diff.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. // Package diff implements helpers for comparing two filesystems.
  2. package diff
  3. import (
  4. "context"
  5. "fmt"
  6. "io"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "sort"
  11. "github.com/pkg/errors"
  12. "github.com/kopia/kopia/fs"
  13. "github.com/kopia/kopia/internal/iocopy"
  14. "github.com/kopia/kopia/repo"
  15. "github.com/kopia/kopia/repo/logging"
  16. "github.com/kopia/kopia/repo/object"
  17. "github.com/kopia/kopia/snapshot"
  18. "github.com/kopia/kopia/snapshot/snapshotfs"
  19. )
  20. const dirMode = 0o700
  21. var log = logging.Module("diff")
  22. // EntryTypeStats accumulates stats for an FS entry type.
  23. type EntryTypeStats struct {
  24. Added uint32 `json:"added"`
  25. Removed uint32 `json:"removed"`
  26. Modified uint32 `json:"modified"`
  27. // aggregate stats
  28. SameContentButDifferentMetadata uint32 `json:"sameContentButDifferentMetadata"`
  29. // stats categorized based on metadata
  30. SameContentButDifferentMode uint32 `json:"sameContentButDifferentMode"`
  31. SameContentButDifferentModificationTime uint32 `json:"sameContentButDifferentModificationTime"`
  32. SameContentButDifferentUserOwner uint32 `json:"sameContentButDifferentUserOwner"`
  33. SameContentButDifferentGroupOwner uint32 `json:"sameContentButDifferentGroupOwner"`
  34. }
  35. // Stats accumulates stats between snapshots being compared.
  36. type Stats struct {
  37. FileEntries EntryTypeStats `json:"fileEntries"`
  38. DirectoryEntries EntryTypeStats `json:"directoryEntries"`
  39. }
  40. // Comparer outputs diff information between two filesystems.
  41. type Comparer struct {
  42. stats Stats
  43. out io.Writer
  44. tmpDir string
  45. statsOnly bool
  46. DiffCommand string
  47. DiffArguments []string
  48. }
  49. // Compare compares two filesystem entries and emits their diff information.
  50. func (c *Comparer) Compare(ctx context.Context, e1, e2 fs.Entry) (Stats, error) {
  51. c.stats = Stats{}
  52. err := c.compareEntry(ctx, e1, e2, ".")
  53. if err != nil {
  54. return c.stats, err
  55. }
  56. return c.stats, errors.Wrap(err, "error comparing fs entries")
  57. }
  58. // Close removes all temporary files used by the comparer.
  59. func (c *Comparer) Close() error {
  60. //nolint:wrapcheck
  61. return os.RemoveAll(c.tmpDir)
  62. }
  63. func maybeOID(e fs.Entry) string {
  64. if h, ok := e.(object.HasObjectID); ok {
  65. return h.ObjectID().String()
  66. }
  67. return ""
  68. }
  69. func (c *Comparer) compareDirectories(ctx context.Context, dir1, dir2 fs.Directory, parent string) error {
  70. log(ctx).Debugf("comparing directories %v (%v and %v)", parent, maybeOID(dir1), maybeOID(dir2))
  71. var entries1, entries2 []fs.Entry
  72. var err error
  73. if dir1 != nil {
  74. entries1, err = fs.GetAllEntries(ctx, dir1)
  75. if err != nil {
  76. return errors.Wrapf(err, "unable to read first directory %v", parent)
  77. }
  78. }
  79. if dir2 != nil {
  80. entries2, err = fs.GetAllEntries(ctx, dir2)
  81. if err != nil {
  82. return errors.Wrapf(err, "unable to read second directory %v", parent)
  83. }
  84. }
  85. return c.compareDirectoryEntries(ctx, entries1, entries2, parent)
  86. }
  87. //nolint:gocyclo
  88. func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path string) error {
  89. // see if we have the same object IDs, which implies identical objects, thanks to content-addressable-storage
  90. h1, e1HasObjectID := e1.(object.HasObjectID)
  91. h2, e2HasObjectID := e2.(object.HasObjectID)
  92. if e1HasObjectID && e2HasObjectID {
  93. if h1.ObjectID() == h2.ObjectID() {
  94. if _, isDir := e1.(fs.Directory); isDir {
  95. compareMetadata(ctx, e1, e2, path, &c.stats.DirectoryEntries)
  96. } else {
  97. compareMetadata(ctx, e1, e2, path, &c.stats.FileEntries)
  98. }
  99. return nil
  100. }
  101. }
  102. if e1 == nil {
  103. if dir2, isDir2 := e2.(fs.Directory); isDir2 {
  104. c.output(c.statsOnly, "added directory %v\n", path)
  105. c.stats.DirectoryEntries.Added++
  106. return c.compareDirectories(ctx, nil, dir2, path)
  107. }
  108. c.output(c.statsOnly, "added file %v (%v bytes)\n", path, e2.Size())
  109. c.stats.FileEntries.Added++
  110. if f, ok := e2.(fs.File); ok {
  111. if err := c.compareFiles(ctx, nil, f, path); err != nil {
  112. return err
  113. }
  114. }
  115. return nil
  116. }
  117. if e2 == nil {
  118. if dir1, isDir1 := e1.(fs.Directory); isDir1 {
  119. c.output(c.statsOnly, "removed directory %v\n", path)
  120. c.stats.DirectoryEntries.Removed++
  121. return c.compareDirectories(ctx, dir1, nil, path)
  122. }
  123. c.output(c.statsOnly, "removed file %v (%v bytes)\n", path, e1.Size())
  124. c.stats.FileEntries.Removed++
  125. if f, ok := e1.(fs.File); ok {
  126. if err := c.compareFiles(ctx, f, nil, path); err != nil {
  127. return err
  128. }
  129. }
  130. return nil
  131. }
  132. c.compareEntryMetadata(e1, e2, path)
  133. dir1, isDir1 := e1.(fs.Directory)
  134. dir2, isDir2 := e2.(fs.Directory)
  135. if isDir1 {
  136. if !isDir2 {
  137. // right is a non-directory, left is a directory
  138. c.output(c.statsOnly, "changed %v from directory to non-directory\n", path)
  139. return nil
  140. }
  141. return c.compareDirectories(ctx, dir1, dir2, path)
  142. }
  143. if isDir2 {
  144. // left is non-directory, right is a directory
  145. c.output(c.statsOnly, "changed %v from non-directory to a directory\n", path)
  146. return nil
  147. }
  148. if f1, ok := e1.(fs.File); ok {
  149. if f2, ok := e2.(fs.File); ok {
  150. c.output(c.statsOnly, "changed %v at %v (size %v -> %v)\n", path, e2.ModTime().String(), e1.Size(), e2.Size())
  151. c.stats.FileEntries.Modified++
  152. if err := c.compareFiles(ctx, f1, f2, path); err != nil {
  153. return err
  154. }
  155. }
  156. }
  157. // don't compare filesystem boundaries (e1.Device()), it's pretty useless and is not stored in backups
  158. return nil
  159. }
  160. // Checks for changes in e1's and e2's metadata when they have the same content,
  161. // and updates the stats accordingly.
  162. // The function is not concurrency safe, as it updates st without any locking.
  163. func compareMetadata(ctx context.Context, e1, e2 fs.Entry, path string, st *EntryTypeStats) {
  164. var changed bool
  165. if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 {
  166. changed = true
  167. st.SameContentButDifferentMode++
  168. }
  169. if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) {
  170. changed = true
  171. st.SameContentButDifferentModificationTime++
  172. }
  173. o1, o2 := e1.Owner(), e2.Owner()
  174. if o1.UserID != o2.UserID {
  175. changed = true
  176. st.SameContentButDifferentUserOwner++
  177. }
  178. if o1.GroupID != o2.GroupID {
  179. changed = true
  180. st.SameContentButDifferentGroupOwner++
  181. }
  182. if changed {
  183. st.SameContentButDifferentMetadata++
  184. log(ctx).Debugf("content unchanged but metadata has been modified: %v", path)
  185. }
  186. }
  187. func (c *Comparer) compareEntryMetadata(e1, e2 fs.Entry, fullpath string) {
  188. switch {
  189. case e1 == e2: // in particular e1 == nil && e2 == nil
  190. return
  191. case e1 == nil:
  192. c.output(c.statsOnly, "%v does not exist in source directory\n", fullpath)
  193. return
  194. case e2 == nil:
  195. c.output(c.statsOnly, "%v does not exist in destination directory\n", fullpath)
  196. return
  197. }
  198. var changed bool
  199. if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 {
  200. changed = true
  201. c.output(c.statsOnly, "%v modes differ: %v %v\n", fullpath, m1, m2)
  202. }
  203. if s1, s2 := e1.Size(), e2.Size(); s1 != s2 {
  204. changed = true
  205. c.output(c.statsOnly, "%v sizes differ: %v %v\n", fullpath, s1, s2)
  206. }
  207. if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) {
  208. changed = true
  209. c.output(c.statsOnly, "%v modification times differ: %v %v\n", fullpath, mt1, mt2)
  210. }
  211. o1, o2 := e1.Owner(), e2.Owner()
  212. if o1.UserID != o2.UserID {
  213. changed = true
  214. c.output(c.statsOnly, "%v owner users differ: %v %v\n", fullpath, o1.UserID, o2.UserID)
  215. }
  216. if o1.GroupID != o2.GroupID {
  217. changed = true
  218. c.output(c.statsOnly, "%v owner groups differ: %v %v\n", fullpath, o1.GroupID, o2.GroupID)
  219. }
  220. _, isDir1 := e1.(fs.Directory)
  221. _, isDir2 := e2.(fs.Directory)
  222. if changed {
  223. if isDir1 && isDir2 {
  224. c.stats.DirectoryEntries.Modified++
  225. } else {
  226. c.stats.FileEntries.Modified++
  227. }
  228. }
  229. }
  230. func (c *Comparer) compareDirectoryEntries(ctx context.Context, entries1, entries2 []fs.Entry, dirPath string) error {
  231. e1byname := map[string]fs.Entry{}
  232. for _, e1 := range entries1 {
  233. e1byname[e1.Name()] = e1
  234. }
  235. for _, e2 := range entries2 {
  236. entryName := e2.Name()
  237. if err := c.compareEntry(ctx, e1byname[entryName], e2, dirPath+"/"+entryName); err != nil {
  238. return errors.Wrapf(err, "error comparing %v", entryName)
  239. }
  240. delete(e1byname, entryName)
  241. }
  242. // at this point e1byname only has entries present in entries1 but not entries2, those are the deleted ones
  243. for _, e1 := range entries1 {
  244. entryName := e1.Name()
  245. if _, ok := e1byname[entryName]; ok {
  246. if err := c.compareEntry(ctx, e1, nil, dirPath+"/"+entryName); err != nil {
  247. return errors.Wrapf(err, "error comparing %v", entryName)
  248. }
  249. }
  250. }
  251. return nil
  252. }
  253. func (c *Comparer) compareFiles(ctx context.Context, f1, f2 fs.File, fname string) error {
  254. if c.DiffCommand == "" {
  255. return nil
  256. }
  257. oldName := "/dev/null"
  258. newName := "/dev/null"
  259. if f1 != nil {
  260. oldName = filepath.Join("old", fname)
  261. oldFile := filepath.Join(c.tmpDir, oldName)
  262. if err := downloadFile(ctx, f1, oldFile); err != nil {
  263. return errors.Wrap(err, "error downloading old file")
  264. }
  265. defer os.Remove(oldFile) //nolint:errcheck
  266. }
  267. if f2 != nil {
  268. newName = filepath.Join("new", fname)
  269. newFile := filepath.Join(c.tmpDir, newName)
  270. if err := downloadFile(ctx, f2, newFile); err != nil {
  271. return errors.Wrap(err, "error downloading new file")
  272. }
  273. defer os.Remove(newFile) //nolint:errcheck
  274. }
  275. var args []string
  276. args = append(args, c.DiffArguments...)
  277. args = append(args, oldName, newName)
  278. cmd := exec.CommandContext(ctx, c.DiffCommand, args...) //nolint:gosec
  279. cmd.Dir = c.tmpDir
  280. cmd.Stdout = c.out
  281. cmd.Stderr = c.out
  282. cmd.Run() //nolint:errcheck
  283. return nil
  284. }
  285. func downloadFile(ctx context.Context, f fs.File, fname string) error {
  286. if err := os.MkdirAll(filepath.Dir(fname), dirMode); err != nil {
  287. return errors.Wrap(err, "error making directory")
  288. }
  289. src, err := f.Open(ctx)
  290. if err != nil {
  291. return errors.Wrap(err, "error opening object")
  292. }
  293. defer src.Close() //nolint:errcheck
  294. dst, err := os.Create(fname) //nolint:gosec
  295. if err != nil {
  296. return errors.Wrap(err, "error creating file to edit")
  297. }
  298. defer dst.Close() //nolint:errcheck
  299. return errors.Wrap(iocopy.JustCopy(dst, src), "error downloading file")
  300. }
  301. // Stats returns aggregated statistics computed during snapshot comparison
  302. // must be invoked after a call to Compare which populates ComparerStats struct.
  303. func (c *Comparer) Stats() Stats {
  304. return c.stats
  305. }
  306. func (c *Comparer) output(statsOnly bool, msg string, args ...any) {
  307. if !statsOnly {
  308. fmt.Fprintf(c.out, msg, args...) //nolint:errcheck
  309. }
  310. }
  311. // NewComparer creates a comparer for a given repository that will output the results to a given writer.
  312. func NewComparer(out io.Writer, statsOnly bool) (*Comparer, error) {
  313. tmp, err := os.MkdirTemp("", "kopia")
  314. if err != nil {
  315. return nil, errors.Wrap(err, "error creating temp directory")
  316. }
  317. return &Comparer{out: out, tmpDir: tmp, statsOnly: statsOnly}, nil
  318. }
  319. // GetPrecedingSnapshot fetches the snapshot manifest for the snapshot immediately preceding the given snapshotID if it exists.
  320. func GetPrecedingSnapshot(ctx context.Context, rep repo.Repository, snapshotID string) (*snapshot.Manifest, error) {
  321. snapshotManifest, err := snapshotfs.FindSnapshotByRootObjectIDOrManifestID(ctx, rep, snapshotID, true)
  322. if err != nil {
  323. return nil, errors.Wrap(err, "failed to get snapshot manifest for the given snapshotID")
  324. }
  325. snapshotList, err := snapshot.ListSnapshots(ctx, rep, snapshotManifest.Source)
  326. if err != nil {
  327. return nil, errors.Wrap(err, "failed to list snapshots")
  328. }
  329. // sort snapshots in ascending order based on start time
  330. sort.Slice(snapshotList, func(i, j int) bool {
  331. return snapshotList[i].StartTime.Before(snapshotList[j].StartTime)
  332. })
  333. for i, snap := range snapshotList {
  334. if snap.ID == snapshotManifest.ID {
  335. if i == 0 {
  336. return nil, errors.New("there is no immediately preceding snapshot")
  337. }
  338. return snapshotList[i-1], nil
  339. }
  340. }
  341. return nil, errors.New("couldn't find immediately preceding snapshot")
  342. }
  343. // GetTwoLatestSnapshotsForASource fetches two latest snapshot manifests for a given source if they exist.
  344. func GetTwoLatestSnapshotsForASource(ctx context.Context, rep repo.Repository, source snapshot.SourceInfo) (secondLast, last *snapshot.Manifest, err error) {
  345. snapshots, err := snapshot.ListSnapshots(ctx, rep, source)
  346. if err != nil {
  347. return nil, nil, errors.Wrap(err, "failed to list all snapshots")
  348. }
  349. const minimumReqSnapshots = 2
  350. sizeSnapshots := len(snapshots)
  351. if sizeSnapshots < minimumReqSnapshots {
  352. return nil, nil, errors.New("snapshot source has less than two snapshots")
  353. }
  354. sort.Slice(snapshots, func(i, j int) bool {
  355. return snapshots[i].StartTime.Before(snapshots[j].StartTime)
  356. })
  357. return snapshots[sizeSnapshots-2], snapshots[sizeSnapshots-1], nil
  358. }