123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- // Package diff implements helpers for comparing two filesystems.
- package diff
- import (
- "context"
- "fmt"
- "io"
- "os"
- "os/exec"
- "path/filepath"
- "sort"
- "github.com/pkg/errors"
- "github.com/kopia/kopia/fs"
- "github.com/kopia/kopia/internal/iocopy"
- "github.com/kopia/kopia/repo"
- "github.com/kopia/kopia/repo/logging"
- "github.com/kopia/kopia/repo/object"
- "github.com/kopia/kopia/snapshot"
- "github.com/kopia/kopia/snapshot/snapshotfs"
- )
- const dirMode = 0o700
- var log = logging.Module("diff")
- // EntryTypeStats accumulates stats for an FS entry type.
- type EntryTypeStats struct {
- Added uint32 `json:"added"`
- Removed uint32 `json:"removed"`
- Modified uint32 `json:"modified"`
- // aggregate stats
- SameContentButDifferentMetadata uint32 `json:"sameContentButDifferentMetadata"`
- // stats categorized based on metadata
- SameContentButDifferentMode uint32 `json:"sameContentButDifferentMode"`
- SameContentButDifferentModificationTime uint32 `json:"sameContentButDifferentModificationTime"`
- SameContentButDifferentUserOwner uint32 `json:"sameContentButDifferentUserOwner"`
- SameContentButDifferentGroupOwner uint32 `json:"sameContentButDifferentGroupOwner"`
- }
- // Stats accumulates stats between snapshots being compared.
- type Stats struct {
- FileEntries EntryTypeStats `json:"fileEntries"`
- DirectoryEntries EntryTypeStats `json:"directoryEntries"`
- }
- // Comparer outputs diff information between two filesystems.
- type Comparer struct {
- stats Stats
- out io.Writer
- tmpDir string
- statsOnly bool
- DiffCommand string
- DiffArguments []string
- }
- // Compare compares two filesystem entries and emits their diff information.
- func (c *Comparer) Compare(ctx context.Context, e1, e2 fs.Entry) (Stats, error) {
- c.stats = Stats{}
- err := c.compareEntry(ctx, e1, e2, ".")
- if err != nil {
- return c.stats, err
- }
- return c.stats, errors.Wrap(err, "error comparing fs entries")
- }
- // Close removes all temporary files used by the comparer.
- func (c *Comparer) Close() error {
- //nolint:wrapcheck
- return os.RemoveAll(c.tmpDir)
- }
- func maybeOID(e fs.Entry) string {
- if h, ok := e.(object.HasObjectID); ok {
- return h.ObjectID().String()
- }
- return ""
- }
- func (c *Comparer) compareDirectories(ctx context.Context, dir1, dir2 fs.Directory, parent string) error {
- log(ctx).Debugf("comparing directories %v (%v and %v)", parent, maybeOID(dir1), maybeOID(dir2))
- var entries1, entries2 []fs.Entry
- var err error
- if dir1 != nil {
- entries1, err = fs.GetAllEntries(ctx, dir1)
- if err != nil {
- return errors.Wrapf(err, "unable to read first directory %v", parent)
- }
- }
- if dir2 != nil {
- entries2, err = fs.GetAllEntries(ctx, dir2)
- if err != nil {
- return errors.Wrapf(err, "unable to read second directory %v", parent)
- }
- }
- return c.compareDirectoryEntries(ctx, entries1, entries2, parent)
- }
- //nolint:gocyclo
- func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path string) error {
- // see if we have the same object IDs, which implies identical objects, thanks to content-addressable-storage
- h1, e1HasObjectID := e1.(object.HasObjectID)
- h2, e2HasObjectID := e2.(object.HasObjectID)
- if e1HasObjectID && e2HasObjectID {
- if h1.ObjectID() == h2.ObjectID() {
- if _, isDir := e1.(fs.Directory); isDir {
- compareMetadata(ctx, e1, e2, path, &c.stats.DirectoryEntries)
- } else {
- compareMetadata(ctx, e1, e2, path, &c.stats.FileEntries)
- }
- return nil
- }
- }
- if e1 == nil {
- if dir2, isDir2 := e2.(fs.Directory); isDir2 {
- c.output(c.statsOnly, "added directory %v\n", path)
- c.stats.DirectoryEntries.Added++
- return c.compareDirectories(ctx, nil, dir2, path)
- }
- c.output(c.statsOnly, "added file %v (%v bytes)\n", path, e2.Size())
- c.stats.FileEntries.Added++
- if f, ok := e2.(fs.File); ok {
- if err := c.compareFiles(ctx, nil, f, path); err != nil {
- return err
- }
- }
- return nil
- }
- if e2 == nil {
- if dir1, isDir1 := e1.(fs.Directory); isDir1 {
- c.output(c.statsOnly, "removed directory %v\n", path)
- c.stats.DirectoryEntries.Removed++
- return c.compareDirectories(ctx, dir1, nil, path)
- }
- c.output(c.statsOnly, "removed file %v (%v bytes)\n", path, e1.Size())
- c.stats.FileEntries.Removed++
- if f, ok := e1.(fs.File); ok {
- if err := c.compareFiles(ctx, f, nil, path); err != nil {
- return err
- }
- }
- return nil
- }
- c.compareEntryMetadata(e1, e2, path)
- dir1, isDir1 := e1.(fs.Directory)
- dir2, isDir2 := e2.(fs.Directory)
- if isDir1 {
- if !isDir2 {
- // right is a non-directory, left is a directory
- c.output(c.statsOnly, "changed %v from directory to non-directory\n", path)
- return nil
- }
- return c.compareDirectories(ctx, dir1, dir2, path)
- }
- if isDir2 {
- // left is non-directory, right is a directory
- c.output(c.statsOnly, "changed %v from non-directory to a directory\n", path)
- return nil
- }
- if f1, ok := e1.(fs.File); ok {
- if f2, ok := e2.(fs.File); ok {
- c.output(c.statsOnly, "changed %v at %v (size %v -> %v)\n", path, e2.ModTime().String(), e1.Size(), e2.Size())
- c.stats.FileEntries.Modified++
- if err := c.compareFiles(ctx, f1, f2, path); err != nil {
- return err
- }
- }
- }
- // don't compare filesystem boundaries (e1.Device()), it's pretty useless and is not stored in backups
- return nil
- }
- // Checks for changes in e1's and e2's metadata when they have the same content,
- // and updates the stats accordingly.
- // The function is not concurrency safe, as it updates st without any locking.
- func compareMetadata(ctx context.Context, e1, e2 fs.Entry, path string, st *EntryTypeStats) {
- var changed bool
- if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 {
- changed = true
- st.SameContentButDifferentMode++
- }
- if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) {
- changed = true
- st.SameContentButDifferentModificationTime++
- }
- o1, o2 := e1.Owner(), e2.Owner()
- if o1.UserID != o2.UserID {
- changed = true
- st.SameContentButDifferentUserOwner++
- }
- if o1.GroupID != o2.GroupID {
- changed = true
- st.SameContentButDifferentGroupOwner++
- }
- if changed {
- st.SameContentButDifferentMetadata++
- log(ctx).Debugf("content unchanged but metadata has been modified: %v", path)
- }
- }
- func (c *Comparer) compareEntryMetadata(e1, e2 fs.Entry, fullpath string) {
- switch {
- case e1 == e2: // in particular e1 == nil && e2 == nil
- return
- case e1 == nil:
- c.output(c.statsOnly, "%v does not exist in source directory\n", fullpath)
- return
- case e2 == nil:
- c.output(c.statsOnly, "%v does not exist in destination directory\n", fullpath)
- return
- }
- var changed bool
- if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 {
- changed = true
- c.output(c.statsOnly, "%v modes differ: %v %v\n", fullpath, m1, m2)
- }
- if s1, s2 := e1.Size(), e2.Size(); s1 != s2 {
- changed = true
- c.output(c.statsOnly, "%v sizes differ: %v %v\n", fullpath, s1, s2)
- }
- if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) {
- changed = true
- c.output(c.statsOnly, "%v modification times differ: %v %v\n", fullpath, mt1, mt2)
- }
- o1, o2 := e1.Owner(), e2.Owner()
- if o1.UserID != o2.UserID {
- changed = true
- c.output(c.statsOnly, "%v owner users differ: %v %v\n", fullpath, o1.UserID, o2.UserID)
- }
- if o1.GroupID != o2.GroupID {
- changed = true
- c.output(c.statsOnly, "%v owner groups differ: %v %v\n", fullpath, o1.GroupID, o2.GroupID)
- }
- _, isDir1 := e1.(fs.Directory)
- _, isDir2 := e2.(fs.Directory)
- if changed {
- if isDir1 && isDir2 {
- c.stats.DirectoryEntries.Modified++
- } else {
- c.stats.FileEntries.Modified++
- }
- }
- }
- func (c *Comparer) compareDirectoryEntries(ctx context.Context, entries1, entries2 []fs.Entry, dirPath string) error {
- e1byname := map[string]fs.Entry{}
- for _, e1 := range entries1 {
- e1byname[e1.Name()] = e1
- }
- for _, e2 := range entries2 {
- entryName := e2.Name()
- if err := c.compareEntry(ctx, e1byname[entryName], e2, dirPath+"/"+entryName); err != nil {
- return errors.Wrapf(err, "error comparing %v", entryName)
- }
- delete(e1byname, entryName)
- }
- // at this point e1byname only has entries present in entries1 but not entries2, those are the deleted ones
- for _, e1 := range entries1 {
- entryName := e1.Name()
- if _, ok := e1byname[entryName]; ok {
- if err := c.compareEntry(ctx, e1, nil, dirPath+"/"+entryName); err != nil {
- return errors.Wrapf(err, "error comparing %v", entryName)
- }
- }
- }
- return nil
- }
- func (c *Comparer) compareFiles(ctx context.Context, f1, f2 fs.File, fname string) error {
- if c.DiffCommand == "" {
- return nil
- }
- oldName := "/dev/null"
- newName := "/dev/null"
- if f1 != nil {
- oldName = filepath.Join("old", fname)
- oldFile := filepath.Join(c.tmpDir, oldName)
- if err := downloadFile(ctx, f1, oldFile); err != nil {
- return errors.Wrap(err, "error downloading old file")
- }
- defer os.Remove(oldFile) //nolint:errcheck
- }
- if f2 != nil {
- newName = filepath.Join("new", fname)
- newFile := filepath.Join(c.tmpDir, newName)
- if err := downloadFile(ctx, f2, newFile); err != nil {
- return errors.Wrap(err, "error downloading new file")
- }
- defer os.Remove(newFile) //nolint:errcheck
- }
- var args []string
- args = append(args, c.DiffArguments...)
- args = append(args, oldName, newName)
- cmd := exec.CommandContext(ctx, c.DiffCommand, args...) //nolint:gosec
- cmd.Dir = c.tmpDir
- cmd.Stdout = c.out
- cmd.Stderr = c.out
- cmd.Run() //nolint:errcheck
- return nil
- }
- func downloadFile(ctx context.Context, f fs.File, fname string) error {
- if err := os.MkdirAll(filepath.Dir(fname), dirMode); err != nil {
- return errors.Wrap(err, "error making directory")
- }
- src, err := f.Open(ctx)
- if err != nil {
- return errors.Wrap(err, "error opening object")
- }
- defer src.Close() //nolint:errcheck
- dst, err := os.Create(fname) //nolint:gosec
- if err != nil {
- return errors.Wrap(err, "error creating file to edit")
- }
- defer dst.Close() //nolint:errcheck
- return errors.Wrap(iocopy.JustCopy(dst, src), "error downloading file")
- }
- // Stats returns aggregated statistics computed during snapshot comparison
- // must be invoked after a call to Compare which populates ComparerStats struct.
- func (c *Comparer) Stats() Stats {
- return c.stats
- }
- func (c *Comparer) output(statsOnly bool, msg string, args ...any) {
- if !statsOnly {
- fmt.Fprintf(c.out, msg, args...) //nolint:errcheck
- }
- }
- // NewComparer creates a comparer for a given repository that will output the results to a given writer.
- func NewComparer(out io.Writer, statsOnly bool) (*Comparer, error) {
- tmp, err := os.MkdirTemp("", "kopia")
- if err != nil {
- return nil, errors.Wrap(err, "error creating temp directory")
- }
- return &Comparer{out: out, tmpDir: tmp, statsOnly: statsOnly}, nil
- }
- // GetPrecedingSnapshot fetches the snapshot manifest for the snapshot immediately preceding the given snapshotID if it exists.
- func GetPrecedingSnapshot(ctx context.Context, rep repo.Repository, snapshotID string) (*snapshot.Manifest, error) {
- snapshotManifest, err := snapshotfs.FindSnapshotByRootObjectIDOrManifestID(ctx, rep, snapshotID, true)
- if err != nil {
- return nil, errors.Wrap(err, "failed to get snapshot manifest for the given snapshotID")
- }
- snapshotList, err := snapshot.ListSnapshots(ctx, rep, snapshotManifest.Source)
- if err != nil {
- return nil, errors.Wrap(err, "failed to list snapshots")
- }
- // sort snapshots in ascending order based on start time
- sort.Slice(snapshotList, func(i, j int) bool {
- return snapshotList[i].StartTime.Before(snapshotList[j].StartTime)
- })
- for i, snap := range snapshotList {
- if snap.ID == snapshotManifest.ID {
- if i == 0 {
- return nil, errors.New("there is no immediately preceding snapshot")
- }
- return snapshotList[i-1], nil
- }
- }
- return nil, errors.New("couldn't find immediately preceding snapshot")
- }
- // GetTwoLatestSnapshotsForASource fetches two latest snapshot manifests for a given source if they exist.
- func GetTwoLatestSnapshotsForASource(ctx context.Context, rep repo.Repository, source snapshot.SourceInfo) (secondLast, last *snapshot.Manifest, err error) {
- snapshots, err := snapshot.ListSnapshots(ctx, rep, source)
- if err != nil {
- return nil, nil, errors.Wrap(err, "failed to list all snapshots")
- }
- const minimumReqSnapshots = 2
- sizeSnapshots := len(snapshots)
- if sizeSnapshots < minimumReqSnapshots {
- return nil, nil, errors.New("snapshot source has less than two snapshots")
- }
- sort.Slice(snapshots, func(i, j int) bool {
- return snapshots[i].StartTime.Before(snapshots[j].StartTime)
- })
- return snapshots[sizeSnapshots-2], snapshots[sizeSnapshots-1], nil
- }
|