Ver código fonte

feat(restore): added support for deleting extra files in restore path (#4725)

Ankit Jain 2 semanas atrás
pai
commit
35089a4894

+ 18 - 3
cli/command_restore.go

@@ -123,6 +123,7 @@ type commandRestore struct {
 	restoreSkipOwners             bool
 	restoreSkipPermissions        bool
 	restoreIncremental            bool
+	restoreDeleteExtra            bool
 	restoreIgnoreErrors           bool
 	restoreShallowAtDepth         int32
 	minSizeForPlaceholder         int32
@@ -153,6 +154,7 @@ func (c *commandRestore) setup(svc appServices, parent commandParent) {
 	cmd.Flag("write-files-atomically", "Write files atomically to disk, ensuring they are either fully committed, or not written at all, preventing partially written files").Default("false").BoolVar(&c.restoreWriteFilesAtomically)
 	cmd.Flag("ignore-errors", "Ignore all errors").BoolVar(&c.restoreIgnoreErrors)
 	cmd.Flag("skip-existing", "Skip files and symlinks that exist in the output").BoolVar(&c.restoreIncremental)
+	cmd.Flag("delete-extra", "Delete additional files, directories and symlinks that exist in the restore path but do not exist in the snapshot").BoolVar(&c.restoreDeleteExtra)
 	cmd.Flag("shallow", "Shallow restore the directory hierarchy starting at this level (default is to deep restore the entire hierarchy.)").Int32Var(&c.restoreShallowAtDepth)
 	cmd.Flag("shallow-minsize", "When doing a shallow restore, write actual files instead of placeholders smaller than this size.").Int32Var(&c.minSizeForPlaceholder)
 	cmd.Flag("snapshot-time", "When using a path as the source, use the latest snapshot available before this date. Default is latest").Default("latest").StringVar(&c.snapshotTime)
@@ -334,22 +336,34 @@ func (c *commandRestore) detectRestoreMode(ctx context.Context, m, targetpath st
 }
 
 func printRestoreStats(ctx context.Context, st *restore.Stats) {
-	var maybeSkipped, maybeErrors string
+	var maybeSkipped, maybeDeletedDirs, maybeDeletedFiles, maybeDeletedSymlinks, maybeErrors string
 
 	if st.SkippedCount > 0 {
 		maybeSkipped = fmt.Sprintf(", skipped %v (%v)", st.SkippedCount, units.BytesString(st.SkippedTotalFileSize))
 	}
 
+	if st.DeletedDirCount > 0 {
+		maybeDeletedDirs = fmt.Sprintf(", deleted directories %v", st.DeletedDirCount)
+	}
+
+	if st.DeletedFilesCount > 0 {
+		maybeDeletedFiles = fmt.Sprintf(", deleted files %v", st.DeletedFilesCount)
+	}
+
+	if st.DeletedSymlinkCount > 0 {
+		maybeDeletedSymlinks = fmt.Sprintf(", deleted symbolic links %v", st.DeletedSymlinkCount)
+	}
+
 	if st.IgnoredErrorCount > 0 {
 		maybeErrors = fmt.Sprintf(", ignored %v errors", st.IgnoredErrorCount)
 	}
 
-	log(ctx).Infof("Restored %v files, %v directories and %v symbolic links (%v)%v%v.\n",
+	log(ctx).Infof("Restored %v files, %v directories and %v symbolic links (%v)%v%v%v%v%v.\n",
 		st.RestoredFileCount,
 		st.RestoredDirCount,
 		st.RestoredSymlinkCount,
 		units.BytesString(st.RestoredTotalFileSize),
-		maybeSkipped, maybeErrors)
+		maybeSkipped, maybeDeletedDirs, maybeDeletedFiles, maybeDeletedSymlinks, maybeErrors)
 }
 
 func (c *commandRestore) setupPlaceholderExpansion(ctx context.Context, rep repo.Repository, rstp restoreSourceTarget, output restore.Output) (fs.Entry, error) {
@@ -426,6 +440,7 @@ func (c *commandRestore) run(ctx context.Context, rep repo.Repository) error {
 		st, err := restore.Entry(ctx, rep, output, rootEntry, restore.Options{
 			Parallel:               c.restoreParallel,
 			Incremental:            c.restoreIncremental,
+			DeleteExtra:            c.restoreDeleteExtra,
 			IgnoreErrors:           c.restoreIgnoreErrors,
 			RestoreDirEntryAtDepth: c.restoreShallowAtDepth,
 			MinSizeForPlaceholder:  c.minSizeForPlaceholder,

+ 93 - 0
snapshot/restore/restore.go

@@ -2,6 +2,7 @@ package restore
 
 import (
 	"context"
+	"os"
 	"path"
 	"runtime"
 	"sync/atomic"
@@ -46,6 +47,9 @@ type Stats struct {
 	EnqueuedDirCount     int32
 	EnqueuedSymlinkCount int32
 	SkippedCount         int32
+	DeletedFilesCount    int32
+	DeletedSymlinkCount  int32
+	DeletedDirCount      int32
 	IgnoredErrorCount    int32
 }
 
@@ -62,6 +66,9 @@ type statsInternal struct {
 	EnqueuedDirCount     atomic.Int32
 	EnqueuedSymlinkCount atomic.Int32
 	SkippedCount         atomic.Int32
+	DeletedFilesCount    atomic.Int32
+	DeletedSymlinkCount  atomic.Int32
+	DeletedDirCount      atomic.Int32
 	IgnoredErrorCount    atomic.Int32
 }
 
@@ -77,6 +84,9 @@ func (s *statsInternal) clone() Stats {
 		EnqueuedDirCount:      s.EnqueuedDirCount.Load(),
 		EnqueuedSymlinkCount:  s.EnqueuedSymlinkCount.Load(),
 		SkippedCount:          s.SkippedCount.Load(),
+		DeletedFilesCount:     s.DeletedFilesCount.Load(),
+		DeletedSymlinkCount:   s.DeletedSymlinkCount.Load(),
+		DeletedDirCount:       s.DeletedDirCount.Load(),
 		IgnoredErrorCount:     s.IgnoredErrorCount.Load(),
 	}
 }
@@ -90,6 +100,7 @@ type Options struct {
 	// required bindings in the UI.
 	Parallel               int   `json:"parallel"`
 	Incremental            bool  `json:"incremental"`
+	DeleteExtra            bool  `json:"deleteExtra"`
 	IgnoreErrors           bool  `json:"ignoreErrors"`
 	RestoreDirEntryAtDepth int32 `json:"restoreDirEntryAtDepth"`
 	MinSizeForPlaceholder  int32 `json:"minSizeForPlaceholder"`
@@ -107,6 +118,7 @@ func Entry(ctx context.Context, rep repo.Repository, output Output, rootEntry fs
 		shallowoutput:    makeShallowFilesystemOutput(output, options),
 		q:                parallelwork.NewQueue(),
 		incremental:      options.Incremental,
+		deleteExtra:      options.DeleteExtra,
 		ignoreErrors:     options.IgnoreErrors,
 		cancel:           options.Cancel,
 		progressCallback: options.ProgressCallback,
@@ -149,6 +161,7 @@ type copier struct {
 	shallowoutput Output
 	q             *parallelwork.Queue
 	incremental   bool
+	deleteExtra   bool
 	ignoreErrors  bool
 	cancel        chan struct{}
 
@@ -274,6 +287,15 @@ func (c *copier) copyDirectory(ctx context.Context, d fs.Directory, targetPath s
 		return errors.Wrap(err, "create directory")
 	}
 
+	if c.deleteExtra {
+		// deleting existing files only makes sense in the context of an actual filesystem (compared to a tar or zip)
+		if fsOutput, isFileSystem := c.output.(*FilesystemOutput); isFileSystem {
+			if err := c.deleteExtraFilesInDir(ctx, fsOutput, d, targetPath); err != nil {
+				return errors.Wrap(err, "delete extra")
+			}
+		}
+	}
+
 	return errors.Wrap(c.copyDirectoryContent(ctx, d, targetPath, currentdepth+1, maxdepth, func() error {
 		if err := c.output.FinishDirectory(ctx, targetPath, d); err != nil {
 			return errors.Wrap(err, "finish directory")
@@ -283,6 +305,77 @@ func (c *copier) copyDirectory(ctx context.Context, d fs.Directory, targetPath s
 	}), "copy directory contents")
 }
 
+func (c *copier) deleteExtraFilesInDir(ctx context.Context, o *FilesystemOutput, d fs.Directory, targetPath string) error {
+	existingEntries, err := os.ReadDir(path.Join(o.TargetPath, targetPath))
+	if os.IsNotExist(err) {
+		return nil
+	}
+
+	if err != nil {
+		return errors.Wrap(err, "read existing dir entries ('"+path.Join(o.TargetPath, targetPath)+"')")
+	}
+
+	snapshotEntries, err := fs.GetAllEntries(ctx, d)
+	if err != nil {
+		return errors.Wrap(err, "error reading directory")
+	}
+
+	// first classify snapshot entries to help with deletion (treat symlinks like normal files)
+	snapshotDirectories := map[string]struct{}{}
+	snapshotFiles := map[string]struct{}{}
+
+	for _, snapshotEntry := range snapshotEntries {
+		if snapshotEntry.IsDir() {
+			snapshotDirectories[snapshotEntry.Name()] = struct{}{}
+			continue
+		}
+
+		snapshotFiles[snapshotEntry.Name()] = struct{}{}
+	}
+
+	// Delete entries that exist in the target path and are not in the snapshot.
+	// Also, delete entries with a mismatching directory / file type, that is:
+	// - delete existing directories that are files in the snapshot; and
+	// - delete existing files that are directories in the snapshot.
+	// This allows "overwriting" existing entries with when their (directory vs. file) types do not match.
+	for _, existingEntry := range existingEntries {
+		if existingEntry.IsDir() {
+			if _, ok := snapshotDirectories[existingEntry.Name()]; !ok {
+				entryPath := path.Join(o.TargetPath, targetPath, existingEntry.Name())
+
+				log(ctx).Debugf("deleting directory %v since it does not exist in snapshot", entryPath)
+
+				if err := os.RemoveAll(entryPath); err != nil {
+					return errors.Wrap(err, "delete directory "+path.Join(o.TargetPath, targetPath, existingEntry.Name()))
+				}
+
+				c.stats.DeletedDirCount.Add(1)
+			}
+
+			continue
+		}
+
+		if _, ok := snapshotFiles[existingEntry.Name()]; !ok {
+			entryPath := path.Join(o.TargetPath, targetPath, existingEntry.Name())
+
+			log(ctx).Debugf("deleting file %v since it does not exist in snapshot", entryPath)
+
+			if err := os.Remove(entryPath); err != nil {
+				return errors.Wrap(err, "delete file "+path.Join(o.TargetPath, targetPath, existingEntry.Name()))
+			}
+
+			if existingEntry.Type() == os.ModeSymlink {
+				c.stats.DeletedSymlinkCount.Add(1)
+			} else {
+				// if it's not a symlink, we are not classifying file types further.
+				c.stats.DeletedFilesCount.Add(1)
+			}
+		}
+	}
+
+	return nil
+}
+
 func (c *copier) copyDirectoryContent(ctx context.Context, d fs.Directory, targetPath string, currentdepth, maxdepth int32, onCompletion parallelwork.CallbackFunc) error {
 	entries, err := fs.GetAllEntries(ctx, d)
 	if err != nil {

+ 56 - 2
tests/end_to_end_test/restore_test.go

@@ -41,6 +41,8 @@ const (
 
 	overriddenFilePermissions = 0o651
 	overriddenDirPermissions  = 0o752
+
+	statsOnly = false
 )
 
 type fakeRestoreProgress struct {
@@ -172,13 +174,35 @@ func TestRestoreCommand(t *testing.T) {
 
 	// Attempt to restore into a target directory that already exists
 	e.RunAndExpectFailure(t, "restore", rootID, restoreDir, "--no-overwrite-files")
+
+	// Attempt to restore into a target directory with extra files, and check they have been deleted
+	extraFile := filepath.Join(restoreDir, "extraFile.txt")
+
+	err := os.WriteFile(extraFile, []byte("extra file contents"), 0o644)
+	require.NoError(t, err)
+
+	extraDir := filepath.Join(restoreDir, "extraDir")
+	err = os.Mkdir(extraDir, 0o766)
+	require.NoError(t, err)
+
+	// Add extra files to the extra directory
+	for i := range 10 {
+		extraFileInDir := filepath.Join(extraDir, fmt.Sprint("extraFile-", i))
+
+		err := os.WriteFile(extraFileInDir, []byte("extra file contents"), 0o644)
+		require.NoError(t, err)
+	}
+
+	e.RunAndExpectSuccess(t, "restore", rootID, restoreDir)
+	compareDirsWithChange(t, source, restoreDir, 11, 1)
+
+	e.RunAndExpectSuccess(t, "restore", rootID, restoreDir, "--delete-extra")
+	compareDirs(t, source, restoreDir)
 }
 
 func compareDirs(t *testing.T, source, restoreDir string) {
 	t.Helper()
 
-	const statsOnly = false
-
 	// Restored contents should match source
 	s, err := localfs.Directory(source)
 	require.NoError(t, err)
@@ -202,6 +226,36 @@ func compareDirs(t *testing.T, source, restoreDir string) {
 	}
 }
 
+func compareDirsWithChange(t *testing.T, source, restoreDir string, expectedExtraFiles, expectedExtraDirs int) {
+	t.Helper()
+
+	// Restored contents should match source
+	s, err := localfs.Directory(source)
+	require.NoError(t, err)
+	wantHash, err := fshasher.Hash(testlogging.Context(t), s)
+	require.NoError(t, err)
+
+	// check restored contents
+	r, err := localfs.Directory(restoreDir)
+	require.NoError(t, err)
+
+	ctx := testlogging.Context(t)
+	gotHash, err := fshasher.Hash(ctx, r)
+	require.NoError(t, err)
+
+	assert.NotEqual(t, wantHash, gotHash)
+
+	cmp, err := diff.NewComparer(os.Stderr, statsOnly)
+	require.NoError(t, err)
+
+	stats, err := cmp.Compare(ctx, s, r)
+	require.NoError(t, err)
+
+	require.Equal(t, uint32(expectedExtraFiles), stats.FileEntries.Added, "unexpected number of extra files")
+
+	require.Equal(t, uint32(expectedExtraDirs), stats.DirectoryEntries.Added, "unexpected number of extra directories")
+}
+
 func TestSnapshotRestore(t *testing.T) {
 	t.Parallel()