Преглед изворни кода

First step to implement full garbage collector for image layers

Refactored exiting logic on way that layers are first marked to be under
removal so if actual removal fails they can be found from disk and
cleaned up.

Full garbage collector will be implemented as part of containerd
migration.

Signed-off-by: Olli Janatuinen <olli.janatuinen@gmail.com>
Olli Janatuinen пре 6 година
родитељ
комит
213681b66a

+ 88 - 0
integration/image/remove_unix_test.go

@@ -0,0 +1,88 @@
+// +build !windows
+
+package image // import "github.com/docker/docker/integration/image"
+
+import (
+	"context"
+	"io"
+	"io/ioutil"
+	"os"
+	"strconv"
+	"syscall"
+	"testing"
+	"unsafe"
+
+	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/internal/test/daemon"
+	"github.com/docker/docker/internal/test/fakecontext"
+	"gotest.tools/assert"
+	"gotest.tools/skip"
+)
+
+// This is a regression test for #38488
+// It ensures that orphan layers can be found and cleaned up
+// after unsuccessful image removal
+func TestRemoveImageGarbageCollector(t *testing.T) {
+	// This test uses very platform specific way to prevent
+	// daemon for remove image layer.
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, os.Getenv("DOCKER_ENGINE_GOARCH") != "amd64")
+
+	// Create daemon with overlay2 graphdriver because vfs uses disk differently
+	// and this test case would not work with it.
+	d := daemon.New(t, daemon.WithStorageDriver("overlay2"), daemon.WithImageService)
+	d.Start(t)
+	defer d.Stop(t)
+
+	ctx := context.Background()
+	client := d.NewClientT(t)
+	i := d.ImageService()
+
+	img := "test-garbage-collector"
+
+	// Build a image with multiple layers
+	dockerfile := `FROM busybox
+	RUN echo echo Running... > /run.sh`
+	source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile))
+	defer source.Close()
+	resp, err := client.ImageBuild(ctx,
+		source.AsTarReader(t),
+		types.ImageBuildOptions{
+			Remove:      true,
+			ForceRemove: true,
+			Tags:        []string{img},
+		})
+	assert.NilError(t, err)
+	_, err = io.Copy(ioutil.Discard, resp.Body)
+	resp.Body.Close()
+	assert.NilError(t, err)
+	image, _, err := client.ImageInspectWithRaw(ctx, img)
+	assert.NilError(t, err)
+
+	// Mark latest image layer to immutable
+	data := image.GraphDriver.Data
+	file, _ := os.Open(data["UpperDir"])
+	attr := 0x00000010
+	fsflags := uintptr(0x40086602)
+	argp := uintptr(unsafe.Pointer(&attr))
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp)
+	assert.Equal(t, "errno 0", errno.Error())
+
+	// Try to remove the image, it should generate error
+	// but marking layer back to mutable before checking errors (so we don't break CI server)
+	_, err = client.ImageRemove(ctx, img, types.ImageRemoveOptions{})
+	attr = 0x00000000
+	argp = uintptr(unsafe.Pointer(&attr))
+	_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fsflags, argp)
+	assert.Equal(t, "errno 0", errno.Error())
+	assert.ErrorContains(t, err, "permission denied")
+
+	// Verify that layer remaining on disk
+	dir, _ := os.Stat(data["UpperDir"])
+	assert.Equal(t, "true", strconv.FormatBool(dir.IsDir()))
+
+	// Run imageService.Cleanup() and make sure that layer was removed from disk
+	i.Cleanup()
+	dir, err = os.Stat(data["UpperDir"])
+	assert.ErrorContains(t, err, "no such file or directory")
+}

+ 7 - 0
internal/test/daemon/daemon.go

@@ -16,6 +16,7 @@ import (
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/events"
 	"github.com/docker/docker/api/types/events"
 	"github.com/docker/docker/client"
 	"github.com/docker/docker/client"
+	"github.com/docker/docker/daemon/images"
 	"github.com/docker/docker/internal/test"
 	"github.com/docker/docker/internal/test"
 	"github.com/docker/docker/internal/test/request"
 	"github.com/docker/docker/internal/test/request"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/opts"
@@ -71,6 +72,7 @@ type Daemon struct {
 	init                       bool
 	init                       bool
 	dockerdBinary              string
 	dockerdBinary              string
 	log                        logT
 	log                        logT
+	imageService               *images.ImageService
 
 
 	// swarm related field
 	// swarm related field
 	swarmListenAddr string
 	swarmListenAddr string
@@ -700,3 +702,8 @@ func cleanupRaftDir(t testingT, rootPath string) {
 		t.Logf("error removing %v: %v", walDir, err)
 		t.Logf("error removing %v: %v", walDir, err)
 	}
 	}
 }
 }
+
+// ImageService returns the Daemon's ImageService
+func (d *Daemon) ImageService() *images.ImageService {
+	return d.imageService
+}

+ 7 - 0
internal/test/daemon/ops.go

@@ -70,3 +70,10 @@ func WithEnvironment(e environment.Execution) func(*Daemon) {
 		}
 		}
 	}
 	}
 }
 }
+
+// WithStorageDriver sets store driver option
+func WithStorageDriver(driver string) func(d *Daemon) {
+	return func(d *Daemon) {
+		d.storageDriver = driver
+	}
+}

+ 34 - 0
internal/test/daemon/ops_unix.go

@@ -0,0 +1,34 @@
+// +build !windows
+
+package daemon
+
+import (
+	"path/filepath"
+	"runtime"
+
+	"github.com/docker/docker/daemon/images"
+	"github.com/docker/docker/layer"
+
+	// register graph drivers
+	_ "github.com/docker/docker/daemon/graphdriver/register"
+	"github.com/docker/docker/pkg/idtools"
+)
+
+// WithImageService sets imageService options
+func WithImageService(d *Daemon) {
+	layerStores := make(map[string]layer.Store)
+	os := runtime.GOOS
+	layerStores[os], _ = layer.NewStoreFromOptions(layer.StoreOptions{
+		Root:                      d.Root,
+		MetadataStorePathTemplate: filepath.Join(d.RootDir(), "image", "%s", "layerdb"),
+		GraphDriver:               d.storageDriver,
+		GraphDriverOptions:        nil,
+		IDMapping:                 &idtools.IdentityMapping{},
+		PluginGetter:              nil,
+		ExperimentalEnabled:       false,
+		OS:                        os,
+	})
+	d.imageService = images.NewImageService(images.ImageServiceConfig{
+		LayerStores: layerStores,
+	})
+}

+ 74 - 2
layer/filestore.go

@@ -305,6 +305,47 @@ func (fms *fileMetadataStore) GetMountParent(mount string) (ChainID, error) {
 	return ChainID(dgst), nil
 	return ChainID(dgst), nil
 }
 }
 
 
+func (fms *fileMetadataStore) getOrphan() ([]roLayer, error) {
+	var orphanLayers []roLayer
+	for _, algorithm := range supportedAlgorithms {
+		fileInfos, err := ioutil.ReadDir(filepath.Join(fms.root, string(algorithm)))
+		if err != nil {
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, err
+		}
+
+		for _, fi := range fileInfos {
+			if fi.IsDir() && strings.Contains(fi.Name(), "-removing") {
+				nameSplit := strings.Split(fi.Name(), "-")
+				dgst := digest.NewDigestFromEncoded(algorithm, nameSplit[0])
+				if err := dgst.Validate(); err != nil {
+					logrus.Debugf("Ignoring invalid digest %s:%s", algorithm, nameSplit[0])
+				} else {
+					chainID := ChainID(dgst)
+					chainFile := filepath.Join(fms.root, string(algorithm), fi.Name(), "cache-id")
+					contentBytes, err := ioutil.ReadFile(chainFile)
+					if err != nil {
+						logrus.WithError(err).WithField("digest", dgst).Error("cannot get cache ID")
+					}
+					cacheID := strings.TrimSpace(string(contentBytes))
+					if cacheID == "" {
+						logrus.Errorf("invalid cache id value")
+					}
+
+					l := &roLayer{
+						chainID: chainID,
+						cacheID: cacheID,
+					}
+					orphanLayers = append(orphanLayers, *l)
+				}
+			}
+		}
+	}
+	return orphanLayers, nil
+}
+
 func (fms *fileMetadataStore) List() ([]ChainID, []string, error) {
 func (fms *fileMetadataStore) List() ([]ChainID, []string, error) {
 	var ids []ChainID
 	var ids []ChainID
 	for _, algorithm := range supportedAlgorithms {
 	for _, algorithm := range supportedAlgorithms {
@@ -346,8 +387,39 @@ func (fms *fileMetadataStore) List() ([]ChainID, []string, error) {
 	return ids, mounts, nil
 	return ids, mounts, nil
 }
 }
 
 
-func (fms *fileMetadataStore) Remove(layer ChainID) error {
-	return os.RemoveAll(fms.getLayerDirectory(layer))
+// Remove layerdb folder if that is marked for removal
+func (fms *fileMetadataStore) Remove(layer ChainID, cache string) error {
+	dgst := digest.Digest(layer)
+	files, err := ioutil.ReadDir(filepath.Join(fms.root, string(dgst.Algorithm())))
+	if err != nil {
+		return err
+	}
+	for _, f := range files {
+		if !strings.HasSuffix(f.Name(), "-removing") || !strings.HasPrefix(f.Name(), dgst.String()) {
+			continue
+		}
+
+		// Make sure that we only remove layerdb folder which points to
+		// requested cacheID
+		dir := filepath.Join(fms.root, string(dgst.Algorithm()), f.Name())
+		chainFile := filepath.Join(dir, "cache-id")
+		contentBytes, err := ioutil.ReadFile(chainFile)
+		if err != nil {
+			logrus.WithError(err).WithField("file", chainFile).Error("cannot get cache ID")
+			continue
+		}
+		cacheID := strings.TrimSpace(string(contentBytes))
+		if cacheID != cache {
+			continue
+		}
+		logrus.Debugf("Removing folder: %s", dir)
+		err = os.RemoveAll(dir)
+		if err != nil && !os.IsNotExist(err) {
+			logrus.WithError(err).WithField("name", f.Name()).Error("cannot remove layer")
+			continue
+		}
+	}
+	return nil
 }
 }
 
 
 func (fms *fileMetadataStore) RemoveMount(mount string) error {
 func (fms *fileMetadataStore) RemoveMount(mount string) error {

+ 37 - 3
layer/layer_store.go

@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
+	"os"
+	"path/filepath"
 	"sync"
 	"sync"
 
 
 	"github.com/docker/distribution"
 	"github.com/docker/distribution"
@@ -415,11 +417,24 @@ func (ls *layerStore) Map() map[ChainID]Layer {
 }
 }
 
 
 func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error {
 func (ls *layerStore) deleteLayer(layer *roLayer, metadata *Metadata) error {
+	// Rename layer digest folder first so we detect orphan layer(s)
+	// if ls.driver.Remove fails
+	dir := ls.store.getLayerDirectory(layer.chainID)
+	for {
+		dgst := digest.Digest(layer.chainID)
+		tmpID := fmt.Sprintf("%s-%s-removing", dgst.Hex(), stringid.GenerateRandomID())
+		dir := filepath.Join(ls.store.root, string(dgst.Algorithm()), tmpID)
+		err := os.Rename(ls.store.getLayerDirectory(layer.chainID), dir)
+		if os.IsExist(err) {
+			continue
+		}
+		break
+	}
 	err := ls.driver.Remove(layer.cacheID)
 	err := ls.driver.Remove(layer.cacheID)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-	err = ls.store.Remove(layer.chainID)
+	err = os.RemoveAll(dir)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
@@ -452,12 +467,14 @@ func (ls *layerStore) releaseLayer(l *roLayer) ([]Metadata, error) {
 		if l.hasReferences() {
 		if l.hasReferences() {
 			panic("cannot delete referenced layer")
 			panic("cannot delete referenced layer")
 		}
 		}
+		// Remove layer from layer map first so it is not considered to exist
+		// when if ls.deleteLayer fails.
+		delete(ls.layerMap, l.chainID)
+
 		var metadata Metadata
 		var metadata Metadata
 		if err := ls.deleteLayer(l, &metadata); err != nil {
 		if err := ls.deleteLayer(l, &metadata); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-
-		delete(ls.layerMap, l.chainID)
 		removed = append(removed, metadata)
 		removed = append(removed, metadata)
 
 
 		if l.parent == nil {
 		if l.parent == nil {
@@ -743,6 +760,23 @@ func (ls *layerStore) assembleTarTo(graphID string, metadata io.ReadCloser, size
 }
 }
 
 
 func (ls *layerStore) Cleanup() error {
 func (ls *layerStore) Cleanup() error {
+	orphanLayers, err := ls.store.getOrphan()
+	if err != nil {
+		logrus.Errorf("Cannot get orphan layers: %v", err)
+	}
+	logrus.Debugf("found %v orphan layers", len(orphanLayers))
+	for _, orphan := range orphanLayers {
+		logrus.Debugf("removing orphan layer, chain ID: %v , cache ID: %v", orphan.chainID, orphan.cacheID)
+		err = ls.driver.Remove(orphan.cacheID)
+		if err != nil && !os.IsNotExist(err) {
+			logrus.WithError(err).WithField("cache-id", orphan.cacheID).Error("cannot remove orphan layer")
+			continue
+		}
+		err = ls.store.Remove(orphan.chainID, orphan.cacheID)
+		if err != nil {
+			logrus.WithError(err).WithField("chain-id", orphan.chainID).Error("cannot remove orphan layer metadata")
+		}
+	}
 	return ls.driver.Cleanup()
 	return ls.driver.Cleanup()
 }
 }