浏览代码

c8d: Use reference counting while mounting a snapshot

Some snapshotters (like overlayfs or zfs) can't mount the same
directories twice. For example if the same directroy is used as an upper
directory in two mounts the kernel will output this warning:

    overlayfs: upperdir is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.

And indeed accessing the files from both mounts will result in an "No
such file or directory" error.

This change introduces reference counts for the mounts, if a directory
is already mounted the mount interface will only increment the mount
counter and return the mount target effectively making sure that the
filesystem doesn't end up in an undefined behavior.

Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
Djordje Lukic 2 年之前
父节点
当前提交
32d58144fd

+ 6 - 17
daemon/containerd/mount.go

@@ -3,9 +3,7 @@ package containerd
 import (
 	"context"
 	"fmt"
-	"os"
 
-	"github.com/containerd/containerd/mount"
 	"github.com/docker/docker/container"
 	"github.com/sirupsen/logrus"
 )
@@ -19,17 +17,13 @@ func (i *ImageService) Mount(ctx context.Context, container *container.Container
 		return err
 	}
 
-	// The temporary location will be under /var/lib/docker/... because
-	// we set the `TMPDIR`
-	root, err := os.MkdirTemp("", fmt.Sprintf("%s_rootfs-mount", container.ID))
-	if err != nil {
-		return fmt.Errorf("failed to create temp dir: %w", err)
-	}
-
-	if err := mount.All(mounts, root); err != nil {
+	var root string
+	if root, err = i.refCountMounter.Mount(mounts, container.ID); err != nil {
 		return fmt.Errorf("failed to mount %s: %w", root, err)
 	}
 
+	logrus.WithField("container", container.ID).Debugf("container mounted via snapshotter: %v", root)
+
 	container.BaseFS = root
 	return nil
 }
@@ -38,15 +32,10 @@ func (i *ImageService) Mount(ctx context.Context, container *container.Container
 func (i *ImageService) Unmount(ctx context.Context, container *container.Container) error {
 	root := container.BaseFS
 
-	if err := mount.UnmountAll(root, 0); err != nil {
+	if err := i.refCountMounter.Unmount(root); err != nil {
+		logrus.WithField("container", container.ID).WithError(err).Error("error unmounting container")
 		return fmt.Errorf("failed to unmount %s: %w", root, err)
 	}
 
-	if err := os.Remove(root); err != nil {
-		logrus.WithError(err).WithField("dir", root).Error("failed to remove mount temp dir")
-	}
-
-	container.BaseFS = ""
-
 	return nil
 }

+ 10 - 6
daemon/containerd/service.go

@@ -15,6 +15,7 @@ import (
 	"github.com/docker/docker/container"
 	daemonevents "github.com/docker/docker/daemon/events"
 	"github.com/docker/docker/daemon/images"
+	"github.com/docker/docker/daemon/snapshotter"
 	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/layer"
@@ -35,6 +36,7 @@ type ImageService struct {
 	registryService RegistryConfigProvider
 	eventsService   *daemonevents.Events
 	pruneRunning    atomic.Bool
+	refCountMounter snapshotter.Mounter
 }
 
 type RegistryConfigProvider interface {
@@ -43,12 +45,13 @@ type RegistryConfigProvider interface {
 }
 
 type ImageServiceConfig struct {
-	Client        *containerd.Client
-	Containers    container.Store
-	Snapshotter   string
-	RegistryHosts docker.RegistryHosts
-	Registry      RegistryConfigProvider
-	EventsService *daemonevents.Events
+	Client          *containerd.Client
+	Containers      container.Store
+	Snapshotter     string
+	RegistryHosts   docker.RegistryHosts
+	Registry        RegistryConfigProvider
+	EventsService   *daemonevents.Events
+	RefCountMounter snapshotter.Mounter
 }
 
 // NewService creates a new ImageService.
@@ -60,6 +63,7 @@ func NewService(config ImageServiceConfig) *ImageService {
 		registryHosts:   config.RegistryHosts,
 		registryService: config.Registry,
 		eventsService:   config.EventsService,
+		refCountMounter: config.RefCountMounter,
 	}
 }
 

+ 8 - 6
daemon/daemon.go

@@ -41,6 +41,7 @@ import (
 	"github.com/docker/docker/daemon/images"
 	dlogger "github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/daemon/network"
+	"github.com/docker/docker/daemon/snapshotter"
 	"github.com/docker/docker/daemon/stats"
 	"github.com/docker/docker/distribution"
 	dmetadata "github.com/docker/docker/distribution/metadata"
@@ -1054,12 +1055,13 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
 			return nil, err
 		}
 		d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{
-			Client:        d.containerdCli,
-			Containers:    d.containers,
-			Snapshotter:   driverName,
-			RegistryHosts: d.RegistryHosts,
-			Registry:      d.registryService,
-			EventsService: d.EventsService,
+			Client:          d.containerdCli,
+			Containers:      d.containers,
+			Snapshotter:     driverName,
+			RegistryHosts:   d.RegistryHosts,
+			Registry:        d.registryService,
+			EventsService:   d.EventsService,
+			RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping),
 		})
 	} else {
 		layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{

+ 2 - 8
daemon/daemon_unix.go

@@ -1384,19 +1384,13 @@ func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *
 // conditionalMountOnStart is a platform specific helper function during the
 // container start to call mount.
 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
-	if !daemon.UsesSnapshotter() {
-		return daemon.Mount(container)
-	}
-	return nil
+	return daemon.Mount(container)
 }
 
 // conditionalUnmountOnCleanup is a platform specific helper function called
 // during the cleanup of a container to unmount.
 func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
-	if !daemon.UsesSnapshotter() {
-		return daemon.Unmount(container)
-	}
-	return nil
+	return daemon.Unmount(container)
 }
 
 // setDefaultIsolation determines the default isolation mode for the

+ 8 - 22
daemon/oci_linux.go

@@ -735,21 +735,19 @@ func sysctlExists(s string) bool {
 // withCommonOptions sets common docker options
 func withCommonOptions(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Container) coci.SpecOpts {
 	return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
-		if c.BaseFS == "" && !daemon.UsesSnapshotter() {
+		if c.BaseFS == "" {
 			return errors.New("populateCommonSpec: BaseFS of container " + c.ID + " is unexpectedly empty")
 		}
 		linkedEnv, err := daemon.setupLinkedContainers(c)
 		if err != nil {
 			return err
 		}
-		if !daemon.UsesSnapshotter() {
-			s.Root = &specs.Root{
-				Path:     c.BaseFS,
-				Readonly: c.HostConfig.ReadonlyRootfs,
-			}
-			if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil {
-				return err
-			}
+		s.Root = &specs.Root{
+			Path:     c.BaseFS,
+			Readonly: c.HostConfig.ReadonlyRootfs,
+		}
+		if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil {
+			return err
 		}
 		cwd := c.Config.WorkingDir
 		if len(cwd) == 0 {
@@ -1040,20 +1038,8 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
 		WithSelinux(c),
 		WithOOMScore(&c.HostConfig.OomScoreAdj),
 		coci.WithAnnotations(c.HostConfig.Annotations),
+		WithUser(c),
 	)
-	if daemon.UsesSnapshotter() {
-		s.Root = &specs.Root{
-			Path: "rootfs",
-		}
-		if c.Config.User != "" {
-			opts = append(opts, coci.WithUser(c.Config.User))
-		}
-		if c.Config.WorkingDir != "" {
-			opts = append(opts, coci.WithProcessCwd(c.Config.WorkingDir))
-		}
-	} else {
-		opts = append(opts, WithUser(c))
-	}
 
 	if c.NoNewPrivileges {
 		opts = append(opts, coci.WithNoNewPrivileges)

+ 141 - 0
daemon/snapshotter/mount.go

@@ -0,0 +1,141 @@
+package snapshotter
+
+import (
+	"os"
+	"path/filepath"
+
+	"github.com/containerd/containerd/mount"
+	"github.com/docker/docker/daemon/graphdriver"
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/moby/locker"
+	"github.com/sirupsen/logrus"
+)
+
+const mountsDir = "rootfs"
+
+// List of known filesystems that can't be re-mounted or have shared layers
+var refCountedFileSystems = []string{"overlayfs", "zfs", "fuse-overlayfs"}
+
+// Mounter handles mounting/unmounting things coming in from a snapshotter
+// with optional reference counting if needed by the filesystem
+type Mounter interface {
+	// Mount mounts the rootfs for a container and returns the mount point
+	Mount(mounts []mount.Mount, containerID string) (string, error)
+	// Unmount unmounts the container rootfs
+	Unmount(target string) error
+}
+
+// inSlice tests whether a string is contained in a slice of strings or not.
+// Comparison is case sensitive
+func inSlice(slice []string, s string) bool {
+	for _, ss := range slice {
+		if s == ss {
+			return true
+		}
+	}
+	return false
+}
+
+// NewMounter creates a new mounter for the provided snapshotter
+func NewMounter(home string, snapshotter string, idMap idtools.IdentityMapping) Mounter {
+	if inSlice(refCountedFileSystems, snapshotter) {
+		return &refCountMounter{
+			home:        home,
+			snapshotter: snapshotter,
+			rc:          graphdriver.NewRefCounter(checker()),
+			locker:      locker.New(),
+			idMap:       idMap,
+		}
+	}
+
+	return mounter{
+		home:        home,
+		snapshotter: snapshotter,
+		idMap:       idMap,
+	}
+}
+
+type refCountMounter struct {
+	home        string
+	snapshotter string
+	rc          *graphdriver.RefCounter
+	locker      *locker.Locker
+	idMap       idtools.IdentityMapping
+}
+
+func (m *refCountMounter) Mount(mounts []mount.Mount, containerID string) (target string, retErr error) {
+	target = filepath.Join(m.home, mountsDir, m.snapshotter, containerID)
+
+	_, err := os.Stat(target)
+	if err != nil && !os.IsNotExist(err) {
+		return "", err
+	}
+
+	if count := m.rc.Increment(target); count > 1 {
+		return target, nil
+	}
+
+	m.locker.Lock(target)
+	defer m.locker.Unlock(target)
+
+	defer func() {
+		if retErr != nil {
+			if c := m.rc.Decrement(target); c <= 0 {
+				if mntErr := unmount(target); mntErr != nil {
+					logrus.Errorf("error unmounting %s: %v", target, mntErr)
+				}
+				if rmErr := os.Remove(target); rmErr != nil && !os.IsNotExist(rmErr) {
+					logrus.Debugf("Failed to remove %s: %v: %v", target, rmErr, err)
+				}
+			}
+		}
+	}()
+
+	root := m.idMap.RootPair()
+	if err := idtools.MkdirAllAndChown(target, 0700, root); err != nil {
+		return "", err
+	}
+
+	return target, mount.All(mounts, target)
+}
+
+func (m *refCountMounter) Unmount(target string) error {
+	if count := m.rc.Decrement(target); count > 0 {
+		return nil
+	}
+
+	m.locker.Lock(target)
+	defer m.locker.Unlock(target)
+
+	if err := unmount(target); err != nil {
+		logrus.Debugf("Failed to unmount %s: %v", target, err)
+	}
+
+	if err := os.Remove(target); err != nil {
+		logrus.WithError(err).WithField("dir", target).Error("failed to remove mount temp dir")
+	}
+
+	return nil
+}
+
+type mounter struct {
+	home        string
+	snapshotter string
+	idMap       idtools.IdentityMapping
+}
+
+func (m mounter) Mount(mounts []mount.Mount, containerID string) (string, error) {
+	target := filepath.Join(m.home, mountsDir, m.snapshotter, containerID)
+
+	root := m.idMap.RootPair()
+	if err := idtools.MkdirAndChown(target, 0700, root); err != nil {
+		return "", err
+	}
+
+	return target, mount.All(mounts, target)
+}
+
+func (m mounter) Unmount(target string) error {
+	return unmount(target)
+
+}

+ 17 - 0
daemon/snapshotter/mount_default.go

@@ -0,0 +1,17 @@
+//go:build !windows
+
+package snapshotter
+
+import (
+	"github.com/containerd/containerd/mount"
+	"github.com/docker/docker/daemon/graphdriver"
+	"golang.org/x/sys/unix"
+)
+
+func checker() graphdriver.Checker {
+	return graphdriver.NewDefaultChecker()
+}
+
+func unmount(target string) error {
+	return mount.Unmount(target, unix.MNT_DETACH)
+}

+ 18 - 0
daemon/snapshotter/mount_windows.go

@@ -0,0 +1,18 @@
+package snapshotter
+
+import "github.com/containerd/containerd/mount"
+
+type winChecker struct {
+}
+
+func (c *winChecker) IsMounted(path string) bool {
+	return false
+}
+
+func checker() *winChecker {
+	return &winChecker{}
+}
+
+func unmount(target string) error {
+	return mount.Unmount(target, 0)
+}

+ 1 - 8
daemon/start.go

@@ -5,7 +5,6 @@ import (
 	"runtime"
 	"time"
 
-	"github.com/containerd/containerd"
 	"github.com/docker/docker/api/types"
 	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/container"
@@ -179,13 +178,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
 		return err
 	}
 
-	newContainerOpts := []containerd.NewContainerOpts{}
-	if daemon.UsesSnapshotter() {
-		newContainerOpts = append(newContainerOpts, containerd.WithSnapshotter(container.Driver))
-		newContainerOpts = append(newContainerOpts, containerd.WithSnapshot(container.ID))
-	}
-
-	ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions, newContainerOpts...)
+	ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions)
 	if err != nil {
 		return setExitCodeFromError(container.SetExitCode, err)
 	}