From aec7a80c6f0e0ff848a2370a3e0b3ece065ee823 Mon Sep 17 00:00:00 2001 From: Djordje Lukic Date: Sat, 17 Jun 2023 17:23:01 -0600 Subject: [PATCH] c8d: Use reference counting while mounting a snapshot Some snapshotters (like overlayfs or zfs) can't mount the same directories twice. For example if the same directroy is used as an upper directory in two mounts the kernel will output this warning: overlayfs: upperdir is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior. And indeed accessing the files from both mounts will result in an "No such file or directory" error. This change introduces reference counts for the mounts, if a directory is already mounted the mount interface will only increment the mount counter and return the mount target effectively making sure that the filesystem doesn't end up in an undefined behavior. Signed-off-by: Djordje Lukic (cherry picked from commit 32d58144fd5ed2020a3a4629d651e69ea5c5177a) Signed-off-by: Bjorn Neergaard --- daemon/containerd/mount.go | 23 ++--- daemon/containerd/service.go | 16 ++-- daemon/daemon.go | 14 +-- daemon/daemon_unix.go | 10 +- daemon/oci_linux.go | 30 ++---- daemon/snapshotter/mount.go | 141 ++++++++++++++++++++++++++++ daemon/snapshotter/mount_default.go | 17 ++++ daemon/snapshotter/mount_windows.go | 18 ++++ daemon/start.go | 9 +- 9 files changed, 211 insertions(+), 67 deletions(-) create mode 100644 daemon/snapshotter/mount.go create mode 100644 daemon/snapshotter/mount_default.go create mode 100644 daemon/snapshotter/mount_windows.go diff --git a/daemon/containerd/mount.go b/daemon/containerd/mount.go index ed58e761dc..6f36364f7c 100644 --- a/daemon/containerd/mount.go +++ b/daemon/containerd/mount.go @@ -3,9 +3,7 @@ package containerd import ( "context" "fmt" - "os" - "github.com/containerd/containerd/mount" "github.com/docker/docker/container" "github.com/sirupsen/logrus" ) @@ -19,17 +17,13 @@ func (i *ImageService) Mount(ctx context.Context, container *container.Container return err } - // The temporary location will be under /var/lib/docker/... because - // we set the `TMPDIR` - root, err := os.MkdirTemp("", fmt.Sprintf("%s_rootfs-mount", container.ID)) - if err != nil { - return fmt.Errorf("failed to create temp dir: %w", err) - } - - if err := mount.All(mounts, root); err != nil { + var root string + if root, err = i.refCountMounter.Mount(mounts, container.ID); err != nil { return fmt.Errorf("failed to mount %s: %w", root, err) } + logrus.WithField("container", container.ID).Debugf("container mounted via snapshotter: %v", root) + container.BaseFS = root return nil } @@ -38,15 +32,10 @@ func (i *ImageService) Mount(ctx context.Context, container *container.Container func (i *ImageService) Unmount(ctx context.Context, container *container.Container) error { root := container.BaseFS - if err := mount.UnmountAll(root, 0); err != nil { + if err := i.refCountMounter.Unmount(root); err != nil { + logrus.WithField("container", container.ID).WithError(err).Error("error unmounting container") return fmt.Errorf("failed to unmount %s: %w", root, err) } - if err := os.Remove(root); err != nil { - logrus.WithError(err).WithField("dir", root).Error("failed to remove mount temp dir") - } - - container.BaseFS = "" - return nil } diff --git a/daemon/containerd/service.go b/daemon/containerd/service.go index 0aa0c39146..fce32665a8 100644 --- a/daemon/containerd/service.go +++ b/daemon/containerd/service.go @@ -15,6 +15,7 @@ import ( "github.com/docker/docker/container" daemonevents "github.com/docker/docker/daemon/events" "github.com/docker/docker/daemon/images" + "github.com/docker/docker/daemon/snapshotter" "github.com/docker/docker/errdefs" "github.com/docker/docker/image" "github.com/docker/docker/layer" @@ -35,6 +36,7 @@ type ImageService struct { registryService RegistryConfigProvider eventsService *daemonevents.Events pruneRunning atomic.Bool + refCountMounter snapshotter.Mounter } type RegistryHostsProvider interface { @@ -47,12 +49,13 @@ type RegistryConfigProvider interface { } type ImageServiceConfig struct { - Client *containerd.Client - Containers container.Store - Snapshotter string - HostsProvider RegistryHostsProvider - Registry RegistryConfigProvider - EventsService *daemonevents.Events + Client *containerd.Client + Containers container.Store + Snapshotter string + HostsProvider RegistryHostsProvider + Registry RegistryConfigProvider + EventsService *daemonevents.Events + RefCountMounter snapshotter.Mounter } // NewService creates a new ImageService. @@ -64,6 +67,7 @@ func NewService(config ImageServiceConfig) *ImageService { registryHosts: config.HostsProvider, registryService: config.Registry, eventsService: config.EventsService, + refCountMounter: config.RefCountMounter, } } diff --git a/daemon/daemon.go b/daemon/daemon.go index 9be2f28969..a00a405dc1 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -40,6 +40,7 @@ import ( "github.com/docker/docker/daemon/images" dlogger "github.com/docker/docker/daemon/logger" "github.com/docker/docker/daemon/network" + "github.com/docker/docker/daemon/snapshotter" "github.com/docker/docker/daemon/stats" "github.com/docker/docker/distribution" dmetadata "github.com/docker/docker/distribution/metadata" @@ -1023,12 +1024,13 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S return nil, err } d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{ - Client: d.containerdCli, - Containers: d.containers, - Snapshotter: driverName, - HostsProvider: d, - Registry: d.registryService, - EventsService: d.EventsService, + Client: d.containerdCli, + Containers: d.containers, + Snapshotter: driverName, + HostsProvider: d, + Registry: d.registryService, + EventsService: d.EventsService, + RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping), }) } else { layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{ diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index 4b504c871e..11e691a2b7 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -1394,19 +1394,13 @@ func (daemon *Daemon) registerLinks(container *container.Container, hostConfig * // conditionalMountOnStart is a platform specific helper function during the // container start to call mount. func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { - if !daemon.UsesSnapshotter() { - return daemon.Mount(container) - } - return nil + return daemon.Mount(container) } // conditionalUnmountOnCleanup is a platform specific helper function called // during the cleanup of a container to unmount. func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error { - if !daemon.UsesSnapshotter() { - return daemon.Unmount(container) - } - return nil + return daemon.Unmount(container) } // setDefaultIsolation determines the default isolation mode for the diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index 015a429944..5ad7677c84 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -718,21 +718,19 @@ func sysctlExists(s string) bool { // WithCommonOptions sets common docker options func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts { return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error { - if c.BaseFS == "" && !daemon.UsesSnapshotter() { + if c.BaseFS == "" { return errors.New("populateCommonSpec: BaseFS of container " + c.ID + " is unexpectedly empty") } linkedEnv, err := daemon.setupLinkedContainers(c) if err != nil { return err } - if !daemon.UsesSnapshotter() { - s.Root = &specs.Root{ - Path: c.BaseFS, - Readonly: c.HostConfig.ReadonlyRootfs, - } - if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil { - return err - } + s.Root = &specs.Root{ + Path: c.BaseFS, + Readonly: c.HostConfig.ReadonlyRootfs, + } + if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil { + return err } cwd := c.Config.WorkingDir if len(cwd) == 0 { @@ -1023,20 +1021,8 @@ func (daemon *Daemon) createSpec(ctx context.Context, c *container.Container) (r WithSelinux(c), WithOOMScore(&c.HostConfig.OomScoreAdj), coci.WithAnnotations(c.HostConfig.Annotations), + WithUser(c), ) - if daemon.UsesSnapshotter() { - s.Root = &specs.Root{ - Path: "rootfs", - } - if c.Config.User != "" { - opts = append(opts, coci.WithUser(c.Config.User)) - } - if c.Config.WorkingDir != "" { - opts = append(opts, coci.WithProcessCwd(c.Config.WorkingDir)) - } - } else { - opts = append(opts, WithUser(c)) - } if c.NoNewPrivileges { opts = append(opts, coci.WithNoNewPrivileges) diff --git a/daemon/snapshotter/mount.go b/daemon/snapshotter/mount.go new file mode 100644 index 0000000000..0dc3f0bdd2 --- /dev/null +++ b/daemon/snapshotter/mount.go @@ -0,0 +1,141 @@ +package snapshotter + +import ( + "os" + "path/filepath" + + "github.com/containerd/containerd/mount" + "github.com/docker/docker/daemon/graphdriver" + "github.com/docker/docker/pkg/idtools" + "github.com/moby/locker" + "github.com/sirupsen/logrus" +) + +const mountsDir = "rootfs" + +// List of known filesystems that can't be re-mounted or have shared layers +var refCountedFileSystems = []string{"overlayfs", "zfs", "fuse-overlayfs"} + +// Mounter handles mounting/unmounting things coming in from a snapshotter +// with optional reference counting if needed by the filesystem +type Mounter interface { + // Mount mounts the rootfs for a container and returns the mount point + Mount(mounts []mount.Mount, containerID string) (string, error) + // Unmount unmounts the container rootfs + Unmount(target string) error +} + +// inSlice tests whether a string is contained in a slice of strings or not. +// Comparison is case sensitive +func inSlice(slice []string, s string) bool { + for _, ss := range slice { + if s == ss { + return true + } + } + return false +} + +// NewMounter creates a new mounter for the provided snapshotter +func NewMounter(home string, snapshotter string, idMap idtools.IdentityMapping) Mounter { + if inSlice(refCountedFileSystems, snapshotter) { + return &refCountMounter{ + home: home, + snapshotter: snapshotter, + rc: graphdriver.NewRefCounter(checker()), + locker: locker.New(), + idMap: idMap, + } + } + + return mounter{ + home: home, + snapshotter: snapshotter, + idMap: idMap, + } +} + +type refCountMounter struct { + home string + snapshotter string + rc *graphdriver.RefCounter + locker *locker.Locker + idMap idtools.IdentityMapping +} + +func (m *refCountMounter) Mount(mounts []mount.Mount, containerID string) (target string, retErr error) { + target = filepath.Join(m.home, mountsDir, m.snapshotter, containerID) + + _, err := os.Stat(target) + if err != nil && !os.IsNotExist(err) { + return "", err + } + + if count := m.rc.Increment(target); count > 1 { + return target, nil + } + + m.locker.Lock(target) + defer m.locker.Unlock(target) + + defer func() { + if retErr != nil { + if c := m.rc.Decrement(target); c <= 0 { + if mntErr := unmount(target); mntErr != nil { + logrus.Errorf("error unmounting %s: %v", target, mntErr) + } + if rmErr := os.Remove(target); rmErr != nil && !os.IsNotExist(rmErr) { + logrus.Debugf("Failed to remove %s: %v: %v", target, rmErr, err) + } + } + } + }() + + root := m.idMap.RootPair() + if err := idtools.MkdirAllAndChown(target, 0700, root); err != nil { + return "", err + } + + return target, mount.All(mounts, target) +} + +func (m *refCountMounter) Unmount(target string) error { + if count := m.rc.Decrement(target); count > 0 { + return nil + } + + m.locker.Lock(target) + defer m.locker.Unlock(target) + + if err := unmount(target); err != nil { + logrus.Debugf("Failed to unmount %s: %v", target, err) + } + + if err := os.Remove(target); err != nil { + logrus.WithError(err).WithField("dir", target).Error("failed to remove mount temp dir") + } + + return nil +} + +type mounter struct { + home string + snapshotter string + idMap idtools.IdentityMapping +} + +func (m mounter) Mount(mounts []mount.Mount, containerID string) (string, error) { + target := filepath.Join(m.home, mountsDir, m.snapshotter, containerID) + + root := m.idMap.RootPair() + if err := idtools.MkdirAndChown(target, 0700, root); err != nil { + return "", err + } + + return target, mount.All(mounts, target) +} + +func (m mounter) Unmount(target string) error { + return unmount(target) + +} diff --git a/daemon/snapshotter/mount_default.go b/daemon/snapshotter/mount_default.go new file mode 100644 index 0000000000..8203a9c479 --- /dev/null +++ b/daemon/snapshotter/mount_default.go @@ -0,0 +1,17 @@ +//go:build !windows + +package snapshotter + +import ( + "github.com/containerd/containerd/mount" + "github.com/docker/docker/daemon/graphdriver" + "golang.org/x/sys/unix" +) + +func checker() graphdriver.Checker { + return graphdriver.NewDefaultChecker() +} + +func unmount(target string) error { + return mount.Unmount(target, unix.MNT_DETACH) +} diff --git a/daemon/snapshotter/mount_windows.go b/daemon/snapshotter/mount_windows.go new file mode 100644 index 0000000000..f43cfe24c4 --- /dev/null +++ b/daemon/snapshotter/mount_windows.go @@ -0,0 +1,18 @@ +package snapshotter + +import "github.com/containerd/containerd/mount" + +type winChecker struct { +} + +func (c *winChecker) IsMounted(path string) bool { + return false +} + +func checker() *winChecker { + return &winChecker{} +} + +func unmount(target string) error { + return mount.Unmount(target, 0) +} diff --git a/daemon/start.go b/daemon/start.go index 0b4eb6d67b..2e0b9e6be8 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -5,7 +5,6 @@ import ( "runtime" "time" - "github.com/containerd/containerd" "github.com/docker/docker/api/types" containertypes "github.com/docker/docker/api/types/container" "github.com/docker/docker/container" @@ -178,13 +177,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, container *container.C return err } - newContainerOpts := []containerd.NewContainerOpts{} - if daemon.UsesSnapshotter() { - newContainerOpts = append(newContainerOpts, containerd.WithSnapshotter(container.Driver)) - newContainerOpts = append(newContainerOpts, containerd.WithSnapshot(container.ID)) - } - - ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions, newContainerOpts...) + ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions) if err != nil { return setExitCodeFromError(container.SetExitCode, err) }