c8d: Use reference counting while mounting a snapshot

Some snapshotters (like overlayfs or zfs) can't mount the same
directories twice. For example if the same directroy is used as an upper
directory in two mounts the kernel will output this warning:

    overlayfs: upperdir is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.

And indeed accessing the files from both mounts will result in an "No
such file or directory" error.

This change introduces reference counts for the mounts, if a directory
is already mounted the mount interface will only increment the mount
counter and return the mount target effectively making sure that the
filesystem doesn't end up in an undefined behavior.

Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
This commit is contained in:
Djordje Lukic 2023-05-15 17:42:37 +02:00
parent 717cd0aaa8
commit 32d58144fd
9 changed files with 211 additions and 67 deletions

View file

@ -3,9 +3,7 @@ package containerd
import (
"context"
"fmt"
"os"
"github.com/containerd/containerd/mount"
"github.com/docker/docker/container"
"github.com/sirupsen/logrus"
)
@ -19,17 +17,13 @@ func (i *ImageService) Mount(ctx context.Context, container *container.Container
return err
}
// The temporary location will be under /var/lib/docker/... because
// we set the `TMPDIR`
root, err := os.MkdirTemp("", fmt.Sprintf("%s_rootfs-mount", container.ID))
if err != nil {
return fmt.Errorf("failed to create temp dir: %w", err)
}
if err := mount.All(mounts, root); err != nil {
var root string
if root, err = i.refCountMounter.Mount(mounts, container.ID); err != nil {
return fmt.Errorf("failed to mount %s: %w", root, err)
}
logrus.WithField("container", container.ID).Debugf("container mounted via snapshotter: %v", root)
container.BaseFS = root
return nil
}
@ -38,15 +32,10 @@ func (i *ImageService) Mount(ctx context.Context, container *container.Container
func (i *ImageService) Unmount(ctx context.Context, container *container.Container) error {
root := container.BaseFS
if err := mount.UnmountAll(root, 0); err != nil {
if err := i.refCountMounter.Unmount(root); err != nil {
logrus.WithField("container", container.ID).WithError(err).Error("error unmounting container")
return fmt.Errorf("failed to unmount %s: %w", root, err)
}
if err := os.Remove(root); err != nil {
logrus.WithError(err).WithField("dir", root).Error("failed to remove mount temp dir")
}
container.BaseFS = ""
return nil
}

View file

@ -15,6 +15,7 @@ import (
"github.com/docker/docker/container"
daemonevents "github.com/docker/docker/daemon/events"
"github.com/docker/docker/daemon/images"
"github.com/docker/docker/daemon/snapshotter"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/image"
"github.com/docker/docker/layer"
@ -35,6 +36,7 @@ type ImageService struct {
registryService RegistryConfigProvider
eventsService *daemonevents.Events
pruneRunning atomic.Bool
refCountMounter snapshotter.Mounter
}
type RegistryConfigProvider interface {
@ -43,12 +45,13 @@ type RegistryConfigProvider interface {
}
type ImageServiceConfig struct {
Client *containerd.Client
Containers container.Store
Snapshotter string
RegistryHosts docker.RegistryHosts
Registry RegistryConfigProvider
EventsService *daemonevents.Events
Client *containerd.Client
Containers container.Store
Snapshotter string
RegistryHosts docker.RegistryHosts
Registry RegistryConfigProvider
EventsService *daemonevents.Events
RefCountMounter snapshotter.Mounter
}
// NewService creates a new ImageService.
@ -60,6 +63,7 @@ func NewService(config ImageServiceConfig) *ImageService {
registryHosts: config.RegistryHosts,
registryService: config.Registry,
eventsService: config.EventsService,
refCountMounter: config.RefCountMounter,
}
}

View file

@ -41,6 +41,7 @@ import (
"github.com/docker/docker/daemon/images"
dlogger "github.com/docker/docker/daemon/logger"
"github.com/docker/docker/daemon/network"
"github.com/docker/docker/daemon/snapshotter"
"github.com/docker/docker/daemon/stats"
"github.com/docker/docker/distribution"
dmetadata "github.com/docker/docker/distribution/metadata"
@ -1054,12 +1055,13 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
return nil, err
}
d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{
Client: d.containerdCli,
Containers: d.containers,
Snapshotter: driverName,
RegistryHosts: d.RegistryHosts,
Registry: d.registryService,
EventsService: d.EventsService,
Client: d.containerdCli,
Containers: d.containers,
Snapshotter: driverName,
RegistryHosts: d.RegistryHosts,
Registry: d.registryService,
EventsService: d.EventsService,
RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping),
})
} else {
layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{

View file

@ -1384,19 +1384,13 @@ func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *
// conditionalMountOnStart is a platform specific helper function during the
// container start to call mount.
func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
if !daemon.UsesSnapshotter() {
return daemon.Mount(container)
}
return nil
return daemon.Mount(container)
}
// conditionalUnmountOnCleanup is a platform specific helper function called
// during the cleanup of a container to unmount.
func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
if !daemon.UsesSnapshotter() {
return daemon.Unmount(container)
}
return nil
return daemon.Unmount(container)
}
// setDefaultIsolation determines the default isolation mode for the

View file

@ -735,21 +735,19 @@ func sysctlExists(s string) bool {
// withCommonOptions sets common docker options
func withCommonOptions(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Container) coci.SpecOpts {
return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
if c.BaseFS == "" && !daemon.UsesSnapshotter() {
if c.BaseFS == "" {
return errors.New("populateCommonSpec: BaseFS of container " + c.ID + " is unexpectedly empty")
}
linkedEnv, err := daemon.setupLinkedContainers(c)
if err != nil {
return err
}
if !daemon.UsesSnapshotter() {
s.Root = &specs.Root{
Path: c.BaseFS,
Readonly: c.HostConfig.ReadonlyRootfs,
}
if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil {
return err
}
s.Root = &specs.Root{
Path: c.BaseFS,
Readonly: c.HostConfig.ReadonlyRootfs,
}
if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil {
return err
}
cwd := c.Config.WorkingDir
if len(cwd) == 0 {
@ -1040,20 +1038,8 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
WithSelinux(c),
WithOOMScore(&c.HostConfig.OomScoreAdj),
coci.WithAnnotations(c.HostConfig.Annotations),
WithUser(c),
)
if daemon.UsesSnapshotter() {
s.Root = &specs.Root{
Path: "rootfs",
}
if c.Config.User != "" {
opts = append(opts, coci.WithUser(c.Config.User))
}
if c.Config.WorkingDir != "" {
opts = append(opts, coci.WithProcessCwd(c.Config.WorkingDir))
}
} else {
opts = append(opts, WithUser(c))
}
if c.NoNewPrivileges {
opts = append(opts, coci.WithNoNewPrivileges)

141
daemon/snapshotter/mount.go Normal file
View file

@ -0,0 +1,141 @@
package snapshotter
import (
"os"
"path/filepath"
"github.com/containerd/containerd/mount"
"github.com/docker/docker/daemon/graphdriver"
"github.com/docker/docker/pkg/idtools"
"github.com/moby/locker"
"github.com/sirupsen/logrus"
)
const mountsDir = "rootfs"
// List of known filesystems that can't be re-mounted or have shared layers
var refCountedFileSystems = []string{"overlayfs", "zfs", "fuse-overlayfs"}
// Mounter handles mounting/unmounting things coming in from a snapshotter
// with optional reference counting if needed by the filesystem
type Mounter interface {
// Mount mounts the rootfs for a container and returns the mount point
Mount(mounts []mount.Mount, containerID string) (string, error)
// Unmount unmounts the container rootfs
Unmount(target string) error
}
// inSlice tests whether a string is contained in a slice of strings or not.
// Comparison is case sensitive
func inSlice(slice []string, s string) bool {
for _, ss := range slice {
if s == ss {
return true
}
}
return false
}
// NewMounter creates a new mounter for the provided snapshotter
func NewMounter(home string, snapshotter string, idMap idtools.IdentityMapping) Mounter {
if inSlice(refCountedFileSystems, snapshotter) {
return &refCountMounter{
home: home,
snapshotter: snapshotter,
rc: graphdriver.NewRefCounter(checker()),
locker: locker.New(),
idMap: idMap,
}
}
return mounter{
home: home,
snapshotter: snapshotter,
idMap: idMap,
}
}
type refCountMounter struct {
home string
snapshotter string
rc *graphdriver.RefCounter
locker *locker.Locker
idMap idtools.IdentityMapping
}
func (m *refCountMounter) Mount(mounts []mount.Mount, containerID string) (target string, retErr error) {
target = filepath.Join(m.home, mountsDir, m.snapshotter, containerID)
_, err := os.Stat(target)
if err != nil && !os.IsNotExist(err) {
return "", err
}
if count := m.rc.Increment(target); count > 1 {
return target, nil
}
m.locker.Lock(target)
defer m.locker.Unlock(target)
defer func() {
if retErr != nil {
if c := m.rc.Decrement(target); c <= 0 {
if mntErr := unmount(target); mntErr != nil {
logrus.Errorf("error unmounting %s: %v", target, mntErr)
}
if rmErr := os.Remove(target); rmErr != nil && !os.IsNotExist(rmErr) {
logrus.Debugf("Failed to remove %s: %v: %v", target, rmErr, err)
}
}
}
}()
root := m.idMap.RootPair()
if err := idtools.MkdirAllAndChown(target, 0700, root); err != nil {
return "", err
}
return target, mount.All(mounts, target)
}
func (m *refCountMounter) Unmount(target string) error {
if count := m.rc.Decrement(target); count > 0 {
return nil
}
m.locker.Lock(target)
defer m.locker.Unlock(target)
if err := unmount(target); err != nil {
logrus.Debugf("Failed to unmount %s: %v", target, err)
}
if err := os.Remove(target); err != nil {
logrus.WithError(err).WithField("dir", target).Error("failed to remove mount temp dir")
}
return nil
}
type mounter struct {
home string
snapshotter string
idMap idtools.IdentityMapping
}
func (m mounter) Mount(mounts []mount.Mount, containerID string) (string, error) {
target := filepath.Join(m.home, mountsDir, m.snapshotter, containerID)
root := m.idMap.RootPair()
if err := idtools.MkdirAndChown(target, 0700, root); err != nil {
return "", err
}
return target, mount.All(mounts, target)
}
func (m mounter) Unmount(target string) error {
return unmount(target)
}

View file

@ -0,0 +1,17 @@
//go:build !windows
package snapshotter
import (
"github.com/containerd/containerd/mount"
"github.com/docker/docker/daemon/graphdriver"
"golang.org/x/sys/unix"
)
func checker() graphdriver.Checker {
return graphdriver.NewDefaultChecker()
}
func unmount(target string) error {
return mount.Unmount(target, unix.MNT_DETACH)
}

View file

@ -0,0 +1,18 @@
package snapshotter
import "github.com/containerd/containerd/mount"
type winChecker struct {
}
func (c *winChecker) IsMounted(path string) bool {
return false
}
func checker() *winChecker {
return &winChecker{}
}
func unmount(target string) error {
return mount.Unmount(target, 0)
}

View file

@ -5,7 +5,6 @@ import (
"runtime"
"time"
"github.com/containerd/containerd"
"github.com/docker/docker/api/types"
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/container"
@ -179,13 +178,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
return err
}
newContainerOpts := []containerd.NewContainerOpts{}
if daemon.UsesSnapshotter() {
newContainerOpts = append(newContainerOpts, containerd.WithSnapshotter(container.Driver))
newContainerOpts = append(newContainerOpts, containerd.WithSnapshot(container.ID))
}
ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions, newContainerOpts...)
ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions)
if err != nil {
return setExitCodeFromError(container.SetExitCode, err)
}