Explorar o código

Use rslave propagation for mounts from daemon root

By default, if a user requests a bind mount it uses private propagation.
When the source path is a path within the daemon root this, along with
some other propagation values that the user can use, causes issues when
the daemon tries to remove a mountpoint because a container will then
have a private reference to that mount which prevents removal.

Unmouting with MNT_DETATCH can help this scenario on newer kernels, but
ultimately this is just covering up the problem and doesn't actually
free up the underlying resources until all references are destroyed.

This change does essentially 2 things:

1. Change the default propagation when unspecified to `rslave` when the
source path is within the daemon root path or a parent of the daemon
root (because everything is using rbinds).
2. Creates a validation error on create when the user tries to specify
an unacceptable propagation mode for these paths...
basically the only two acceptable modes are `rslave` and `rshared`.

In cases where we have used the new default propagation but the
underlying filesystem is not setup to handle it (fs must hvae at least
rshared propagation) instead of erroring out like we normally would,
this falls back to the old default mode of `private`, which preserves
backwards compatibility.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
Brian Goff %!s(int64=7) %!d(string=hai) anos
pai
achega
589a0afa8c

+ 28 - 6
daemon/oci_linux.go

@@ -604,7 +604,8 @@ func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []c
 		//
 		// For private volumes any root propagation value should work.
 		pFlag := mountPropagationMap[m.Propagation]
-		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
+		switch pFlag {
+		case mount.SHARED, mount.RSHARED:
 			if err := ensureShared(m.Source); err != nil {
 				return err
 			}
@@ -612,13 +613,34 @@ func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []c
 			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
 				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
 			}
-		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
+		case mount.SLAVE, mount.RSLAVE:
+			var fallback bool
 			if err := ensureSharedOrSlave(m.Source); err != nil {
-				return err
+				// For backwards compatability purposes, treat mounts from the daemon root
+				// as special since we automatically add rslave propagation to these mounts
+				// when the user did not set anything, so we should fallback to the old
+				// behavior which is to use private propagation which is normally the
+				// default.
+				if !strings.HasPrefix(m.Source, daemon.root) && !strings.HasPrefix(daemon.root, m.Source) {
+					return err
+				}
+
+				cm, ok := c.MountPoints[m.Destination]
+				if !ok {
+					return err
+				}
+				if cm.Spec.BindOptions != nil && cm.Spec.BindOptions.Propagation != "" {
+					// This means the user explicitly set a propagation, do not fallback in that case.
+					return err
+				}
+				fallback = true
+				logrus.WithField("container", c.ID).WithField("source", m.Source).Warn("Falling back to default propagation for bind source in daemon root")
 			}
-			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
-			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
-				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
+			if !fallback {
+				rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
+				if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
+					s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
+				}
 			}
 		}
 

+ 15 - 0
daemon/volumes.go

@@ -10,6 +10,7 @@ import (
 
 	"github.com/docker/docker/api/types"
 	containertypes "github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/api/types/mount"
 	mounttypes "github.com/docker/docker/api/types/mount"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/errdefs"
@@ -146,6 +147,13 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo
 		if err != nil {
 			return err
 		}
+		needsSlavePropagation, err := daemon.validateBindDaemonRoot(bind.Spec)
+		if err != nil {
+			return err
+		}
+		if needsSlavePropagation {
+			bind.Propagation = mount.PropagationRSlave
+		}
 
 		// #10618
 		_, tmpfsExists := hostConfig.Tmpfs[bind.Destination]
@@ -178,6 +186,13 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo
 		if err != nil {
 			return errdefs.InvalidParameter(err)
 		}
+		needsSlavePropagation, err := daemon.validateBindDaemonRoot(mp.Spec)
+		if err != nil {
+			return err
+		}
+		if needsSlavePropagation {
+			mp.Propagation = mount.PropagationRSlave
+		}
 
 		if binds[mp.Destination] {
 			return duplicateMountPointError(cfg.Target)

+ 36 - 0
daemon/volumes_linux.go

@@ -0,0 +1,36 @@
+package daemon
+
+import (
+	"strings"
+
+	"github.com/docker/docker/api/types/mount"
+	"github.com/docker/docker/errdefs"
+	"github.com/pkg/errors"
+)
+
+// validateBindDaemonRoot ensures that if a given mountpoint's source is within
+// the daemon root path, that the propagation is setup to prevent a container
+// from holding private refereneces to a mount within the daemon root, which
+// can cause issues when the daemon attempts to remove the mountpoint.
+func (daemon *Daemon) validateBindDaemonRoot(m mount.Mount) (bool, error) {
+	if m.Type != mount.TypeBind {
+		return false, nil
+	}
+
+	// check if the source is within the daemon root, or if the daemon root is within the source
+	if !strings.HasPrefix(m.Source, daemon.root) && !strings.HasPrefix(daemon.root, m.Source) {
+		return false, nil
+	}
+
+	if m.BindOptions == nil {
+		return true, nil
+	}
+
+	switch m.BindOptions.Propagation {
+	case mount.PropagationRSlave, mount.PropagationRShared, "":
+		return m.BindOptions.Propagation == "", nil
+	default:
+	}
+
+	return false, errdefs.InvalidParameter(errors.Errorf(`invalid mount config: must use either propagation mode "rslave" or "rshared" when mount source is within the daemon root, daemon root: %q, bind mount source: %q, propagation: %q`, daemon.root, m.Source, m.BindOptions.Propagation))
+}

+ 56 - 0
daemon/volumes_linux_test.go

@@ -0,0 +1,56 @@
+package daemon
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/docker/docker/api/types/mount"
+)
+
+func TestBindDaemonRoot(t *testing.T) {
+	t.Parallel()
+	d := &Daemon{root: "/a/b/c/daemon"}
+	for _, test := range []struct {
+		desc      string
+		opts      *mount.BindOptions
+		needsProp bool
+		err       bool
+	}{
+		{desc: "nil propagation settings", opts: nil, needsProp: true, err: false},
+		{desc: "empty propagation settings", opts: &mount.BindOptions{}, needsProp: true, err: false},
+		{desc: "private propagation", opts: &mount.BindOptions{Propagation: mount.PropagationPrivate}, err: true},
+		{desc: "rprivate propagation", opts: &mount.BindOptions{Propagation: mount.PropagationRPrivate}, err: true},
+		{desc: "slave propagation", opts: &mount.BindOptions{Propagation: mount.PropagationSlave}, err: true},
+		{desc: "rslave propagation", opts: &mount.BindOptions{Propagation: mount.PropagationRSlave}, err: false, needsProp: false},
+		{desc: "shared propagation", opts: &mount.BindOptions{Propagation: mount.PropagationShared}, err: true},
+		{desc: "rshared propagation", opts: &mount.BindOptions{Propagation: mount.PropagationRSlave}, err: false, needsProp: false},
+	} {
+		t.Run(test.desc, func(t *testing.T) {
+			test := test
+			for desc, source := range map[string]string{
+				"source is root":    d.root,
+				"source is subpath": filepath.Join(d.root, "a", "b"),
+				"source is parent":  filepath.Dir(d.root),
+				"source is /":       "/",
+			} {
+				t.Run(desc, func(t *testing.T) {
+					mount := mount.Mount{
+						Type:        mount.TypeBind,
+						Source:      source,
+						BindOptions: test.opts,
+					}
+					needsProp, err := d.validateBindDaemonRoot(mount)
+					if (err != nil) != test.err {
+						t.Fatalf("expected err=%v, got: %v", test.err, err)
+					}
+					if test.err {
+						return
+					}
+					if test.needsProp != needsProp {
+						t.Fatalf("expected needsProp=%v, got: %v", test.needsProp, needsProp)
+					}
+				})
+			}
+		})
+	}
+}

+ 5 - 0
daemon/volumes_windows.go

@@ -3,6 +3,7 @@ package daemon
 import (
 	"sort"
 
+	"github.com/docker/docker/api/types/mount"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/volume"
@@ -44,3 +45,7 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
 func setBindModeIfNull(bind *volume.MountPoint) {
 	return
 }
+
+func (daemon *Daemon) validateBindDaemonRoot(m mount.Mount) (bool, error) {
+	return false, nil
+}

+ 132 - 4
integration/container/mounts_linux_test.go

@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"path/filepath"
 	"testing"
 
 	"github.com/docker/docker/api/types"
@@ -12,6 +13,7 @@ import (
 	"github.com/docker/docker/api/types/network"
 	"github.com/docker/docker/client"
 	"github.com/docker/docker/integration-cli/daemon"
+	"github.com/docker/docker/integration/util/request"
 	"github.com/docker/docker/pkg/stdcopy"
 	"github.com/docker/docker/pkg/system"
 	"github.com/gotestyourself/gotestyourself/fs"
@@ -51,10 +53,10 @@ func TestContainerShmNoLeak(t *testing.T) {
 	hc := container.HostConfig{
 		Mounts: []mount.Mount{
 			{
-				Type:        mount.TypeBind,
-				Source:      d.Root,
-				Target:      "/testdaemonroot",
-				BindOptions: &mount.BindOptions{Propagation: mount.PropagationRPrivate}},
+				Type:   mount.TypeBind,
+				Source: d.Root,
+				Target: "/testdaemonroot",
+			},
 		},
 	}
 	cfg.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("mount | grep testdaemonroot | grep containers | grep %s", ctr.ID)}
@@ -141,3 +143,129 @@ func TestContainerNetworkMountsNoChown(t *testing.T) {
 	require.NoError(t, err)
 	assert.Equal(t, uint32(0), statT.UID(), "bind mounted network file should not change ownership from root")
 }
+
+func TestMountDaemonRoot(t *testing.T) {
+	t.Parallel()
+
+	client := request.NewAPIClient(t)
+	ctx := context.Background()
+	info, err := client.Info(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, test := range []struct {
+		desc        string
+		propagation mount.Propagation
+		expected    mount.Propagation
+	}{
+		{
+			desc:        "default",
+			propagation: "",
+			expected:    mount.PropagationRSlave,
+		},
+		{
+			desc:        "private",
+			propagation: mount.PropagationPrivate,
+		},
+		{
+			desc:        "rprivate",
+			propagation: mount.PropagationRPrivate,
+		},
+		{
+			desc:        "slave",
+			propagation: mount.PropagationSlave,
+		},
+		{
+			desc:        "rslave",
+			propagation: mount.PropagationRSlave,
+			expected:    mount.PropagationRSlave,
+		},
+		{
+			desc:        "shared",
+			propagation: mount.PropagationShared,
+		},
+		{
+			desc:        "rshared",
+			propagation: mount.PropagationRShared,
+			expected:    mount.PropagationRShared,
+		},
+	} {
+		t.Run(test.desc, func(t *testing.T) {
+			test := test
+			t.Parallel()
+
+			propagationSpec := fmt.Sprintf(":%s", test.propagation)
+			if test.propagation == "" {
+				propagationSpec = ""
+			}
+			bindSpecRoot := info.DockerRootDir + ":" + "/foo" + propagationSpec
+			bindSpecSub := filepath.Join(info.DockerRootDir, "containers") + ":/foo" + propagationSpec
+
+			for name, hc := range map[string]*container.HostConfig{
+				"bind root":    {Binds: []string{bindSpecRoot}},
+				"bind subpath": {Binds: []string{bindSpecSub}},
+				"mount root": {
+					Mounts: []mount.Mount{
+						{
+							Type:        mount.TypeBind,
+							Source:      info.DockerRootDir,
+							Target:      "/foo",
+							BindOptions: &mount.BindOptions{Propagation: test.propagation},
+						},
+					},
+				},
+				"mount subpath": {
+					Mounts: []mount.Mount{
+						{
+							Type:        mount.TypeBind,
+							Source:      filepath.Join(info.DockerRootDir, "containers"),
+							Target:      "/foo",
+							BindOptions: &mount.BindOptions{Propagation: test.propagation},
+						},
+					},
+				},
+			} {
+				t.Run(name, func(t *testing.T) {
+					hc := hc
+					t.Parallel()
+
+					c, err := client.ContainerCreate(ctx, &container.Config{
+						Image: "busybox",
+						Cmd:   []string{"true"},
+					}, hc, nil, "")
+
+					if err != nil {
+						if test.expected != "" {
+							t.Fatal(err)
+						}
+						// expected an error, so this is ok and should not continue
+						return
+					}
+					if test.expected == "" {
+						t.Fatal("expected create to fail")
+					}
+
+					defer func() {
+						if err := client.ContainerRemove(ctx, c.ID, types.ContainerRemoveOptions{Force: true}); err != nil {
+							panic(err)
+						}
+					}()
+
+					inspect, err := client.ContainerInspect(ctx, c.ID)
+					if err != nil {
+						t.Fatal(err)
+					}
+					if len(inspect.Mounts) != 1 {
+						t.Fatalf("unexpected number of mounts: %+v", inspect.Mounts)
+					}
+
+					m := inspect.Mounts[0]
+					if m.Propagation != test.expected {
+						t.Fatalf("got unexpected propagation mode, expected %q, got: %v", test.expected, m.Propagation)
+					}
+				})
+			}
+		})
+	}
+}