Forráskód Böngészése

Merge pull request #47588 from vvoland/v25.0-47558

[25.0 backport] plugin: fix mounting /etc/hosts when running in UserNS
Paweł Gronowski 1 éve
szülő
commit
817bccb1c6

+ 2 - 34
daemon/oci_linux.go

@@ -19,6 +19,7 @@ import (
 	"github.com/docker/docker/container"
 	dconfig "github.com/docker/docker/daemon/config"
 	"github.com/docker/docker/errdefs"
+	"github.com/docker/docker/internal/rootless/mountopts"
 	"github.com/docker/docker/oci"
 	"github.com/docker/docker/oci/caps"
 	"github.com/docker/docker/pkg/idtools"
@@ -31,7 +32,6 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
-	"golang.org/x/sys/unix"
 )
 
 const inContainerInitPath = "/sbin/" + dconfig.DefaultInitBinary
@@ -468,38 +468,6 @@ func ensureSharedOrSlave(path string) error {
 	return nil
 }
 
-// Get the set of mount flags that are set on the mount that contains the given
-// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
-// bind-mounting "with options" will not fail with user namespaces, due to
-// kernel restrictions that require user namespace mounts to preserve
-// CL_UNPRIVILEGED locked flags.
-func getUnprivilegedMountFlags(path string) ([]string, error) {
-	var statfs unix.Statfs_t
-	if err := unix.Statfs(path, &statfs); err != nil {
-		return nil, err
-	}
-
-	// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
-	unprivilegedFlags := map[uint64]string{
-		unix.MS_RDONLY:     "ro",
-		unix.MS_NODEV:      "nodev",
-		unix.MS_NOEXEC:     "noexec",
-		unix.MS_NOSUID:     "nosuid",
-		unix.MS_NOATIME:    "noatime",
-		unix.MS_RELATIME:   "relatime",
-		unix.MS_NODIRATIME: "nodiratime",
-	}
-
-	var flags []string
-	for mask, flag := range unprivilegedFlags {
-		if uint64(statfs.Flags)&mask == mask {
-			flags = append(flags, flag)
-		}
-	}
-
-	return flags, nil
-}
-
 var (
 	mountPropagationMap = map[string]int{
 		"private":  mount.PRIVATE,
@@ -723,7 +691,7 @@ func withMounts(daemon *Daemon, daemonCfg *configStore, c *container.Container)
 			// when runc sets up the root filesystem, it is already inside a user
 			// namespace, and thus cannot change any flags that are locked.
 			if daemonCfg.RemappedRoot != "" || userns.RunningInUserNS() {
-				unprivOpts, err := getUnprivilegedMountFlags(m.Source)
+				unprivOpts, err := mountopts.UnprivilegedMountFlags(m.Source)
 				if err != nil {
 					return err
 				}

+ 39 - 0
internal/rootless/mountopts/mountopts_linux.go

@@ -0,0 +1,39 @@
+package mountopts
+
+import (
+	"golang.org/x/sys/unix"
+)
+
+// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
+// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
+// bind-mounting "with options" will not fail with user namespaces, due to
+// kernel restrictions that require user namespace mounts to preserve
+// CL_UNPRIVILEGED locked flags.
+//
+// TODO: Move to github.com/moby/sys/mount, and update BuildKit copy of this code as well (https://github.com/moby/buildkit/blob/v0.13.0/util/rootless/mountopts/mountopts_linux.go#L11-L18)
+func UnprivilegedMountFlags(path string) ([]string, error) {
+	var statfs unix.Statfs_t
+	if err := unix.Statfs(path, &statfs); err != nil {
+		return nil, err
+	}
+
+	// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
+	unprivilegedFlags := map[uint64]string{
+		unix.MS_RDONLY:     "ro",
+		unix.MS_NODEV:      "nodev",
+		unix.MS_NOEXEC:     "noexec",
+		unix.MS_NOSUID:     "nosuid",
+		unix.MS_NOATIME:    "noatime",
+		unix.MS_RELATIME:   "relatime",
+		unix.MS_NODIRATIME: "nodiratime",
+	}
+
+	var flags []string
+	for mask, flag := range unprivilegedFlags {
+		if uint64(statfs.Flags)&mask == mask {
+			flags = append(flags, flag)
+		}
+	}
+
+	return flags, nil
+}

+ 36 - 0
plugin/v2/plugin_linux.go

@@ -1,3 +1,6 @@
+// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
+//go:build go1.19
+
 package v2 // import "github.com/docker/docker/plugin/v2"
 
 import (
@@ -6,7 +9,10 @@ import (
 	"runtime"
 	"strings"
 
+	"github.com/containerd/containerd/pkg/userns"
 	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/internal/rootless/mountopts"
+	"github.com/docker/docker/internal/sliceutil"
 	"github.com/docker/docker/oci"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
@@ -136,5 +142,35 @@ func (p *Plugin) InitSpec(execRoot string) (*specs.Spec, error) {
 		p.modifyRuntimeSpec(&s)
 	}
 
+	// Rootless mode requires modifying the mount flags
+	// https://github.com/moby/moby/issues/47248#issuecomment-1927776700
+	// https://github.com/moby/moby/pull/47558
+	if userns.RunningInUserNS() {
+		for i := range s.Mounts {
+			m := &s.Mounts[i]
+			for _, o := range m.Options {
+				switch o {
+				case "bind", "rbind":
+					if _, err := os.Lstat(m.Source); err != nil {
+						if errors.Is(err, os.ErrNotExist) {
+							continue
+						}
+						return nil, err
+					}
+					// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
+					// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
+					// bind-mounting "with options" will not fail with user namespaces, due to
+					// kernel restrictions that require user namespace mounts to preserve
+					// CL_UNPRIVILEGED locked flags.
+					unpriv, err := mountopts.UnprivilegedMountFlags(m.Source)
+					if err != nil {
+						return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
+					}
+					m.Options = sliceutil.Dedup(append(m.Options, unpriv...))
+				}
+			}
+		}
+	}
+
 	return &s, nil
 }