浏览代码

Merge fb64cc3cce8a8d8758da71c7f7fda5b8c23603e6 into 801fd16e3e48b7638e6fee83facbb62de9a03cef

Brian Goff 1 年之前
父节点
当前提交
a68d9c325b
共有 3 个文件被更改,包括 75 次插入5 次删除
  1. 16 5
      daemon/oci_linux.go
  2. 58 0
      integration/container/default_userns_privs_test.go
  3. 1 0
      oci/caps/defaults.go

+ 16 - 5
daemon/oci_linux.go

@@ -255,11 +255,21 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
 		if c.HostConfig.UsernsMode.IsPrivate() {
 		if c.HostConfig.UsernsMode.IsPrivate() {
 			if uidMap := daemon.idMapping.UIDMaps; uidMap != nil {
 			if uidMap := daemon.idMapping.UIDMaps; uidMap != nil {
 				userNS = true
 				userNS = true
-				setNamespace(s, specs.LinuxNamespace{
-					Type: specs.UserNamespace,
-				})
 				s.Linux.UIDMappings = specMapping(uidMap)
 				s.Linux.UIDMappings = specMapping(uidMap)
 				s.Linux.GIDMappings = specMapping(daemon.idMapping.GIDMaps)
 				s.Linux.GIDMappings = specMapping(daemon.idMapping.GIDMaps)
+			} else {
+				if !c.HostConfig.Privileged {
+					setNamespace(s, specs.LinuxNamespace{
+						Type: specs.UserNamespace,
+					})
+					userNS = true
+					s.Linux.UIDMappings = []specs.LinuxIDMapping{
+						{Size: 65536},
+					}
+					s.Linux.GIDMappings = []specs.LinuxIDMapping{
+						{Size: 65536},
+					}
+				}
 			}
 			}
 		}
 		}
 		// network
 		// network
@@ -771,10 +781,11 @@ func withCommonOptions(daemon *Daemon, daemonCfg *dconfig.Config, c *container.C
 		// joining an existing namespace, only if we create a new net namespace.
 		// joining an existing namespace, only if we create a new net namespace.
 		if c.HostConfig.NetworkMode.IsPrivate() {
 		if c.HostConfig.NetworkMode.IsPrivate() {
 			// We cannot set up ping socket support in a user namespace
 			// We cannot set up ping socket support in a user namespace
-			userNS := daemonCfg.RemappedRoot != "" && c.HostConfig.UsernsMode.IsPrivate()
+			// userNS := daemonCfg.RemappedRoot != "" && c.HostConfig.UsernsMode.IsPrivate()
+			userNS := !c.HostConfig.Privileged
 			if !userNS && !userns.RunningInUserNS() && sysctlExists("net.ipv4.ping_group_range") {
 			if !userNS && !userns.RunningInUserNS() && sysctlExists("net.ipv4.ping_group_range") {
 				// allow unprivileged ICMP echo sockets without CAP_NET_RAW
 				// allow unprivileged ICMP echo sockets without CAP_NET_RAW
-				s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
+				// s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
 			}
 			}
 			// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
 			// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
 			if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
 			if sysctlExists("net.ipv4.ip_unprivileged_port_start") {

+ 58 - 0
integration/container/default_userns_privs_test.go

@@ -0,0 +1,58 @@
+package container // import "github.com/docker/docker/integration/container"
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/docker/docker/integration/internal/container"
+	"gotest.tools/v3/assert"
+	"gotest.tools/v3/icmd"
+)
+
+func TestDefaultUsernsPrivs(t *testing.T) {
+	ctx := setupTest(t)
+
+	apiClient := testEnv.APIClient()
+
+	// Make sure that 2 privileged containers have the same user namespace
+	hostNs1Res := container.RunAttach(ctx, t, apiClient, container.WithPrivileged(true), container.WithCmd("readlink", "/proc/self/ns/user"))
+	assert.Equal(t, hostNs1Res.ExitCode, 0)
+	hostns1 := strings.TrimSpace(hostNs1Res.Stdout.String())
+	assert.Assert(t, hostns1 != "", "user namespace should not be empty")
+
+	hostNs2Res := container.RunAttach(ctx, t, apiClient, container.WithPrivileged(true), container.WithCmd("readlink", "/proc/self/ns/user"))
+	assert.Equal(t, hostNs2Res.ExitCode, 0)
+	hostns2 := strings.TrimSpace(hostNs1Res.Stdout.String())
+	assert.Assert(t, hostns2 != "", "user namespace should not be empty")
+
+	assert.Equal(t, hostns1, hostns2, "privileged user namespaces should be the same")
+
+	if testEnv.IsLocalDaemon() {
+		// Make sure the privileged container has the same user namespace as the host
+		res := icmd.RunCommand("readlink", "/proc/self/ns/user")
+		res.Assert(t, icmd.Success)
+
+		out := strings.TrimSpace(res.Combined())
+		assert.NilError(t, res.Error, string(out))
+		assert.Equal(t, hostns1, out, "privileged user namespace should be the same as the host")
+	}
+
+	res := container.RunAttach(ctx, t, apiClient, container.WithCmd("readlink", "/proc/self/ns/user"))
+	assert.Equal(t, res.ExitCode, 0, res.Stderr)
+	cUserns := strings.TrimSpace(res.Stdout.String())
+	assert.Assert(t, cUserns != "", "user namespace should not be empty")
+	assert.Assert(t, cUserns != hostns1, "user namespace should not be the same as the host")
+
+	cmd := `
+set -e
+mkdir /test1
+mkdir /test2
+touch /test1/hello
+mount --bind /test1 /test2
+[ -f /test2/hello ]
+`
+
+	// TODO: For some reason this is failing in the test env but works just fine when running manually.
+	res = container.RunAttach(ctx, t, apiClient, container.WithCmd("sh", "-c", cmd))
+	assert.Equal(t, res.ExitCode, 0, res.Stderr)
+}

+ 1 - 0
oci/caps/defaults.go

@@ -3,6 +3,7 @@ package caps // import "github.com/docker/docker/oci/caps"
 // DefaultCapabilities returns a Linux kernel default capabilities
 // DefaultCapabilities returns a Linux kernel default capabilities
 func DefaultCapabilities() []string {
 func DefaultCapabilities() []string {
 	return []string{
 	return []string{
+		"CAP_SYS_ADMIN",
 		"CAP_CHOWN",
 		"CAP_CHOWN",
 		"CAP_DAC_OVERRIDE",
 		"CAP_DAC_OVERRIDE",
 		"CAP_FSETID",
 		"CAP_FSETID",