Browse Source

Merge pull request #41030 from justincormack/default-sysctls

Add default sysctls to allow ping sockets and privileged ports with no capabilities
Sebastiaan van Stijn 5 years ago
parent
commit
888da28d42
3 changed files with 67 additions and 3 deletions
  1. 25 0
      daemon/oci_linux.go
  2. 39 1
      daemon/oci_linux_test.go
  3. 3 2
      integration-cli/docker_cli_run_unix_test.go

+ 25 - 0
daemon/oci_linux.go

@@ -716,6 +716,14 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
 	}
 }
 
+// sysctlExists checks if a sysctl exists; runc will error if we add any that do not actually
+// exist, so do not add the default ones if running on an old kernel.
+func sysctlExists(s string) bool {
+	f := filepath.Join("/proc", "sys", strings.Replace(s, ".", "/", -1))
+	_, err := os.Stat(f)
+	return err == nil
+}
+
 // WithCommonOptions sets common docker options
 func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
 	return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
@@ -768,6 +776,23 @@ func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
 		s.Hostname = c.Config.Hostname
 		setLinuxDomainname(c, s)
 
+		// Add default sysctls that are generally safe and useful; currently we
+		// grant the capabilities to allow these anyway. You can override if
+		// you want to restore the original behaviour.
+		// We do not set network sysctls if network namespace is host, or if we are
+		// joining an existing namespace, only if we create a new net namespace.
+		if c.HostConfig.NetworkMode.IsPrivate() {
+			// We cannot set up ping socket support in a user namespace
+			if !c.HostConfig.UsernsMode.IsPrivate() && sysctlExists("net.ipv4.ping_group_range") {
+				// allow unprivileged ICMP echo sockets without CAP_NET_RAW
+				s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
+			}
+			// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
+			if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
+				s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"] = "0"
+			}
+		}
+
 		return nil
 	}
 }

+ 39 - 1
daemon/oci_linux_test.go

@@ -114,7 +114,9 @@ func TestSysctlOverride(t *testing.T) {
 			Domainname: "baz.cyphar.com",
 		},
 		HostConfig: &containertypes.HostConfig{
-			Sysctls: map[string]string{},
+			NetworkMode: "bridge",
+			Sysctls:     map[string]string{},
+			UsernsMode:  "host",
 		},
 	}
 	d := setupFakeDaemon(t, c)
@@ -125,15 +127,51 @@ func TestSysctlOverride(t *testing.T) {
 	assert.NilError(t, err)
 	assert.Equal(t, s.Hostname, "foobar")
 	assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname)
+	if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
+		assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
+	}
+	if sysctlExists("net.ipv4.ping_group_range") {
+		assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
+	}
 
 	// Set an explicit sysctl.
 	c.HostConfig.Sysctls["kernel.domainname"] = "foobar.net"
 	assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname)
+	c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
 
 	s, err = d.createSpec(c)
 	assert.NilError(t, err)
 	assert.Equal(t, s.Hostname, "foobar")
 	assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"])
+	assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
+}
+
+// TestSysctlOverrideHost ensures that any implicit network sysctls are not set
+// with host networking
+func TestSysctlOverrideHost(t *testing.T) {
+	c := &container.Container{
+		Config: &containertypes.Config{},
+		HostConfig: &containertypes.HostConfig{
+			NetworkMode: "host",
+			Sysctls:     map[string]string{},
+			UsernsMode:  "host",
+		},
+	}
+	d := setupFakeDaemon(t, c)
+	defer cleanupFakeContainer(c)
+
+	// Ensure that the implicit sysctl is not set
+	s, err := d.createSpec(c)
+	assert.NilError(t, err)
+	assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "")
+	assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "")
+
+	// Set an explicit sysctl.
+	c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
+
+	s, err = d.createSpec(c)
+	assert.NilError(t, err)
+	assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
 }
 
 func TestGetSourceMount(t *testing.T) {

+ 3 - 2
integration-cli/docker_cli_run_unix_test.go

@@ -1252,12 +1252,13 @@ func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetBindService(c *testing.T
 	// test that a root user has default capability CAP_NET_BIND_SERVICE
 	dockerCmd(c, "run", "syscall-test", "socket-test")
 	// test that non root user does not have default capability CAP_NET_BIND_SERVICE
-	icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "socket-test").Assert(c, icmd.Expected{
+	// as we allow this via sysctl, also tweak the sysctl back to default
+	icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
 		ExitCode: 1,
 		Err:      "Permission denied",
 	})
 	// test that root user can drop default capability CAP_NET_BIND_SERVICE
-	icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "syscall-test", "socket-test").Assert(c, icmd.Expected{
+	icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
 		ExitCode: 1,
 		Err:      "Permission denied",
 	})