Merge pull request #41030 from justincormack/default-sysctls

Add default sysctls to allow ping sockets and privileged ports with no capabilities
This commit is contained in:
Sebastiaan van Stijn 2020-06-04 22:31:51 +02:00 committed by GitHub
commit 888da28d42
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 3 deletions

View file

@ -716,6 +716,14 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
}
}
// sysctlExists checks if a sysctl exists; runc will error if we add any that do not actually
// exist, so do not add the default ones if running on an old kernel.
func sysctlExists(s string) bool {
f := filepath.Join("/proc", "sys", strings.Replace(s, ".", "/", -1))
_, err := os.Stat(f)
return err == nil
}
// WithCommonOptions sets common docker options
func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
@ -768,6 +776,23 @@ func WithCommonOptions(daemon *Daemon, c *container.Container) coci.SpecOpts {
s.Hostname = c.Config.Hostname
setLinuxDomainname(c, s)
// Add default sysctls that are generally safe and useful; currently we
// grant the capabilities to allow these anyway. You can override if
// you want to restore the original behaviour.
// We do not set network sysctls if network namespace is host, or if we are
// joining an existing namespace, only if we create a new net namespace.
if c.HostConfig.NetworkMode.IsPrivate() {
// We cannot set up ping socket support in a user namespace
if !c.HostConfig.UsernsMode.IsPrivate() && sysctlExists("net.ipv4.ping_group_range") {
// allow unprivileged ICMP echo sockets without CAP_NET_RAW
s.Linux.Sysctl["net.ipv4.ping_group_range"] = "0 2147483647"
}
// allow opening any port less than 1024 without CAP_NET_BIND_SERVICE
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"] = "0"
}
}
return nil
}
}

View file

@ -114,7 +114,9 @@ func TestSysctlOverride(t *testing.T) {
Domainname: "baz.cyphar.com",
},
HostConfig: &containertypes.HostConfig{
Sysctls: map[string]string{},
NetworkMode: "bridge",
Sysctls: map[string]string{},
UsernsMode: "host",
},
}
d := setupFakeDaemon(t, c)
@ -125,15 +127,51 @@ func TestSysctlOverride(t *testing.T) {
assert.NilError(t, err)
assert.Equal(t, s.Hostname, "foobar")
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.Config.Domainname)
if sysctlExists("net.ipv4.ip_unprivileged_port_start") {
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "0")
}
if sysctlExists("net.ipv4.ping_group_range") {
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "0 2147483647")
}
// Set an explicit sysctl.
c.HostConfig.Sysctls["kernel.domainname"] = "foobar.net"
assert.Assert(t, c.HostConfig.Sysctls["kernel.domainname"] != c.Config.Domainname)
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
s, err = d.createSpec(c)
assert.NilError(t, err)
assert.Equal(t, s.Hostname, "foobar")
assert.Equal(t, s.Linux.Sysctl["kernel.domainname"], c.HostConfig.Sysctls["kernel.domainname"])
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
}
// TestSysctlOverrideHost ensures that any implicit network sysctls are not set
// with host networking
func TestSysctlOverrideHost(t *testing.T) {
c := &container.Container{
Config: &containertypes.Config{},
HostConfig: &containertypes.HostConfig{
NetworkMode: "host",
Sysctls: map[string]string{},
UsernsMode: "host",
},
}
d := setupFakeDaemon(t, c)
defer cleanupFakeContainer(c)
// Ensure that the implicit sysctl is not set
s, err := d.createSpec(c)
assert.NilError(t, err)
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], "")
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ping_group_range"], "")
// Set an explicit sysctl.
c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"] = "1024"
s, err = d.createSpec(c)
assert.NilError(t, err)
assert.Equal(t, s.Linux.Sysctl["net.ipv4.ip_unprivileged_port_start"], c.HostConfig.Sysctls["net.ipv4.ip_unprivileged_port_start"])
}
func TestGetSourceMount(t *testing.T) {

View file

@ -1252,12 +1252,13 @@ func (s *DockerSuite) TestUserNoEffectiveCapabilitiesNetBindService(c *testing.T
// test that a root user has default capability CAP_NET_BIND_SERVICE
dockerCmd(c, "run", "syscall-test", "socket-test")
// test that non root user does not have default capability CAP_NET_BIND_SERVICE
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "syscall-test", "socket-test").Assert(c, icmd.Expected{
// as we allow this via sysctl, also tweak the sysctl back to default
icmd.RunCommand(dockerBinary, "run", "--user", "1000:1000", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
ExitCode: 1,
Err: "Permission denied",
})
// test that root user can drop default capability CAP_NET_BIND_SERVICE
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "syscall-test", "socket-test").Assert(c, icmd.Expected{
icmd.RunCommand(dockerBinary, "run", "--cap-drop", "net_bind_service", "--sysctl", "net.ipv4.ip_unprivileged_port_start=1024", "syscall-test", "socket-test").Assert(c, icmd.Expected{
ExitCode: 1,
Err: "Permission denied",
})