Start containers in their own cgroup namespaces

This is enabled for all containers that are not run with --privileged,
if the kernel supports it.

Fixes #38332

Signed-off-by: Rob Gulewich <rgulewich@netflix.com>
This commit is contained in:
Rob Gulewich 2018-12-14 15:07:19 -08:00
parent b20a14b546
commit 256eb04d69
10 changed files with 178 additions and 43 deletions

View file

@ -81,26 +81,27 @@ var (
// Daemon holds information about the Docker daemon. // Daemon holds information about the Docker daemon.
type Daemon struct { type Daemon struct {
ID string ID string
repository string repository string
containers container.Store containers container.Store
containersReplica container.ViewDB containersReplica container.ViewDB
execCommands *exec.Store execCommands *exec.Store
imageService *images.ImageService imageService *images.ImageService
idIndex *truncindex.TruncIndex idIndex *truncindex.TruncIndex
configStore *config.Config configStore *config.Config
statsCollector *stats.Collector statsCollector *stats.Collector
defaultLogConfig containertypes.LogConfig defaultLogConfig containertypes.LogConfig
RegistryService registry.Service RegistryService registry.Service
EventsService *events.Events EventsService *events.Events
netController libnetwork.NetworkController netController libnetwork.NetworkController
volumes *volumesservice.VolumesService volumes *volumesservice.VolumesService
discoveryWatcher discovery.Reloader discoveryWatcher discovery.Reloader
root string root string
seccompEnabled bool seccompEnabled bool
apparmorEnabled bool apparmorEnabled bool
shutdown bool cgroupNamespacesEnabled bool
idMapping *idtools.IdentityMapping shutdown bool
idMapping *idtools.IdentityMapping
// TODO: move graphDrivers field to an InfoService // TODO: move graphDrivers field to an InfoService
graphDrivers map[string]string // By operating system graphDrivers map[string]string // By operating system
@ -1020,6 +1021,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
d.idMapping = idMapping d.idMapping = idMapping
d.seccompEnabled = sysInfo.Seccomp d.seccompEnabled = sysInfo.Seccomp
d.apparmorEnabled = sysInfo.AppArmor d.apparmorEnabled = sysInfo.AppArmor
d.cgroupNamespacesEnabled = sysInfo.CgroupNamespaces
d.linkIndex = newLinkIndex() d.linkIndex = newLinkIndex()

View file

@ -307,8 +307,13 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
s.Hostname = "" s.Hostname = ""
} }
return nil // cgroup
if daemon.cgroupNamespacesEnabled && !c.HostConfig.Privileged {
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
setNamespace(s, nsCgroup)
} }
return nil
} }
func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping { func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {

View file

@ -3984,18 +3984,40 @@ func (s *DockerSuite) TestBuildContainerWithCgroupParent(c *check.C) {
if !found { if !found {
c.Fatalf("unable to find self memory cgroup path. CgroupsPath: %v", selfCgroupPaths) c.Fatalf("unable to find self memory cgroup path. CgroupsPath: %v", selfCgroupPaths)
} }
result := buildImage("buildcgroupparent",
cli.WithFlags("--cgroup-parent", cgroupParent), doneCh := make(chan string)
build.WithDockerfile(`
// If cgroup namespaces are enabled, then processes running inside the container won't
// be able to see the parent namespace. Check that they have the correct parents from
// the host, which has the non-namespaced view of the hierarchy.
go func() {
result := buildImage("buildcgroupparent",
cli.WithFlags("--cgroup-parent", cgroupParent),
build.WithDockerfile(`
FROM busybox FROM busybox
RUN cat /proc/self/cgroup RUN sleep 10
`)) `))
result.Assert(c, icmd.Success) result.Assert(c, icmd.Success)
m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), result.Combined()) doneCh <- "done"
assert.NilError(c, err) }()
// Wait until the build is well into the sleep
time.Sleep(3 * time.Second)
out, _, err := dockerCmdWithError("ps", "-q", "-l")
c.Assert(err, check.IsNil)
cID := strings.TrimSpace(out)
pid := inspectField(c, cID, "State.Pid")
paths := ReadCgroupPathsForPid(c, pid)
m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), paths)
c.Assert(err, check.IsNil)
if !m { if !m {
c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, result.Combined()) c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, paths)
} }
// Wait for the build to complete, otherwise it will exit with an error
<-doneCh
} }
// FIXME(vdemeester) could be a unit test // FIXME(vdemeester) could be a unit test

View file

@ -1787,7 +1787,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
} }
func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) { func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
testRequires(c, DaemonIsLinux) // Test requires local filesystem access on a Linux host
testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon)
cgroupParent := "test" cgroupParent := "test"
name := "cgroup-test" name := "cgroup-test"
@ -1795,10 +1796,20 @@ func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
s.d.StartWithBusybox(c, "--cgroup-parent", cgroupParent) s.d.StartWithBusybox(c, "--cgroup-parent", cgroupParent)
defer s.d.Restart(c) defer s.d.Restart(c)
out, err := s.d.Cmd("run", "--name", name, "busybox", "cat", "/proc/self/cgroup") out, err := s.d.Cmd("run", "--name", name, "-d", "busybox", "top")
assert.NilError(c, err) c.Assert(err, checker.IsNil)
cgroupPaths := ParseCgroupPaths(string(out))
c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", string(out))) // If cgroup namespaces are enabled, then processes running inside the container won't
// be able to see the parent namespace. Check that they have the correct parents from
// the host, which has the non-namespaced view of the hierarchy.
pid, err := s.d.Cmd("inspect", "-f", "{{.State.Pid}}", name)
c.Assert(err, checker.IsNil)
pid = strings.TrimSpace(string(pid))
paths := ReadCgroupPathsForPid(c, pid)
cgroupPaths := ParseCgroupPaths(paths)
c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", paths))
out, err = s.d.Cmd("inspect", "-f", "{{.Id}}", name) out, err = s.d.Cmd("inspect", "-f", "{{.Id}}", name)
assert.NilError(c, err) assert.NilError(c, err)
id := strings.TrimSpace(string(out)) id := strings.TrimSpace(string(out))

View file

@ -3241,8 +3241,8 @@ func (s *DockerSuite) TestRunWithUlimits(c *check.C) {
} }
func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) { func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
// Not applicable on Windows as uses Unix specific functionality // Test requires local filesystem access on a Linux host
testRequires(c, DaemonIsLinux) testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon)
// cgroup-parent relative path // cgroup-parent relative path
testRunContainerWithCgroupParent(c, "test", "cgroup-test") testRunContainerWithCgroupParent(c, "test", "cgroup-test")
@ -3252,14 +3252,23 @@ func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
} }
func testRunContainerWithCgroupParent(c *check.C, cgroupParent, name string) { func testRunContainerWithCgroupParent(c *check.C, cgroupParent, name string) {
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup") out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
if err != nil { if err != nil {
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err) c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
} }
cgroupPaths := ParseCgroupPaths(string(out)) cID := strings.TrimSpace(out)
// If cgroup namespaces are enabled, then processes running inside the container won't
// be able to see the parent namespace. Check that they have the correct parents from
// the host, which has the non-namespaced view of the hierarchy.
pid := inspectField(c, cID, "State.Pid")
paths := ReadCgroupPathsForPid(c, pid)
cgroupPaths := ParseCgroupPaths(paths)
if len(cgroupPaths) == 0 { if len(cgroupPaths) == 0 {
c.Fatalf("unexpected output - %q", string(out)) c.Fatalf("unexpected output - %q", string(paths))
} }
id := getIDByName(c, name) id := getIDByName(c, name)
expectedCgroup := path.Join(cgroupParent, id) expectedCgroup := path.Join(cgroupParent, id)
found := false found := false
@ -3285,21 +3294,29 @@ func (s *DockerSuite) TestRunInvalidCgroupParent(c *check.C) {
} }
func testRunInvalidCgroupParent(c *check.C, cgroupParent, cleanCgroupParent, name string) { func testRunInvalidCgroupParent(c *check.C, cgroupParent, cleanCgroupParent, name string) {
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup") out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
if err != nil { if err != nil {
// XXX: This may include a daemon crash. // XXX: This may include a daemon crash.
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err) c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
} }
cID := strings.TrimSpace(out)
// We expect "/SHOULD_NOT_EXIST" to not exist. If not, we have a security issue. // We expect "/SHOULD_NOT_EXIST" to not exist. If not, we have a security issue.
if _, err := os.Stat("/SHOULD_NOT_EXIST"); err == nil || !os.IsNotExist(err) { if _, err := os.Stat("/SHOULD_NOT_EXIST"); err == nil || !os.IsNotExist(err) {
c.Fatalf("SECURITY: --cgroup-parent with ../../ relative paths cause files to be created in the host (this is bad) !!") c.Fatalf("SECURITY: --cgroup-parent with ../../ relative paths cause files to be created in the host (this is bad) !!")
} }
cgroupPaths := ParseCgroupPaths(string(out)) // If cgroup namespaces are enabled, then processes running inside the container won't
// be able to see the parent namespace. Check that they have the correct parents from
// the host, which has the non-namespaced view of the hierarchy.
pid := inspectField(c, cID, "State.Pid")
paths := ReadCgroupPathsForPid(c, pid)
cgroupPaths := ParseCgroupPaths(paths)
if len(cgroupPaths) == 0 { if len(cgroupPaths) == 0 {
c.Fatalf("unexpected output - %q", string(out)) c.Fatalf("unexpected output - %q", string(paths))
} }
id := getIDByName(c, name) id := getIDByName(c, name)
expectedCgroup := path.Join(cleanCgroupParent, id) expectedCgroup := path.Join(cleanCgroupParent, id)
found := false found := false

View file

@ -2,6 +2,7 @@ package main
import ( import (
"fmt" "fmt"
"io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
@ -38,6 +39,17 @@ func transformCmd(execCmd *exec.Cmd) icmd.Cmd {
} }
} }
// ReadCgroupPathsForPid reads the cgroup path file for a pid in '/proc/<pid>/cgroup'
func ReadCgroupPathsForPid(c *check.C, pid string) string {
cgroupFile := fmt.Sprintf("/proc/%s/cgroup", pid)
out, err := ioutil.ReadFile(cgroupFile)
if err != nil {
c.Fatalf("unexpected failure when reading cgroup file %s\n%v", cgroupFile, err)
}
return string(out)
}
// ParseCgroupPaths parses 'procCgroupData', which is output of '/proc/<pid>/cgroup', and returns // ParseCgroupPaths parses 'procCgroupData', which is output of '/proc/<pid>/cgroup', and returns
// a map which cgroup name as key and path as value. // a map which cgroup name as key and path as value.
func ParseCgroupPaths(procCgroupData string) map[string]string { func ParseCgroupPaths(procCgroupData string) map[string]string {

View file

@ -2,6 +2,10 @@ package container // import "github.com/docker/docker/integration/container"
import ( import (
"context" "context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv" "strconv"
"strings" "strings"
"testing" "testing"
@ -93,3 +97,32 @@ func TestNISDomainname(t *testing.T) {
assert.Equal(t, 0, res.ExitCode) assert.Equal(t, 0, res.ExitCode)
assert.Check(t, is.Equal(domainname, strings.TrimSpace(res.Stdout()))) assert.Check(t, is.Equal(domainname, strings.TrimSpace(res.Stdout())))
} }
func TestCgroupNamespaces(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType != "linux")
skip.If(t, testEnv.IsRemoteDaemon())
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
t.Skip("cgroup namespaces are unsupported")
}
defer setupTest(t)()
client := testEnv.APIClient()
ctx := context.Background()
cID := container.Run(t, ctx, client)
poll.WaitOn(t, container.IsInState(ctx, client, cID, "running"), poll.WithDelay(100*time.Millisecond))
path := filepath.Join(os.Getenv("DEST"), "docker.pid")
b, err := ioutil.ReadFile(path)
assert.NilError(t, err)
link, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/cgroup", string(b)))
assert.NilError(t, err)
// Check that the container's cgroup doesn't match the docker daemon's
res, err := container.Exec(ctx, client, cID, []string{"readlink", "/proc/1/ns/cgroup"})
assert.NilError(t, err)
assert.Assert(t, is.Len(res.Stderr(), 0))
assert.Equal(t, 0, res.ExitCode)
assert.Assert(t, link != strings.TrimSpace(res.Stdout()))
}

View file

@ -16,6 +16,9 @@ type SysInfo struct {
cgroupCpusetInfo cgroupCpusetInfo
cgroupPids cgroupPids
// Whether the kernel supports cgroup namespaces or not
CgroupNamespaces bool
// Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work // Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work
IPv4ForwardingDisabled bool IPv4ForwardingDisabled bool

View file

@ -53,6 +53,7 @@ func New(quiet bool) *SysInfo {
applyNetworkingInfo, applyNetworkingInfo,
applyAppArmorInfo, applyAppArmorInfo,
applySeccompInfo, applySeccompInfo,
applyCgroupNsInfo,
}...) }...)
for _, o := range ops { for _, o := range ops {
@ -250,6 +251,15 @@ func applyAppArmorInfo(info *SysInfo, _ map[string]string) []string {
return warnings return warnings
} }
// applyCgroupNsInfo adds cgroup namespace information to the info.
func applyCgroupNsInfo(info *SysInfo, _ map[string]string) []string {
var warnings []string
if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
info.CgroupNamespaces = true
}
return warnings
}
// applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP. // applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP.
func applySeccompInfo(info *SysInfo, _ map[string]string) []string { func applySeccompInfo(info *SysInfo, _ map[string]string) []string {
var warnings []string var warnings []string

View file

@ -96,6 +96,26 @@ func TestNewAppArmorDisabled(t *testing.T) {
assert.Assert(t, !sysInfo.AppArmor) assert.Assert(t, !sysInfo.AppArmor)
} }
func TestNewCgroupNamespacesEnabled(t *testing.T) {
// If cgroup namespaces are supported in the kernel, then sysInfo.CgroupNamespaces should be TRUE
if _, err := os.Stat("/proc/self/ns/cgroup"); err != nil {
t.Skip("cgroup namespaces must be enabled")
}
sysInfo := New(true)
assert.Assert(t, sysInfo.CgroupNamespaces)
}
func TestNewCgroupNamespacesDisabled(t *testing.T) {
// If cgroup namespaces are *not* supported in the kernel, then sysInfo.CgroupNamespaces should be FALSE
if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
t.Skip("cgroup namespaces must be disabled")
}
sysInfo := New(true)
assert.Assert(t, !sysInfo.CgroupNamespaces)
}
func TestNumCPU(t *testing.T) { func TestNumCPU(t *testing.T) {
cpuNumbers := NumCPU() cpuNumbers := NumCPU()
if cpuNumbers <= 0 { if cpuNumbers <= 0 {