Start containers in their own cgroup namespaces
This is enabled for all containers that are not run with --privileged, if the kernel supports it. Fixes #38332 Signed-off-by: Rob Gulewich <rgulewich@netflix.com>
This commit is contained in:
parent
b20a14b546
commit
256eb04d69
10 changed files with 178 additions and 43 deletions
|
@ -81,26 +81,27 @@ var (
|
|||
|
||||
// Daemon holds information about the Docker daemon.
|
||||
type Daemon struct {
|
||||
ID string
|
||||
repository string
|
||||
containers container.Store
|
||||
containersReplica container.ViewDB
|
||||
execCommands *exec.Store
|
||||
imageService *images.ImageService
|
||||
idIndex *truncindex.TruncIndex
|
||||
configStore *config.Config
|
||||
statsCollector *stats.Collector
|
||||
defaultLogConfig containertypes.LogConfig
|
||||
RegistryService registry.Service
|
||||
EventsService *events.Events
|
||||
netController libnetwork.NetworkController
|
||||
volumes *volumesservice.VolumesService
|
||||
discoveryWatcher discovery.Reloader
|
||||
root string
|
||||
seccompEnabled bool
|
||||
apparmorEnabled bool
|
||||
shutdown bool
|
||||
idMapping *idtools.IdentityMapping
|
||||
ID string
|
||||
repository string
|
||||
containers container.Store
|
||||
containersReplica container.ViewDB
|
||||
execCommands *exec.Store
|
||||
imageService *images.ImageService
|
||||
idIndex *truncindex.TruncIndex
|
||||
configStore *config.Config
|
||||
statsCollector *stats.Collector
|
||||
defaultLogConfig containertypes.LogConfig
|
||||
RegistryService registry.Service
|
||||
EventsService *events.Events
|
||||
netController libnetwork.NetworkController
|
||||
volumes *volumesservice.VolumesService
|
||||
discoveryWatcher discovery.Reloader
|
||||
root string
|
||||
seccompEnabled bool
|
||||
apparmorEnabled bool
|
||||
cgroupNamespacesEnabled bool
|
||||
shutdown bool
|
||||
idMapping *idtools.IdentityMapping
|
||||
// TODO: move graphDrivers field to an InfoService
|
||||
graphDrivers map[string]string // By operating system
|
||||
|
||||
|
@ -1020,6 +1021,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
|||
d.idMapping = idMapping
|
||||
d.seccompEnabled = sysInfo.Seccomp
|
||||
d.apparmorEnabled = sysInfo.AppArmor
|
||||
d.cgroupNamespacesEnabled = sysInfo.CgroupNamespaces
|
||||
|
||||
d.linkIndex = newLinkIndex()
|
||||
|
||||
|
|
|
@ -307,8 +307,13 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
|||
s.Hostname = ""
|
||||
}
|
||||
|
||||
return nil
|
||||
// cgroup
|
||||
if daemon.cgroupNamespacesEnabled && !c.HostConfig.Privileged {
|
||||
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
|
||||
setNamespace(s, nsCgroup)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
|
||||
|
|
|
@ -3984,18 +3984,40 @@ func (s *DockerSuite) TestBuildContainerWithCgroupParent(c *check.C) {
|
|||
if !found {
|
||||
c.Fatalf("unable to find self memory cgroup path. CgroupsPath: %v", selfCgroupPaths)
|
||||
}
|
||||
result := buildImage("buildcgroupparent",
|
||||
cli.WithFlags("--cgroup-parent", cgroupParent),
|
||||
build.WithDockerfile(`
|
||||
|
||||
doneCh := make(chan string)
|
||||
|
||||
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||
// be able to see the parent namespace. Check that they have the correct parents from
|
||||
// the host, which has the non-namespaced view of the hierarchy.
|
||||
|
||||
go func() {
|
||||
result := buildImage("buildcgroupparent",
|
||||
cli.WithFlags("--cgroup-parent", cgroupParent),
|
||||
build.WithDockerfile(`
|
||||
FROM busybox
|
||||
RUN cat /proc/self/cgroup
|
||||
`))
|
||||
result.Assert(c, icmd.Success)
|
||||
m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), result.Combined())
|
||||
assert.NilError(c, err)
|
||||
RUN sleep 10
|
||||
`))
|
||||
result.Assert(c, icmd.Success)
|
||||
doneCh <- "done"
|
||||
}()
|
||||
|
||||
// Wait until the build is well into the sleep
|
||||
time.Sleep(3 * time.Second)
|
||||
out, _, err := dockerCmdWithError("ps", "-q", "-l")
|
||||
c.Assert(err, check.IsNil)
|
||||
cID := strings.TrimSpace(out)
|
||||
|
||||
pid := inspectField(c, cID, "State.Pid")
|
||||
paths := ReadCgroupPathsForPid(c, pid)
|
||||
m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), paths)
|
||||
c.Assert(err, check.IsNil)
|
||||
if !m {
|
||||
c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, result.Combined())
|
||||
c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, paths)
|
||||
}
|
||||
|
||||
// Wait for the build to complete, otherwise it will exit with an error
|
||||
<-doneCh
|
||||
}
|
||||
|
||||
// FIXME(vdemeester) could be a unit test
|
||||
|
|
|
@ -1787,7 +1787,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
|
|||
}
|
||||
|
||||
func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
||||
testRequires(c, DaemonIsLinux)
|
||||
// Test requires local filesystem access on a Linux host
|
||||
testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon)
|
||||
|
||||
cgroupParent := "test"
|
||||
name := "cgroup-test"
|
||||
|
@ -1795,10 +1796,20 @@ func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
|||
s.d.StartWithBusybox(c, "--cgroup-parent", cgroupParent)
|
||||
defer s.d.Restart(c)
|
||||
|
||||
out, err := s.d.Cmd("run", "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
||||
assert.NilError(c, err)
|
||||
cgroupPaths := ParseCgroupPaths(string(out))
|
||||
c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", string(out)))
|
||||
out, err := s.d.Cmd("run", "--name", name, "-d", "busybox", "top")
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||
// be able to see the parent namespace. Check that they have the correct parents from
|
||||
// the host, which has the non-namespaced view of the hierarchy.
|
||||
|
||||
pid, err := s.d.Cmd("inspect", "-f", "{{.State.Pid}}", name)
|
||||
c.Assert(err, checker.IsNil)
|
||||
pid = strings.TrimSpace(string(pid))
|
||||
paths := ReadCgroupPathsForPid(c, pid)
|
||||
cgroupPaths := ParseCgroupPaths(paths)
|
||||
c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", paths))
|
||||
|
||||
out, err = s.d.Cmd("inspect", "-f", "{{.Id}}", name)
|
||||
assert.NilError(c, err)
|
||||
id := strings.TrimSpace(string(out))
|
||||
|
|
|
@ -3241,8 +3241,8 @@ func (s *DockerSuite) TestRunWithUlimits(c *check.C) {
|
|||
}
|
||||
|
||||
func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
||||
// Not applicable on Windows as uses Unix specific functionality
|
||||
testRequires(c, DaemonIsLinux)
|
||||
// Test requires local filesystem access on a Linux host
|
||||
testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon)
|
||||
|
||||
// cgroup-parent relative path
|
||||
testRunContainerWithCgroupParent(c, "test", "cgroup-test")
|
||||
|
@ -3252,14 +3252,23 @@ func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
|||
}
|
||||
|
||||
func testRunContainerWithCgroupParent(c *check.C, cgroupParent, name string) {
|
||||
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
||||
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
|
||||
if err != nil {
|
||||
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
||||
}
|
||||
cgroupPaths := ParseCgroupPaths(string(out))
|
||||
cID := strings.TrimSpace(out)
|
||||
|
||||
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||
// be able to see the parent namespace. Check that they have the correct parents from
|
||||
// the host, which has the non-namespaced view of the hierarchy.
|
||||
|
||||
pid := inspectField(c, cID, "State.Pid")
|
||||
paths := ReadCgroupPathsForPid(c, pid)
|
||||
cgroupPaths := ParseCgroupPaths(paths)
|
||||
if len(cgroupPaths) == 0 {
|
||||
c.Fatalf("unexpected output - %q", string(out))
|
||||
c.Fatalf("unexpected output - %q", string(paths))
|
||||
}
|
||||
|
||||
id := getIDByName(c, name)
|
||||
expectedCgroup := path.Join(cgroupParent, id)
|
||||
found := false
|
||||
|
@ -3285,21 +3294,29 @@ func (s *DockerSuite) TestRunInvalidCgroupParent(c *check.C) {
|
|||
}
|
||||
|
||||
func testRunInvalidCgroupParent(c *check.C, cgroupParent, cleanCgroupParent, name string) {
|
||||
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
||||
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
|
||||
if err != nil {
|
||||
// XXX: This may include a daemon crash.
|
||||
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
||||
}
|
||||
cID := strings.TrimSpace(out)
|
||||
|
||||
// We expect "/SHOULD_NOT_EXIST" to not exist. If not, we have a security issue.
|
||||
if _, err := os.Stat("/SHOULD_NOT_EXIST"); err == nil || !os.IsNotExist(err) {
|
||||
c.Fatalf("SECURITY: --cgroup-parent with ../../ relative paths cause files to be created in the host (this is bad) !!")
|
||||
}
|
||||
|
||||
cgroupPaths := ParseCgroupPaths(string(out))
|
||||
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||
// be able to see the parent namespace. Check that they have the correct parents from
|
||||
// the host, which has the non-namespaced view of the hierarchy.
|
||||
|
||||
pid := inspectField(c, cID, "State.Pid")
|
||||
paths := ReadCgroupPathsForPid(c, pid)
|
||||
cgroupPaths := ParseCgroupPaths(paths)
|
||||
if len(cgroupPaths) == 0 {
|
||||
c.Fatalf("unexpected output - %q", string(out))
|
||||
c.Fatalf("unexpected output - %q", string(paths))
|
||||
}
|
||||
|
||||
id := getIDByName(c, name)
|
||||
expectedCgroup := path.Join(cleanCgroupParent, id)
|
||||
found := false
|
||||
|
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
@ -38,6 +39,17 @@ func transformCmd(execCmd *exec.Cmd) icmd.Cmd {
|
|||
}
|
||||
}
|
||||
|
||||
// ReadCgroupPathsForPid reads the cgroup path file for a pid in '/proc/<pid>/cgroup'
|
||||
func ReadCgroupPathsForPid(c *check.C, pid string) string {
|
||||
cgroupFile := fmt.Sprintf("/proc/%s/cgroup", pid)
|
||||
out, err := ioutil.ReadFile(cgroupFile)
|
||||
if err != nil {
|
||||
c.Fatalf("unexpected failure when reading cgroup file %s\n%v", cgroupFile, err)
|
||||
}
|
||||
|
||||
return string(out)
|
||||
}
|
||||
|
||||
// ParseCgroupPaths parses 'procCgroupData', which is output of '/proc/<pid>/cgroup', and returns
|
||||
// a map which cgroup name as key and path as value.
|
||||
func ParseCgroupPaths(procCgroupData string) map[string]string {
|
||||
|
|
|
@ -2,6 +2,10 @@ package container // import "github.com/docker/docker/integration/container"
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -93,3 +97,32 @@ func TestNISDomainname(t *testing.T) {
|
|||
assert.Equal(t, 0, res.ExitCode)
|
||||
assert.Check(t, is.Equal(domainname, strings.TrimSpace(res.Stdout())))
|
||||
}
|
||||
|
||||
func TestCgroupNamespaces(t *testing.T) {
|
||||
skip.If(t, testEnv.DaemonInfo.OSType != "linux")
|
||||
skip.If(t, testEnv.IsRemoteDaemon())
|
||||
|
||||
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
||||
t.Skip("cgroup namespaces are unsupported")
|
||||
}
|
||||
|
||||
defer setupTest(t)()
|
||||
client := testEnv.APIClient()
|
||||
ctx := context.Background()
|
||||
|
||||
cID := container.Run(t, ctx, client)
|
||||
poll.WaitOn(t, container.IsInState(ctx, client, cID, "running"), poll.WithDelay(100*time.Millisecond))
|
||||
|
||||
path := filepath.Join(os.Getenv("DEST"), "docker.pid")
|
||||
b, err := ioutil.ReadFile(path)
|
||||
assert.NilError(t, err)
|
||||
link, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/cgroup", string(b)))
|
||||
assert.NilError(t, err)
|
||||
|
||||
// Check that the container's cgroup doesn't match the docker daemon's
|
||||
res, err := container.Exec(ctx, client, cID, []string{"readlink", "/proc/1/ns/cgroup"})
|
||||
assert.NilError(t, err)
|
||||
assert.Assert(t, is.Len(res.Stderr(), 0))
|
||||
assert.Equal(t, 0, res.ExitCode)
|
||||
assert.Assert(t, link != strings.TrimSpace(res.Stdout()))
|
||||
}
|
||||
|
|
|
@ -16,6 +16,9 @@ type SysInfo struct {
|
|||
cgroupCpusetInfo
|
||||
cgroupPids
|
||||
|
||||
// Whether the kernel supports cgroup namespaces or not
|
||||
CgroupNamespaces bool
|
||||
|
||||
// Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work
|
||||
IPv4ForwardingDisabled bool
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ func New(quiet bool) *SysInfo {
|
|||
applyNetworkingInfo,
|
||||
applyAppArmorInfo,
|
||||
applySeccompInfo,
|
||||
applyCgroupNsInfo,
|
||||
}...)
|
||||
|
||||
for _, o := range ops {
|
||||
|
@ -250,6 +251,15 @@ func applyAppArmorInfo(info *SysInfo, _ map[string]string) []string {
|
|||
return warnings
|
||||
}
|
||||
|
||||
// applyCgroupNsInfo adds cgroup namespace information to the info.
|
||||
func applyCgroupNsInfo(info *SysInfo, _ map[string]string) []string {
|
||||
var warnings []string
|
||||
if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
|
||||
info.CgroupNamespaces = true
|
||||
}
|
||||
return warnings
|
||||
}
|
||||
|
||||
// applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP.
|
||||
func applySeccompInfo(info *SysInfo, _ map[string]string) []string {
|
||||
var warnings []string
|
||||
|
|
|
@ -96,6 +96,26 @@ func TestNewAppArmorDisabled(t *testing.T) {
|
|||
assert.Assert(t, !sysInfo.AppArmor)
|
||||
}
|
||||
|
||||
func TestNewCgroupNamespacesEnabled(t *testing.T) {
|
||||
// If cgroup namespaces are supported in the kernel, then sysInfo.CgroupNamespaces should be TRUE
|
||||
if _, err := os.Stat("/proc/self/ns/cgroup"); err != nil {
|
||||
t.Skip("cgroup namespaces must be enabled")
|
||||
}
|
||||
|
||||
sysInfo := New(true)
|
||||
assert.Assert(t, sysInfo.CgroupNamespaces)
|
||||
}
|
||||
|
||||
func TestNewCgroupNamespacesDisabled(t *testing.T) {
|
||||
// If cgroup namespaces are *not* supported in the kernel, then sysInfo.CgroupNamespaces should be FALSE
|
||||
if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
|
||||
t.Skip("cgroup namespaces must be disabled")
|
||||
}
|
||||
|
||||
sysInfo := New(true)
|
||||
assert.Assert(t, !sysInfo.CgroupNamespaces)
|
||||
}
|
||||
|
||||
func TestNumCPU(t *testing.T) {
|
||||
cpuNumbers := NumCPU()
|
||||
if cpuNumbers <= 0 {
|
||||
|
|
Loading…
Reference in a new issue