Start containers in their own cgroup namespaces
This is enabled for all containers that are not run with --privileged, if the kernel supports it. Fixes #38332 Signed-off-by: Rob Gulewich <rgulewich@netflix.com>
This commit is contained in:
parent
b20a14b546
commit
256eb04d69
10 changed files with 178 additions and 43 deletions
|
@ -81,26 +81,27 @@ var (
|
||||||
|
|
||||||
// Daemon holds information about the Docker daemon.
|
// Daemon holds information about the Docker daemon.
|
||||||
type Daemon struct {
|
type Daemon struct {
|
||||||
ID string
|
ID string
|
||||||
repository string
|
repository string
|
||||||
containers container.Store
|
containers container.Store
|
||||||
containersReplica container.ViewDB
|
containersReplica container.ViewDB
|
||||||
execCommands *exec.Store
|
execCommands *exec.Store
|
||||||
imageService *images.ImageService
|
imageService *images.ImageService
|
||||||
idIndex *truncindex.TruncIndex
|
idIndex *truncindex.TruncIndex
|
||||||
configStore *config.Config
|
configStore *config.Config
|
||||||
statsCollector *stats.Collector
|
statsCollector *stats.Collector
|
||||||
defaultLogConfig containertypes.LogConfig
|
defaultLogConfig containertypes.LogConfig
|
||||||
RegistryService registry.Service
|
RegistryService registry.Service
|
||||||
EventsService *events.Events
|
EventsService *events.Events
|
||||||
netController libnetwork.NetworkController
|
netController libnetwork.NetworkController
|
||||||
volumes *volumesservice.VolumesService
|
volumes *volumesservice.VolumesService
|
||||||
discoveryWatcher discovery.Reloader
|
discoveryWatcher discovery.Reloader
|
||||||
root string
|
root string
|
||||||
seccompEnabled bool
|
seccompEnabled bool
|
||||||
apparmorEnabled bool
|
apparmorEnabled bool
|
||||||
shutdown bool
|
cgroupNamespacesEnabled bool
|
||||||
idMapping *idtools.IdentityMapping
|
shutdown bool
|
||||||
|
idMapping *idtools.IdentityMapping
|
||||||
// TODO: move graphDrivers field to an InfoService
|
// TODO: move graphDrivers field to an InfoService
|
||||||
graphDrivers map[string]string // By operating system
|
graphDrivers map[string]string // By operating system
|
||||||
|
|
||||||
|
@ -1020,6 +1021,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
||||||
d.idMapping = idMapping
|
d.idMapping = idMapping
|
||||||
d.seccompEnabled = sysInfo.Seccomp
|
d.seccompEnabled = sysInfo.Seccomp
|
||||||
d.apparmorEnabled = sysInfo.AppArmor
|
d.apparmorEnabled = sysInfo.AppArmor
|
||||||
|
d.cgroupNamespacesEnabled = sysInfo.CgroupNamespaces
|
||||||
|
|
||||||
d.linkIndex = newLinkIndex()
|
d.linkIndex = newLinkIndex()
|
||||||
|
|
||||||
|
|
|
@ -307,8 +307,13 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
||||||
s.Hostname = ""
|
s.Hostname = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
// cgroup
|
||||||
|
if daemon.cgroupNamespacesEnabled && !c.HostConfig.Privileged {
|
||||||
|
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
|
||||||
|
setNamespace(s, nsCgroup)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
|
func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
|
||||||
|
|
|
@ -3984,18 +3984,40 @@ func (s *DockerSuite) TestBuildContainerWithCgroupParent(c *check.C) {
|
||||||
if !found {
|
if !found {
|
||||||
c.Fatalf("unable to find self memory cgroup path. CgroupsPath: %v", selfCgroupPaths)
|
c.Fatalf("unable to find self memory cgroup path. CgroupsPath: %v", selfCgroupPaths)
|
||||||
}
|
}
|
||||||
result := buildImage("buildcgroupparent",
|
|
||||||
cli.WithFlags("--cgroup-parent", cgroupParent),
|
doneCh := make(chan string)
|
||||||
build.WithDockerfile(`
|
|
||||||
|
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||||
|
// be able to see the parent namespace. Check that they have the correct parents from
|
||||||
|
// the host, which has the non-namespaced view of the hierarchy.
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
result := buildImage("buildcgroupparent",
|
||||||
|
cli.WithFlags("--cgroup-parent", cgroupParent),
|
||||||
|
build.WithDockerfile(`
|
||||||
FROM busybox
|
FROM busybox
|
||||||
RUN cat /proc/self/cgroup
|
RUN sleep 10
|
||||||
`))
|
`))
|
||||||
result.Assert(c, icmd.Success)
|
result.Assert(c, icmd.Success)
|
||||||
m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), result.Combined())
|
doneCh <- "done"
|
||||||
assert.NilError(c, err)
|
}()
|
||||||
|
|
||||||
|
// Wait until the build is well into the sleep
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
out, _, err := dockerCmdWithError("ps", "-q", "-l")
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
cID := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
pid := inspectField(c, cID, "State.Pid")
|
||||||
|
paths := ReadCgroupPathsForPid(c, pid)
|
||||||
|
m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), paths)
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
if !m {
|
if !m {
|
||||||
c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, result.Combined())
|
c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, paths)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait for the build to complete, otherwise it will exit with an error
|
||||||
|
<-doneCh
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME(vdemeester) could be a unit test
|
// FIXME(vdemeester) could be a unit test
|
||||||
|
|
|
@ -1787,7 +1787,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
||||||
testRequires(c, DaemonIsLinux)
|
// Test requires local filesystem access on a Linux host
|
||||||
|
testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon)
|
||||||
|
|
||||||
cgroupParent := "test"
|
cgroupParent := "test"
|
||||||
name := "cgroup-test"
|
name := "cgroup-test"
|
||||||
|
@ -1795,10 +1796,20 @@ func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
||||||
s.d.StartWithBusybox(c, "--cgroup-parent", cgroupParent)
|
s.d.StartWithBusybox(c, "--cgroup-parent", cgroupParent)
|
||||||
defer s.d.Restart(c)
|
defer s.d.Restart(c)
|
||||||
|
|
||||||
out, err := s.d.Cmd("run", "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
out, err := s.d.Cmd("run", "--name", name, "-d", "busybox", "top")
|
||||||
assert.NilError(c, err)
|
c.Assert(err, checker.IsNil)
|
||||||
cgroupPaths := ParseCgroupPaths(string(out))
|
|
||||||
c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", string(out)))
|
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||||
|
// be able to see the parent namespace. Check that they have the correct parents from
|
||||||
|
// the host, which has the non-namespaced view of the hierarchy.
|
||||||
|
|
||||||
|
pid, err := s.d.Cmd("inspect", "-f", "{{.State.Pid}}", name)
|
||||||
|
c.Assert(err, checker.IsNil)
|
||||||
|
pid = strings.TrimSpace(string(pid))
|
||||||
|
paths := ReadCgroupPathsForPid(c, pid)
|
||||||
|
cgroupPaths := ParseCgroupPaths(paths)
|
||||||
|
c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", paths))
|
||||||
|
|
||||||
out, err = s.d.Cmd("inspect", "-f", "{{.Id}}", name)
|
out, err = s.d.Cmd("inspect", "-f", "{{.Id}}", name)
|
||||||
assert.NilError(c, err)
|
assert.NilError(c, err)
|
||||||
id := strings.TrimSpace(string(out))
|
id := strings.TrimSpace(string(out))
|
||||||
|
|
|
@ -3241,8 +3241,8 @@ func (s *DockerSuite) TestRunWithUlimits(c *check.C) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
||||||
// Not applicable on Windows as uses Unix specific functionality
|
// Test requires local filesystem access on a Linux host
|
||||||
testRequires(c, DaemonIsLinux)
|
testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon)
|
||||||
|
|
||||||
// cgroup-parent relative path
|
// cgroup-parent relative path
|
||||||
testRunContainerWithCgroupParent(c, "test", "cgroup-test")
|
testRunContainerWithCgroupParent(c, "test", "cgroup-test")
|
||||||
|
@ -3252,14 +3252,23 @@ func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func testRunContainerWithCgroupParent(c *check.C, cgroupParent, name string) {
|
func testRunContainerWithCgroupParent(c *check.C, cgroupParent, name string) {
|
||||||
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
||||||
}
|
}
|
||||||
cgroupPaths := ParseCgroupPaths(string(out))
|
cID := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||||
|
// be able to see the parent namespace. Check that they have the correct parents from
|
||||||
|
// the host, which has the non-namespaced view of the hierarchy.
|
||||||
|
|
||||||
|
pid := inspectField(c, cID, "State.Pid")
|
||||||
|
paths := ReadCgroupPathsForPid(c, pid)
|
||||||
|
cgroupPaths := ParseCgroupPaths(paths)
|
||||||
if len(cgroupPaths) == 0 {
|
if len(cgroupPaths) == 0 {
|
||||||
c.Fatalf("unexpected output - %q", string(out))
|
c.Fatalf("unexpected output - %q", string(paths))
|
||||||
}
|
}
|
||||||
|
|
||||||
id := getIDByName(c, name)
|
id := getIDByName(c, name)
|
||||||
expectedCgroup := path.Join(cgroupParent, id)
|
expectedCgroup := path.Join(cgroupParent, id)
|
||||||
found := false
|
found := false
|
||||||
|
@ -3285,21 +3294,29 @@ func (s *DockerSuite) TestRunInvalidCgroupParent(c *check.C) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func testRunInvalidCgroupParent(c *check.C, cgroupParent, cleanCgroupParent, name string) {
|
func testRunInvalidCgroupParent(c *check.C, cgroupParent, cleanCgroupParent, name string) {
|
||||||
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// XXX: This may include a daemon crash.
|
// XXX: This may include a daemon crash.
|
||||||
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
||||||
}
|
}
|
||||||
|
cID := strings.TrimSpace(out)
|
||||||
|
|
||||||
// We expect "/SHOULD_NOT_EXIST" to not exist. If not, we have a security issue.
|
// We expect "/SHOULD_NOT_EXIST" to not exist. If not, we have a security issue.
|
||||||
if _, err := os.Stat("/SHOULD_NOT_EXIST"); err == nil || !os.IsNotExist(err) {
|
if _, err := os.Stat("/SHOULD_NOT_EXIST"); err == nil || !os.IsNotExist(err) {
|
||||||
c.Fatalf("SECURITY: --cgroup-parent with ../../ relative paths cause files to be created in the host (this is bad) !!")
|
c.Fatalf("SECURITY: --cgroup-parent with ../../ relative paths cause files to be created in the host (this is bad) !!")
|
||||||
}
|
}
|
||||||
|
|
||||||
cgroupPaths := ParseCgroupPaths(string(out))
|
// If cgroup namespaces are enabled, then processes running inside the container won't
|
||||||
|
// be able to see the parent namespace. Check that they have the correct parents from
|
||||||
|
// the host, which has the non-namespaced view of the hierarchy.
|
||||||
|
|
||||||
|
pid := inspectField(c, cID, "State.Pid")
|
||||||
|
paths := ReadCgroupPathsForPid(c, pid)
|
||||||
|
cgroupPaths := ParseCgroupPaths(paths)
|
||||||
if len(cgroupPaths) == 0 {
|
if len(cgroupPaths) == 0 {
|
||||||
c.Fatalf("unexpected output - %q", string(out))
|
c.Fatalf("unexpected output - %q", string(paths))
|
||||||
}
|
}
|
||||||
|
|
||||||
id := getIDByName(c, name)
|
id := getIDByName(c, name)
|
||||||
expectedCgroup := path.Join(cleanCgroupParent, id)
|
expectedCgroup := path.Join(cleanCgroupParent, id)
|
||||||
found := false
|
found := false
|
||||||
|
|
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -38,6 +39,17 @@ func transformCmd(execCmd *exec.Cmd) icmd.Cmd {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadCgroupPathsForPid reads the cgroup path file for a pid in '/proc/<pid>/cgroup'
|
||||||
|
func ReadCgroupPathsForPid(c *check.C, pid string) string {
|
||||||
|
cgroupFile := fmt.Sprintf("/proc/%s/cgroup", pid)
|
||||||
|
out, err := ioutil.ReadFile(cgroupFile)
|
||||||
|
if err != nil {
|
||||||
|
c.Fatalf("unexpected failure when reading cgroup file %s\n%v", cgroupFile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(out)
|
||||||
|
}
|
||||||
|
|
||||||
// ParseCgroupPaths parses 'procCgroupData', which is output of '/proc/<pid>/cgroup', and returns
|
// ParseCgroupPaths parses 'procCgroupData', which is output of '/proc/<pid>/cgroup', and returns
|
||||||
// a map which cgroup name as key and path as value.
|
// a map which cgroup name as key and path as value.
|
||||||
func ParseCgroupPaths(procCgroupData string) map[string]string {
|
func ParseCgroupPaths(procCgroupData string) map[string]string {
|
||||||
|
|
|
@ -2,6 +2,10 @@ package container // import "github.com/docker/docker/integration/container"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -93,3 +97,32 @@ func TestNISDomainname(t *testing.T) {
|
||||||
assert.Equal(t, 0, res.ExitCode)
|
assert.Equal(t, 0, res.ExitCode)
|
||||||
assert.Check(t, is.Equal(domainname, strings.TrimSpace(res.Stdout())))
|
assert.Check(t, is.Equal(domainname, strings.TrimSpace(res.Stdout())))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCgroupNamespaces(t *testing.T) {
|
||||||
|
skip.If(t, testEnv.DaemonInfo.OSType != "linux")
|
||||||
|
skip.If(t, testEnv.IsRemoteDaemon())
|
||||||
|
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
||||||
|
t.Skip("cgroup namespaces are unsupported")
|
||||||
|
}
|
||||||
|
|
||||||
|
defer setupTest(t)()
|
||||||
|
client := testEnv.APIClient()
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
cID := container.Run(t, ctx, client)
|
||||||
|
poll.WaitOn(t, container.IsInState(ctx, client, cID, "running"), poll.WithDelay(100*time.Millisecond))
|
||||||
|
|
||||||
|
path := filepath.Join(os.Getenv("DEST"), "docker.pid")
|
||||||
|
b, err := ioutil.ReadFile(path)
|
||||||
|
assert.NilError(t, err)
|
||||||
|
link, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/cgroup", string(b)))
|
||||||
|
assert.NilError(t, err)
|
||||||
|
|
||||||
|
// Check that the container's cgroup doesn't match the docker daemon's
|
||||||
|
res, err := container.Exec(ctx, client, cID, []string{"readlink", "/proc/1/ns/cgroup"})
|
||||||
|
assert.NilError(t, err)
|
||||||
|
assert.Assert(t, is.Len(res.Stderr(), 0))
|
||||||
|
assert.Equal(t, 0, res.ExitCode)
|
||||||
|
assert.Assert(t, link != strings.TrimSpace(res.Stdout()))
|
||||||
|
}
|
||||||
|
|
|
@ -16,6 +16,9 @@ type SysInfo struct {
|
||||||
cgroupCpusetInfo
|
cgroupCpusetInfo
|
||||||
cgroupPids
|
cgroupPids
|
||||||
|
|
||||||
|
// Whether the kernel supports cgroup namespaces or not
|
||||||
|
CgroupNamespaces bool
|
||||||
|
|
||||||
// Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work
|
// Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work
|
||||||
IPv4ForwardingDisabled bool
|
IPv4ForwardingDisabled bool
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,7 @@ func New(quiet bool) *SysInfo {
|
||||||
applyNetworkingInfo,
|
applyNetworkingInfo,
|
||||||
applyAppArmorInfo,
|
applyAppArmorInfo,
|
||||||
applySeccompInfo,
|
applySeccompInfo,
|
||||||
|
applyCgroupNsInfo,
|
||||||
}...)
|
}...)
|
||||||
|
|
||||||
for _, o := range ops {
|
for _, o := range ops {
|
||||||
|
@ -250,6 +251,15 @@ func applyAppArmorInfo(info *SysInfo, _ map[string]string) []string {
|
||||||
return warnings
|
return warnings
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyCgroupNsInfo adds cgroup namespace information to the info.
|
||||||
|
func applyCgroupNsInfo(info *SysInfo, _ map[string]string) []string {
|
||||||
|
var warnings []string
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
|
||||||
|
info.CgroupNamespaces = true
|
||||||
|
}
|
||||||
|
return warnings
|
||||||
|
}
|
||||||
|
|
||||||
// applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP.
|
// applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP.
|
||||||
func applySeccompInfo(info *SysInfo, _ map[string]string) []string {
|
func applySeccompInfo(info *SysInfo, _ map[string]string) []string {
|
||||||
var warnings []string
|
var warnings []string
|
||||||
|
|
|
@ -96,6 +96,26 @@ func TestNewAppArmorDisabled(t *testing.T) {
|
||||||
assert.Assert(t, !sysInfo.AppArmor)
|
assert.Assert(t, !sysInfo.AppArmor)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNewCgroupNamespacesEnabled(t *testing.T) {
|
||||||
|
// If cgroup namespaces are supported in the kernel, then sysInfo.CgroupNamespaces should be TRUE
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); err != nil {
|
||||||
|
t.Skip("cgroup namespaces must be enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
sysInfo := New(true)
|
||||||
|
assert.Assert(t, sysInfo.CgroupNamespaces)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewCgroupNamespacesDisabled(t *testing.T) {
|
||||||
|
// If cgroup namespaces are *not* supported in the kernel, then sysInfo.CgroupNamespaces should be FALSE
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
|
||||||
|
t.Skip("cgroup namespaces must be disabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
sysInfo := New(true)
|
||||||
|
assert.Assert(t, !sysInfo.CgroupNamespaces)
|
||||||
|
}
|
||||||
|
|
||||||
func TestNumCPU(t *testing.T) {
|
func TestNumCPU(t *testing.T) {
|
||||||
cpuNumbers := NumCPU()
|
cpuNumbers := NumCPU()
|
||||||
if cpuNumbers <= 0 {
|
if cpuNumbers <= 0 {
|
||||||
|
|
Loading…
Reference in a new issue