Browse Source

Merge pull request #38377 from rgulewich/38332-cgroup-ns

Start containers in their own cgroup namespaces
Yong Tang 6 years ago
parent
commit
3042254a87

+ 5 - 0
api/server/router/container/container_routes.go

@@ -489,6 +489,11 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo
 		if hostConfig.IpcMode.IsEmpty() {
 			hostConfig.IpcMode = container.IpcMode("shareable")
 		}
+
+		// Older clients expect the default to be "host"
+		if hostConfig.CgroupnsMode.IsEmpty() {
+			hostConfig.CgroupnsMode = container.CgroupnsMode("host")
+		}
 	}
 
 	if hostConfig != nil && hostConfig.PidsLimit != nil && *hostConfig.PidsLimit <= 0 {

+ 13 - 0
api/swagger.yaml

@@ -703,6 +703,19 @@ definitions:
             description: "A list of kernel capabilities to drop from the container. Conflicts with option 'Capabilities'"
             items:
               type: "string"
+          CgroupnsMode:
+            type: "string"
+            enum:
+              - "private"
+              - "host"
+            description: |
+                    cgroup namespace mode for the container. Possible values are:
+
+                    - `"private"`: the container runs in its own private cgroup namespace
+                    - `"host"`: use the host system's cgroup namespace
+
+                    If not specified, the daemon default is used, which can either be `"private"`
+                    or `"host"`, depending on daemon version, kernel support and configuration.
           Dns:
             type: "array"
             description: "A list of DNS servers for the container to use."

+ 27 - 3
api/types/container/host_config.go

@@ -10,6 +10,29 @@ import (
 	"github.com/docker/go-units"
 )
 
+// CgroupnsMode represents the cgroup namespace mode of the container
+type CgroupnsMode string
+
+// IsPrivate indicates whether the container uses its own private cgroup namespace
+func (c CgroupnsMode) IsPrivate() bool {
+	return c == "private"
+}
+
+// IsHost indicates whether the container shares the host's cgroup namespace
+func (c CgroupnsMode) IsHost() bool {
+	return c == "host"
+}
+
+// IsEmpty indicates whether the container cgroup namespace mode is unset
+func (c CgroupnsMode) IsEmpty() bool {
+	return c == ""
+}
+
+// Valid indicates whether the cgroup namespace mode is valid
+func (c CgroupnsMode) Valid() bool {
+	return c.IsEmpty() || c.IsPrivate() || c.IsHost()
+}
+
 // Isolation represents the isolation technology of a container. The supported
 // values are platform specific
 type Isolation string
@@ -381,9 +404,10 @@ type HostConfig struct {
 	CapAdd          strslice.StrSlice // List of kernel capabilities to add to the container
 	CapDrop         strslice.StrSlice // List of kernel capabilities to remove from the container
 	Capabilities    []string          `json:"Capabilities"` // List of kernel capabilities to be available for container (this overrides the default set)
-	DNS             []string          `json:"Dns"`          // List of DNS server to lookup
-	DNSOptions      []string          `json:"DnsOptions"`   // List of DNSOption to look for
-	DNSSearch       []string          `json:"DnsSearch"`    // List of DNSSearch to look for
+	CgroupnsMode    CgroupnsMode      // Cgroup namespace mode to use for the container
+	DNS             []string          `json:"Dns"`        // List of DNS server to lookup
+	DNSOptions      []string          `json:"DnsOptions"` // List of DNSOption to look for
+	DNSSearch       []string          `json:"DnsSearch"`  // List of DNSSearch to look for
 	ExtraHosts      []string          // List of extra hosts
 	GroupAdd        []string          // List of additional groups that the container process will run as
 	IpcMode         IpcMode           // IPC namespace to use for the container

+ 1 - 0
cmd/dockerd/config_unix.go

@@ -64,5 +64,6 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
 	// rootless needs to be explicitly specified for running "rootful" dockerd in rootless dockerd (#38702)
 	// Note that defaultUserlandProxyPath and honorXDG are configured according to the value of rootless.RunningWithRootlessKit, not the value of --rootless.
 	flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithRootlessKit(), "Enable rootless mode; typically used with RootlessKit (experimental)")
+	flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", config.DefaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
 	return nil
 }

+ 17 - 1
daemon/config/config_unix.go

@@ -11,6 +11,8 @@ import (
 )
 
 const (
+	// DefaultCgroupNamespaceMode is the default for a container's CgroupnsMode, if not set otherwise
+	DefaultCgroupNamespaceMode = "host" // TODO: change to private
 	// DefaultIpcMode is default for container's IpcMode, if not set otherwise
 	DefaultIpcMode = "private"
 )
@@ -37,6 +39,7 @@ type Config struct {
 	ShmSize              opts.MemBytes            `json:"default-shm-size,omitempty"`
 	NoNewPrivileges      bool                     `json:"no-new-privileges,omitempty"`
 	IpcMode              string                   `json:"default-ipc-mode,omitempty"`
+	CgroupNamespaceMode  string                   `json:"default-cgroupns-mode,omitempty"`
 	// ResolvConf is the path to the configuration of the host resolver
 	ResolvConf string `json:"resolv-conf,omitempty"`
 	Rootless   bool   `json:"rootless,omitempty"`
@@ -84,9 +87,22 @@ func verifyDefaultIpcMode(mode string) error {
 	return nil
 }
 
+func verifyDefaultCgroupNsMode(mode string) error {
+	cm := containertypes.CgroupnsMode(mode)
+	if !cm.Valid() {
+		return fmt.Errorf("Default cgroup namespace mode (%v) is invalid. Use \"host\" or \"private\".", cm) // nolint: golint
+	}
+
+	return nil
+}
+
 // ValidatePlatformConfig checks if any platform-specific configuration settings are invalid.
 func (conf *Config) ValidatePlatformConfig() error {
-	return verifyDefaultIpcMode(conf.IpcMode)
+	if err := verifyDefaultIpcMode(conf.IpcMode); err != nil {
+		return err
+	}
+
+	return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
 }
 
 // IsRootless returns conf.Rootless

+ 22 - 0
daemon/daemon_unix.go

@@ -356,6 +356,15 @@ func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConf
 		hostConfig.IpcMode = containertypes.IpcMode(m)
 	}
 
+	// Set default cgroup namespace mode, if unset for container
+	if hostConfig.CgroupnsMode.IsEmpty() {
+		m := config.DefaultCgroupNamespaceMode
+		if daemon.configStore != nil {
+			m = daemon.configStore.CgroupNamespaceMode
+		}
+		hostConfig.CgroupnsMode = containertypes.CgroupnsMode(m)
+	}
+
 	adaptSharedNamespaceContainer(daemon, hostConfig)
 
 	var err error
@@ -675,6 +684,19 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
 		}
 	}
 
+	if !hostConfig.CgroupnsMode.Valid() {
+		return warnings, fmt.Errorf("invalid cgroup namespace mode: %v", hostConfig.CgroupnsMode)
+	}
+	if hostConfig.CgroupnsMode.IsPrivate() {
+		if !sysInfo.CgroupNamespaces {
+			warnings = append(warnings, "Your kernel does not support cgroup namespaces.  Cgroup namespace setting discarded.")
+		}
+
+		if hostConfig.Privileged {
+			return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces.  You must run the container in the host cgroup namespace when running privileged mode")
+		}
+	}
+
 	return warnings, nil
 }
 

+ 4 - 0
daemon/info.go

@@ -178,6 +178,10 @@ func (daemon *Daemon) fillSecurityOptions(v *types.Info, sysInfo *sysinfo.SysInf
 	if daemon.Rootless() {
 		securityOptions = append(securityOptions, "name=rootless")
 	}
+	if daemon.cgroupNamespacesEnabled(sysInfo) {
+		securityOptions = append(securityOptions, "name=cgroupns")
+	}
+
 	v.SecurityOptions = securityOptions
 }
 

+ 5 - 0
daemon/info_unix.go

@@ -10,6 +10,7 @@ import (
 	"strings"
 
 	"github.com/docker/docker/api/types"
+	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/dockerversion"
 	"github.com/docker/docker/pkg/sysinfo"
 	"github.com/pkg/errors"
@@ -247,6 +248,10 @@ func parseRuncVersion(v string) (version string, commit string, err error) {
 	return version, commit, err
 }
 
+func (daemon *Daemon) cgroupNamespacesEnabled(sysInfo *sysinfo.SysInfo) bool {
+	return sysInfo.CgroupNamespaces && containertypes.CgroupnsMode(daemon.configStore.CgroupNamespaceMode).IsPrivate()
+}
+
 // Rootless returns true if daemon is running in rootless mode
 func (daemon *Daemon) Rootless() bool {
 	return daemon.configStore.Rootless

+ 4 - 0
daemon/info_windows.go

@@ -14,6 +14,10 @@ func (daemon *Daemon) fillPlatformVersion(v *types.Version) {}
 func fillDriverWarnings(v *types.Info) {
 }
 
+func (daemon *Daemon) cgroupNamespacesEnabled(sysInfo *sysinfo.SysInfo) bool {
+	return false
+}
+
 // Rootless returns true if daemon is running in rootless mode
 func (daemon *Daemon) Rootless() bool {
 	return false

+ 13 - 0
daemon/oci_linux.go

@@ -307,6 +307,19 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
 			s.Hostname = ""
 		}
 
+		// cgroup
+		if !c.HostConfig.CgroupnsMode.IsEmpty() {
+			cgroupNsMode := c.HostConfig.CgroupnsMode
+			if !cgroupNsMode.Valid() {
+				return fmt.Errorf("invalid cgroup namespace mode: %v", cgroupNsMode)
+			}
+
+			if cgroupNsMode.IsPrivate() && !c.HostConfig.Privileged {
+				nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
+				setNamespace(s, nsCgroup)
+			}
+		}
+
 		return nil
 	}
 }

+ 5 - 0
daemon/reload_unix.go

@@ -34,6 +34,10 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
 		daemon.configStore.ShmSize = conf.ShmSize
 	}
 
+	if conf.CgroupNamespaceMode != "" {
+		daemon.configStore.CgroupNamespaceMode = conf.CgroupNamespaceMode
+	}
+
 	if conf.IpcMode != "" {
 		daemon.configStore.IpcMode = conf.IpcMode
 	}
@@ -51,6 +55,7 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
 	attributes["default-runtime"] = daemon.configStore.DefaultRuntime
 	attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize)
 	attributes["default-ipc-mode"] = daemon.configStore.IpcMode
+	attributes["default-cgroupns-mode"] = daemon.configStore.CgroupNamespaceMode
 
 	return nil
 }

+ 5 - 0
docs/api/version-history.md

@@ -68,6 +68,11 @@ keywords: "API, Docker, rcli, REST, documentation"
 * `POST /containers/{id}/update` now accepts a `PidsLimit` field to tune a container's
   PID limit. Set `0` or `-1` for unlimited. Leave `null` to not change the current value.
 * `POST /build` now accepts `outputs` key for configuring build outputs when using BuildKit mode.
+* `POST /containers/create` on Linux now accepts the `HostConfig.CgroupnsMode` property.
+  Set the property to `host` to create the container in the daemon's cgroup namespace, or
+  `private` to create the container in its own private cgroup namespace.  The per-daemon
+  default is `host`, and can be changed by using the`CgroupNamespaceMode` daemon configuration
+  parameter.
 
 ## V1.39 API changes
 

+ 92 - 0
integration/build/build_cgroupns_linux_test.go

@@ -0,0 +1,92 @@
+package build // import "github.com/docker/docker/integration/build"
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"strings"
+	"testing"
+
+	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/integration/internal/requirement"
+	"github.com/docker/docker/internal/test/daemon"
+	"github.com/docker/docker/internal/test/fakecontext"
+	"github.com/docker/docker/pkg/jsonmessage"
+	"gotest.tools/assert"
+	"gotest.tools/skip"
+)
+
+// Finds the output of `readlink /proc/<pid>/ns/cgroup` in build output
+func getCgroupFromBuildOutput(buildOutput io.Reader) (string, error) {
+	const prefix = "cgroup:"
+
+	dec := json.NewDecoder(buildOutput)
+	for {
+		m := jsonmessage.JSONMessage{}
+		err := dec.Decode(&m)
+		if err == io.EOF {
+			return "", nil
+		}
+		if err != nil {
+			return "", err
+		}
+		if ix := strings.Index(m.Stream, prefix); ix == 0 {
+			return strings.TrimSpace(m.Stream), nil
+		}
+	}
+}
+
+// Runs a docker build against a daemon with the given cgroup namespace default value.
+// Returns the container cgroup and daemon cgroup.
+func testBuildWithCgroupNs(t *testing.T, daemonNsMode string) (string, string) {
+	d := daemon.New(t, daemon.WithDefaultCgroupNamespaceMode(daemonNsMode))
+	d.StartWithBusybox(t)
+	defer d.Stop(t)
+
+	dockerfile := `
+		FROM busybox
+		RUN readlink /proc/self/ns/cgroup
+	`
+	ctx := context.Background()
+	source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile))
+	defer source.Close()
+
+	client := d.NewClientT(t)
+	resp, err := client.ImageBuild(ctx,
+		source.AsTarReader(t),
+		types.ImageBuildOptions{
+			Remove:      true,
+			ForceRemove: true,
+			Tags:        []string{"buildcgroupns"},
+		})
+	assert.NilError(t, err)
+	defer resp.Body.Close()
+
+	containerCgroup, err := getCgroupFromBuildOutput(resp.Body)
+	assert.NilError(t, err)
+	daemonCgroup := d.CgroupNamespace(t)
+
+	return containerCgroup, daemonCgroup
+}
+
+func TestCgroupNamespacesBuild(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to private cgroup namespaces, containers launched
+	// should be in their own private cgroup namespace by default
+	containerCgroup, daemonCgroup := testBuildWithCgroupNs(t, "private")
+	assert.Assert(t, daemonCgroup != containerCgroup)
+}
+
+func TestCgroupNamespacesBuildDaemonHostMode(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to host cgroup namespaces, containers
+	// launched should not be inside their own cgroup namespaces
+	containerCgroup, daemonCgroup := testBuildWithCgroupNs(t, "host")
+	assert.Assert(t, daemonCgroup == containerCgroup)
+}

+ 152 - 0
integration/container/run_cgroupns_linux_test.go

@@ -0,0 +1,152 @@
+package container // import "github.com/docker/docker/integration/container"
+
+import (
+	"context"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/docker/docker/client"
+	"github.com/docker/docker/integration/internal/container"
+	"github.com/docker/docker/integration/internal/requirement"
+	"github.com/docker/docker/internal/test/daemon"
+	"gotest.tools/assert"
+	is "gotest.tools/assert/cmp"
+	"gotest.tools/poll"
+	"gotest.tools/skip"
+)
+
+// Gets the value of the cgroup namespace for pid 1 of a container
+func containerCgroupNamespace(ctx context.Context, t *testing.T, client *client.Client, cID string) string {
+	res, err := container.Exec(ctx, client, cID, []string{"readlink", "/proc/1/ns/cgroup"})
+	assert.NilError(t, err)
+	assert.Assert(t, is.Len(res.Stderr(), 0))
+	assert.Equal(t, 0, res.ExitCode)
+	return strings.TrimSpace(res.Stdout())
+}
+
+// Bring up a daemon with the specified default cgroup namespace mode, and then create a container with the container options
+func testRunWithCgroupNs(t *testing.T, daemonNsMode string, containerOpts ...func(*container.TestContainerConfig)) (string, string) {
+	d := daemon.New(t, daemon.WithDefaultCgroupNamespaceMode(daemonNsMode))
+	client := d.NewClientT(t)
+	ctx := context.Background()
+
+	d.StartWithBusybox(t)
+	defer d.Stop(t)
+
+	cID := container.Run(t, ctx, client, containerOpts...)
+	poll.WaitOn(t, container.IsInState(ctx, client, cID, "running"), poll.WithDelay(100*time.Millisecond))
+
+	daemonCgroup := d.CgroupNamespace(t)
+	containerCgroup := containerCgroupNamespace(ctx, t, client, cID)
+	return containerCgroup, daemonCgroup
+}
+
+// Bring up a daemon with the specified default cgroup namespace mode. Create a container with the container options,
+// expecting an error with the specified string
+func testCreateFailureWithCgroupNs(t *testing.T, daemonNsMode string, errStr string, containerOpts ...func(*container.TestContainerConfig)) {
+	d := daemon.New(t, daemon.WithDefaultCgroupNamespaceMode(daemonNsMode))
+	client := d.NewClientT(t)
+	ctx := context.Background()
+
+	d.StartWithBusybox(t)
+	defer d.Stop(t)
+	container.CreateExpectingErr(t, ctx, client, errStr, containerOpts...)
+}
+
+func TestCgroupNamespacesRun(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to private cgroup namespaces, containers launched
+	// should be in their own private cgroup namespace by default
+	containerCgroup, daemonCgroup := testRunWithCgroupNs(t, "private")
+	assert.Assert(t, daemonCgroup != containerCgroup)
+}
+
+func TestCgroupNamespacesRunPrivileged(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to private cgroup namespaces, privileged containers
+	// launched should not be inside their own cgroup namespaces
+	containerCgroup, daemonCgroup := testRunWithCgroupNs(t, "private", container.WithPrivileged(true))
+	assert.Assert(t, daemonCgroup == containerCgroup)
+}
+
+func TestCgroupNamespacesRunDaemonHostMode(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to host cgroup namespaces, containers
+	// launched should not be inside their own cgroup namespaces
+	containerCgroup, daemonCgroup := testRunWithCgroupNs(t, "host")
+	assert.Assert(t, daemonCgroup == containerCgroup)
+}
+
+func TestCgroupNamespacesRunHostMode(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to private cgroup namespaces, containers launched
+	// with a cgroup ns mode of "host" should not be inside their own cgroup namespaces
+	containerCgroup, daemonCgroup := testRunWithCgroupNs(t, "private", container.WithCgroupnsMode("host"))
+	assert.Assert(t, daemonCgroup == containerCgroup)
+}
+
+func TestCgroupNamespacesRunPrivateMode(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// When the daemon defaults to private cgroup namespaces, containers launched
+	// with a cgroup ns mode of "private" should be inside their own cgroup namespaces
+	containerCgroup, daemonCgroup := testRunWithCgroupNs(t, "private", container.WithCgroupnsMode("private"))
+	assert.Assert(t, daemonCgroup != containerCgroup)
+}
+
+func TestCgroupNamespacesRunPrivilegedAndPrivate(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// Running with both privileged and cgroupns=private is not allowed
+	errStr := "privileged mode is incompatible with private cgroup namespaces.  You must run the container in the host cgroup namespace when running privileged mode"
+	testCreateFailureWithCgroupNs(t, "private", errStr, container.WithPrivileged(true), container.WithCgroupnsMode("private"))
+}
+
+func TestCgroupNamespacesRunInvalidMode(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	// An invalid cgroup namespace mode should return an error on container creation
+	errStr := "invalid cgroup namespace mode: invalid"
+	testCreateFailureWithCgroupNs(t, "private", errStr, container.WithCgroupnsMode("invalid"))
+}
+
+// Clients before 1.40 expect containers to be created in the host cgroup namespace,
+// regardless of the default setting of the daemon
+func TestCgroupNamespacesRunOlderClient(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "linux")
+	skip.If(t, testEnv.IsRemoteDaemon())
+	skip.If(t, !requirement.CgroupNamespacesEnabled())
+
+	d := daemon.New(t, daemon.WithDefaultCgroupNamespaceMode("private"))
+	client := d.NewClientT(t, client.WithVersion("1.39"))
+
+	ctx := context.Background()
+	d.StartWithBusybox(t)
+	defer d.Stop(t)
+
+	cID := container.Run(t, ctx, client)
+	poll.WaitOn(t, container.IsInState(ctx, client, cID, "running"), poll.WithDelay(100*time.Millisecond))
+
+	daemonCgroup := d.CgroupNamespace(t)
+	containerCgroup := containerCgroupNamespace(ctx, t, client, cID)
+	assert.Assert(t, daemonCgroup == containerCgroup)
+}

+ 14 - 3
integration/internal/container/container.go

@@ -20,9 +20,9 @@ type TestContainerConfig struct {
 	NetworkingConfig *network.NetworkingConfig
 }
 
-// Create creates a container with the specified options
+// create creates a container with the specified options
 // nolint: golint
-func Create(t *testing.T, ctx context.Context, client client.APIClient, ops ...func(*TestContainerConfig)) string { // nolint: golint
+func create(t *testing.T, ctx context.Context, client client.APIClient, ops ...func(*TestContainerConfig)) (container.ContainerCreateCreatedBody, error) { // nolint: golint
 	t.Helper()
 	config := &TestContainerConfig{
 		Config: &container.Config{
@@ -37,12 +37,23 @@ func Create(t *testing.T, ctx context.Context, client client.APIClient, ops ...f
 		op(config)
 	}
 
-	c, err := client.ContainerCreate(ctx, config.Config, config.HostConfig, config.NetworkingConfig, config.Name)
+	return client.ContainerCreate(ctx, config.Config, config.HostConfig, config.NetworkingConfig, config.Name)
+}
+
+// Create creates a container with the specified options, asserting that there was no error
+func Create(t *testing.T, ctx context.Context, client client.APIClient, ops ...func(*TestContainerConfig)) string { // nolint: golint
+	c, err := create(t, ctx, client, ops...)
 	assert.NilError(t, err)
 
 	return c.ID
 }
 
+// CreateExpectingErr creates a container, expecting an error with the specified message
+func CreateExpectingErr(t *testing.T, ctx context.Context, client client.APIClient, errMsg string, ops ...func(*TestContainerConfig)) { // nolint: golint
+	_, err := create(t, ctx, client, ops...)
+	assert.ErrorContains(t, err, errMsg)
+}
+
 // Run creates and start a container with the specified options
 // nolint: golint
 func Run(t *testing.T, ctx context.Context, client client.APIClient, ops ...func(*TestContainerConfig)) string { // nolint: golint

+ 20 - 0
integration/internal/container/ops.go

@@ -160,3 +160,23 @@ func WithUser(user string) func(c *TestContainerConfig) {
 		c.Config.User = user
 	}
 }
+
+// WithPrivileged sets privileged mode for the container
+func WithPrivileged(privileged bool) func(*TestContainerConfig) {
+	return func(c *TestContainerConfig) {
+		if c.HostConfig == nil {
+			c.HostConfig = &containertypes.HostConfig{}
+		}
+		c.HostConfig.Privileged = privileged
+	}
+}
+
+// WithCgroupnsMode sets the cgroup namespace mode for the container
+func WithCgroupnsMode(mode string) func(*TestContainerConfig) {
+	return func(c *TestContainerConfig) {
+		if c.HostConfig == nil {
+			c.HostConfig = &containertypes.HostConfig{}
+		}
+		c.HostConfig.CgroupnsMode = containertypes.CgroupnsMode(mode)
+	}
+}

+ 10 - 0
integration/internal/requirement/requirement_linux.go

@@ -1,12 +1,22 @@
 package requirement // import "github.com/docker/docker/integration/internal/requirement"
 
 import (
+	"os"
 	"strings"
 
 	"github.com/docker/docker/pkg/parsers/kernel"
 	"gotest.tools/icmd"
 )
 
+// CgroupNamespacesEnabled checks if cgroup namespaces are enabled on this host
+func CgroupNamespacesEnabled() bool {
+	if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
+		return false
+	}
+
+	return true
+}
+
 func overlayFSSupported() bool {
 	result := icmd.RunCommand("/bin/sh", "-c", "cat /proc/filesystems")
 	if result.Error != nil {

+ 22 - 13
internal/test/daemon/daemon.go

@@ -60,16 +60,17 @@ type Daemon struct {
 	UseDefaultHost    bool
 	UseDefaultTLSHost bool
 
-	id            string
-	logFile       *os.File
-	cmd           *exec.Cmd
-	storageDriver string
-	userlandProxy bool
-	execRoot      string
-	experimental  bool
-	init          bool
-	dockerdBinary string
-	log           logT
+	id                         string
+	logFile                    *os.File
+	cmd                        *exec.Cmd
+	storageDriver              string
+	userlandProxy              bool
+	defaultCgroupNamespaceMode string
+	execRoot                   string
+	experimental               bool
+	init                       bool
+	dockerdBinary              string
+	log                        logT
 
 	// swarm related field
 	swarmListenAddr string
@@ -169,13 +170,18 @@ func (d *Daemon) ReadLogFile() ([]byte, error) {
 }
 
 // NewClientT creates new client based on daemon's socket path
-func (d *Daemon) NewClientT(t assert.TestingT) *client.Client {
+func (d *Daemon) NewClientT(t assert.TestingT, extraOpts ...client.Opt) *client.Client {
 	if ht, ok := t.(test.HelperT); ok {
 		ht.Helper()
 	}
-	c, err := client.NewClientWithOpts(
+
+	clientOpts := []client.Opt{
 		client.FromEnv,
-		client.WithHost(d.Sock()))
+		client.WithHost(d.Sock()),
+	}
+	clientOpts = append(clientOpts, extraOpts...)
+
+	c, err := client.NewClientWithOpts(clientOpts...)
 	assert.NilError(t, err, "cannot create daemon client")
 	return c
 }
@@ -225,6 +231,9 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error {
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.Folder),
 		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
 	)
+	if d.defaultCgroupNamespaceMode != "" {
+		args = append(args, []string{"--default-cgroupns-mode", d.defaultCgroupNamespaceMode}...)
+	}
 	if d.experimental {
 		args = append(args, "--experimental")
 	}

+ 11 - 0
internal/test/daemon/daemon_unix.go

@@ -3,11 +3,14 @@
 package daemon // import "github.com/docker/docker/internal/test/daemon"
 
 import (
+	"fmt"
 	"os"
 	"path/filepath"
+	"strings"
 
 	"github.com/docker/docker/internal/test"
 	"golang.org/x/sys/unix"
+	"gotest.tools/assert"
 )
 
 func cleanupNetworkNamespace(t testingT, execRoot string) {
@@ -29,6 +32,14 @@ func cleanupNetworkNamespace(t testingT, execRoot string) {
 	})
 }
 
+// CgroupNamespace returns the cgroup namespace the daemon is running in
+func (d *Daemon) CgroupNamespace(t assert.TestingT) string {
+	link, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/cgroup", d.Pid()))
+	assert.NilError(t, err)
+
+	return strings.TrimSpace(link)
+}
+
 // SignalDaemonDump sends a signal to the daemon to write a dump file
 func SignalDaemonDump(pid int) {
 	unix.Kill(pid, unix.SIGQUIT)

+ 7 - 0
internal/test/daemon/daemon_windows.go

@@ -5,6 +5,7 @@ import (
 	"strconv"
 
 	"golang.org/x/sys/windows"
+	"gotest.tools/assert"
 )
 
 // SignalDaemonDump sends a signal to the daemon to write a dump file
@@ -23,3 +24,9 @@ func signalDaemonReload(pid int) error {
 
 func cleanupNetworkNamespace(t testingT, execRoot string) {
 }
+
+// CgroupNamespace returns the cgroup namespace the daemon is running in
+func (d *Daemon) CgroupNamespace(t assert.TestingT) string {
+	assert.Assert(t, false)
+	return "cgroup namespaces are not supported on Windows"
+}

+ 7 - 0
internal/test/daemon/ops.go

@@ -2,6 +2,13 @@ package daemon
 
 import "github.com/docker/docker/internal/test/environment"
 
+// WithDefaultCgroupNamespaceMode sets the default cgroup namespace mode for the daemon
+func WithDefaultCgroupNamespaceMode(mode string) func(*Daemon) {
+	return func(d *Daemon) {
+		d.defaultCgroupNamespaceMode = mode
+	}
+}
+
 // WithExperimental sets the daemon in experimental mode
 func WithExperimental(d *Daemon) {
 	d.experimental = true

+ 3 - 0
pkg/sysinfo/sysinfo.go

@@ -16,6 +16,9 @@ type SysInfo struct {
 	cgroupCpusetInfo
 	cgroupPids
 
+	// Whether the kernel supports cgroup namespaces or not
+	CgroupNamespaces bool
+
 	// Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work
 	IPv4ForwardingDisabled bool
 

+ 10 - 0
pkg/sysinfo/sysinfo_linux.go

@@ -53,6 +53,7 @@ func New(quiet bool) *SysInfo {
 		applyNetworkingInfo,
 		applyAppArmorInfo,
 		applySeccompInfo,
+		applyCgroupNsInfo,
 	}...)
 
 	for _, o := range ops {
@@ -250,6 +251,15 @@ func applyAppArmorInfo(info *SysInfo, _ map[string]string) []string {
 	return warnings
 }
 
+// applyCgroupNsInfo adds cgroup namespace information to the info.
+func applyCgroupNsInfo(info *SysInfo, _ map[string]string) []string {
+	var warnings []string
+	if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
+		info.CgroupNamespaces = true
+	}
+	return warnings
+}
+
 // applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP.
 func applySeccompInfo(info *SysInfo, _ map[string]string) []string {
 	var warnings []string

+ 20 - 0
pkg/sysinfo/sysinfo_linux_test.go

@@ -96,6 +96,26 @@ func TestNewAppArmorDisabled(t *testing.T) {
 	assert.Assert(t, !sysInfo.AppArmor)
 }
 
+func TestNewCgroupNamespacesEnabled(t *testing.T) {
+	// If cgroup namespaces are supported in the kernel, then sysInfo.CgroupNamespaces should be TRUE
+	if _, err := os.Stat("/proc/self/ns/cgroup"); err != nil {
+		t.Skip("cgroup namespaces must be enabled")
+	}
+
+	sysInfo := New(true)
+	assert.Assert(t, sysInfo.CgroupNamespaces)
+}
+
+func TestNewCgroupNamespacesDisabled(t *testing.T) {
+	// If cgroup namespaces are *not* supported in the kernel, then sysInfo.CgroupNamespaces should be FALSE
+	if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
+		t.Skip("cgroup namespaces must be disabled")
+	}
+
+	sysInfo := New(true)
+	assert.Assert(t, !sysInfo.CgroupNamespaces)
+}
+
 func TestNumCPU(t *testing.T) {
 	cpuNumbers := NumCPU()
 	if cpuNumbers <= 0 {

+ 26 - 0
runconfig/hostconfig_test.go

@@ -14,6 +14,32 @@ import (
 	is "gotest.tools/assert/cmp"
 )
 
+func TestCgroupnsModeTest(t *testing.T) {
+	cgroupNsModes := map[container.CgroupnsMode][]bool{
+		// private, host, empty, valid
+		"":                {false, false, true, true},
+		"something:weird": {false, false, false, false},
+		"host":            {false, true, false, true},
+		"host:name":       {false, false, false, false},
+		"private":         {true, false, false, true},
+		"private:name":    {false, false, false, false},
+	}
+	for cgroupNsMode, state := range cgroupNsModes {
+		if cgroupNsMode.IsPrivate() != state[0] {
+			t.Fatalf("CgroupnsMode.IsPrivate for %v should have been %v but was %v", cgroupNsMode, state[0], cgroupNsMode.IsPrivate())
+		}
+		if cgroupNsMode.IsHost() != state[1] {
+			t.Fatalf("CgroupnsMode.IsHost for %v should have been %v but was %v", cgroupNsMode, state[1], cgroupNsMode.IsHost())
+		}
+		if cgroupNsMode.IsEmpty() != state[2] {
+			t.Fatalf("CgroupnsMode.Valid for %v should have been %v but was %v", cgroupNsMode, state[2], cgroupNsMode.Valid())
+		}
+		if cgroupNsMode.Valid() != state[3] {
+			t.Fatalf("CgroupnsMode.Valid for %v should have been %v but was %v", cgroupNsMode, state[2], cgroupNsMode.Valid())
+		}
+	}
+}
+
 // TODO Windows: This will need addressing for a Windows daemon.
 func TestNetworkModeTest(t *testing.T) {
 	networkModes := map[container.NetworkMode][]bool{