Browse Source

Merge pull request #5922 from crosbymichael/host-dev-priv

Mount /dev in tmpfs for privileged containers
Victor Vieux 11 years ago
parent
commit
5a0a03e394

+ 7 - 3
daemon/execdriver/native/create.go

@@ -10,6 +10,7 @@ import (
 	"github.com/dotcloud/docker/daemon/execdriver/native/template"
 	"github.com/dotcloud/docker/pkg/apparmor"
 	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 )
 
 // createContainer populates and configures the container type with the
@@ -34,8 +35,6 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
 		if err := d.setPrivileged(container); err != nil {
 			return nil, err
 		}
-	} else {
-		container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "devtmpfs"})
 	}
 	if err := d.setupCgroups(container, c); err != nil {
 		return nil, err
@@ -97,12 +96,17 @@ func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver.
 	return nil
 }
 
-func (d *driver) setPrivileged(container *libcontainer.Container) error {
+func (d *driver) setPrivileged(container *libcontainer.Container) (err error) {
 	container.Capabilities = libcontainer.GetAllCapabilities()
 	container.Cgroups.DeviceAccess = true
 
 	delete(container.Context, "restrictions")
 
+	container.OptionalDeviceNodes = nil
+	if container.RequiredDeviceNodes, err = nodes.GetHostDeviceNodes(); err != nil {
+		return err
+	}
+
 	if apparmor.IsEnabled() {
 		container.Context["apparmor_profile"] = "unconfined"
 	}

+ 4 - 1
daemon/execdriver/native/template/default_template.go

@@ -4,6 +4,7 @@ import (
 	"github.com/dotcloud/docker/pkg/apparmor"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
+	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 )
 
 // New returns the docker default configuration for libcontainer
@@ -32,7 +33,9 @@ func New() *libcontainer.Container {
 			Parent:       "docker",
 			DeviceAccess: false,
 		},
-		Context: libcontainer.Context{},
+		Context:             libcontainer.Context{},
+		RequiredDeviceNodes: nodes.DefaultNodes,
+		OptionalDeviceNodes: []string{"fuse"},
 	}
 	if apparmor.IsEnabled() {
 		container.Context["apparmor_profile"] = "docker-default"

+ 76 - 22
pkg/libcontainer/container.go

@@ -4,26 +4,69 @@ import (
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
 )
 
-// Context is a generic key value pair that allows
-// arbatrary data to be sent
+// Context is a generic key value pair that allows arbatrary data to be sent
 type Context map[string]string
 
-// Container defines configuration options for how a
-// container is setup inside a directory and how a process should be executed
+// Container defines configuration options for executing a process inside a contained environment
 type Container struct {
-	Hostname     string          `json:"hostname,omitempty"`      // hostname
-	ReadonlyFs   bool            `json:"readonly_fs,omitempty"`   // set the containers rootfs as readonly
-	NoPivotRoot  bool            `json:"no_pivot_root,omitempty"` // this can be enabled if you are running in ramdisk
-	User         string          `json:"user,omitempty"`          // user to execute the process as
-	WorkingDir   string          `json:"working_dir,omitempty"`   // current working directory
-	Env          []string        `json:"environment,omitempty"`   // environment to set
-	Tty          bool            `json:"tty,omitempty"`           // setup a proper tty or not
-	Namespaces   map[string]bool `json:"namespaces,omitempty"`    // namespaces to apply
-	Capabilities []string        `json:"capabilities,omitempty"`  // capabilities given to the container
-	Networks     []*Network      `json:"networks,omitempty"`      // nil for host's network stack
-	Cgroups      *cgroups.Cgroup `json:"cgroups,omitempty"`       // cgroups
-	Context      Context         `json:"context,omitempty"`       // generic context for specific options (apparmor, selinux)
-	Mounts       Mounts          `json:"mounts,omitempty"`
+	// Hostname optionally sets the container's hostname if provided
+	Hostname string `json:"hostname,omitempty"`
+
+	// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted
+	// bind mounts are writtable
+	ReadonlyFs bool `json:"readonly_fs,omitempty"`
+
+	// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
+	// This is a common option when the container is running in ramdisk
+	NoPivotRoot bool `json:"no_pivot_root,omitempty"`
+
+	// User will set the uid and gid of the executing process running inside the container
+	User string `json:"user,omitempty"`
+
+	// WorkingDir will change the processes current working directory inside the container's rootfs
+	WorkingDir string `json:"working_dir,omitempty"`
+
+	// Env will populate the processes environment with the provided values
+	// Any values from the parent processes will be cleared before the values
+	// provided in Env are provided to the process
+	Env []string `json:"environment,omitempty"`
+
+	// Tty when true will allocate a pty slave on the host for access by the container's process
+	// and ensure that it is mounted inside the container's rootfs
+	Tty bool `json:"tty,omitempty"`
+
+	// Namespaces specifies the container's namespaces that it should setup when cloning the init process
+	// If a namespace is not provided that namespace is shared from the container's parent process
+	Namespaces map[string]bool `json:"namespaces,omitempty"`
+
+	// Capabilities specify the capabilities to keep when executing the process inside the container
+	// All capbilities not specified will be dropped from the processes capability mask
+	Capabilities []string `json:"capabilities,omitempty"`
+
+	// Networks specifies the container's network setup to be created
+	Networks []*Network `json:"networks,omitempty"`
+
+	// Cgroups specifies specific cgroup settings for the various subsystems that the container is
+	// placed into to limit the resources the container has available
+	Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"`
+
+	// Context is a generic key value format that allows for additional settings to be passed
+	// on the container's creation
+	// This is commonly used to specify apparmor profiles, selinux labels, and different restrictions
+	// placed on the container's processes
+	Context Context `json:"context,omitempty"`
+
+	// Mounts specify additional source and destination paths that will be mounted inside the container's
+	// rootfs and mount namespace if specified
+	Mounts Mounts `json:"mounts,omitempty"`
+
+	// RequiredDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
+	// If the host system does not support the device that the container requests an error is returned
+	RequiredDeviceNodes []string `json:"required_device_nodes,omitempty"`
+
+	// OptionalDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
+	// If the host system does not support the device that the container requests the error is ignored
+	OptionalDeviceNodes []string `json:"optional_device_nodes,omitempty"`
 }
 
 // Network defines configuration for a container's networking stack
@@ -31,9 +74,20 @@ type Container struct {
 // The network configuration can be omited from a container causing the
 // container to be setup with the host's networking stack
 type Network struct {
-	Type    string  `json:"type,omitempty"`    // type of networking to setup i.e. veth, macvlan, etc
-	Context Context `json:"context,omitempty"` // generic context for type specific networking options
-	Address string  `json:"address,omitempty"`
-	Gateway string  `json:"gateway,omitempty"`
-	Mtu     int     `json:"mtu,omitempty"`
+	// Type sets the networks type, commonly veth and loopback
+	Type string `json:"type,omitempty"`
+
+	// Context is a generic key value format for setting additional options that are specific to
+	// the network type
+	Context Context `json:"context,omitempty"`
+
+	// Address contains the IP and mask to set on the network interface
+	Address string `json:"address,omitempty"`
+
+	// Gateway sets the gateway address that is used as the default for the interface
+	Gateway string `json:"gateway,omitempty"`
+
+	// Mtu sets the mtu value for the interface and will be mirrored on both the host and
+	// container's interfaces if a pair is created, specifically in the case of type veth
+	Mtu int `json:"mtu,omitempty"`
 }

+ 8 - 0
pkg/libcontainer/container.json

@@ -43,5 +43,13 @@
     {
       "type": "devtmpfs"
     }
+  ],
+  "required_device_nodes": [
+      "null",
+      "zero",
+      "full",
+      "random",
+      "urandom",
+      "tty"
   ]
 }

+ 15 - 6
pkg/libcontainer/container_test.go

@@ -4,12 +4,14 @@ import (
 	"encoding/json"
 	"os"
 	"testing"
+
+	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 )
 
 // Checks whether the expected capability is specified in the capabilities.
-func hasCapability(expected string, capabilities []string) bool {
-	for _, capability := range capabilities {
-		if capability == expected {
+func contains(expected string, values []string) bool {
+	for _, v := range values {
+		if v == expected {
 			return true
 		}
 	}
@@ -47,18 +49,25 @@ func TestContainerJsonFormat(t *testing.T) {
 		t.Fail()
 	}
 
-	if hasCapability("SYS_ADMIN", container.Capabilities) {
+	if contains("SYS_ADMIN", container.Capabilities) {
 		t.Log("SYS_ADMIN should not be enabled in capabilities mask")
 		t.Fail()
 	}
 
-	if !hasCapability("MKNOD", container.Capabilities) {
+	if !contains("MKNOD", container.Capabilities) {
 		t.Log("MKNOD should be enabled in capabilities mask")
 		t.Fail()
 	}
 
-	if hasCapability("SYS_CHROOT", container.Capabilities) {
+	if contains("SYS_CHROOT", container.Capabilities) {
 		t.Log("capabilities mask should not contain SYS_CHROOT")
 		t.Fail()
 	}
+
+	for _, n := range nodes.DefaultNodes {
+		if !contains(n, container.RequiredDeviceNodes) {
+			t.Logf("devices should contain %s", n)
+			t.Fail()
+		}
+	}
 }

+ 5 - 7
pkg/libcontainer/mount/init.go

@@ -48,11 +48,11 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
 	if err := setupBindmounts(rootfs, container.Mounts); err != nil {
 		return fmt.Errorf("bind mounts %s", err)
 	}
-	if err := nodes.CopyN(rootfs, nodes.DefaultNodes, true); err != nil {
-		return fmt.Errorf("copy dev nodes %s", err)
+	if err := nodes.CopyN(rootfs, container.RequiredDeviceNodes, true); err != nil {
+		return fmt.Errorf("copy required dev nodes %s", err)
 	}
-	if err := nodes.CopyN(rootfs, nodes.AdditionalNodes, false); err != nil {
-		return fmt.Errorf("copy additional dev nodes %s", err)
+	if err := nodes.CopyN(rootfs, container.OptionalDeviceNodes, false); err != nil {
+		return fmt.Errorf("copy optional dev nodes %s", err)
 	}
 	if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
 		return err
@@ -195,12 +195,10 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo
 	systemMounts := []mount{
 		{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
 		{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
+		{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
 		{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
 		{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
 	}
 
-	if len(mounts.OfType("devtmpfs")) == 1 {
-		systemMounts = append([]mount{{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}}, systemMounts...)
-	}
 	return systemMounts
 }

+ 16 - 5
pkg/libcontainer/mount/nodes/nodes.go

@@ -4,6 +4,7 @@ package nodes
 
 import (
 	"fmt"
+	"io/ioutil"
 	"os"
 	"path/filepath"
 	"syscall"
@@ -21,11 +22,6 @@ var DefaultNodes = []string{
 	"tty",
 }
 
-// AdditionalNodes includes nodes that are not required
-var AdditionalNodes = []string{
-	"fuse",
-}
-
 // CopyN copies the device node from the host into the rootfs
 func CopyN(rootfs string, nodesToCopy []string, shouldExist bool) error {
 	oldMask := system.Umask(0000)
@@ -61,3 +57,18 @@ func Copy(rootfs, node string, shouldExist bool) error {
 	}
 	return nil
 }
+
+func GetHostDeviceNodes() ([]string, error) {
+	files, err := ioutil.ReadDir("/dev")
+	if err != nil {
+		return nil, err
+	}
+
+	out := []string{}
+	for _, f := range files {
+		if f.Mode()&os.ModeDevice == os.ModeDevice {
+			out = append(out, f.Name())
+		}
+	}
+	return out, nil
+}

+ 11 - 0
pkg/libcontainer/mount/nodes/nodes_unsupported.go

@@ -0,0 +1,11 @@
+// +build !linux
+
+package nodes
+
+import "github.com/dotcloud/docker/pkg/libcontainer"
+
+var DefaultNodes = []string{}
+
+func GetHostDeviceNodes() ([]string, error) {
+	return nil, libcontainer.ErrUnsupported
+}