Selaa lähdekoodia

Refactor device handling code

We now have one place that keeps track of (most) devices that are allowed and created within the container.  That place is pkg/libcontainer/devices/devices.go

This fixes several inconsistencies between which devices were created in the lxc backend and the native backend.  It also fixes inconsistencies between wich devices were created and which were allowed.  For example, /dev/full was being created but it was not allowed within the cgroup.  It also declares the file modes and permissions of the default devices, rather than copying them from the host.  This is in line with docker's philosphy of not being host dependent.

Docker-DCO-1.1-Signed-off-by: Timothy Hobbs <timothyhobbs@seznam.cz> (github: https://github.com/timthelion)
Timothy Hobbs 11 vuotta sitten
vanhempi
commit
608702b980

+ 15 - 12
daemon/container.go

@@ -23,6 +23,7 @@ import (
 	"github.com/dotcloud/docker/links"
 	"github.com/dotcloud/docker/links"
 	"github.com/dotcloud/docker/nat"
 	"github.com/dotcloud/docker/nat"
 	"github.com/dotcloud/docker/pkg/label"
 	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
 	"github.com/dotcloud/docker/pkg/networkfs/etchosts"
 	"github.com/dotcloud/docker/pkg/networkfs/etchosts"
 	"github.com/dotcloud/docker/pkg/networkfs/resolvconf"
 	"github.com/dotcloud/docker/pkg/networkfs/resolvconf"
 	"github.com/dotcloud/docker/pkg/symlink"
 	"github.com/dotcloud/docker/pkg/symlink"
@@ -230,18 +231,20 @@ func populateCommand(c *Container, env []string) error {
 		Cpuset:     c.Config.Cpuset,
 		Cpuset:     c.Config.Cpuset,
 	}
 	}
 	c.command = &execdriver.Command{
 	c.command = &execdriver.Command{
-		ID:         c.ID,
-		Privileged: c.hostConfig.Privileged,
-		Rootfs:     c.RootfsPath(),
-		InitPath:   "/.dockerinit",
-		Entrypoint: c.Path,
-		Arguments:  c.Args,
-		WorkingDir: c.Config.WorkingDir,
-		Network:    en,
-		Tty:        c.Config.Tty,
-		User:       c.Config.User,
-		Config:     context,
-		Resources:  resources,
+		ID:                 c.ID,
+		Privileged:         c.hostConfig.Privileged,
+		Rootfs:             c.RootfsPath(),
+		InitPath:           "/.dockerinit",
+		Entrypoint:         c.Path,
+		Arguments:          c.Args,
+		WorkingDir:         c.Config.WorkingDir,
+		Network:            en,
+		Tty:                c.Config.Tty,
+		User:               c.Config.User,
+		Config:             context,
+		Resources:          resources,
+		AllowedDevices:     devices.DefaultAllowedDevices,
+		AutoCreatedDevices: devices.DefaultAutoCreatedDevices,
 	}
 	}
 	c.command.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
 	c.command.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
 	c.command.Env = env
 	c.command.Env = env

+ 18 - 14
daemon/execdriver/driver.go

@@ -5,6 +5,8 @@ import (
 	"io"
 	"io"
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
+
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
 )
 )
 
 
 // Context is a generic key value pair that allows
 // Context is a generic key value pair that allows
@@ -120,20 +122,22 @@ type Mount struct {
 type Command struct {
 type Command struct {
 	exec.Cmd `json:"-"`
 	exec.Cmd `json:"-"`
 
 
-	ID         string              `json:"id"`
-	Privileged bool                `json:"privileged"`
-	User       string              `json:"user"`
-	Rootfs     string              `json:"rootfs"`   // root fs of the container
-	InitPath   string              `json:"initpath"` // dockerinit
-	Entrypoint string              `json:"entrypoint"`
-	Arguments  []string            `json:"arguments"`
-	WorkingDir string              `json:"working_dir"`
-	ConfigPath string              `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
-	Tty        bool                `json:"tty"`
-	Network    *Network            `json:"network"`
-	Config     map[string][]string `json:"config"` //  generic values that specific drivers can consume
-	Resources  *Resources          `json:"resources"`
-	Mounts     []Mount             `json:"mounts"`
+	ID                 string              `json:"id"`
+	Privileged         bool                `json:"privileged"`
+	User               string              `json:"user"`
+	Rootfs             string              `json:"rootfs"`   // root fs of the container
+	InitPath           string              `json:"initpath"` // dockerinit
+	Entrypoint         string              `json:"entrypoint"`
+	Arguments          []string            `json:"arguments"`
+	WorkingDir         string              `json:"working_dir"`
+	ConfigPath         string              `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
+	Tty                bool                `json:"tty"`
+	Network            *Network            `json:"network"`
+	Config             map[string][]string `json:"config"` //  generic values that specific drivers can consume
+	Resources          *Resources          `json:"resources"`
+	Mounts             []Mount             `json:"mounts"`
+	AllowedDevices     []devices.Device    `json:"allowed_devices"`
+	AutoCreatedDevices []devices.Device    `json:"autocreated_devices"`
 
 
 	Terminal     Terminal `json:"-"`             // standard or tty terminal
 	Terminal     Terminal `json:"-"`             // standard or tty terminal
 	Console      string   `json:"-"`             // dev/console path
 	Console      string   `json:"-"`             // dev/console path

+ 5 - 0
daemon/execdriver/lxc/driver.go

@@ -17,6 +17,7 @@ import (
 	"github.com/dotcloud/docker/daemon/execdriver"
 	"github.com/dotcloud/docker/daemon/execdriver"
 	"github.com/dotcloud/docker/pkg/label"
 	"github.com/dotcloud/docker/pkg/label"
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
+	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 	"github.com/dotcloud/docker/pkg/system"
 	"github.com/dotcloud/docker/pkg/system"
 	"github.com/dotcloud/docker/utils"
 	"github.com/dotcloud/docker/utils"
 )
 )
@@ -159,6 +160,10 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	c.Path = aname
 	c.Path = aname
 	c.Args = append([]string{name}, arg...)
 	c.Args = append([]string{name}, arg...)
 
 
+	if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
+		return -1, err
+	}
+
 	if err := c.Start(); err != nil {
 	if err := c.Start(); err != nil {
 		return -1, err
 		return -1, err
 	}
 	}

+ 4 - 31
daemon/execdriver/lxc/lxc_template.go

@@ -47,37 +47,10 @@ lxc.cgroup.devices.allow = a
 {{else}}
 {{else}}
 # no implicit access to devices
 # no implicit access to devices
 lxc.cgroup.devices.deny = a
 lxc.cgroup.devices.deny = a
-
-# but allow mknod for any device
-lxc.cgroup.devices.allow = c *:* m
-lxc.cgroup.devices.allow = b *:* m
-
-# /dev/null and zero
-lxc.cgroup.devices.allow = c 1:3 rwm
-lxc.cgroup.devices.allow = c 1:5 rwm
-
-# consoles
-lxc.cgroup.devices.allow = c 5:1 rwm
-lxc.cgroup.devices.allow = c 5:0 rwm
-lxc.cgroup.devices.allow = c 4:0 rwm
-lxc.cgroup.devices.allow = c 4:1 rwm
-
-# /dev/urandom,/dev/random
-lxc.cgroup.devices.allow = c 1:9 rwm
-lxc.cgroup.devices.allow = c 1:8 rwm
-
-# /dev/pts/ - pts namespaces are "coming soon"
-lxc.cgroup.devices.allow = c 136:* rwm
-lxc.cgroup.devices.allow = c 5:2 rwm
-
-# tuntap
-lxc.cgroup.devices.allow = c 10:200 rwm
-
-# fuse
-#lxc.cgroup.devices.allow = c 10:229 rwm
-
-# rtc
-#lxc.cgroup.devices.allow = c 254:0 rwm
+#Allow the devices passed to us in the AllowedDevices list.
+{{range $allowedDevice := .AllowedDevices}}
+lxc.cgroup.devices.allow = {{$allowedDevice.GetCgroupAllowString}}
+{{end}}
 {{end}}
 {{end}}
 
 
 # standard mount point
 # standard mount point

+ 3 - 0
daemon/execdriver/lxc/lxc_template_unit_test.go

@@ -11,6 +11,8 @@ import (
 	"strings"
 	"strings"
 	"testing"
 	"testing"
 	"time"
 	"time"
+
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
 )
 )
 
 
 func TestLXCConfig(t *testing.T) {
 func TestLXCConfig(t *testing.T) {
@@ -47,6 +49,7 @@ func TestLXCConfig(t *testing.T) {
 			Mtu:       1500,
 			Mtu:       1500,
 			Interface: nil,
 			Interface: nil,
 		},
 		},
+		AllowedDevices: make([]devices.Device, 0),
 	}
 	}
 	p, err := driver.generateLXCConfig(command)
 	p, err := driver.generateLXCConfig(command)
 	if err != nil {
 	if err != nil {

+ 3 - 7
daemon/execdriver/native/create.go

@@ -11,7 +11,6 @@ import (
 	"github.com/dotcloud/docker/daemon/execdriver/native/template"
 	"github.com/dotcloud/docker/daemon/execdriver/native/template"
 	"github.com/dotcloud/docker/pkg/apparmor"
 	"github.com/dotcloud/docker/pkg/apparmor"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer"
-	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 )
 )
 
 
 // createContainer populates and configures the container type with the
 // createContainer populates and configures the container type with the
@@ -25,6 +24,8 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
 	container.WorkingDir = c.WorkingDir
 	container.WorkingDir = c.WorkingDir
 	container.Env = c.Env
 	container.Env = c.Env
 	container.Cgroups.Name = c.ID
 	container.Cgroups.Name = c.ID
+	container.Cgroups.AllowedDevices = c.AllowedDevices
+	container.DeviceNodes = c.AutoCreatedDevices
 	// check to see if we are running in ramdisk to disable pivot root
 	// check to see if we are running in ramdisk to disable pivot root
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
 	container.Context["restrictions"] = "true"
 	container.Context["restrictions"] = "true"
@@ -105,15 +106,10 @@ func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver.
 
 
 func (d *driver) setPrivileged(container *libcontainer.Container) (err error) {
 func (d *driver) setPrivileged(container *libcontainer.Container) (err error) {
 	container.Capabilities = libcontainer.GetAllCapabilities()
 	container.Capabilities = libcontainer.GetAllCapabilities()
-	container.Cgroups.DeviceAccess = true
+	container.Cgroups.AllowAllDevices = true
 
 
 	delete(container.Context, "restrictions")
 	delete(container.Context, "restrictions")
 
 
-	container.OptionalDeviceNodes = nil
-	if container.RequiredDeviceNodes, err = nodes.GetHostDeviceNodes(); err != nil {
-		return err
-	}
-
 	if apparmor.IsEnabled() {
 	if apparmor.IsEnabled() {
 		container.Context["apparmor_profile"] = "unconfined"
 		container.Context["apparmor_profile"] = "unconfined"
 	}
 	}

+ 3 - 6
daemon/execdriver/native/template/default_template.go

@@ -4,7 +4,6 @@ import (
 	"github.com/dotcloud/docker/pkg/apparmor"
 	"github.com/dotcloud/docker/pkg/apparmor"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
-	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 )
 )
 
 
 // New returns the docker default configuration for libcontainer
 // New returns the docker default configuration for libcontainer
@@ -30,12 +29,10 @@ func New() *libcontainer.Container {
 			"NEWNET": true,
 			"NEWNET": true,
 		},
 		},
 		Cgroups: &cgroups.Cgroup{
 		Cgroups: &cgroups.Cgroup{
-			Parent:       "docker",
-			DeviceAccess: false,
+			Parent:          "docker",
+			AllowAllDevices: false,
 		},
 		},
-		Context:             libcontainer.Context{},
-		RequiredDeviceNodes: nodes.DefaultNodes,
-		OptionalDeviceNodes: []string{"/dev/fuse"},
+		Context: libcontainer.Context{},
 	}
 	}
 	if apparmor.IsEnabled() {
 	if apparmor.IsEnabled() {
 		container.Context["apparmor_profile"] = "docker-default"
 		container.Context["apparmor_profile"] = "docker-default"

+ 38 - 0
integration-cli/docker_cli_run_test.go

@@ -5,6 +5,7 @@ import (
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
 	"path/filepath"
 	"path/filepath"
+	"reflect"
 	"regexp"
 	"regexp"
 	"sort"
 	"sort"
 	"strings"
 	"strings"
@@ -835,3 +836,40 @@ func TestRunWithCpuset(t *testing.T) {
 
 
 	logDone("run - cpuset 0")
 	logDone("run - cpuset 0")
 }
 }
+
+func TestDeviceNumbers(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "ls -l /dev/null")
+
+	out, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out)
+	}
+	deviceLineFields := strings.Fields(out)
+	deviceLineFields[6] = ""
+	deviceLineFields[7] = ""
+	deviceLineFields[8] = ""
+	expected := []string{"crw-rw-rw-", "1", "root", "root", "1,", "3", "", "", "", "/dev/null"}
+
+	if !(reflect.DeepEqual(deviceLineFields, expected)) {
+		t.Fatalf("expected output\ncrw-rw-rw- 1 root root 1, 3 May 24 13:29 /dev/null\n received\n %s\n", out)
+	}
+	deleteAllContainers()
+
+	logDone("run - test device numbers")
+}
+
+func TestThatCharacterDevicesActLikeCharacterDevices(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "dd if=/dev/zero of=/zero bs=1k count=5 2> /dev/null ; du -h /zero")
+
+	out, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out)
+	}
+
+	if actual := strings.Trim(out, "\r\n"); actual[0] == '0' {
+		t.Fatalf("expected a new file called /zero to be create that is greater than 0 bytes long, but du says: %s", actual)
+	}
+	deleteAllContainers()
+
+	logDone("run - test that character devices work.")
+}

+ 13 - 10
pkg/libcontainer/cgroups/cgroups.go

@@ -2,6 +2,8 @@ package cgroups
 
 
 import (
 import (
 	"errors"
 	"errors"
+
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
 )
 )
 
 
 var (
 var (
@@ -10,17 +12,18 @@ var (
 
 
 type Cgroup struct {
 type Cgroup struct {
 	Name   string `json:"name,omitempty"`
 	Name   string `json:"name,omitempty"`
-	Parent string `json:"parent,omitempty"`
+	Parent string `json:"parent,omitempty"` // name of parent cgroup or slice
 
 
-	DeviceAccess      bool   `json:"device_access,omitempty"`      // name of parent cgroup or slice
-	Memory            int64  `json:"memory,omitempty"`             // Memory limit (in bytes)
-	MemoryReservation int64  `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes)
-	MemorySwap        int64  `json:"memory_swap,omitempty"`        // Total memory usage (memory + swap); set `-1' to disable swap
-	CpuShares         int64  `json:"cpu_shares,omitempty"`         // CPU shares (relative weight vs. other containers)
-	CpuQuota          int64  `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
-	CpuPeriod         int64  `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default.
-	CpusetCpus        string `json:"cpuset_cpus,omitempty"`        // CPU to use
-	Freezer           string `json:"freezer,omitempty"`            // set the freeze value for the process
+	AllowAllDevices   bool             `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container.  If false, allow access only to devices explicitly listed in the allowed_devices list.
+	AllowedDevices    []devices.Device `json:"allowed_devices,omitempty"`
+	Memory            int64            `json:"memory,omitempty"`             // Memory limit (in bytes)
+	MemoryReservation int64            `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes)
+	MemorySwap        int64            `json:"memory_swap,omitempty"`        // Total memory usage (memory + swap); set `-1' to disable swap
+	CpuShares         int64            `json:"cpu_shares,omitempty"`         // CPU shares (relative weight vs. other containers)
+	CpuQuota          int64            `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
+	CpuPeriod         int64            `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default.
+	CpusetCpus        string           `json:"cpuset_cpus,omitempty"`        // CPU to use
+	Freezer           string           `json:"freezer,omitempty"`            // set the freeze value for the process
 
 
 	Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
 	Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
 }
 }

+ 3 - 31
pkg/libcontainer/cgroups/fs/devices.go

@@ -9,41 +9,13 @@ func (s *devicesGroup) Set(d *data) error {
 		return err
 		return err
 	}
 	}
 
 
-	if !d.c.DeviceAccess {
+	if !d.c.AllowAllDevices {
 		if err := writeFile(dir, "devices.deny", "a"); err != nil {
 		if err := writeFile(dir, "devices.deny", "a"); err != nil {
 			return err
 			return err
 		}
 		}
 
 
-		allow := []string{
-			// allow mknod for any device
-			"c *:* m",
-			"b *:* m",
-
-			// /dev/null, zero, full
-			"c 1:3 rwm",
-			"c 1:5 rwm",
-			"c 1:7 rwm",
-
-			// consoles
-			"c 5:1 rwm",
-			"c 5:0 rwm",
-			"c 4:0 rwm",
-			"c 4:1 rwm",
-
-			// /dev/urandom,/dev/random
-			"c 1:9 rwm",
-			"c 1:8 rwm",
-
-			// /dev/pts/ - pts namespaces are "coming soon"
-			"c 136:* rwm",
-			"c 5:2 rwm",
-
-			// tuntap
-			"c 10:200 rwm",
-		}
-
-		for _, val := range allow {
-			if err := writeFile(dir, "devices.allow", val); err != nil {
+		for _, dev := range d.c.AllowedDevices {
+			if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
 				return err
 				return err
 			}
 			}
 		}
 		}

+ 11 - 39
pkg/libcontainer/cgroups/systemd/apply_systemd.go

@@ -21,11 +21,6 @@ type systemdCgroup struct {
 	cleanupDirs []string
 	cleanupDirs []string
 }
 }
 
 
-type DeviceAllow struct {
-	Node        string
-	Permissions string
-}
-
 var (
 var (
 	connLock              sync.Mutex
 	connLock              sync.Mutex
 	theConn               *systemd1.Conn
 	theConn               *systemd1.Conn
@@ -116,24 +111,9 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
 		systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
 		systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
 	)
 	)
 
 
-	if !c.DeviceAccess {
+	if !c.AllowAllDevices {
 		properties = append(properties,
 		properties = append(properties,
-			systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")},
-			systemd1.Property{"DeviceAllow", dbus.MakeVariant([]DeviceAllow{
-				{"/dev/null", "rwm"},
-				{"/dev/zero", "rwm"},
-				{"/dev/full", "rwm"},
-				{"/dev/random", "rwm"},
-				{"/dev/urandom", "rwm"},
-				{"/dev/tty", "rwm"},
-				{"/dev/console", "rwm"},
-				{"/dev/tty0", "rwm"},
-				{"/dev/tty1", "rwm"},
-				{"/dev/pts/ptmx", "rwm"},
-				// There is no way to add /dev/pts/* here atm, so we hack this manually below
-				// /dev/pts/* (how to add this?)
-				// Same with tuntap, which doesn't exist as a node most of the time
-			})})
+			systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")})
 	}
 	}
 
 
 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
@@ -167,28 +147,16 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
 
 
 	cgroup := props["ControlGroup"].(string)
 	cgroup := props["ControlGroup"].(string)
 
 
-	if !c.DeviceAccess {
+	if !c.AllowAllDevices {
 		mountpoint, err := cgroups.FindCgroupMountpoint("devices")
 		mountpoint, err := cgroups.FindCgroupMountpoint("devices")
 		if err != nil {
 		if err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
 
 
-		path := filepath.Join(mountpoint, cgroup)
-
-		allow := []string{
-			// allow mknod for any device
-			"c *:* m",
-			"b *:* m",
-
-			// /dev/pts/ - pts namespaces are "coming soon"
-			"c 136:* rwm",
-
-			// tuntap
-			"c 10:200 rwm",
-		}
-
-		for _, val := range allow {
-			if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte(val), 0700); err != nil {
+		dir := filepath.Join(mountpoint, cgroup)
+		// We use the same method of allowing devices as in the fs backend.  This needs to be changed to use DBUS as soon as possible.  However, that change has to wait untill http://cgit.freedesktop.org/systemd/systemd/commit/?id=90060676c442604780634c0a993e3f9c3733f8e6 has been applied in most commonly used systemd versions.
+		for _, dev := range c.AllowedDevices {
+			if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
 				return nil, err
 				return nil, err
 			}
 			}
 		}
 		}
@@ -295,6 +263,10 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
 	return &res, nil
 	return &res, nil
 }
 }
 
 
+func writeFile(dir, file, data string) error {
+	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
+}
+
 func (c *systemdCgroup) Cleanup() error {
 func (c *systemdCgroup) Cleanup() error {
 	// systemd cleans up, we don't need to do much
 	// systemd cleans up, we don't need to do much
 
 

+ 3 - 7
pkg/libcontainer/container.go

@@ -2,6 +2,7 @@ package libcontainer
 
 
 import (
 import (
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
 )
 )
 
 
 // Context is a generic key value pair that allows arbatrary data to be sent
 // Context is a generic key value pair that allows arbatrary data to be sent
@@ -60,13 +61,8 @@ type Container struct {
 	// rootfs and mount namespace if specified
 	// rootfs and mount namespace if specified
 	Mounts Mounts `json:"mounts,omitempty"`
 	Mounts Mounts `json:"mounts,omitempty"`
 
 
-	// RequiredDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
-	// If the host system does not support the device that the container requests an error is returned
-	RequiredDeviceNodes []string `json:"required_device_nodes,omitempty"`
-
-	// OptionalDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
-	// If the host system does not support the device that the container requests the error is ignored
-	OptionalDeviceNodes []string `json:"optional_device_nodes,omitempty"`
+	// The device nodes that should be automatically created within the container upon container start.  Note, make sure that the node is marked as allowed in the cgroup as well!
+	DeviceNodes []devices.Device `json:"device_nodes,omitempty"`
 }
 }
 
 
 // Network defines configuration for a container's networking stack
 // Network defines configuration for a container's networking stack

+ 49 - 7
pkg/libcontainer/container.json

@@ -44,12 +44,54 @@
       "type": "devtmpfs"
       "type": "devtmpfs"
     }
     }
   ],
   ],
-  "required_device_nodes": [
-      "/dev/null",
-      "/dev/zero",
-      "/dev/full",
-      "/dev/random",
-      "/dev/urandom",
-      "/dev/tty"
+  "device_nodes": [
+		{
+			"path":  "/dev/null",
+			"type":        99,
+			"major_number": 1,
+			"minor_number": 3,
+			"cgroup_permissions": "rwm",
+			"file_mode": 438
+		},
+		{
+			"path":  "/dev/zero",
+			"type":        99,
+			"major_number": 1,
+			"minor_number": 5,
+			"cgroup_permissions": "rwm",
+			"file_mode": 438
+		},
+		{
+			"path":  "/dev/full",
+			"type":        99,
+			"major_number": 1,
+			"minor_number": 7,
+			"cgroup_permissions": "rwm",
+			"file_mode": 438
+		},
+		{
+			"path":  "/dev/tty",
+			"type":        99,
+			"major_number": 5,
+			"minor_number": 0,
+			"cgroup_permissions": "rwm",
+			"file_mode": 438
+		},
+		{
+			"path":  "/dev/urandom",
+			"type":        99,
+			"major_number": 1,
+			"minor_number": 9,
+			"cgroup_permissions": "rwm",
+			"file_mode": 438
+		},
+		{
+			"path":  "/dev/random",
+			"type":        99,
+			"major_number": 1,
+			"minor_number": 8,
+			"cgroup_permissions": "rwm",
+			"file_mode": 438
+		}
   ]
   ]
 }
 }

+ 0 - 9
pkg/libcontainer/container_test.go

@@ -4,8 +4,6 @@ import (
 	"encoding/json"
 	"encoding/json"
 	"os"
 	"os"
 	"testing"
 	"testing"
-
-	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
 )
 )
 
 
 // Checks whether the expected capability is specified in the capabilities.
 // Checks whether the expected capability is specified in the capabilities.
@@ -63,11 +61,4 @@ func TestContainerJsonFormat(t *testing.T) {
 		t.Log("capabilities mask should not contain SYS_CHROOT")
 		t.Log("capabilities mask should not contain SYS_CHROOT")
 		t.Fail()
 		t.Fail()
 	}
 	}
-
-	for _, n := range nodes.DefaultNodes {
-		if !contains(n, container.RequiredDeviceNodes) {
-			t.Logf("devices should contain %s", n)
-			t.Fail()
-		}
-	}
 }
 }

+ 239 - 0
pkg/libcontainer/devices/devices.go

@@ -0,0 +1,239 @@
+package devices
+
+import (
+	"fmt"
+	"os"
+	"syscall"
+)
+
+const (
+	Wildcard = -1
+)
+
+type Device struct {
+	Type              rune        `json:"type,omitempty"`
+	Path              string      `json:"path,omitempty"`               // It is fine if this is an empty string in the case that you are using Wildcards
+	MajorNumber       int64       `json:"major_number,omitempty"`       // Use the wildcard constant for wildcards.
+	MinorNumber       int64       `json:"minor_number,omitempty"`       // Use the wildcard constant for wildcards.
+	CgroupPermissions string      `json:"cgroup_permissions,omitempty"` // Typically just "rwm"
+	FileMode          os.FileMode `json:"file_mode,omitempty"`          // The permission bits of the file's mode
+}
+
+func GetDeviceNumberString(deviceNumber int64) string {
+	if deviceNumber == Wildcard {
+		return "*"
+	} else {
+		return fmt.Sprintf("%d", deviceNumber)
+	}
+}
+
+func (device Device) GetCgroupAllowString() string {
+	return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions)
+}
+
+// Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct.
+func GetDevice(path string, cgroupPermissions string) (Device, error) {
+	var (
+		err                    error
+		fileInfo               os.FileInfo
+		mode                   os.FileMode
+		fileModePermissionBits os.FileMode
+		devType                rune
+		devNumber              int
+		stat_t                 *syscall.Stat_t
+		ok                     bool
+		device                 Device
+	)
+
+	fileInfo, err = os.Stat(path)
+	if err != nil {
+		return Device{}, err
+	}
+
+	mode = fileInfo.Mode()
+	fileModePermissionBits = os.FileMode.Perm(mode)
+	switch {
+	case (mode & os.ModeDevice) == 0:
+		return Device{}, fmt.Errorf("%s is not a device", path)
+	case (mode & os.ModeCharDevice) != 0:
+		fileModePermissionBits |= syscall.S_IFCHR
+		devType = 'c'
+	default:
+		fileModePermissionBits |= syscall.S_IFBLK
+		devType = 'b'
+	}
+
+	stat_t, ok = fileInfo.Sys().(*syscall.Stat_t)
+	if !ok {
+		return Device{}, fmt.Errorf("cannot determine the device number for device %s", path)
+	}
+	devNumber = int(stat_t.Rdev)
+
+	device = Device{
+		Type:              devType,
+		Path:              path,
+		MajorNumber:       Major(devNumber),
+		MinorNumber:       Minor(devNumber),
+		CgroupPermissions: cgroupPermissions,
+		FileMode:          fileModePermissionBits,
+	}
+	return device, nil
+}
+
+var (
+	// These are devices that are to be both allowed and created.
+
+	DefaultSimpleDevices = []Device{
+		// /dev/null and zero
+		{
+			Path:              "/dev/null",
+			Type:              'c',
+			MajorNumber:       1,
+			MinorNumber:       3,
+			CgroupPermissions: "rwm",
+			FileMode:          0666,
+		},
+		{
+			Path:              "/dev/zero",
+			Type:              'c',
+			MajorNumber:       1,
+			MinorNumber:       5,
+			CgroupPermissions: "rwm",
+			FileMode:          0666,
+		},
+
+		{
+			Path:              "/dev/full",
+			Type:              'c',
+			MajorNumber:       1,
+			MinorNumber:       7,
+			CgroupPermissions: "rwm",
+			FileMode:          0666,
+		},
+
+		// consoles and ttys
+		{
+			Path:              "/dev/tty",
+			Type:              'c',
+			MajorNumber:       5,
+			MinorNumber:       0,
+			CgroupPermissions: "rwm",
+			FileMode:          0666,
+		},
+
+		// /dev/urandom,/dev/random
+		{
+			Path:              "/dev/urandom",
+			Type:              'c',
+			MajorNumber:       1,
+			MinorNumber:       9,
+			CgroupPermissions: "rwm",
+			FileMode:          0666,
+		},
+		{
+			Path:              "/dev/random",
+			Type:              'c',
+			MajorNumber:       1,
+			MinorNumber:       8,
+			CgroupPermissions: "rwm",
+			FileMode:          0666,
+		},
+	}
+
+	DefaultAllowedDevices = append([]Device{
+		// allow mknod for any device
+		{
+			Type:              'c',
+			MajorNumber:       Wildcard,
+			MinorNumber:       Wildcard,
+			CgroupPermissions: "m",
+		},
+		{
+			Type:              'b',
+			MajorNumber:       Wildcard,
+			MinorNumber:       Wildcard,
+			CgroupPermissions: "m",
+		},
+
+		{
+			Path:              "/dev/console",
+			Type:              'c',
+			MajorNumber:       5,
+			MinorNumber:       1,
+			CgroupPermissions: "rwm",
+		},
+		{
+			Path:              "/dev/tty0",
+			Type:              'c',
+			MajorNumber:       4,
+			MinorNumber:       0,
+			CgroupPermissions: "rwm",
+		},
+		{
+			Path:              "/dev/tty1",
+			Type:              'c',
+			MajorNumber:       4,
+			MinorNumber:       1,
+			CgroupPermissions: "rwm",
+		},
+		// /dev/pts/ - pts namespaces are "coming soon"
+		{
+			Path:              "",
+			Type:              'c',
+			MajorNumber:       136,
+			MinorNumber:       Wildcard,
+			CgroupPermissions: "rwm",
+		},
+		{
+			Path:              "",
+			Type:              'c',
+			MajorNumber:       5,
+			MinorNumber:       2,
+			CgroupPermissions: "rwm",
+		},
+
+		// tuntap
+		{
+			Path:              "",
+			Type:              'c',
+			MajorNumber:       10,
+			MinorNumber:       200,
+			CgroupPermissions: "rwm",
+		},
+
+		/*// fuse
+		   {
+		    Path: "",
+		    Type: 'c',
+		    MajorNumber: 10,
+		    MinorNumber: 229,
+		    CgroupPermissions: "rwm",
+		   },
+
+		// rtc
+		   {
+		    Path: "",
+		    Type: 'c',
+		    MajorNumber: 254,
+		    MinorNumber: 0,
+		    CgroupPermissions: "rwm",
+		   },
+		*/
+	}, DefaultSimpleDevices...)
+
+	DefaultAutoCreatedDevices = append([]Device{
+		{
+			// /dev/fuse is created but not allowed.
+			// This is to allow java to work.  Because java
+			// Insists on there being a /dev/fuse
+			// https://github.com/dotcloud/docker/issues/514
+			// https://github.com/dotcloud/docker/issues/2393
+			//
+			Path:              "/dev/fuse",
+			Type:              'c',
+			MajorNumber:       10,
+			MinorNumber:       229,
+			CgroupPermissions: "rwm",
+		},
+	}, DefaultSimpleDevices...)
+)

+ 26 - 0
pkg/libcontainer/devices/number.go

@@ -0,0 +1,26 @@
+package devices
+
+/*
+
+This code provides support for manipulating linux device numbers.  It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved.
+
+You can read what they are here:
+
+ - http://www.makelinux.net/ldd3/chp-3-sect-2
+ - http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94
+
+Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in <linux/kdev_t.h> as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9
+
+*/
+
+func Major(devNumber int) int64 {
+	return int64((devNumber >> 8) & 0xfff)
+}
+
+func Minor(devNumber int) int64 {
+	return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
+}
+
+func Mkdev(majorNumber int64, minorNumber int64) int {
+	return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12))
+}

+ 2 - 5
pkg/libcontainer/mount/init.go

@@ -48,11 +48,8 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
 	if err := setupBindmounts(rootfs, container.Mounts); err != nil {
 	if err := setupBindmounts(rootfs, container.Mounts); err != nil {
 		return fmt.Errorf("bind mounts %s", err)
 		return fmt.Errorf("bind mounts %s", err)
 	}
 	}
-	if err := nodes.CopyN(rootfs, container.RequiredDeviceNodes, true); err != nil {
-		return fmt.Errorf("copy required dev nodes %s", err)
-	}
-	if err := nodes.CopyN(rootfs, container.OptionalDeviceNodes, false); err != nil {
-		return fmt.Errorf("copy optional dev nodes %s", err)
+	if err := nodes.CreateDeviceNodes(rootfs, container.DeviceNodes); err != nil {
+		return fmt.Errorf("create device nodes %s", err)
 	}
 	}
 	if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
 	if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
 		return err
 		return err

+ 23 - 33
pkg/libcontainer/mount/nodes/nodes.go

@@ -9,47 +9,27 @@ import (
 	"path/filepath"
 	"path/filepath"
 	"syscall"
 	"syscall"
 
 
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
 	"github.com/dotcloud/docker/pkg/system"
 	"github.com/dotcloud/docker/pkg/system"
 )
 )
 
 
-// Default list of device nodes to copy
-var DefaultNodes = []string{
-	"/dev/null",
-	"/dev/zero",
-	"/dev/full",
-	"/dev/random",
-	"/dev/urandom",
-	"/dev/tty",
-}
-
-// CopyN copies the device node from the host into the rootfs
-func CopyN(rootfs string, nodesToCopy []string, shouldExist bool) error {
+// Create the device nodes in the container.
+func CreateDeviceNodes(rootfs string, nodesToCreate []devices.Device) error {
 	oldMask := system.Umask(0000)
 	oldMask := system.Umask(0000)
 	defer system.Umask(oldMask)
 	defer system.Umask(oldMask)
 
 
-	for _, node := range nodesToCopy {
-		if err := Copy(rootfs, node, shouldExist); err != nil {
+	for _, node := range nodesToCreate {
+		if err := CreateDeviceNode(rootfs, node); err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}
 	return nil
 	return nil
 }
 }
 
 
-// Copy copies the device node into the rootfs.  If the node
-// on the host system does not exist and the boolean flag is passed
-// an error will be returned
-func Copy(rootfs, node string, shouldExist bool) error {
-	stat, err := os.Stat(node)
-	if err != nil {
-		if os.IsNotExist(err) && !shouldExist {
-			return nil
-		}
-		return err
-	}
-
+// Creates the device node in the rootfs of the container.
+func CreateDeviceNode(rootfs string, node devices.Device) error {
 	var (
 	var (
-		dest   = filepath.Join(rootfs, node)
-		st     = stat.Sys().(*syscall.Stat_t)
+		dest   = filepath.Join(rootfs, node.Path)
 		parent = filepath.Dir(dest)
 		parent = filepath.Dir(dest)
 	)
 	)
 
 
@@ -57,13 +37,23 @@ func Copy(rootfs, node string, shouldExist bool) error {
 		return err
 		return err
 	}
 	}
 
 
-	if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
-		return fmt.Errorf("mknod %s %s", node, err)
+	fileMode := node.FileMode
+	switch node.Type {
+	case 'c':
+		fileMode |= syscall.S_IFCHR
+	case 'b':
+		fileMode |= syscall.S_IFBLK
+	default:
+		return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
+	}
+
+	if err := system.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) {
+		return fmt.Errorf("mknod %s %s", node.Path, err)
 	}
 	}
 	return nil
 	return nil
 }
 }
 
 
-func getNodes(path string) ([]string, error) {
+func getDeviceNodes(path string) ([]string, error) {
 	out := []string{}
 	out := []string{}
 	files, err := ioutil.ReadDir(path)
 	files, err := ioutil.ReadDir(path)
 	if err != nil {
 	if err != nil {
@@ -71,7 +61,7 @@ func getNodes(path string) ([]string, error) {
 	}
 	}
 	for _, f := range files {
 	for _, f := range files {
 		if f.IsDir() && f.Name() != "pts" && f.Name() != "shm" {
 		if f.IsDir() && f.Name() != "pts" && f.Name() != "shm" {
-			sub, err := getNodes(filepath.Join(path, f.Name()))
+			sub, err := getDeviceNodes(filepath.Join(path, f.Name()))
 			if err != nil {
 			if err != nil {
 				return nil, err
 				return nil, err
 			}
 			}
@@ -84,5 +74,5 @@ func getNodes(path string) ([]string, error) {
 }
 }
 
 
 func GetHostDeviceNodes() ([]string, error) {
 func GetHostDeviceNodes() ([]string, error) {
-	return getNodes("/dev")
+	return getDeviceNodes("/dev")
 }
 }

+ 8 - 3
pkg/libcontainer/mount/nodes/nodes_unsupported.go

@@ -2,10 +2,15 @@
 
 
 package nodes
 package nodes
 
 
-import "github.com/dotcloud/docker/pkg/libcontainer"
-
-var DefaultNodes = []string{}
+import (
+	"github.com/dotcloud/docker/pkg/libcontainer"
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
+)
 
 
 func GetHostDeviceNodes() ([]string, error) {
 func GetHostDeviceNodes() ([]string, error) {
 	return nil, libcontainer.ErrUnsupported
 	return nil, libcontainer.ErrUnsupported
 }
 }
+
+func CreateDeviceNodes(rootfs string, nodesToCreate []devices.Device) error {
+	return libcontainer.ErrUnsupported
+}