Forráskód Böngészése

Execdriver implementation on new libcontainer API

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
Alexander Morozov 10 éve
szülő
commit
68ba5f0b69

+ 7 - 5
daemon/container.go

@@ -14,6 +14,8 @@ import (
 	"syscall"
 	"syscall"
 	"time"
 	"time"
 
 
+	"github.com/docker/libcontainer"
+	"github.com/docker/libcontainer/configs"
 	"github.com/docker/libcontainer/devices"
 	"github.com/docker/libcontainer/devices"
 	"github.com/docker/libcontainer/label"
 	"github.com/docker/libcontainer/label"
 
 
@@ -259,18 +261,18 @@ func populateCommand(c *Container, env []string) error {
 	pid.HostPid = c.hostConfig.PidMode.IsHost()
 	pid.HostPid = c.hostConfig.PidMode.IsHost()
 
 
 	// Build lists of devices allowed and created within the container.
 	// Build lists of devices allowed and created within the container.
-	userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
+	userSpecifiedDevices := make([]*configs.Device, len(c.hostConfig.Devices))
 	for i, deviceMapping := range c.hostConfig.Devices {
 	for i, deviceMapping := range c.hostConfig.Devices {
-		device, err := devices.GetDevice(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions)
+		device, err := devices.DeviceFromPath(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions)
 		if err != nil {
 		if err != nil {
 			return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
 			return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
 		}
 		}
 		device.Path = deviceMapping.PathInContainer
 		device.Path = deviceMapping.PathInContainer
 		userSpecifiedDevices[i] = device
 		userSpecifiedDevices[i] = device
 	}
 	}
-	allowedDevices := append(devices.DefaultAllowedDevices, userSpecifiedDevices...)
+	allowedDevices := append(configs.DefaultAllowedDevices, userSpecifiedDevices...)
 
 
-	autoCreatedDevices := append(devices.DefaultAutoCreatedDevices, userSpecifiedDevices...)
+	autoCreatedDevices := append(configs.DefaultAutoCreatedDevices, userSpecifiedDevices...)
 
 
 	// TODO: this can be removed after lxc-conf is fully deprecated
 	// TODO: this can be removed after lxc-conf is fully deprecated
 	lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig)
 	lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig)
@@ -972,7 +974,7 @@ func (container *Container) Exposes(p nat.Port) bool {
 	return exists
 	return exists
 }
 }
 
 
-func (container *Container) GetPtyMaster() (*os.File, error) {
+func (container *Container) GetPtyMaster() (libcontainer.Console, error) {
 	ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal)
 	ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal)
 	if !ok {
 	if !ok {
 		return nil, ErrNoTTY
 		return nil, ErrNoTTY

+ 96 - 25
daemon/execdriver/driver.go

@@ -1,17 +1,22 @@
 package execdriver
 package execdriver
 
 
 import (
 import (
+	"encoding/json"
 	"errors"
 	"errors"
 	"io"
 	"io"
+	"io/ioutil"
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
+	"path/filepath"
+	"strconv"
 	"strings"
 	"strings"
 	"time"
 	"time"
 
 
 	"github.com/docker/docker/daemon/execdriver/native/template"
 	"github.com/docker/docker/daemon/execdriver/native/template"
 	"github.com/docker/docker/pkg/ulimit"
 	"github.com/docker/docker/pkg/ulimit"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer"
-	"github.com/docker/libcontainer/devices"
+	"github.com/docker/libcontainer/cgroups/fs"
+	"github.com/docker/libcontainer/configs"
 )
 )
 
 
 // Context is a generic key value pair that allows
 // Context is a generic key value pair that allows
@@ -42,7 +47,7 @@ type Terminal interface {
 }
 }
 
 
 type TtyTerminal interface {
 type TtyTerminal interface {
-	Master() *os.File
+	Master() libcontainer.Console
 }
 }
 
 
 // ExitStatus provides exit reasons for a container.
 // ExitStatus provides exit reasons for a container.
@@ -109,7 +114,7 @@ type Resources struct {
 }
 }
 
 
 type ResourceStats struct {
 type ResourceStats struct {
-	*libcontainer.ContainerStats
+	*libcontainer.Stats
 	Read        time.Time `json:"read"`
 	Read        time.Time `json:"read"`
 	MemoryLimit int64     `json:"memory_limit"`
 	MemoryLimit int64     `json:"memory_limit"`
 	SystemUsage uint64    `json:"system_usage"`
 	SystemUsage uint64    `json:"system_usage"`
@@ -149,8 +154,8 @@ type Command struct {
 	Pid                *Pid              `json:"pid"`
 	Pid                *Pid              `json:"pid"`
 	Resources          *Resources        `json:"resources"`
 	Resources          *Resources        `json:"resources"`
 	Mounts             []Mount           `json:"mounts"`
 	Mounts             []Mount           `json:"mounts"`
-	AllowedDevices     []*devices.Device `json:"allowed_devices"`
-	AutoCreatedDevices []*devices.Device `json:"autocreated_devices"`
+	AllowedDevices     []*configs.Device `json:"allowed_devices"`
+	AutoCreatedDevices []*configs.Device `json:"autocreated_devices"`
 	CapAdd             []string          `json:"cap_add"`
 	CapAdd             []string          `json:"cap_add"`
 	CapDrop            []string          `json:"cap_drop"`
 	CapDrop            []string          `json:"cap_drop"`
 	ContainerPid       int               `json:"container_pid"`  // the pid for the process inside a container
 	ContainerPid       int               `json:"container_pid"`  // the pid for the process inside a container
@@ -161,23 +166,19 @@ type Command struct {
 	AppArmorProfile    string            `json:"apparmor_profile"`
 	AppArmorProfile    string            `json:"apparmor_profile"`
 }
 }
 
 
-func InitContainer(c *Command) *libcontainer.Config {
+func InitContainer(c *Command) *configs.Config {
 	container := template.New()
 	container := template.New()
 
 
 	container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env)
 	container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env)
-	container.Tty = c.ProcessConfig.Tty
-	container.User = c.ProcessConfig.User
-	container.WorkingDir = c.WorkingDir
-	container.Env = c.ProcessConfig.Env
 	container.Cgroups.Name = c.ID
 	container.Cgroups.Name = c.ID
 	container.Cgroups.AllowedDevices = c.AllowedDevices
 	container.Cgroups.AllowedDevices = c.AllowedDevices
-	container.MountConfig.DeviceNodes = c.AutoCreatedDevices
-	container.RootFs = c.Rootfs
-	container.MountConfig.ReadonlyFs = c.ReadonlyRootfs
+	container.Readonlyfs = c.ReadonlyRootfs
+	container.Devices = c.AutoCreatedDevices
+	container.Rootfs = c.Rootfs
+	container.Readonlyfs = c.ReadonlyRootfs
 
 
 	// check to see if we are running in ramdisk to disable pivot root
 	// check to see if we are running in ramdisk to disable pivot root
-	container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
-	container.RestrictSys = true
+	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
 	return container
 	return container
 }
 }
 
 
@@ -191,7 +192,7 @@ func getEnv(key string, env []string) string {
 	return ""
 	return ""
 }
 }
 
 
-func SetupCgroups(container *libcontainer.Config, c *Command) error {
+func SetupCgroups(container *configs.Config, c *Command) error {
 	if c.Resources != nil {
 	if c.Resources != nil {
 		container.Cgroups.CpuShares = c.Resources.CpuShares
 		container.Cgroups.CpuShares = c.Resources.CpuShares
 		container.Cgroups.Memory = c.Resources.Memory
 		container.Cgroups.Memory = c.Resources.Memory
@@ -203,28 +204,98 @@ func SetupCgroups(container *libcontainer.Config, c *Command) error {
 	return nil
 	return nil
 }
 }
 
 
-func Stats(stateFile string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) {
-	state, err := libcontainer.GetState(stateFile)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return nil, ErrNotRunning
+// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
+func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) {
+	out := &libcontainer.NetworkInterface{Name: interfaceName}
+	// This can happen if the network runtime information is missing - possible if the
+	// container was created by an old version of libcontainer.
+	if interfaceName == "" {
+		return out, nil
+	}
+	type netStatsPair struct {
+		// Where to write the output.
+		Out *uint64
+		// The network stats file to read.
+		File string
+	}
+	// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
+	netStats := []netStatsPair{
+		{Out: &out.RxBytes, File: "tx_bytes"},
+		{Out: &out.RxPackets, File: "tx_packets"},
+		{Out: &out.RxErrors, File: "tx_errors"},
+		{Out: &out.RxDropped, File: "tx_dropped"},
+
+		{Out: &out.TxBytes, File: "rx_bytes"},
+		{Out: &out.TxPackets, File: "rx_packets"},
+		{Out: &out.TxErrors, File: "rx_errors"},
+		{Out: &out.TxDropped, File: "rx_dropped"},
+	}
+	for _, netStat := range netStats {
+		data, err := readSysfsNetworkStats(interfaceName, netStat.File)
+		if err != nil {
+			return nil, err
 		}
 		}
+		*(netStat.Out) = data
+	}
+	return out, nil
+}
+
+// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
+func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
+	data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
+	if err != nil {
+		return 0, err
+	}
+	return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
+}
+
+func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) {
+	f, err := os.Open(filepath.Join(containerDir, "state.json"))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	type network struct {
+		Type              string
+		HostInterfaceName string
+	}
+
+	state := struct {
+		CgroupPaths map[string]string `json:"cgroup_paths"`
+		Networks    []network
+	}{}
+
+	if err := json.NewDecoder(f).Decode(&state); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 	now := time.Now()
 	now := time.Now()
-	stats, err := libcontainer.GetStats(nil, state)
+
+	mgr := fs.Manager{Paths: state.CgroupPaths}
+	cstats, err := mgr.GetStats()
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
+	stats := &libcontainer.Stats{CgroupStats: cstats}
 	// if the container does not have any memory limit specified set the
 	// if the container does not have any memory limit specified set the
 	// limit to the machines memory
 	// limit to the machines memory
 	memoryLimit := containerMemoryLimit
 	memoryLimit := containerMemoryLimit
 	if memoryLimit == 0 {
 	if memoryLimit == 0 {
 		memoryLimit = machineMemory
 		memoryLimit = machineMemory
 	}
 	}
+	for _, iface := range state.Networks {
+		switch iface.Type {
+		case "veth":
+			istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
+			if err != nil {
+				return nil, err
+			}
+			stats.Interfaces = append(stats.Interfaces, istats)
+		}
+	}
 	return &ResourceStats{
 	return &ResourceStats{
-		Read:           now,
-		ContainerStats: stats,
-		MemoryLimit:    memoryLimit,
+		Stats:       stats,
+		Read:        now,
+		MemoryLimit: memoryLimit,
 	}, nil
 	}, nil
 }
 }

+ 146 - 8
daemon/execdriver/lxc/driver.go

@@ -23,7 +23,9 @@ import (
 	"github.com/docker/docker/utils"
 	"github.com/docker/docker/utils"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/cgroups"
-	"github.com/docker/libcontainer/mount/nodes"
+	"github.com/docker/libcontainer/configs"
+	"github.com/docker/libcontainer/system"
+	"github.com/docker/libcontainer/user"
 	"github.com/kr/pty"
 	"github.com/kr/pty"
 )
 )
 
 
@@ -42,7 +44,7 @@ type driver struct {
 }
 }
 
 
 type activeContainer struct {
 type activeContainer struct {
-	container *libcontainer.Config
+	container *configs.Config
 	cmd       *exec.Cmd
 	cmd       *exec.Cmd
 }
 }
 
 
@@ -190,7 +192,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	c.ProcessConfig.Path = aname
 	c.ProcessConfig.Path = aname
 	c.ProcessConfig.Args = append([]string{name}, arg...)
 	c.ProcessConfig.Args = append([]string{name}, arg...)
 
 
-	if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
+	if err := createDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
 		return execdriver.ExitStatus{ExitCode: -1}, err
 		return execdriver.ExitStatus{ExitCode: -1}, err
 	}
 	}
 
 
@@ -231,11 +233,17 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	}
 	}
 
 
 	state := &libcontainer.State{
 	state := &libcontainer.State{
-		InitPid:     pid,
-		CgroupPaths: cgroupPaths,
+		InitProcessPid: pid,
+		CgroupPaths:    cgroupPaths,
 	}
 	}
 
 
-	if err := libcontainer.SaveState(dataPath, state); err != nil {
+	f, err := os.Create(filepath.Join(dataPath, "state.json"))
+	if err != nil {
+		return terminate(err)
+	}
+	defer f.Close()
+
+	if err := json.NewEncoder(f).Encode(state); err != nil {
 		return terminate(err)
 		return terminate(err)
 	}
 	}
 
 
@@ -245,8 +253,9 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 		log.Debugf("Invoking startCallback")
 		log.Debugf("Invoking startCallback")
 		startCallback(&c.ProcessConfig, pid)
 		startCallback(&c.ProcessConfig, pid)
 	}
 	}
+
 	oomKill := false
 	oomKill := false
-	oomKillNotification, err := libcontainer.NotifyOnOOM(state)
+	oomKillNotification, err := notifyOnOOM(cgroupPaths)
 	if err == nil {
 	if err == nil {
 		_, oomKill = <-oomKillNotification
 		_, oomKill = <-oomKillNotification
 		log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr)
 		log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr)
@@ -265,9 +274,57 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr
 	return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr
 }
 }
 
 
+// copy from libcontainer
+func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
+	dir := paths["memory"]
+	if dir == "" {
+		return nil, fmt.Errorf("There is no path for %q in state", "memory")
+	}
+	oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
+	if err != nil {
+		return nil, err
+	}
+	fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
+	if syserr != 0 {
+		oomControl.Close()
+		return nil, syserr
+	}
+
+	eventfd := os.NewFile(fd, "eventfd")
+
+	eventControlPath := filepath.Join(dir, "cgroup.event_control")
+	data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
+	if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
+		eventfd.Close()
+		oomControl.Close()
+		return nil, err
+	}
+	ch := make(chan struct{})
+	go func() {
+		defer func() {
+			close(ch)
+			eventfd.Close()
+			oomControl.Close()
+		}()
+		buf := make([]byte, 8)
+		for {
+			if _, err := eventfd.Read(buf); err != nil {
+				return
+			}
+			// When a cgroup is destroyed, an event is sent to eventfd.
+			// So if the control path is gone, return instead of notifying.
+			if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
+				return
+			}
+			ch <- struct{}{}
+		}
+	}()
+	return ch, nil
+}
+
 // createContainer populates and configures the container type with the
 // createContainer populates and configures the container type with the
 // data provided by the execdriver.Command
 // data provided by the execdriver.Command
-func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) {
+func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
 	container := execdriver.InitContainer(c)
 	container := execdriver.InitContainer(c)
 	if err := execdriver.SetupCgroups(container, c); err != nil {
 	if err := execdriver.SetupCgroups(container, c); err != nil {
 		return nil, err
 		return nil, err
@@ -297,6 +354,87 @@ func cgroupPaths(containerId string) (map[string]string, error) {
 	return paths, nil
 	return paths, nil
 }
 }
 
 
+// this is copy from old libcontainer nodes.go
+func createDeviceNodes(rootfs string, nodesToCreate []*configs.Device) error {
+	oldMask := syscall.Umask(0000)
+	defer syscall.Umask(oldMask)
+
+	for _, node := range nodesToCreate {
+		if err := createDeviceNode(rootfs, node); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Creates the device node in the rootfs of the container.
+func createDeviceNode(rootfs string, node *configs.Device) error {
+	var (
+		dest   = filepath.Join(rootfs, node.Path)
+		parent = filepath.Dir(dest)
+	)
+
+	if err := os.MkdirAll(parent, 0755); err != nil {
+		return err
+	}
+
+	fileMode := node.FileMode
+	switch node.Type {
+	case 'c':
+		fileMode |= syscall.S_IFCHR
+	case 'b':
+		fileMode |= syscall.S_IFBLK
+	default:
+		return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
+	}
+
+	if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
+		return fmt.Errorf("mknod %s %s", node.Path, err)
+	}
+
+	if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
+		return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
+	}
+
+	return nil
+}
+
+// setupUser changes the groups, gid, and uid for the user inside the container
+// copy from libcontainer, cause not it's private
+func setupUser(userSpec string) error {
+	// Set up defaults.
+	defaultExecUser := user.ExecUser{
+		Uid:  syscall.Getuid(),
+		Gid:  syscall.Getgid(),
+		Home: "/",
+	}
+	passwdPath, err := user.GetPasswdPath()
+	if err != nil {
+		return err
+	}
+	groupPath, err := user.GetGroupPath()
+	if err != nil {
+		return err
+	}
+	execUser, err := user.GetExecUserPath(userSpec, &defaultExecUser, passwdPath, groupPath)
+	if err != nil {
+		return err
+	}
+	if err := system.Setgid(execUser.Gid); err != nil {
+		return err
+	}
+	if err := system.Setuid(execUser.Uid); err != nil {
+		return err
+	}
+	// if we didn't get HOME already, set it based on the user's HOME
+	if envHome := os.Getenv("HOME"); envHome == "" {
+		if err := os.Setenv("HOME", execUser.Home); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 /// Return the exit code of the process
 /// Return the exit code of the process
 // if the process has not exited -1 will be returned
 // if the process has not exited -1 will be returned
 func getExitCode(c *execdriver.Command) int {
 func getExitCode(c *execdriver.Command) int {

+ 1 - 5
daemon/execdriver/lxc/lxc_init_linux.go

@@ -3,8 +3,6 @@ package lxc
 import (
 import (
 	"fmt"
 	"fmt"
 
 
-	"github.com/docker/libcontainer"
-	"github.com/docker/libcontainer/namespaces"
 	"github.com/docker/libcontainer/utils"
 	"github.com/docker/libcontainer/utils"
 )
 )
 
 
@@ -12,9 +10,7 @@ func finalizeNamespace(args *InitArgs) error {
 	if err := utils.CloseExecFrom(3); err != nil {
 	if err := utils.CloseExecFrom(3); err != nil {
 		return err
 		return err
 	}
 	}
-	if err := namespaces.SetupUser(&libcontainer.Config{
-		User: args.User,
-	}); err != nil {
+	if err := setupUser(args.User); err != nil {
 		return fmt.Errorf("setup user %s", err)
 		return fmt.Errorf("setup user %s", err)
 	}
 	}
 	if err := setupWorkingDirectory(args); err != nil {
 	if err := setupWorkingDirectory(args); err != nil {

+ 5 - 9
daemon/execdriver/lxc/lxc_template.go

@@ -11,7 +11,6 @@ import (
 	nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
 	nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
 	"github.com/docker/docker/utils"
 	"github.com/docker/docker/utils"
 	"github.com/docker/libcontainer/label"
 	"github.com/docker/libcontainer/label"
-	"github.com/docker/libcontainer/security/capabilities"
 )
 )
 
 
 const LxcTemplate = `
 const LxcTemplate = `
@@ -169,7 +168,7 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) {
 	var newCaps []string
 	var newCaps []string
 	for _, cap := range caps {
 	for _, cap := range caps {
 		log.Debugf("cap %s\n", cap)
 		log.Debugf("cap %s\n", cap)
-		realCap := capabilities.GetCapability(cap)
+		realCap := execdriver.GetCapability(cap)
 		numCap := fmt.Sprintf("%d", realCap.Value)
 		numCap := fmt.Sprintf("%d", realCap.Value)
 		newCaps = append(newCaps, numCap)
 		newCaps = append(newCaps, numCap)
 	}
 	}
@@ -180,13 +179,10 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) {
 func dropList(drops []string) ([]string, error) {
 func dropList(drops []string) ([]string, error) {
 	if utils.StringsContainsNoCase(drops, "all") {
 	if utils.StringsContainsNoCase(drops, "all") {
 		var newCaps []string
 		var newCaps []string
-		for _, cap := range capabilities.GetAllCapabilities() {
-			log.Debugf("drop cap %s\n", cap)
-			realCap := capabilities.GetCapability(cap)
-			if realCap == nil {
-				return nil, fmt.Errorf("Invalid capability '%s'", cap)
-			}
-			numCap := fmt.Sprintf("%d", realCap.Value)
+		for _, capName := range execdriver.GetAllCapabilities() {
+			cap := execdriver.GetCapability(capName)
+			log.Debugf("drop cap %s\n", cap.Key)
+			numCap := fmt.Sprintf("%d", cap.Value)
 			newCaps = append(newCaps, numCap)
 			newCaps = append(newCaps, numCap)
 		}
 		}
 		return newCaps, nil
 		return newCaps, nil

+ 8 - 8
daemon/execdriver/lxc/lxc_template_unit_test.go

@@ -5,11 +5,6 @@ package lxc
 import (
 import (
 	"bufio"
 	"bufio"
 	"fmt"
 	"fmt"
-	"github.com/docker/docker/daemon/execdriver"
-	nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
-	"github.com/docker/libcontainer/devices"
-	"github.com/docker/libcontainer/security/capabilities"
-	"github.com/syndtr/gocapability/capability"
 	"io/ioutil"
 	"io/ioutil"
 	"math/rand"
 	"math/rand"
 	"os"
 	"os"
@@ -17,6 +12,11 @@ import (
 	"strings"
 	"strings"
 	"testing"
 	"testing"
 	"time"
 	"time"
+
+	"github.com/docker/docker/daemon/execdriver"
+	nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
+	"github.com/docker/libcontainer/configs"
+	"github.com/syndtr/gocapability/capability"
 )
 )
 
 
 func TestLXCConfig(t *testing.T) {
 func TestLXCConfig(t *testing.T) {
@@ -53,7 +53,7 @@ func TestLXCConfig(t *testing.T) {
 			Mtu:       1500,
 			Mtu:       1500,
 			Interface: nil,
 			Interface: nil,
 		},
 		},
-		AllowedDevices: make([]*devices.Device, 0),
+		AllowedDevices: make([]*configs.Device, 0),
 		ProcessConfig:  execdriver.ProcessConfig{},
 		ProcessConfig:  execdriver.ProcessConfig{},
 	}
 	}
 	p, err := driver.generateLXCConfig(command)
 	p, err := driver.generateLXCConfig(command)
@@ -295,7 +295,7 @@ func TestCustomLxcConfigMisc(t *testing.T) {
 	grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
 	grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
 	container := nativeTemplate.New()
 	container := nativeTemplate.New()
 	for _, cap := range container.Capabilities {
 	for _, cap := range container.Capabilities {
-		realCap := capabilities.GetCapability(cap)
+		realCap := execdriver.GetCapability(cap)
 		numCap := fmt.Sprintf("%d", realCap.Value)
 		numCap := fmt.Sprintf("%d", realCap.Value)
 		if cap != "MKNOD" && cap != "KILL" {
 		if cap != "MKNOD" && cap != "KILL" {
 			grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))
 			grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))
@@ -359,7 +359,7 @@ func TestCustomLxcConfigMiscOverride(t *testing.T) {
 	grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
 	grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
 	container := nativeTemplate.New()
 	container := nativeTemplate.New()
 	for _, cap := range container.Capabilities {
 	for _, cap := range container.Capabilities {
-		realCap := capabilities.GetCapability(cap)
+		realCap := execdriver.GetCapability(cap)
 		numCap := fmt.Sprintf("%d", realCap.Value)
 		numCap := fmt.Sprintf("%d", realCap.Value)
 		if cap != "MKNOD" && cap != "KILL" {
 		if cap != "MKNOD" && cap != "KILL" {
 			grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))
 			grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))

+ 94 - 58
daemon/execdriver/native/create.go

@@ -3,21 +3,24 @@
 package native
 package native
 
 
 import (
 import (
+	"errors"
 	"fmt"
 	"fmt"
-	"os/exec"
+	"net"
 	"path/filepath"
 	"path/filepath"
+	"strings"
+	"syscall"
 
 
 	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/libcontainer"
+	"github.com/docker/docker/pkg/symlink"
 	"github.com/docker/libcontainer/apparmor"
 	"github.com/docker/libcontainer/apparmor"
+	"github.com/docker/libcontainer/configs"
 	"github.com/docker/libcontainer/devices"
 	"github.com/docker/libcontainer/devices"
-	"github.com/docker/libcontainer/mount"
-	"github.com/docker/libcontainer/security/capabilities"
+	"github.com/docker/libcontainer/utils"
 )
 )
 
 
 // createContainer populates and configures the container type with the
 // createContainer populates and configures the container type with the
 // data provided by the execdriver.Command
 // data provided by the execdriver.Command
-func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) {
+func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
 	container := execdriver.InitContainer(c)
 	container := execdriver.InitContainer(c)
 
 
 	if err := d.createIpc(container, c); err != nil {
 	if err := d.createIpc(container, c); err != nil {
@@ -33,6 +36,13 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
 	}
 	}
 
 
 	if c.ProcessConfig.Privileged {
 	if c.ProcessConfig.Privileged {
+		// clear readonly for /sys
+		for i := range container.Mounts {
+			if container.Mounts[i].Destination == "/sys" {
+				container.Mounts[i].Flags &= ^syscall.MS_RDONLY
+			}
+		}
+		container.ReadonlyPaths = nil
 		if err := d.setPrivileged(container); err != nil {
 		if err := d.setPrivileged(container); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
@@ -57,43 +67,52 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
 	if err := d.setupLabels(container, c); err != nil {
 	if err := d.setupLabels(container, c); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-
 	d.setupRlimits(container, c)
 	d.setupRlimits(container, c)
+	return container, nil
+}
 
 
-	cmds := make(map[string]*exec.Cmd)
-	d.Lock()
-	for k, v := range d.activeContainers {
-		cmds[k] = v.cmd
+func generateIfaceName() (string, error) {
+	for i := 0; i < 10; i++ {
+		name, err := utils.GenerateRandomName("veth", 7)
+		if err != nil {
+			continue
+		}
+		if _, err := net.InterfaceByName(name); err != nil {
+			if strings.Contains(err.Error(), "no such") {
+				return name, nil
+			}
+			return "", err
+		}
 	}
 	}
-	d.Unlock()
-
-	return container, nil
+	return "", errors.New("Failed to find name for new interface")
 }
 }
 
 
-func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Command) error {
+func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
 	if c.Network.HostNetworking {
 	if c.Network.HostNetworking {
-		container.Namespaces.Remove(libcontainer.NEWNET)
+		container.Namespaces.Remove(configs.NEWNET)
 		return nil
 		return nil
 	}
 	}
 
 
-	container.Networks = []*libcontainer.Network{
+	container.Networks = []*configs.Network{
 		{
 		{
-			Mtu:     c.Network.Mtu,
-			Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0),
-			Gateway: "localhost",
-			Type:    "loopback",
+			Type: "loopback",
 		},
 		},
 	}
 	}
 
 
+	iName, err := generateIfaceName()
+	if err != nil {
+		return err
+	}
 	if c.Network.Interface != nil {
 	if c.Network.Interface != nil {
-		vethNetwork := libcontainer.Network{
-			Mtu:        c.Network.Mtu,
-			Address:    fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
-			MacAddress: c.Network.Interface.MacAddress,
-			Gateway:    c.Network.Interface.Gateway,
-			Type:       "veth",
-			Bridge:     c.Network.Interface.Bridge,
-			VethPrefix: "veth",
+		vethNetwork := configs.Network{
+			Name:              "eth0",
+			HostInterfaceName: iName,
+			Mtu:               c.Network.Mtu,
+			Address:           fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
+			MacAddress:        c.Network.Interface.MacAddress,
+			Gateway:           c.Network.Interface.Gateway,
+			Type:              "veth",
+			Bridge:            c.Network.Interface.Bridge,
 		}
 		}
 		if c.Network.Interface.GlobalIPv6Address != "" {
 		if c.Network.Interface.GlobalIPv6Address != "" {
 			vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen)
 			vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen)
@@ -107,21 +126,24 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
 		active := d.activeContainers[c.Network.ContainerID]
 		active := d.activeContainers[c.Network.ContainerID]
 		d.Unlock()
 		d.Unlock()
 
 
-		if active == nil || active.cmd.Process == nil {
+		if active == nil {
 			return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
 			return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
 		}
 		}
-		cmd := active.cmd
 
 
-		nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net")
-		container.Namespaces.Add(libcontainer.NEWNET, nspath)
+		state, err := active.State()
+		if err != nil {
+			return err
+		}
+
+		container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
-func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
+func (d *driver) createIpc(container *configs.Config, c *execdriver.Command) error {
 	if c.Ipc.HostIpc {
 	if c.Ipc.HostIpc {
-		container.Namespaces.Remove(libcontainer.NEWIPC)
+		container.Namespaces.Remove(configs.NEWIPC)
 		return nil
 		return nil
 	}
 	}
 
 
@@ -130,37 +152,38 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
 		active := d.activeContainers[c.Ipc.ContainerID]
 		active := d.activeContainers[c.Ipc.ContainerID]
 		d.Unlock()
 		d.Unlock()
 
 
-		if active == nil || active.cmd.Process == nil {
+		if active == nil {
 			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
 			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
 		}
 		}
-		cmd := active.cmd
 
 
-		container.Namespaces.Add(libcontainer.NEWIPC, filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc"))
+		state, err := active.State()
+		if err != nil {
+			return err
+		}
+		container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
-func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error {
+func (d *driver) createPid(container *configs.Config, c *execdriver.Command) error {
 	if c.Pid.HostPid {
 	if c.Pid.HostPid {
-		container.Namespaces.Remove(libcontainer.NEWPID)
+		container.Namespaces.Remove(configs.NEWPID)
 		return nil
 		return nil
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
-func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
-	container.Capabilities = capabilities.GetAllCapabilities()
+func (d *driver) setPrivileged(container *configs.Config) (err error) {
+	container.Capabilities = execdriver.GetAllCapabilities()
 	container.Cgroups.AllowAllDevices = true
 	container.Cgroups.AllowAllDevices = true
 
 
-	hostDeviceNodes, err := devices.GetHostDeviceNodes()
+	hostDevices, err := devices.HostDevices()
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-	container.MountConfig.DeviceNodes = hostDeviceNodes
-
-	container.RestrictSys = false
+	container.Devices = hostDevices
 
 
 	if apparmor.IsEnabled() {
 	if apparmor.IsEnabled() {
 		container.AppArmorProfile = "unconfined"
 		container.AppArmorProfile = "unconfined"
@@ -169,39 +192,52 @@ func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
 	return nil
 	return nil
 }
 }
 
 
-func (d *driver) setCapabilities(container *libcontainer.Config, c *execdriver.Command) (err error) {
+func (d *driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
 	container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
 	container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
 	return err
 	return err
 }
 }
 
 
-func (d *driver) setupRlimits(container *libcontainer.Config, c *execdriver.Command) {
+func (d *driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
 	if c.Resources == nil {
 	if c.Resources == nil {
 		return
 		return
 	}
 	}
 
 
 	for _, rlimit := range c.Resources.Rlimits {
 	for _, rlimit := range c.Resources.Rlimits {
-		container.Rlimits = append(container.Rlimits, libcontainer.Rlimit((*rlimit)))
+		container.Rlimits = append(container.Rlimits, configs.Rlimit{
+			Type: rlimit.Type,
+			Hard: rlimit.Hard,
+			Soft: rlimit.Soft,
+		})
 	}
 	}
 }
 }
 
 
-func (d *driver) setupMounts(container *libcontainer.Config, c *execdriver.Command) error {
+func (d *driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
 	for _, m := range c.Mounts {
 	for _, m := range c.Mounts {
-		container.MountConfig.Mounts = append(container.MountConfig.Mounts, &mount.Mount{
-			Type:        "bind",
+		dest, err := symlink.FollowSymlinkInScope(filepath.Join(c.Rootfs, m.Destination), c.Rootfs)
+		if err != nil {
+			return err
+		}
+		flags := syscall.MS_BIND | syscall.MS_REC
+		if !m.Writable {
+			flags |= syscall.MS_RDONLY
+		}
+		if m.Slave {
+			flags |= syscall.MS_SLAVE
+		}
+
+		container.Mounts = append(container.Mounts, &configs.Mount{
 			Source:      m.Source,
 			Source:      m.Source,
-			Destination: m.Destination,
-			Writable:    m.Writable,
-			Private:     m.Private,
-			Slave:       m.Slave,
+			Destination: dest,
+			Device:      "bind",
+			Flags:       flags,
 		})
 		})
 	}
 	}
-
 	return nil
 	return nil
 }
 }
 
 
-func (d *driver) setupLabels(container *libcontainer.Config, c *execdriver.Command) error {
+func (d *driver) setupLabels(container *configs.Config, c *execdriver.Command) error {
 	container.ProcessLabel = c.ProcessLabel
 	container.ProcessLabel = c.ProcessLabel
-	container.MountConfig.MountLabel = c.MountLabel
+	container.MountLabel = c.MountLabel
 
 
 	return nil
 	return nil
 }
 }

+ 183 - 139
daemon/execdriver/native/driver.go

@@ -4,28 +4,28 @@ package native
 
 
 import (
 import (
 	"encoding/json"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
 	"path/filepath"
 	"path/filepath"
+	"strings"
 	"sync"
 	"sync"
 	"syscall"
 	"syscall"
+	"time"
 
 
 	log "github.com/Sirupsen/logrus"
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/execdriver"
+	"github.com/docker/docker/pkg/reexec"
 	sysinfo "github.com/docker/docker/pkg/system"
 	sysinfo "github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer/apparmor"
 	"github.com/docker/libcontainer/apparmor"
-	"github.com/docker/libcontainer/cgroups/fs"
 	"github.com/docker/libcontainer/cgroups/systemd"
 	"github.com/docker/libcontainer/cgroups/systemd"
-	consolepkg "github.com/docker/libcontainer/console"
-	"github.com/docker/libcontainer/namespaces"
-	_ "github.com/docker/libcontainer/namespaces/nsenter"
+	"github.com/docker/libcontainer/configs"
 	"github.com/docker/libcontainer/system"
 	"github.com/docker/libcontainer/system"
+	"github.com/docker/libcontainer/utils"
 )
 )
 
 
 const (
 const (
@@ -33,16 +33,12 @@ const (
 	Version    = "0.2"
 	Version    = "0.2"
 )
 )
 
 
-type activeContainer struct {
-	container *libcontainer.Config
-	cmd       *exec.Cmd
-}
-
 type driver struct {
 type driver struct {
 	root             string
 	root             string
 	initPath         string
 	initPath         string
-	activeContainers map[string]*activeContainer
+	activeContainers map[string]libcontainer.Container
 	machineMemory    int64
 	machineMemory    int64
+	factory          libcontainer.Factory
 	sync.Mutex
 	sync.Mutex
 }
 }
 
 
@@ -59,11 +55,22 @@ func NewDriver(root, initPath string) (*driver, error) {
 	if err := apparmor.InstallDefaultProfile(); err != nil {
 	if err := apparmor.InstallDefaultProfile(); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
+	cgm := libcontainer.Cgroupfs
+	if systemd.UseSystemd() {
+		cgm = libcontainer.SystemdCgroups
+	}
+
+	f, err := libcontainer.New(root, cgm, libcontainer.InitPath(reexec.Self(), DriverName))
+	if err != nil {
+		return nil, err
+	}
+
 	return &driver{
 	return &driver{
 		root:             root,
 		root:             root,
 		initPath:         initPath,
 		initPath:         initPath,
-		activeContainers: make(map[string]*activeContainer),
+		activeContainers: make(map[string]libcontainer.Container),
 		machineMemory:    meminfo.MemTotal,
 		machineMemory:    meminfo.MemTotal,
+		factory:          f,
 	}, nil
 	}, nil
 }
 }
 
 
@@ -81,101 +88,141 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 
 
 	var term execdriver.Terminal
 	var term execdriver.Terminal
 
 
+	p := &libcontainer.Process{
+		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
+		Env:  c.ProcessConfig.Env,
+		Cwd:  c.WorkingDir,
+		User: c.ProcessConfig.User,
+	}
+
 	if c.ProcessConfig.Tty {
 	if c.ProcessConfig.Tty {
-		term, err = NewTtyConsole(&c.ProcessConfig, pipes)
+		rootuid, err := container.HostUID()
+		if err != nil {
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
+		cons, err := p.NewConsole(rootuid)
+		if err != nil {
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
+		term, err = NewTtyConsole(cons, pipes, rootuid)
 	} else {
 	} else {
-		term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
+		p.Stdout = pipes.Stdout
+		p.Stderr = pipes.Stderr
+		r, w, err := os.Pipe()
+		if err != nil {
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
+		if pipes.Stdin != nil {
+			go func() {
+				io.Copy(w, pipes.Stdin)
+				w.Close()
+			}()
+			p.Stdin = r
+		}
+		term = &execdriver.StdConsole{}
 	}
 	}
 	if err != nil {
 	if err != nil {
 		return execdriver.ExitStatus{ExitCode: -1}, err
 		return execdriver.ExitStatus{ExitCode: -1}, err
 	}
 	}
 	c.ProcessConfig.Terminal = term
 	c.ProcessConfig.Terminal = term
 
 
-	d.Lock()
-	d.activeContainers[c.ID] = &activeContainer{
-		container: container,
-		cmd:       &c.ProcessConfig.Cmd,
+	cont, err := d.factory.Create(c.ID, container)
+	if err != nil {
+		return execdriver.ExitStatus{ExitCode: -1}, err
 	}
 	}
+	d.Lock()
+	d.activeContainers[c.ID] = cont
 	d.Unlock()
 	d.Unlock()
+	defer func() {
+		cont.Destroy()
+		d.cleanContainer(c.ID)
+	}()
 
 
-	var (
-		dataPath = filepath.Join(d.root, c.ID)
-		args     = append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...)
-	)
-
-	if err := d.createContainerRoot(c.ID); err != nil {
+	if err := cont.Start(p); err != nil {
 		return execdriver.ExitStatus{ExitCode: -1}, err
 		return execdriver.ExitStatus{ExitCode: -1}, err
 	}
 	}
-	defer d.cleanContainer(c.ID)
 
 
-	if err := d.writeContainerFile(container, c.ID); err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
+	if startCallback != nil {
+		pid, err := p.Pid()
+		if err != nil {
+			p.Signal(os.Kill)
+			p.Wait()
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
+		startCallback(&c.ProcessConfig, pid)
+	}
+
+	oomKillNotification, err := cont.NotifyOOM()
+	if err != nil {
+		oomKillNotification = nil
+		log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
+	}
+	waitF := p.Wait
+	if nss := cont.Config().Namespaces; nss.Contains(configs.NEWPID) {
+		// we need such hack for tracking processes with inerited fds,
+		// because cmd.Wait() waiting for all streams to be copied
+		waitF = waitInPIDHost(p, cont)
+	}
+	ps, err := waitF()
+	if err != nil {
+		if err, ok := err.(*exec.ExitError); !ok {
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		} else {
+			ps = err.ProcessState
+		}
 	}
 	}
+	cont.Destroy()
 
 
-	execOutputChan := make(chan execOutput, 1)
-	waitForStart := make(chan struct{})
+	_, oomKill := <-oomKillNotification
 
 
-	go func() {
-		exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
-			c.ProcessConfig.Path = d.initPath
-			c.ProcessConfig.Args = append([]string{
-				DriverName,
-				"-console", console,
-				"-pipe", "3",
-				"-root", filepath.Join(d.root, c.ID),
-				"--",
-			}, args...)
-
-			// set this to nil so that when we set the clone flags anything else is reset
-			c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
-				Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
-			}
-			c.ProcessConfig.ExtraFiles = []*os.File{child}
+	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
+}
 
 
-			c.ProcessConfig.Env = container.Env
-			c.ProcessConfig.Dir = container.RootFs
+func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
+	return func() (*os.ProcessState, error) {
+		pid, err := p.Pid()
+		if err != nil {
+			return nil, err
+		}
 
 
-			return &c.ProcessConfig.Cmd
-		}, func() {
-			close(waitForStart)
-			if startCallback != nil {
-				c.ContainerPid = c.ProcessConfig.Process.Pid
-				startCallback(&c.ProcessConfig, c.ContainerPid)
+		process, err := os.FindProcess(pid)
+		s, err := process.Wait()
+		if err != nil {
+			if err, ok := err.(*exec.ExitError); !ok {
+				return s, err
+			} else {
+				s = err.ProcessState
 			}
 			}
-		})
-		execOutputChan <- execOutput{exitCode, err}
-	}()
-
-	select {
-	case execOutput := <-execOutputChan:
-		return execdriver.ExitStatus{ExitCode: execOutput.exitCode}, execOutput.err
-	case <-waitForStart:
-		break
-	}
+		}
+		processes, err := c.Processes()
+		if err != nil {
+			return s, err
+		}
 
 
-	oomKill := false
-	state, err := libcontainer.GetState(filepath.Join(d.root, c.ID))
-	if err == nil {
-		oomKillNotification, err := libcontainer.NotifyOnOOM(state)
-		if err == nil {
-			_, oomKill = <-oomKillNotification
-		} else {
-			log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
+		for _, pid := range processes {
+			process, err := os.FindProcess(pid)
+			if err != nil {
+				log.Errorf("Failed to kill process: %d", pid)
+				continue
+			}
+			process.Kill()
 		}
 		}
-	} else {
-		log.Warnf("Failed to get container state, oom notify will not work: %s", err)
-	}
-	// wait for the container to exit.
-	execOutput := <-execOutputChan
 
 
-	return execdriver.ExitStatus{ExitCode: execOutput.exitCode, OOMKilled: oomKill}, execOutput.err
+		p.Wait()
+		return s, err
+	}
 }
 }
 
 
-func (d *driver) Kill(p *execdriver.Command, sig int) error {
-	if p.ProcessConfig.Process == nil {
-		return errors.New("exec: not started")
+func (d *driver) Kill(c *execdriver.Command, sig int) error {
+	active := d.activeContainers[c.ID]
+	if active == nil {
+		return fmt.Errorf("active container for %s does not exist", c.ID)
 	}
 	}
-	return syscall.Kill(p.ProcessConfig.Process.Pid, syscall.Signal(sig))
+	state, err := active.State()
+	if err != nil {
+		return err
+	}
+	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
 }
 }
 
 
 func (d *driver) Pause(c *execdriver.Command) error {
 func (d *driver) Pause(c *execdriver.Command) error {
@@ -183,11 +230,7 @@ func (d *driver) Pause(c *execdriver.Command) error {
 	if active == nil {
 	if active == nil {
 		return fmt.Errorf("active container for %s does not exist", c.ID)
 		return fmt.Errorf("active container for %s does not exist", c.ID)
 	}
 	}
-	active.container.Cgroups.Freezer = "FROZEN"
-	if systemd.UseSystemd() {
-		return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
-	}
-	return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
+	return active.Pause()
 }
 }
 
 
 func (d *driver) Unpause(c *execdriver.Command) error {
 func (d *driver) Unpause(c *execdriver.Command) error {
@@ -195,44 +238,31 @@ func (d *driver) Unpause(c *execdriver.Command) error {
 	if active == nil {
 	if active == nil {
 		return fmt.Errorf("active container for %s does not exist", c.ID)
 		return fmt.Errorf("active container for %s does not exist", c.ID)
 	}
 	}
-	active.container.Cgroups.Freezer = "THAWED"
-	if systemd.UseSystemd() {
-		return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
-	}
-	return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
+	return active.Resume()
 }
 }
 
 
-func (d *driver) Terminate(p *execdriver.Command) error {
+func (d *driver) Terminate(c *execdriver.Command) error {
 	// lets check the start time for the process
 	// lets check the start time for the process
-	state, err := libcontainer.GetState(filepath.Join(d.root, p.ID))
+	active := d.activeContainers[c.ID]
+	if active == nil {
+		return fmt.Errorf("active container for %s does not exist", c.ID)
+	}
+	state, err := active.State()
 	if err != nil {
 	if err != nil {
-		if !os.IsNotExist(err) {
-			return err
-		}
-		// TODO: Remove this part for version 1.2.0
-		// This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0
-		data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start"))
-		if err != nil {
-			// if we don't have the data on disk then we can assume the process is gone
-			// because this is only removed after we know the process has stopped
-			if os.IsNotExist(err) {
-				return nil
-			}
-			return err
-		}
-		state = &libcontainer.State{InitStartTime: string(data)}
+		return err
 	}
 	}
+	pid := state.InitProcessPid
 
 
-	currentStartTime, err := system.GetProcessStartTime(p.ProcessConfig.Process.Pid)
+	currentStartTime, err := system.GetProcessStartTime(pid)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
 
 
-	if state.InitStartTime == currentStartTime {
-		err = syscall.Kill(p.ProcessConfig.Process.Pid, 9)
-		syscall.Wait4(p.ProcessConfig.Process.Pid, nil, 0, nil)
+	if state.InitProcessStartTime == currentStartTime {
+		err = syscall.Kill(pid, 9)
+		syscall.Wait4(pid, nil, 0, nil)
 	}
 	}
-	d.cleanContainer(p.ID)
+	d.cleanContainer(c.ID)
 
 
 	return err
 	return err
 
 
@@ -257,15 +287,10 @@ func (d *driver) GetPidsForContainer(id string) ([]int, error) {
 	if active == nil {
 	if active == nil {
 		return nil, fmt.Errorf("active container for %s does not exist", id)
 		return nil, fmt.Errorf("active container for %s does not exist", id)
 	}
 	}
-	c := active.container.Cgroups
-
-	if systemd.UseSystemd() {
-		return systemd.GetPids(c)
-	}
-	return fs.GetPids(c)
+	return active.Processes()
 }
 }
 
 
-func (d *driver) writeContainerFile(container *libcontainer.Config, id string) error {
+func (d *driver) writeContainerFile(container *configs.Config, id string) error {
 	data, err := json.Marshal(container)
 	data, err := json.Marshal(container)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
@@ -289,42 +314,61 @@ func (d *driver) Clean(id string) error {
 }
 }
 
 
 func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
 func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
-	return execdriver.Stats(filepath.Join(d.root, id), d.activeContainers[id].container.Cgroups.Memory, d.machineMemory)
+	c := d.activeContainers[id]
+	now := time.Now()
+	stats, err := c.Stats()
+	if err != nil {
+		return nil, err
+	}
+	memoryLimit := c.Config().Cgroups.Memory
+	// if the container does not have any memory limit specified set the
+	// limit to the machines memory
+	if memoryLimit == 0 {
+		memoryLimit = d.machineMemory
+	}
+	return &execdriver.ResourceStats{
+		Stats:       stats,
+		Read:        now,
+		MemoryLimit: memoryLimit,
+	}, nil
 }
 }
 
 
-type TtyConsole struct {
-	MasterPty *os.File
+func getEnv(key string, env []string) string {
+	for _, pair := range env {
+		parts := strings.Split(pair, "=")
+		if parts[0] == key {
+			return parts[1]
+		}
+	}
+	return ""
 }
 }
 
 
-func NewTtyConsole(processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes) (*TtyConsole, error) {
-	ptyMaster, console, err := consolepkg.CreateMasterAndConsole()
-	if err != nil {
-		return nil, err
-	}
+type TtyConsole struct {
+	console libcontainer.Console
+}
 
 
+func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, rootuid int) (*TtyConsole, error) {
 	tty := &TtyConsole{
 	tty := &TtyConsole{
-		MasterPty: ptyMaster,
+		console: console,
 	}
 	}
 
 
-	if err := tty.AttachPipes(&processConfig.Cmd, pipes); err != nil {
+	if err := tty.AttachPipes(pipes); err != nil {
 		tty.Close()
 		tty.Close()
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	processConfig.Console = console
-
 	return tty, nil
 	return tty, nil
 }
 }
 
 
-func (t *TtyConsole) Master() *os.File {
-	return t.MasterPty
+func (t *TtyConsole) Master() libcontainer.Console {
+	return t.console
 }
 }
 
 
 func (t *TtyConsole) Resize(h, w int) error {
 func (t *TtyConsole) Resize(h, w int) error {
-	return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
+	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
 }
 }
 
 
-func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error {
+func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
 	go func() {
 	go func() {
 		if wb, ok := pipes.Stdout.(interface {
 		if wb, ok := pipes.Stdout.(interface {
 			CloseWriters() error
 			CloseWriters() error
@@ -332,12 +376,12 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err
 			defer wb.CloseWriters()
 			defer wb.CloseWriters()
 		}
 		}
 
 
-		io.Copy(pipes.Stdout, t.MasterPty)
+		io.Copy(pipes.Stdout, t.console)
 	}()
 	}()
 
 
 	if pipes.Stdin != nil {
 	if pipes.Stdin != nil {
 		go func() {
 		go func() {
-			io.Copy(t.MasterPty, pipes.Stdin)
+			io.Copy(t.console, pipes.Stdin)
 
 
 			pipes.Stdin.Close()
 			pipes.Stdin.Close()
 		}()
 		}()
@@ -347,5 +391,5 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err
 }
 }
 
 
 func (t *TtyConsole) Close() error {
 func (t *TtyConsole) Close() error {
-	return t.MasterPty.Close()
+	return t.console.Close()
 }
 }

+ 50 - 40
daemon/execdriver/native/exec.go

@@ -4,67 +4,77 @@ package native
 
 
 import (
 import (
 	"fmt"
 	"fmt"
-	"log"
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
-	"path/filepath"
-	"runtime"
+	"syscall"
 
 
 	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/pkg/reexec"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer"
-	"github.com/docker/libcontainer/namespaces"
+	_ "github.com/docker/libcontainer/nsenter"
+	"github.com/docker/libcontainer/utils"
 )
 )
 
 
-const execCommandName = "nsenter-exec"
-
-func init() {
-	reexec.Register(execCommandName, nsenterExec)
-}
-
-func nsenterExec() {
-	runtime.LockOSThread()
-
-	// User args are passed after '--' in the command line.
-	userArgs := findUserArgs()
-
-	config, err := loadConfigFromFd()
-	if err != nil {
-		log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err)
-	}
-
-	if err := namespaces.FinalizeSetns(config, userArgs); err != nil {
-		log.Fatalf("docker-exec: failed to exec: %s", err)
-	}
-}
-
 // TODO(vishh): Add support for running in priviledged mode and running as a different user.
 // TODO(vishh): Add support for running in priviledged mode and running as a different user.
 func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
 func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
 	active := d.activeContainers[c.ID]
 	active := d.activeContainers[c.ID]
 	if active == nil {
 	if active == nil {
 		return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
 		return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
 	}
 	}
-	state, err := libcontainer.GetState(filepath.Join(d.root, c.ID))
-	if err != nil {
-		return -1, fmt.Errorf("State unavailable for container with ID %s. The container may have been cleaned up already. Error: %s", c.ID, err)
-	}
 
 
 	var term execdriver.Terminal
 	var term execdriver.Terminal
+	var err error
+
+	p := &libcontainer.Process{
+		Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
+		Env:  c.ProcessConfig.Env,
+		Cwd:  c.WorkingDir,
+		User: c.ProcessConfig.User,
+	}
 
 
 	if processConfig.Tty {
 	if processConfig.Tty {
-		term, err = NewTtyConsole(processConfig, pipes)
+		config := active.Config()
+		rootuid, err := config.HostUID()
+		if err != nil {
+			return -1, err
+		}
+		cons, err := p.NewConsole(rootuid)
+		if err != nil {
+			return -1, err
+		}
+		term, err = NewTtyConsole(cons, pipes, rootuid)
 	} else {
 	} else {
-		term, err = execdriver.NewStdConsole(processConfig, pipes)
+		p.Stdout = pipes.Stdout
+		p.Stderr = pipes.Stderr
+		p.Stdin = pipes.Stdin
+		term = &execdriver.StdConsole{}
+	}
+	if err != nil {
+		return -1, err
 	}
 	}
 
 
 	processConfig.Terminal = term
 	processConfig.Terminal = term
 
 
-	args := append([]string{processConfig.Entrypoint}, processConfig.Arguments...)
+	if err := active.Start(p); err != nil {
+		return -1, err
+	}
+
+	if startCallback != nil {
+		pid, err := p.Pid()
+		if err != nil {
+			p.Signal(os.Kill)
+			p.Wait()
+			return -1, err
+		}
+		startCallback(&c.ProcessConfig, pid)
+	}
 
 
-	return namespaces.ExecIn(active.container, state, args, os.Args[0], "exec", processConfig.Stdin, processConfig.Stdout, processConfig.Stderr, processConfig.Console,
-		func(cmd *exec.Cmd) {
-			if startCallback != nil {
-				startCallback(&c.ProcessConfig, cmd.Process.Pid)
-			}
-		})
+	ps, err := p.Wait()
+	if err != nil {
+		exitErr, ok := err.(*exec.ExitError)
+		if !ok {
+			return -1, err
+		}
+		ps = exitErr.ProcessState
+	}
+	return utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), nil
 }
 }

+ 2 - 16
daemon/execdriver/native/info.go

@@ -2,13 +2,6 @@
 
 
 package native
 package native
 
 
-import (
-	"os"
-	"path/filepath"
-
-	"github.com/docker/libcontainer"
-)
-
 type info struct {
 type info struct {
 	ID     string
 	ID     string
 	driver *driver
 	driver *driver
@@ -18,13 +11,6 @@ type info struct {
 // pid file for a container.  If the file exists then the
 // pid file for a container.  If the file exists then the
 // container is currently running
 // container is currently running
 func (i *info) IsRunning() bool {
 func (i *info) IsRunning() bool {
-	if _, err := libcontainer.GetState(filepath.Join(i.driver.root, i.ID)); err == nil {
-		return true
-	}
-	// TODO: Remove this part for version 1.2.0
-	// This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0
-	if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil {
-		return true
-	}
-	return false
+	_, ok := i.driver.activeContainers[i.ID]
+	return ok
 }
 }

+ 15 - 30
daemon/execdriver/native/init.go

@@ -3,55 +3,40 @@
 package native
 package native
 
 
 import (
 import (
-	"encoding/json"
-	"flag"
 	"fmt"
 	"fmt"
 	"os"
 	"os"
-	"path/filepath"
 	"runtime"
 	"runtime"
 
 
 	"github.com/docker/docker/pkg/reexec"
 	"github.com/docker/docker/pkg/reexec"
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer"
-	"github.com/docker/libcontainer/namespaces"
 )
 )
 
 
 func init() {
 func init() {
 	reexec.Register(DriverName, initializer)
 	reexec.Register(DriverName, initializer)
 }
 }
 
 
-func initializer() {
-	runtime.LockOSThread()
-
-	var (
-		pipe    = flag.Int("pipe", 0, "sync pipe fd")
-		console = flag.String("console", "", "console (pty slave) path")
-		root    = flag.String("root", ".", "root path for configuration files")
-	)
-
-	flag.Parse()
-
-	var container *libcontainer.Config
-	f, err := os.Open(filepath.Join(*root, "container.json"))
-	if err != nil {
-		writeError(err)
+func fatal(err error) {
+	if lerr, ok := err.(libcontainer.Error); ok {
+		lerr.Detail(os.Stderr)
+		os.Exit(1)
 	}
 	}
 
 
-	if err := json.NewDecoder(f).Decode(&container); err != nil {
-		f.Close()
-		writeError(err)
-	}
-	f.Close()
+	fmt.Fprintln(os.Stderr, err)
+	os.Exit(1)
+}
 
 
-	rootfs, err := os.Getwd()
+func initializer() {
+	runtime.GOMAXPROCS(1)
+	runtime.LockOSThread()
+	factory, err := libcontainer.New("")
 	if err != nil {
 	if err != nil {
-		writeError(err)
+		fatal(err)
 	}
 	}
-
-	if err := namespaces.Init(container, rootfs, *console, os.NewFile(uintptr(*pipe), "child"), flag.Args()); err != nil {
-		writeError(err)
+	if err := factory.StartInitialization(3); err != nil {
+		fatal(err)
 	}
 	}
 
 
-	panic("Unreachable")
+	panic("unreachable")
 }
 }
 
 
 func writeError(err error) {
 func writeError(err error) {

+ 43 - 7
daemon/execdriver/native/template/default_template.go

@@ -1,14 +1,17 @@
 package template
 package template
 
 
 import (
 import (
-	"github.com/docker/libcontainer"
+	"syscall"
+
 	"github.com/docker/libcontainer/apparmor"
 	"github.com/docker/libcontainer/apparmor"
-	"github.com/docker/libcontainer/cgroups"
+	"github.com/docker/libcontainer/configs"
 )
 )
 
 
+const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
+
 // New returns the docker default configuration for libcontainer
 // New returns the docker default configuration for libcontainer
-func New() *libcontainer.Config {
-	container := &libcontainer.Config{
+func New() *configs.Config {
+	container := &configs.Config{
 		Capabilities: []string{
 		Capabilities: []string{
 			"CHOWN",
 			"CHOWN",
 			"DAC_OVERRIDE",
 			"DAC_OVERRIDE",
@@ -25,18 +28,51 @@ func New() *libcontainer.Config {
 			"KILL",
 			"KILL",
 			"AUDIT_WRITE",
 			"AUDIT_WRITE",
 		},
 		},
-		Namespaces: libcontainer.Namespaces([]libcontainer.Namespace{
+		Namespaces: configs.Namespaces([]configs.Namespace{
 			{Type: "NEWNS"},
 			{Type: "NEWNS"},
 			{Type: "NEWUTS"},
 			{Type: "NEWUTS"},
 			{Type: "NEWIPC"},
 			{Type: "NEWIPC"},
 			{Type: "NEWPID"},
 			{Type: "NEWPID"},
 			{Type: "NEWNET"},
 			{Type: "NEWNET"},
 		}),
 		}),
-		Cgroups: &cgroups.Cgroup{
+		Cgroups: &configs.Cgroup{
 			Parent:          "docker",
 			Parent:          "docker",
 			AllowAllDevices: false,
 			AllowAllDevices: false,
 		},
 		},
-		MountConfig: &libcontainer.MountConfig{},
+		Mounts: []*configs.Mount{
+			{
+				Device:      "tmpfs",
+				Source:      "shm",
+				Destination: "/dev/shm",
+				Data:        "mode=1777,size=65536k",
+				Flags:       defaultMountFlags,
+			},
+			{
+				Source:      "mqueue",
+				Destination: "/dev/mqueue",
+				Device:      "mqueue",
+				Flags:       defaultMountFlags,
+			},
+			{
+				Source:      "sysfs",
+				Destination: "/sys",
+				Device:      "sysfs",
+				Flags:       defaultMountFlags | syscall.MS_RDONLY,
+			},
+		},
+		MaskPaths: []string{
+			"/proc/kcore",
+		},
+		ReadonlyPaths: []string{
+			"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
+		},
+		Rlimits: []configs.Rlimit{
+			{
+				Type: syscall.RLIMIT_NOFILE,
+				Hard: 1024,
+				Soft: 1024,
+			},
+		},
 	}
 	}
 
 
 	if apparmor.IsEnabled() {
 	if apparmor.IsEnabled() {

+ 17 - 24
daemon/execdriver/native/utils.go

@@ -2,28 +2,21 @@
 
 
 package native
 package native
 
 
-import (
-	"encoding/json"
-	"os"
+//func findUserArgs() []string {
+//for i, a := range os.Args {
+//if a == "--" {
+//return os.Args[i+1:]
+//}
+//}
+//return []string{}
+//}
 
 
-	"github.com/docker/libcontainer"
-)
-
-func findUserArgs() []string {
-	for i, a := range os.Args {
-		if a == "--" {
-			return os.Args[i+1:]
-		}
-	}
-	return []string{}
-}
-
-// loadConfigFromFd loads a container's config from the sync pipe that is provided by
-// fd 3 when running a process
-func loadConfigFromFd() (*libcontainer.Config, error) {
-	var config *libcontainer.Config
-	if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
-		return nil, err
-	}
-	return config, nil
-}
+//// loadConfigFromFd loads a container's config from the sync pipe that is provided by
+//// fd 3 when running a process
+//func loadConfigFromFd() (*configs.Config, error) {
+//var config *libcontainer.Config
+//if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
+//return nil, err
+//}
+//return config, nil
+//}

+ 73 - 3
daemon/execdriver/utils.go

@@ -5,13 +5,83 @@ import (
 	"strings"
 	"strings"
 
 
 	"github.com/docker/docker/utils"
 	"github.com/docker/docker/utils"
-	"github.com/docker/libcontainer/security/capabilities"
+	"github.com/syndtr/gocapability/capability"
 )
 )
 
 
+var capabilityList = Capabilities{
+	{Key: "SETPCAP", Value: capability.CAP_SETPCAP},
+	{Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE},
+	{Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO},
+	{Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT},
+	{Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN},
+	{Key: "SYS_NICE", Value: capability.CAP_SYS_NICE},
+	{Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE},
+	{Key: "SYS_TIME", Value: capability.CAP_SYS_TIME},
+	{Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG},
+	{Key: "MKNOD", Value: capability.CAP_MKNOD},
+	{Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE},
+	{Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL},
+	{Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE},
+	{Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN},
+	{Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN},
+	{Key: "SYSLOG", Value: capability.CAP_SYSLOG},
+	{Key: "CHOWN", Value: capability.CAP_CHOWN},
+	{Key: "NET_RAW", Value: capability.CAP_NET_RAW},
+	{Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE},
+	{Key: "FOWNER", Value: capability.CAP_FOWNER},
+	{Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH},
+	{Key: "FSETID", Value: capability.CAP_FSETID},
+	{Key: "KILL", Value: capability.CAP_KILL},
+	{Key: "SETGID", Value: capability.CAP_SETGID},
+	{Key: "SETUID", Value: capability.CAP_SETUID},
+	{Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE},
+	{Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE},
+	{Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST},
+	{Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK},
+	{Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER},
+	{Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT},
+	{Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE},
+	{Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT},
+	{Key: "LEASE", Value: capability.CAP_LEASE},
+	{Key: "SETFCAP", Value: capability.CAP_SETFCAP},
+	{Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM},
+	{Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND},
+}
+
+type (
+	CapabilityMapping struct {
+		Key   string         `json:"key,omitempty"`
+		Value capability.Cap `json:"value,omitempty"`
+	}
+	Capabilities []*CapabilityMapping
+)
+
+func (c *CapabilityMapping) String() string {
+	return c.Key
+}
+
+func GetCapability(key string) *CapabilityMapping {
+	for _, capp := range capabilityList {
+		if capp.Key == key {
+			cpy := *capp
+			return &cpy
+		}
+	}
+	return nil
+}
+
+func GetAllCapabilities() []string {
+	output := make([]string, len(capabilityList))
+	for i, capability := range capabilityList {
+		output[i] = capability.String()
+	}
+	return output
+}
+
 func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
 func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
 	var (
 	var (
 		newCaps []string
 		newCaps []string
-		allCaps = capabilities.GetAllCapabilities()
+		allCaps = GetAllCapabilities()
 	)
 	)
 
 
 	// look for invalid cap in the drop list
 	// look for invalid cap in the drop list
@@ -26,7 +96,7 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
 
 
 	// handle --cap-add=all
 	// handle --cap-add=all
 	if utils.StringsContainsNoCase(adds, "all") {
 	if utils.StringsContainsNoCase(adds, "all") {
-		basics = capabilities.GetAllCapabilities()
+		basics = allCaps
 	}
 	}
 
 
 	if !utils.StringsContainsNoCase(drops, "all") {
 	if !utils.StringsContainsNoCase(drops, "all") {

+ 14 - 13
daemon/stats.go

@@ -18,7 +18,7 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
 	enc := json.NewEncoder(job.Stdout)
 	enc := json.NewEncoder(job.Stdout)
 	for v := range updates {
 	for v := range updates {
 		update := v.(*execdriver.ResourceStats)
 		update := v.(*execdriver.ResourceStats)
-		ss := convertToAPITypes(update.ContainerStats)
+		ss := convertToAPITypes(update.Stats)
 		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
 		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
 		ss.Read = update.Read
 		ss.Read = update.Read
 		ss.CpuStats.SystemUsage = update.SystemUsage
 		ss.CpuStats.SystemUsage = update.SystemUsage
@@ -31,20 +31,21 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
 	return engine.StatusOK
 	return engine.StatusOK
 }
 }
 
 
-// convertToAPITypes converts the libcontainer.ContainerStats to the api specific
+// convertToAPITypes converts the libcontainer.Stats to the api specific
 // structs.  This is done to preserve API compatibility and versioning.
 // structs.  This is done to preserve API compatibility and versioning.
-func convertToAPITypes(ls *libcontainer.ContainerStats) *types.Stats {
+func convertToAPITypes(ls *libcontainer.Stats) *types.Stats {
 	s := &types.Stats{}
 	s := &types.Stats{}
-	if ls.NetworkStats != nil {
-		s.Network = types.Network{
-			RxBytes:   ls.NetworkStats.RxBytes,
-			RxPackets: ls.NetworkStats.RxPackets,
-			RxErrors:  ls.NetworkStats.RxErrors,
-			RxDropped: ls.NetworkStats.RxDropped,
-			TxBytes:   ls.NetworkStats.TxBytes,
-			TxPackets: ls.NetworkStats.TxPackets,
-			TxErrors:  ls.NetworkStats.TxErrors,
-			TxDropped: ls.NetworkStats.TxDropped,
+	if ls.Interfaces != nil {
+		s.Network = types.Network{}
+		for _, iface := range ls.Interfaces {
+			s.Network.RxBytes += iface.RxBytes
+			s.Network.RxPackets += iface.RxPackets
+			s.Network.RxErrors += iface.RxErrors
+			s.Network.RxDropped += iface.RxDropped
+			s.Network.TxBytes += iface.TxBytes
+			s.Network.TxPackets += iface.TxPackets
+			s.Network.TxErrors += iface.TxErrors
+			s.Network.TxDropped += iface.TxDropped
 		}
 		}
 	}
 	}
 	cs := ls.CgroupStats
 	cs := ls.CgroupStats

+ 4 - 24
integration-cli/docker_cli_exec_test.go

@@ -60,7 +60,7 @@ func TestExecInteractiveStdinClose(t *testing.T) {
 
 
 		out, err := cmd.CombinedOutput()
 		out, err := cmd.CombinedOutput()
 		if err != nil {
 		if err != nil {
-			t.Fatal(err, out)
+			t.Fatal(err, string(out))
 		}
 		}
 
 
 		if string(out) == "" {
 		if string(out) == "" {
@@ -538,7 +538,6 @@ func TestRunExecDir(t *testing.T) {
 	id := strings.TrimSpace(out)
 	id := strings.TrimSpace(out)
 	execDir := filepath.Join(execDriverPath, id)
 	execDir := filepath.Join(execDriverPath, id)
 	stateFile := filepath.Join(execDir, "state.json")
 	stateFile := filepath.Join(execDir, "state.json")
-	contFile := filepath.Join(execDir, "container.json")
 
 
 	{
 	{
 		fi, err := os.Stat(execDir)
 		fi, err := os.Stat(execDir)
@@ -552,10 +551,6 @@ func TestRunExecDir(t *testing.T) {
 		if err != nil {
 		if err != nil {
 			t.Fatal(err)
 			t.Fatal(err)
 		}
 		}
-		fi, err = os.Stat(contFile)
-		if err != nil {
-			t.Fatal(err)
-		}
 	}
 	}
 
 
 	stopCmd := exec.Command(dockerBinary, "stop", id)
 	stopCmd := exec.Command(dockerBinary, "stop", id)
@@ -564,23 +559,12 @@ func TestRunExecDir(t *testing.T) {
 		t.Fatal(err, out)
 		t.Fatal(err, out)
 	}
 	}
 	{
 	{
-		fi, err := os.Stat(execDir)
-		if err != nil {
-			t.Fatal(err)
-		}
-		if !fi.IsDir() {
-			t.Fatalf("%q must be a directory", execDir)
-		}
-		fi, err = os.Stat(stateFile)
+		_, err := os.Stat(execDir)
 		if err == nil {
 		if err == nil {
-			t.Fatalf("Statefile %q is exists for stopped container!", stateFile)
-		}
-		if !os.IsNotExist(err) {
-			t.Fatalf("Error should be about non-existing, got %s", err)
+			t.Fatal(err)
 		}
 		}
-		fi, err = os.Stat(contFile)
 		if err == nil {
 		if err == nil {
-			t.Fatalf("Container file %q is exists for stopped container!", contFile)
+			t.Fatalf("Exec directory %q exists for removed container!", execDir)
 		}
 		}
 		if !os.IsNotExist(err) {
 		if !os.IsNotExist(err) {
 			t.Fatalf("Error should be about non-existing, got %s", err)
 			t.Fatalf("Error should be about non-existing, got %s", err)
@@ -603,10 +587,6 @@ func TestRunExecDir(t *testing.T) {
 		if err != nil {
 		if err != nil {
 			t.Fatal(err)
 			t.Fatal(err)
 		}
 		}
-		fi, err = os.Stat(contFile)
-		if err != nil {
-			t.Fatal(err)
-		}
 	}
 	}
 	rmCmd := exec.Command(dockerBinary, "rm", "-f", id)
 	rmCmd := exec.Command(dockerBinary, "rm", "-f", id)
 	out, _, err = runCommandWithOutput(rmCmd)
 	out, _, err = runCommandWithOutput(rmCmd)