diff --git a/daemon/container.go b/daemon/container.go index bcd50f9953..a065d00b57 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -14,6 +14,8 @@ import ( "syscall" "time" + "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/label" @@ -259,18 +261,18 @@ func populateCommand(c *Container, env []string) error { pid.HostPid = c.hostConfig.PidMode.IsHost() // Build lists of devices allowed and created within the container. - userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices)) + userSpecifiedDevices := make([]*configs.Device, len(c.hostConfig.Devices)) for i, deviceMapping := range c.hostConfig.Devices { - device, err := devices.GetDevice(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions) + device, err := devices.DeviceFromPath(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions) if err != nil { return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err) } device.Path = deviceMapping.PathInContainer userSpecifiedDevices[i] = device } - allowedDevices := append(devices.DefaultAllowedDevices, userSpecifiedDevices...) + allowedDevices := append(configs.DefaultAllowedDevices, userSpecifiedDevices...) - autoCreatedDevices := append(devices.DefaultAutoCreatedDevices, userSpecifiedDevices...) + autoCreatedDevices := append(configs.DefaultAutoCreatedDevices, userSpecifiedDevices...) // TODO: this can be removed after lxc-conf is fully deprecated lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig) @@ -972,7 +974,7 @@ func (container *Container) Exposes(p nat.Port) bool { return exists } -func (container *Container) GetPtyMaster() (*os.File, error) { +func (container *Container) GetPtyMaster() (libcontainer.Console, error) { ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal) if !ok { return nil, ErrNoTTY diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 24bbc62416..bc66c0e542 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -1,17 +1,22 @@ package execdriver import ( + "encoding/json" "errors" "io" + "io/ioutil" "os" "os/exec" + "path/filepath" + "strconv" "strings" "time" "github.com/docker/docker/daemon/execdriver/native/template" "github.com/docker/docker/pkg/ulimit" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/devices" + "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/configs" ) // Context is a generic key value pair that allows @@ -42,7 +47,7 @@ type Terminal interface { } type TtyTerminal interface { - Master() *os.File + Master() libcontainer.Console } // ExitStatus provides exit reasons for a container. @@ -109,7 +114,7 @@ type Resources struct { } type ResourceStats struct { - *libcontainer.ContainerStats + *libcontainer.Stats Read time.Time `json:"read"` MemoryLimit int64 `json:"memory_limit"` SystemUsage uint64 `json:"system_usage"` @@ -149,8 +154,8 @@ type Command struct { Pid *Pid `json:"pid"` Resources *Resources `json:"resources"` Mounts []Mount `json:"mounts"` - AllowedDevices []*devices.Device `json:"allowed_devices"` - AutoCreatedDevices []*devices.Device `json:"autocreated_devices"` + AllowedDevices []*configs.Device `json:"allowed_devices"` + AutoCreatedDevices []*configs.Device `json:"autocreated_devices"` CapAdd []string `json:"cap_add"` CapDrop []string `json:"cap_drop"` ContainerPid int `json:"container_pid"` // the pid for the process inside a container @@ -161,23 +166,19 @@ type Command struct { AppArmorProfile string `json:"apparmor_profile"` } -func InitContainer(c *Command) *libcontainer.Config { +func InitContainer(c *Command) *configs.Config { container := template.New() container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env) - container.Tty = c.ProcessConfig.Tty - container.User = c.ProcessConfig.User - container.WorkingDir = c.WorkingDir - container.Env = c.ProcessConfig.Env container.Cgroups.Name = c.ID container.Cgroups.AllowedDevices = c.AllowedDevices - container.MountConfig.DeviceNodes = c.AutoCreatedDevices - container.RootFs = c.Rootfs - container.MountConfig.ReadonlyFs = c.ReadonlyRootfs + container.Readonlyfs = c.ReadonlyRootfs + container.Devices = c.AutoCreatedDevices + container.Rootfs = c.Rootfs + container.Readonlyfs = c.ReadonlyRootfs // check to see if we are running in ramdisk to disable pivot root - container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" - container.RestrictSys = true + container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" return container } @@ -191,7 +192,7 @@ func getEnv(key string, env []string) string { return "" } -func SetupCgroups(container *libcontainer.Config, c *Command) error { +func SetupCgroups(container *configs.Config, c *Command) error { if c.Resources != nil { container.Cgroups.CpuShares = c.Resources.CpuShares container.Cgroups.Memory = c.Resources.Memory @@ -203,28 +204,98 @@ func SetupCgroups(container *libcontainer.Config, c *Command) error { return nil } -func Stats(stateFile string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) { - state, err := libcontainer.GetState(stateFile) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotRunning +// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. +func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) { + out := &libcontainer.NetworkInterface{Name: interfaceName} + // This can happen if the network runtime information is missing - possible if the + // container was created by an old version of libcontainer. + if interfaceName == "" { + return out, nil + } + type netStatsPair struct { + // Where to write the output. + Out *uint64 + // The network stats file to read. + File string + } + // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. + netStats := []netStatsPair{ + {Out: &out.RxBytes, File: "tx_bytes"}, + {Out: &out.RxPackets, File: "tx_packets"}, + {Out: &out.RxErrors, File: "tx_errors"}, + {Out: &out.RxDropped, File: "tx_dropped"}, + + {Out: &out.TxBytes, File: "rx_bytes"}, + {Out: &out.TxPackets, File: "rx_packets"}, + {Out: &out.TxErrors, File: "rx_errors"}, + {Out: &out.TxDropped, File: "rx_dropped"}, + } + for _, netStat := range netStats { + data, err := readSysfsNetworkStats(interfaceName, netStat.File) + if err != nil { + return nil, err } + *(netStat.Out) = data + } + return out, nil +} + +// Reads the specified statistics available under /sys/class/net//statistics +func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { + data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) + if err != nil { + return 0, err + } + return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) +} + +func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) { + f, err := os.Open(filepath.Join(containerDir, "state.json")) + if err != nil { + return nil, err + } + defer f.Close() + + type network struct { + Type string + HostInterfaceName string + } + + state := struct { + CgroupPaths map[string]string `json:"cgroup_paths"` + Networks []network + }{} + + if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, err } now := time.Now() - stats, err := libcontainer.GetStats(nil, state) + + mgr := fs.Manager{Paths: state.CgroupPaths} + cstats, err := mgr.GetStats() if err != nil { return nil, err } + stats := &libcontainer.Stats{CgroupStats: cstats} // if the container does not have any memory limit specified set the // limit to the machines memory memoryLimit := containerMemoryLimit if memoryLimit == 0 { memoryLimit = machineMemory } + for _, iface := range state.Networks { + switch iface.Type { + case "veth": + istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) + if err != nil { + return nil, err + } + stats.Interfaces = append(stats.Interfaces, istats) + } + } return &ResourceStats{ - Read: now, - ContainerStats: stats, - MemoryLimit: memoryLimit, + Stats: stats, + Read: now, + MemoryLimit: memoryLimit, }, nil } diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index f467b696c1..2daff50d91 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -23,7 +23,9 @@ import ( "github.com/docker/docker/utils" "github.com/docker/libcontainer" "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/mount/nodes" + "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/system" + "github.com/docker/libcontainer/user" "github.com/kr/pty" ) @@ -42,7 +44,7 @@ type driver struct { } type activeContainer struct { - container *libcontainer.Config + container *configs.Config cmd *exec.Cmd } @@ -190,7 +192,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba c.ProcessConfig.Path = aname c.ProcessConfig.Args = append([]string{name}, arg...) - if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { + if err := createDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } @@ -231,11 +233,17 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba } state := &libcontainer.State{ - InitPid: pid, - CgroupPaths: cgroupPaths, + InitProcessPid: pid, + CgroupPaths: cgroupPaths, } - if err := libcontainer.SaveState(dataPath, state); err != nil { + f, err := os.Create(filepath.Join(dataPath, "state.json")) + if err != nil { + return terminate(err) + } + defer f.Close() + + if err := json.NewEncoder(f).Encode(state); err != nil { return terminate(err) } @@ -245,8 +253,9 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba log.Debugf("Invoking startCallback") startCallback(&c.ProcessConfig, pid) } + oomKill := false - oomKillNotification, err := libcontainer.NotifyOnOOM(state) + oomKillNotification, err := notifyOnOOM(cgroupPaths) if err == nil { _, oomKill = <-oomKillNotification log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr) @@ -265,9 +274,57 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr } +// copy from libcontainer +func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { + dir := paths["memory"] + if dir == "" { + return nil, fmt.Errorf("There is no path for %q in state", "memory") + } + oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control")) + if err != nil { + return nil, err + } + fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0) + if syserr != 0 { + oomControl.Close() + return nil, syserr + } + + eventfd := os.NewFile(fd, "eventfd") + + eventControlPath := filepath.Join(dir, "cgroup.event_control") + data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd()) + if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { + eventfd.Close() + oomControl.Close() + return nil, err + } + ch := make(chan struct{}) + go func() { + defer func() { + close(ch) + eventfd.Close() + oomControl.Close() + }() + buf := make([]byte, 8) + for { + if _, err := eventfd.Read(buf); err != nil { + return + } + // When a cgroup is destroyed, an event is sent to eventfd. + // So if the control path is gone, return instead of notifying. + if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { + return + } + ch <- struct{}{} + } + }() + return ch, nil +} + // createContainer populates and configures the container type with the // data provided by the execdriver.Command -func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) { +func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) { container := execdriver.InitContainer(c) if err := execdriver.SetupCgroups(container, c); err != nil { return nil, err @@ -297,6 +354,87 @@ func cgroupPaths(containerId string) (map[string]string, error) { return paths, nil } +// this is copy from old libcontainer nodes.go +func createDeviceNodes(rootfs string, nodesToCreate []*configs.Device) error { + oldMask := syscall.Umask(0000) + defer syscall.Umask(oldMask) + + for _, node := range nodesToCreate { + if err := createDeviceNode(rootfs, node); err != nil { + return err + } + } + return nil +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *configs.Device) error { + var ( + dest = filepath.Join(rootfs, node.Path) + parent = filepath.Dir(dest) + ) + + if err := os.MkdirAll(parent, 0755); err != nil { + return err + } + + fileMode := node.FileMode + switch node.Type { + case 'c': + fileMode |= syscall.S_IFCHR + case 'b': + fileMode |= syscall.S_IFBLK + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + + if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) { + return fmt.Errorf("mknod %s %s", node.Path, err) + } + + if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { + return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) + } + + return nil +} + +// setupUser changes the groups, gid, and uid for the user inside the container +// copy from libcontainer, cause not it's private +func setupUser(userSpec string) error { + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: syscall.Getuid(), + Gid: syscall.Getgid(), + Home: "/", + } + passwdPath, err := user.GetPasswdPath() + if err != nil { + return err + } + groupPath, err := user.GetGroupPath() + if err != nil { + return err + } + execUser, err := user.GetExecUserPath(userSpec, &defaultExecUser, passwdPath, groupPath) + if err != nil { + return err + } + if err := system.Setgid(execUser.Gid); err != nil { + return err + } + if err := system.Setuid(execUser.Uid); err != nil { + return err + } + // if we didn't get HOME already, set it based on the user's HOME + if envHome := os.Getenv("HOME"); envHome == "" { + if err := os.Setenv("HOME", execUser.Home); err != nil { + return err + } + } + return nil +} + /// Return the exit code of the process // if the process has not exited -1 will be returned func getExitCode(c *execdriver.Command) int { diff --git a/daemon/execdriver/lxc/lxc_init_linux.go b/daemon/execdriver/lxc/lxc_init_linux.go index 956a283fc2..e7bc2b5f3a 100644 --- a/daemon/execdriver/lxc/lxc_init_linux.go +++ b/daemon/execdriver/lxc/lxc_init_linux.go @@ -3,8 +3,6 @@ package lxc import ( "fmt" - "github.com/docker/libcontainer" - "github.com/docker/libcontainer/namespaces" "github.com/docker/libcontainer/utils" ) @@ -12,9 +10,7 @@ func finalizeNamespace(args *InitArgs) error { if err := utils.CloseExecFrom(3); err != nil { return err } - if err := namespaces.SetupUser(&libcontainer.Config{ - User: args.User, - }); err != nil { + if err := setupUser(args.User); err != nil { return fmt.Errorf("setup user %s", err) } if err := setupWorkingDirectory(args); err != nil { diff --git a/daemon/execdriver/lxc/lxc_template.go b/daemon/execdriver/lxc/lxc_template.go index 9de799dd52..425df01da8 100644 --- a/daemon/execdriver/lxc/lxc_template.go +++ b/daemon/execdriver/lxc/lxc_template.go @@ -11,7 +11,6 @@ import ( nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template" "github.com/docker/docker/utils" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/security/capabilities" ) const LxcTemplate = ` @@ -169,7 +168,7 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) { var newCaps []string for _, cap := range caps { log.Debugf("cap %s\n", cap) - realCap := capabilities.GetCapability(cap) + realCap := execdriver.GetCapability(cap) numCap := fmt.Sprintf("%d", realCap.Value) newCaps = append(newCaps, numCap) } @@ -180,13 +179,10 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) { func dropList(drops []string) ([]string, error) { if utils.StringsContainsNoCase(drops, "all") { var newCaps []string - for _, cap := range capabilities.GetAllCapabilities() { - log.Debugf("drop cap %s\n", cap) - realCap := capabilities.GetCapability(cap) - if realCap == nil { - return nil, fmt.Errorf("Invalid capability '%s'", cap) - } - numCap := fmt.Sprintf("%d", realCap.Value) + for _, capName := range execdriver.GetAllCapabilities() { + cap := execdriver.GetCapability(capName) + log.Debugf("drop cap %s\n", cap.Key) + numCap := fmt.Sprintf("%d", cap.Value) newCaps = append(newCaps, numCap) } return newCaps, nil diff --git a/daemon/execdriver/lxc/lxc_template_unit_test.go b/daemon/execdriver/lxc/lxc_template_unit_test.go index bb622d4bc5..65e7b6d551 100644 --- a/daemon/execdriver/lxc/lxc_template_unit_test.go +++ b/daemon/execdriver/lxc/lxc_template_unit_test.go @@ -5,11 +5,6 @@ package lxc import ( "bufio" "fmt" - "github.com/docker/docker/daemon/execdriver" - nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template" - "github.com/docker/libcontainer/devices" - "github.com/docker/libcontainer/security/capabilities" - "github.com/syndtr/gocapability/capability" "io/ioutil" "math/rand" "os" @@ -17,6 +12,11 @@ import ( "strings" "testing" "time" + + "github.com/docker/docker/daemon/execdriver" + nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template" + "github.com/docker/libcontainer/configs" + "github.com/syndtr/gocapability/capability" ) func TestLXCConfig(t *testing.T) { @@ -53,7 +53,7 @@ func TestLXCConfig(t *testing.T) { Mtu: 1500, Interface: nil, }, - AllowedDevices: make([]*devices.Device, 0), + AllowedDevices: make([]*configs.Device, 0), ProcessConfig: execdriver.ProcessConfig{}, } p, err := driver.generateLXCConfig(command) @@ -295,7 +295,7 @@ func TestCustomLxcConfigMisc(t *testing.T) { grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") container := nativeTemplate.New() for _, cap := range container.Capabilities { - realCap := capabilities.GetCapability(cap) + realCap := execdriver.GetCapability(cap) numCap := fmt.Sprintf("%d", realCap.Value) if cap != "MKNOD" && cap != "KILL" { grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap)) @@ -359,7 +359,7 @@ func TestCustomLxcConfigMiscOverride(t *testing.T) { grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") container := nativeTemplate.New() for _, cap := range container.Capabilities { - realCap := capabilities.GetCapability(cap) + realCap := execdriver.GetCapability(cap) numCap := fmt.Sprintf("%d", realCap.Value) if cap != "MKNOD" && cap != "KILL" { grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap)) diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 3442f66a00..dd5b40a13d 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -3,21 +3,24 @@ package native import ( + "errors" "fmt" - "os/exec" + "net" "path/filepath" + "strings" + "syscall" "github.com/docker/docker/daemon/execdriver" - "github.com/docker/libcontainer" + "github.com/docker/docker/pkg/symlink" "github.com/docker/libcontainer/apparmor" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/devices" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/security/capabilities" + "github.com/docker/libcontainer/utils" ) // createContainer populates and configures the container type with the // data provided by the execdriver.Command -func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) { +func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) { container := execdriver.InitContainer(c) if err := d.createIpc(container, c); err != nil { @@ -33,6 +36,13 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e } if c.ProcessConfig.Privileged { + // clear readonly for /sys + for i := range container.Mounts { + if container.Mounts[i].Destination == "/sys" { + container.Mounts[i].Flags &= ^syscall.MS_RDONLY + } + } + container.ReadonlyPaths = nil if err := d.setPrivileged(container); err != nil { return nil, err } @@ -57,43 +67,52 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e if err := d.setupLabels(container, c); err != nil { return nil, err } - d.setupRlimits(container, c) - - cmds := make(map[string]*exec.Cmd) - d.Lock() - for k, v := range d.activeContainers { - cmds[k] = v.cmd - } - d.Unlock() - return container, nil } -func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Command) error { +func generateIfaceName() (string, error) { + for i := 0; i < 10; i++ { + name, err := utils.GenerateRandomName("veth", 7) + if err != nil { + continue + } + if _, err := net.InterfaceByName(name); err != nil { + if strings.Contains(err.Error(), "no such") { + return name, nil + } + return "", err + } + } + return "", errors.New("Failed to find name for new interface") +} + +func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error { if c.Network.HostNetworking { - container.Namespaces.Remove(libcontainer.NEWNET) + container.Namespaces.Remove(configs.NEWNET) return nil } - container.Networks = []*libcontainer.Network{ + container.Networks = []*configs.Network{ { - Mtu: c.Network.Mtu, - Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0), - Gateway: "localhost", - Type: "loopback", + Type: "loopback", }, } + iName, err := generateIfaceName() + if err != nil { + return err + } if c.Network.Interface != nil { - vethNetwork := libcontainer.Network{ - Mtu: c.Network.Mtu, - Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), - MacAddress: c.Network.Interface.MacAddress, - Gateway: c.Network.Interface.Gateway, - Type: "veth", - Bridge: c.Network.Interface.Bridge, - VethPrefix: "veth", + vethNetwork := configs.Network{ + Name: "eth0", + HostInterfaceName: iName, + Mtu: c.Network.Mtu, + Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), + MacAddress: c.Network.Interface.MacAddress, + Gateway: c.Network.Interface.Gateway, + Type: "veth", + Bridge: c.Network.Interface.Bridge, } if c.Network.Interface.GlobalIPv6Address != "" { vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen) @@ -107,21 +126,24 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com active := d.activeContainers[c.Network.ContainerID] d.Unlock() - if active == nil || active.cmd.Process == nil { + if active == nil { return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) } - cmd := active.cmd - nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net") - container.Namespaces.Add(libcontainer.NEWNET, nspath) + state, err := active.State() + if err != nil { + return err + } + + container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET]) } return nil } -func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error { +func (d *driver) createIpc(container *configs.Config, c *execdriver.Command) error { if c.Ipc.HostIpc { - container.Namespaces.Remove(libcontainer.NEWIPC) + container.Namespaces.Remove(configs.NEWIPC) return nil } @@ -130,37 +152,38 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command active := d.activeContainers[c.Ipc.ContainerID] d.Unlock() - if active == nil || active.cmd.Process == nil { + if active == nil { return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) } - cmd := active.cmd - container.Namespaces.Add(libcontainer.NEWIPC, filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")) + state, err := active.State() + if err != nil { + return err + } + container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC]) } return nil } -func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error { +func (d *driver) createPid(container *configs.Config, c *execdriver.Command) error { if c.Pid.HostPid { - container.Namespaces.Remove(libcontainer.NEWPID) + container.Namespaces.Remove(configs.NEWPID) return nil } return nil } -func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { - container.Capabilities = capabilities.GetAllCapabilities() +func (d *driver) setPrivileged(container *configs.Config) (err error) { + container.Capabilities = execdriver.GetAllCapabilities() container.Cgroups.AllowAllDevices = true - hostDeviceNodes, err := devices.GetHostDeviceNodes() + hostDevices, err := devices.HostDevices() if err != nil { return err } - container.MountConfig.DeviceNodes = hostDeviceNodes - - container.RestrictSys = false + container.Devices = hostDevices if apparmor.IsEnabled() { container.AppArmorProfile = "unconfined" @@ -169,39 +192,52 @@ func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { return nil } -func (d *driver) setCapabilities(container *libcontainer.Config, c *execdriver.Command) (err error) { +func (d *driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) { container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) return err } -func (d *driver) setupRlimits(container *libcontainer.Config, c *execdriver.Command) { +func (d *driver) setupRlimits(container *configs.Config, c *execdriver.Command) { if c.Resources == nil { return } for _, rlimit := range c.Resources.Rlimits { - container.Rlimits = append(container.Rlimits, libcontainer.Rlimit((*rlimit))) - } -} - -func (d *driver) setupMounts(container *libcontainer.Config, c *execdriver.Command) error { - for _, m := range c.Mounts { - container.MountConfig.Mounts = append(container.MountConfig.Mounts, &mount.Mount{ - Type: "bind", - Source: m.Source, - Destination: m.Destination, - Writable: m.Writable, - Private: m.Private, - Slave: m.Slave, + container.Rlimits = append(container.Rlimits, configs.Rlimit{ + Type: rlimit.Type, + Hard: rlimit.Hard, + Soft: rlimit.Soft, }) } +} +func (d *driver) setupMounts(container *configs.Config, c *execdriver.Command) error { + for _, m := range c.Mounts { + dest, err := symlink.FollowSymlinkInScope(filepath.Join(c.Rootfs, m.Destination), c.Rootfs) + if err != nil { + return err + } + flags := syscall.MS_BIND | syscall.MS_REC + if !m.Writable { + flags |= syscall.MS_RDONLY + } + if m.Slave { + flags |= syscall.MS_SLAVE + } + + container.Mounts = append(container.Mounts, &configs.Mount{ + Source: m.Source, + Destination: dest, + Device: "bind", + Flags: flags, + }) + } return nil } -func (d *driver) setupLabels(container *libcontainer.Config, c *execdriver.Command) error { +func (d *driver) setupLabels(container *configs.Config, c *execdriver.Command) error { container.ProcessLabel = c.ProcessLabel - container.MountConfig.MountLabel = c.MountLabel + container.MountLabel = c.MountLabel return nil } diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index f5abcf02e9..d31e159704 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -4,28 +4,28 @@ package native import ( "encoding/json" - "errors" "fmt" "io" "io/ioutil" "os" "os/exec" "path/filepath" + "strings" "sync" "syscall" + "time" log "github.com/Sirupsen/logrus" "github.com/docker/docker/daemon/execdriver" + "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" - consolepkg "github.com/docker/libcontainer/console" - "github.com/docker/libcontainer/namespaces" - _ "github.com/docker/libcontainer/namespaces/nsenter" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/system" + "github.com/docker/libcontainer/utils" ) const ( @@ -33,16 +33,12 @@ const ( Version = "0.2" ) -type activeContainer struct { - container *libcontainer.Config - cmd *exec.Cmd -} - type driver struct { root string initPath string - activeContainers map[string]*activeContainer + activeContainers map[string]libcontainer.Container machineMemory int64 + factory libcontainer.Factory sync.Mutex } @@ -59,11 +55,22 @@ func NewDriver(root, initPath string) (*driver, error) { if err := apparmor.InstallDefaultProfile(); err != nil { return nil, err } + cgm := libcontainer.Cgroupfs + if systemd.UseSystemd() { + cgm = libcontainer.SystemdCgroups + } + + f, err := libcontainer.New(root, cgm, libcontainer.InitPath(reexec.Self(), DriverName)) + if err != nil { + return nil, err + } + return &driver{ root: root, initPath: initPath, - activeContainers: make(map[string]*activeContainer), + activeContainers: make(map[string]libcontainer.Container), machineMemory: meminfo.MemTotal, + factory: f, }, nil } @@ -81,101 +88,141 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba var term execdriver.Terminal + p := &libcontainer.Process{ + Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, + } + if c.ProcessConfig.Tty { - term, err = NewTtyConsole(&c.ProcessConfig, pipes) + rootuid, err := container.HostUID() + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + cons, err := p.NewConsole(rootuid) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + term, err = NewTtyConsole(cons, pipes, rootuid) } else { - term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) + p.Stdout = pipes.Stdout + p.Stderr = pipes.Stderr + r, w, err := os.Pipe() + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + if pipes.Stdin != nil { + go func() { + io.Copy(w, pipes.Stdin) + w.Close() + }() + p.Stdin = r + } + term = &execdriver.StdConsole{} } if err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } c.ProcessConfig.Terminal = term + cont, err := d.factory.Create(c.ID, container) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } d.Lock() - d.activeContainers[c.ID] = &activeContainer{ - container: container, - cmd: &c.ProcessConfig.Cmd, - } + d.activeContainers[c.ID] = cont d.Unlock() - - var ( - dataPath = filepath.Join(d.root, c.ID) - args = append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...) - ) - - if err := d.createContainerRoot(c.ID); err != nil { - return execdriver.ExitStatus{ExitCode: -1}, err - } - defer d.cleanContainer(c.ID) - - if err := d.writeContainerFile(container, c.ID); err != nil { - return execdriver.ExitStatus{ExitCode: -1}, err - } - - execOutputChan := make(chan execOutput, 1) - waitForStart := make(chan struct{}) - - go func() { - exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd { - c.ProcessConfig.Path = d.initPath - c.ProcessConfig.Args = append([]string{ - DriverName, - "-console", console, - "-pipe", "3", - "-root", filepath.Join(d.root, c.ID), - "--", - }, args...) - - // set this to nil so that when we set the clone flags anything else is reset - c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)), - } - c.ProcessConfig.ExtraFiles = []*os.File{child} - - c.ProcessConfig.Env = container.Env - c.ProcessConfig.Dir = container.RootFs - - return &c.ProcessConfig.Cmd - }, func() { - close(waitForStart) - if startCallback != nil { - c.ContainerPid = c.ProcessConfig.Process.Pid - startCallback(&c.ProcessConfig, c.ContainerPid) - } - }) - execOutputChan <- execOutput{exitCode, err} + defer func() { + cont.Destroy() + d.cleanContainer(c.ID) }() - select { - case execOutput := <-execOutputChan: - return execdriver.ExitStatus{ExitCode: execOutput.exitCode}, execOutput.err - case <-waitForStart: - break + if err := cont.Start(p); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err } - oomKill := false - state, err := libcontainer.GetState(filepath.Join(d.root, c.ID)) - if err == nil { - oomKillNotification, err := libcontainer.NotifyOnOOM(state) - if err == nil { - _, oomKill = <-oomKillNotification - } else { - log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err) + if startCallback != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return execdriver.ExitStatus{ExitCode: -1}, err } - } else { - log.Warnf("Failed to get container state, oom notify will not work: %s", err) + startCallback(&c.ProcessConfig, pid) } - // wait for the container to exit. - execOutput := <-execOutputChan - return execdriver.ExitStatus{ExitCode: execOutput.exitCode, OOMKilled: oomKill}, execOutput.err + oomKillNotification, err := cont.NotifyOOM() + if err != nil { + oomKillNotification = nil + log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err) + } + waitF := p.Wait + if nss := cont.Config().Namespaces; nss.Contains(configs.NEWPID) { + // we need such hack for tracking processes with inerited fds, + // because cmd.Wait() waiting for all streams to be copied + waitF = waitInPIDHost(p, cont) + } + ps, err := waitF() + if err != nil { + if err, ok := err.(*exec.ExitError); !ok { + return execdriver.ExitStatus{ExitCode: -1}, err + } else { + ps = err.ProcessState + } + } + cont.Destroy() + + _, oomKill := <-oomKillNotification + + return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil } -func (d *driver) Kill(p *execdriver.Command, sig int) error { - if p.ProcessConfig.Process == nil { - return errors.New("exec: not started") +func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) { + return func() (*os.ProcessState, error) { + pid, err := p.Pid() + if err != nil { + return nil, err + } + + process, err := os.FindProcess(pid) + s, err := process.Wait() + if err != nil { + if err, ok := err.(*exec.ExitError); !ok { + return s, err + } else { + s = err.ProcessState + } + } + processes, err := c.Processes() + if err != nil { + return s, err + } + + for _, pid := range processes { + process, err := os.FindProcess(pid) + if err != nil { + log.Errorf("Failed to kill process: %d", pid) + continue + } + process.Kill() + } + + p.Wait() + return s, err } - return syscall.Kill(p.ProcessConfig.Process.Pid, syscall.Signal(sig)) +} + +func (d *driver) Kill(c *execdriver.Command, sig int) error { + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) + } + state, err := active.State() + if err != nil { + return err + } + return syscall.Kill(state.InitProcessPid, syscall.Signal(sig)) } func (d *driver) Pause(c *execdriver.Command) error { @@ -183,11 +230,7 @@ func (d *driver) Pause(c *execdriver.Command) error { if active == nil { return fmt.Errorf("active container for %s does not exist", c.ID) } - active.container.Cgroups.Freezer = "FROZEN" - if systemd.UseSystemd() { - return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer) - } - return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer) + return active.Pause() } func (d *driver) Unpause(c *execdriver.Command) error { @@ -195,44 +238,31 @@ func (d *driver) Unpause(c *execdriver.Command) error { if active == nil { return fmt.Errorf("active container for %s does not exist", c.ID) } - active.container.Cgroups.Freezer = "THAWED" - if systemd.UseSystemd() { - return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer) - } - return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer) + return active.Resume() } -func (d *driver) Terminate(p *execdriver.Command) error { +func (d *driver) Terminate(c *execdriver.Command) error { // lets check the start time for the process - state, err := libcontainer.GetState(filepath.Join(d.root, p.ID)) - if err != nil { - if !os.IsNotExist(err) { - return err - } - // TODO: Remove this part for version 1.2.0 - // This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0 - data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start")) - if err != nil { - // if we don't have the data on disk then we can assume the process is gone - // because this is only removed after we know the process has stopped - if os.IsNotExist(err) { - return nil - } - return err - } - state = &libcontainer.State{InitStartTime: string(data)} + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) } + state, err := active.State() + if err != nil { + return err + } + pid := state.InitProcessPid - currentStartTime, err := system.GetProcessStartTime(p.ProcessConfig.Process.Pid) + currentStartTime, err := system.GetProcessStartTime(pid) if err != nil { return err } - if state.InitStartTime == currentStartTime { - err = syscall.Kill(p.ProcessConfig.Process.Pid, 9) - syscall.Wait4(p.ProcessConfig.Process.Pid, nil, 0, nil) + if state.InitProcessStartTime == currentStartTime { + err = syscall.Kill(pid, 9) + syscall.Wait4(pid, nil, 0, nil) } - d.cleanContainer(p.ID) + d.cleanContainer(c.ID) return err @@ -257,15 +287,10 @@ func (d *driver) GetPidsForContainer(id string) ([]int, error) { if active == nil { return nil, fmt.Errorf("active container for %s does not exist", id) } - c := active.container.Cgroups - - if systemd.UseSystemd() { - return systemd.GetPids(c) - } - return fs.GetPids(c) + return active.Processes() } -func (d *driver) writeContainerFile(container *libcontainer.Config, id string) error { +func (d *driver) writeContainerFile(container *configs.Config, id string) error { data, err := json.Marshal(container) if err != nil { return err @@ -289,42 +314,61 @@ func (d *driver) Clean(id string) error { } func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) { - return execdriver.Stats(filepath.Join(d.root, id), d.activeContainers[id].container.Cgroups.Memory, d.machineMemory) -} - -type TtyConsole struct { - MasterPty *os.File -} - -func NewTtyConsole(processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes) (*TtyConsole, error) { - ptyMaster, console, err := consolepkg.CreateMasterAndConsole() + c := d.activeContainers[id] + now := time.Now() + stats, err := c.Stats() if err != nil { return nil, err } + memoryLimit := c.Config().Cgroups.Memory + // if the container does not have any memory limit specified set the + // limit to the machines memory + if memoryLimit == 0 { + memoryLimit = d.machineMemory + } + return &execdriver.ResourceStats{ + Stats: stats, + Read: now, + MemoryLimit: memoryLimit, + }, nil +} +func getEnv(key string, env []string) string { + for _, pair := range env { + parts := strings.Split(pair, "=") + if parts[0] == key { + return parts[1] + } + } + return "" +} + +type TtyConsole struct { + console libcontainer.Console +} + +func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, rootuid int) (*TtyConsole, error) { tty := &TtyConsole{ - MasterPty: ptyMaster, + console: console, } - if err := tty.AttachPipes(&processConfig.Cmd, pipes); err != nil { + if err := tty.AttachPipes(pipes); err != nil { tty.Close() return nil, err } - processConfig.Console = console - return tty, nil } -func (t *TtyConsole) Master() *os.File { - return t.MasterPty +func (t *TtyConsole) Master() libcontainer.Console { + return t.console } func (t *TtyConsole) Resize(h, w int) error { - return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) + return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) } -func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { +func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error { go func() { if wb, ok := pipes.Stdout.(interface { CloseWriters() error @@ -332,12 +376,12 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err defer wb.CloseWriters() } - io.Copy(pipes.Stdout, t.MasterPty) + io.Copy(pipes.Stdout, t.console) }() if pipes.Stdin != nil { go func() { - io.Copy(t.MasterPty, pipes.Stdin) + io.Copy(t.console, pipes.Stdin) pipes.Stdin.Close() }() @@ -347,5 +391,5 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err } func (t *TtyConsole) Close() error { - return t.MasterPty.Close() + return t.console.Close() } diff --git a/daemon/execdriver/native/exec.go b/daemon/execdriver/native/exec.go index 84ad096725..af6dcd2adb 100644 --- a/daemon/execdriver/native/exec.go +++ b/daemon/execdriver/native/exec.go @@ -4,67 +4,77 @@ package native import ( "fmt" - "log" "os" "os/exec" - "path/filepath" - "runtime" + "syscall" "github.com/docker/docker/daemon/execdriver" - "github.com/docker/docker/pkg/reexec" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/namespaces" + _ "github.com/docker/libcontainer/nsenter" + "github.com/docker/libcontainer/utils" ) -const execCommandName = "nsenter-exec" - -func init() { - reexec.Register(execCommandName, nsenterExec) -} - -func nsenterExec() { - runtime.LockOSThread() - - // User args are passed after '--' in the command line. - userArgs := findUserArgs() - - config, err := loadConfigFromFd() - if err != nil { - log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err) - } - - if err := namespaces.FinalizeSetns(config, userArgs); err != nil { - log.Fatalf("docker-exec: failed to exec: %s", err) - } -} - // TODO(vishh): Add support for running in priviledged mode and running as a different user. func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { active := d.activeContainers[c.ID] if active == nil { return -1, fmt.Errorf("No active container exists with ID %s", c.ID) } - state, err := libcontainer.GetState(filepath.Join(d.root, c.ID)) - if err != nil { - return -1, fmt.Errorf("State unavailable for container with ID %s. The container may have been cleaned up already. Error: %s", c.ID, err) - } var term execdriver.Terminal + var err error + + p := &libcontainer.Process{ + Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, + } if processConfig.Tty { - term, err = NewTtyConsole(processConfig, pipes) + config := active.Config() + rootuid, err := config.HostUID() + if err != nil { + return -1, err + } + cons, err := p.NewConsole(rootuid) + if err != nil { + return -1, err + } + term, err = NewTtyConsole(cons, pipes, rootuid) } else { - term, err = execdriver.NewStdConsole(processConfig, pipes) + p.Stdout = pipes.Stdout + p.Stderr = pipes.Stderr + p.Stdin = pipes.Stdin + term = &execdriver.StdConsole{} + } + if err != nil { + return -1, err } processConfig.Terminal = term - args := append([]string{processConfig.Entrypoint}, processConfig.Arguments...) + if err := active.Start(p); err != nil { + return -1, err + } - return namespaces.ExecIn(active.container, state, args, os.Args[0], "exec", processConfig.Stdin, processConfig.Stdout, processConfig.Stderr, processConfig.Console, - func(cmd *exec.Cmd) { - if startCallback != nil { - startCallback(&c.ProcessConfig, cmd.Process.Pid) - } - }) + if startCallback != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return -1, err + } + startCallback(&c.ProcessConfig, pid) + } + + ps, err := p.Wait() + if err != nil { + exitErr, ok := err.(*exec.ExitError) + if !ok { + return -1, err + } + ps = exitErr.ProcessState + } + return utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), nil } diff --git a/daemon/execdriver/native/info.go b/daemon/execdriver/native/info.go index 601b97e810..9d7342da86 100644 --- a/daemon/execdriver/native/info.go +++ b/daemon/execdriver/native/info.go @@ -2,13 +2,6 @@ package native -import ( - "os" - "path/filepath" - - "github.com/docker/libcontainer" -) - type info struct { ID string driver *driver @@ -18,13 +11,6 @@ type info struct { // pid file for a container. If the file exists then the // container is currently running func (i *info) IsRunning() bool { - if _, err := libcontainer.GetState(filepath.Join(i.driver.root, i.ID)); err == nil { - return true - } - // TODO: Remove this part for version 1.2.0 - // This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0 - if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil { - return true - } - return false + _, ok := i.driver.activeContainers[i.ID] + return ok } diff --git a/daemon/execdriver/native/init.go b/daemon/execdriver/native/init.go index 754d842c3b..f57d6cddec 100644 --- a/daemon/execdriver/native/init.go +++ b/daemon/execdriver/native/init.go @@ -3,55 +3,40 @@ package native import ( - "encoding/json" - "flag" "fmt" "os" - "path/filepath" "runtime" "github.com/docker/docker/pkg/reexec" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/namespaces" ) func init() { reexec.Register(DriverName, initializer) } +func fatal(err error) { + if lerr, ok := err.(libcontainer.Error); ok { + lerr.Detail(os.Stderr) + os.Exit(1) + } + + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} + func initializer() { + runtime.GOMAXPROCS(1) runtime.LockOSThread() - - var ( - pipe = flag.Int("pipe", 0, "sync pipe fd") - console = flag.String("console", "", "console (pty slave) path") - root = flag.String("root", ".", "root path for configuration files") - ) - - flag.Parse() - - var container *libcontainer.Config - f, err := os.Open(filepath.Join(*root, "container.json")) + factory, err := libcontainer.New("") if err != nil { - writeError(err) + fatal(err) + } + if err := factory.StartInitialization(3); err != nil { + fatal(err) } - if err := json.NewDecoder(f).Decode(&container); err != nil { - f.Close() - writeError(err) - } - f.Close() - - rootfs, err := os.Getwd() - if err != nil { - writeError(err) - } - - if err := namespaces.Init(container, rootfs, *console, os.NewFile(uintptr(*pipe), "child"), flag.Args()); err != nil { - writeError(err) - } - - panic("Unreachable") + panic("unreachable") } func writeError(err error) { diff --git a/daemon/execdriver/native/template/default_template.go b/daemon/execdriver/native/template/default_template.go index f7d6be746d..cd6ac8bcaf 100644 --- a/daemon/execdriver/native/template/default_template.go +++ b/daemon/execdriver/native/template/default_template.go @@ -1,14 +1,17 @@ package template import ( - "github.com/docker/libcontainer" + "syscall" + "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) +const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV + // New returns the docker default configuration for libcontainer -func New() *libcontainer.Config { - container := &libcontainer.Config{ +func New() *configs.Config { + container := &configs.Config{ Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", @@ -25,18 +28,51 @@ func New() *libcontainer.Config { "KILL", "AUDIT_WRITE", }, - Namespaces: libcontainer.Namespaces([]libcontainer.Namespace{ + Namespaces: configs.Namespaces([]configs.Namespace{ {Type: "NEWNS"}, {Type: "NEWUTS"}, {Type: "NEWIPC"}, {Type: "NEWPID"}, {Type: "NEWNET"}, }), - Cgroups: &cgroups.Cgroup{ + Cgroups: &configs.Cgroup{ Parent: "docker", AllowAllDevices: false, }, - MountConfig: &libcontainer.MountConfig{}, + Mounts: []*configs.Mount{ + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | syscall.MS_RDONLY, + }, + }, + MaskPaths: []string{ + "/proc/kcore", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, + Rlimits: []configs.Rlimit{ + { + Type: syscall.RLIMIT_NOFILE, + Hard: 1024, + Soft: 1024, + }, + }, } if apparmor.IsEnabled() { diff --git a/daemon/execdriver/native/utils.go b/daemon/execdriver/native/utils.go index 88aefaf382..a703926453 100644 --- a/daemon/execdriver/native/utils.go +++ b/daemon/execdriver/native/utils.go @@ -2,28 +2,21 @@ package native -import ( - "encoding/json" - "os" +//func findUserArgs() []string { +//for i, a := range os.Args { +//if a == "--" { +//return os.Args[i+1:] +//} +//} +//return []string{} +//} - "github.com/docker/libcontainer" -) - -func findUserArgs() []string { - for i, a := range os.Args { - if a == "--" { - return os.Args[i+1:] - } - } - return []string{} -} - -// loadConfigFromFd loads a container's config from the sync pipe that is provided by -// fd 3 when running a process -func loadConfigFromFd() (*libcontainer.Config, error) { - var config *libcontainer.Config - if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil { - return nil, err - } - return config, nil -} +//// loadConfigFromFd loads a container's config from the sync pipe that is provided by +//// fd 3 when running a process +//func loadConfigFromFd() (*configs.Config, error) { +//var config *libcontainer.Config +//if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil { +//return nil, err +//} +//return config, nil +//} diff --git a/daemon/execdriver/utils.go b/daemon/execdriver/utils.go index 37042ef83a..e1fc9b9014 100644 --- a/daemon/execdriver/utils.go +++ b/daemon/execdriver/utils.go @@ -5,13 +5,83 @@ import ( "strings" "github.com/docker/docker/utils" - "github.com/docker/libcontainer/security/capabilities" + "github.com/syndtr/gocapability/capability" ) +var capabilityList = Capabilities{ + {Key: "SETPCAP", Value: capability.CAP_SETPCAP}, + {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, + {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO}, + {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT}, + {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN}, + {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE}, + {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE}, + {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME}, + {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG}, + {Key: "MKNOD", Value: capability.CAP_MKNOD}, + {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE}, + {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL}, + {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE}, + {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN}, + {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN}, + {Key: "SYSLOG", Value: capability.CAP_SYSLOG}, + {Key: "CHOWN", Value: capability.CAP_CHOWN}, + {Key: "NET_RAW", Value: capability.CAP_NET_RAW}, + {Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE}, + {Key: "FOWNER", Value: capability.CAP_FOWNER}, + {Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH}, + {Key: "FSETID", Value: capability.CAP_FSETID}, + {Key: "KILL", Value: capability.CAP_KILL}, + {Key: "SETGID", Value: capability.CAP_SETGID}, + {Key: "SETUID", Value: capability.CAP_SETUID}, + {Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE}, + {Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE}, + {Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST}, + {Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK}, + {Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER}, + {Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT}, + {Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE}, + {Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT}, + {Key: "LEASE", Value: capability.CAP_LEASE}, + {Key: "SETFCAP", Value: capability.CAP_SETFCAP}, + {Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM}, + {Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND}, +} + +type ( + CapabilityMapping struct { + Key string `json:"key,omitempty"` + Value capability.Cap `json:"value,omitempty"` + } + Capabilities []*CapabilityMapping +) + +func (c *CapabilityMapping) String() string { + return c.Key +} + +func GetCapability(key string) *CapabilityMapping { + for _, capp := range capabilityList { + if capp.Key == key { + cpy := *capp + return &cpy + } + } + return nil +} + +func GetAllCapabilities() []string { + output := make([]string, len(capabilityList)) + for i, capability := range capabilityList { + output[i] = capability.String() + } + return output +} + func TweakCapabilities(basics, adds, drops []string) ([]string, error) { var ( newCaps []string - allCaps = capabilities.GetAllCapabilities() + allCaps = GetAllCapabilities() ) // look for invalid cap in the drop list @@ -26,7 +96,7 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) { // handle --cap-add=all if utils.StringsContainsNoCase(adds, "all") { - basics = capabilities.GetAllCapabilities() + basics = allCaps } if !utils.StringsContainsNoCase(drops, "all") { diff --git a/daemon/stats.go b/daemon/stats.go index b36b28ae72..85d4a08550 100644 --- a/daemon/stats.go +++ b/daemon/stats.go @@ -18,7 +18,7 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status { enc := json.NewEncoder(job.Stdout) for v := range updates { update := v.(*execdriver.ResourceStats) - ss := convertToAPITypes(update.ContainerStats) + ss := convertToAPITypes(update.Stats) ss.MemoryStats.Limit = uint64(update.MemoryLimit) ss.Read = update.Read ss.CpuStats.SystemUsage = update.SystemUsage @@ -31,20 +31,21 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status { return engine.StatusOK } -// convertToAPITypes converts the libcontainer.ContainerStats to the api specific +// convertToAPITypes converts the libcontainer.Stats to the api specific // structs. This is done to preserve API compatibility and versioning. -func convertToAPITypes(ls *libcontainer.ContainerStats) *types.Stats { +func convertToAPITypes(ls *libcontainer.Stats) *types.Stats { s := &types.Stats{} - if ls.NetworkStats != nil { - s.Network = types.Network{ - RxBytes: ls.NetworkStats.RxBytes, - RxPackets: ls.NetworkStats.RxPackets, - RxErrors: ls.NetworkStats.RxErrors, - RxDropped: ls.NetworkStats.RxDropped, - TxBytes: ls.NetworkStats.TxBytes, - TxPackets: ls.NetworkStats.TxPackets, - TxErrors: ls.NetworkStats.TxErrors, - TxDropped: ls.NetworkStats.TxDropped, + if ls.Interfaces != nil { + s.Network = types.Network{} + for _, iface := range ls.Interfaces { + s.Network.RxBytes += iface.RxBytes + s.Network.RxPackets += iface.RxPackets + s.Network.RxErrors += iface.RxErrors + s.Network.RxDropped += iface.RxDropped + s.Network.TxBytes += iface.TxBytes + s.Network.TxPackets += iface.TxPackets + s.Network.TxErrors += iface.TxErrors + s.Network.TxDropped += iface.TxDropped } } cs := ls.CgroupStats diff --git a/integration-cli/docker_cli_exec_test.go b/integration-cli/docker_cli_exec_test.go index be8a042868..b0cc5902b4 100644 --- a/integration-cli/docker_cli_exec_test.go +++ b/integration-cli/docker_cli_exec_test.go @@ -60,7 +60,7 @@ func TestExecInteractiveStdinClose(t *testing.T) { out, err := cmd.CombinedOutput() if err != nil { - t.Fatal(err, out) + t.Fatal(err, string(out)) } if string(out) == "" { @@ -538,7 +538,6 @@ func TestRunExecDir(t *testing.T) { id := strings.TrimSpace(out) execDir := filepath.Join(execDriverPath, id) stateFile := filepath.Join(execDir, "state.json") - contFile := filepath.Join(execDir, "container.json") { fi, err := os.Stat(execDir) @@ -552,10 +551,6 @@ func TestRunExecDir(t *testing.T) { if err != nil { t.Fatal(err) } - fi, err = os.Stat(contFile) - if err != nil { - t.Fatal(err) - } } stopCmd := exec.Command(dockerBinary, "stop", id) @@ -564,23 +559,12 @@ func TestRunExecDir(t *testing.T) { t.Fatal(err, out) } { - fi, err := os.Stat(execDir) - if err != nil { + _, err := os.Stat(execDir) + if err == nil { t.Fatal(err) } - if !fi.IsDir() { - t.Fatalf("%q must be a directory", execDir) - } - fi, err = os.Stat(stateFile) if err == nil { - t.Fatalf("Statefile %q is exists for stopped container!", stateFile) - } - if !os.IsNotExist(err) { - t.Fatalf("Error should be about non-existing, got %s", err) - } - fi, err = os.Stat(contFile) - if err == nil { - t.Fatalf("Container file %q is exists for stopped container!", contFile) + t.Fatalf("Exec directory %q exists for removed container!", execDir) } if !os.IsNotExist(err) { t.Fatalf("Error should be about non-existing, got %s", err) @@ -603,10 +587,6 @@ func TestRunExecDir(t *testing.T) { if err != nil { t.Fatal(err) } - fi, err = os.Stat(contFile) - if err != nil { - t.Fatal(err) - } } rmCmd := exec.Command(dockerBinary, "rm", "-f", id) out, _, err = runCommandWithOutput(rmCmd)