diff --git a/hack/vendor.sh b/hack/vendor.sh index 4dd124994e..652126769a 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -59,7 +59,7 @@ rm -rf src/code.google.com/p/go mkdir -p src/code.google.com/p/go/src/pkg/archive mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar -clone git github.com/docker/libcontainer f2e78425c377acc7a67a35c3148069b6285a3c4b +clone git github.com/docker/libcontainer 29363e2d2d7b8f62a5f353be333758f83df540a9 # see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file) rm -rf src/github.com/docker/libcontainer/vendor eval "$(grep '^clone ' src/github.com/docker/libcontainer/update-vendor.sh | grep -v 'github.com/codegangsta/cli')" diff --git a/vendor/src/github.com/docker/libcontainer/MAINTAINERS b/vendor/src/github.com/docker/libcontainer/MAINTAINERS index 798d3ca6bf..24011b0540 100644 --- a/vendor/src/github.com/docker/libcontainer/MAINTAINERS +++ b/vendor/src/github.com/docker/libcontainer/MAINTAINERS @@ -1,5 +1,6 @@ Michael Crosby (@crosbymichael) Rohit Jnagal (@rjnagal) Victor Marmol (@vmarmol) +Mrunal Patel (@mrunalp) .travis.yml: Tianon Gravi (@tianon) update-vendor.sh: Tianon Gravi (@tianon) diff --git a/vendor/src/github.com/docker/libcontainer/Makefile b/vendor/src/github.com/docker/libcontainer/Makefile index f7b3031bbd..d6852b24ab 100644 --- a/vendor/src/github.com/docker/libcontainer/Makefile +++ b/vendor/src/github.com/docker/libcontainer/Makefile @@ -2,7 +2,7 @@ all: docker build -t docker/libcontainer . -test: +test: # we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting docker run --rm -it --cap-add NET_ADMIN --cap-add SYS_ADMIN docker/libcontainer diff --git a/vendor/src/github.com/docker/libcontainer/README.md b/vendor/src/github.com/docker/libcontainer/README.md index ee14a57ce1..b80d2841f8 100644 --- a/vendor/src/github.com/docker/libcontainer/README.md +++ b/vendor/src/github.com/docker/libcontainer/README.md @@ -1,4 +1,4 @@ -## libcontainer - reference implementation for containers +## libcontainer - reference implementation for containers [![Build Status](https://travis-ci.org/docker/libcontainer.png?branch=master)](https://travis-ci.org/docker/libcontainer) ### Note on API changes: diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/cgroups.go b/vendor/src/github.com/docker/libcontainer/cgroups/cgroups.go index 64ece568c3..598454862b 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/cgroups.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/cgroups.go @@ -37,4 +37,5 @@ type Cgroup struct { type ActiveCgroup interface { Cleanup() error + Paths() (map[string]string, error) } diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go index 2d1a15239f..e20cdbb926 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go @@ -21,12 +21,16 @@ var ( "perf_event": &PerfEventGroup{}, "freezer": &FreezerGroup{}, } + CgroupProcesses = "cgroup.procs" ) type subsystem interface { - Set(*data) error + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'data'. Remove(*data) error - GetStats(string, *cgroups.Stats) error + // Creates and joins the cgroup represented by data. + Set(*data) error } type data struct { @@ -149,6 +153,18 @@ func (raw *data) parent(subsystem string) (string, error) { return filepath.Join(raw.root, subsystem, initPath), nil } +func (raw *data) Paths() (map[string]string, error) { + paths := make(map[string]string) + for sysname := range subsystems { + path, err := raw.path(sysname) + if err != nil { + return nil, err + } + paths[sysname] = path + } + return paths, nil +} + func (raw *data) path(subsystem string) (string, error) { // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. if filepath.IsAbs(raw.cgroup) { @@ -169,7 +185,7 @@ func (raw *data) join(subsystem string) (string, error) { if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { return "", err } - if err := writeFile(path, "cgroup.procs", strconv.Itoa(raw.pid)); err != nil { + if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil { return "", err } return path, nil diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuacct.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuacct.go index 7979009c08..7761d4c283 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuacct.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuacct.go @@ -54,7 +54,7 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { return err } // sample for 100ms - time.Sleep(100 * time.Millisecond) + time.Sleep(1000 * time.Millisecond) if kernelModeUsage, userModeUsage, err = getCpuUsage(path); err != nil { return err } @@ -73,7 +73,7 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { deltaUsage = lastUsage - startUsage ) if deltaSystem > 0.0 { - percentage = ((deltaProc / deltaSystem) * clockTicks) * cpuCount + percentage = uint64((float64(deltaProc) / float64(deltaSystem)) * float64(clockTicks*cpuCount)) } // NOTE: a percentage over 100% is valid for POSIX because that means the // processes is using multiple cores diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go index 9570125fd4..8847739464 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go @@ -20,19 +20,10 @@ func (s *CpusetGroup) Set(d *data) error { if err != nil { return err } - if err := s.ensureParent(dir); err != nil { - return err - } - // because we are not using d.join we need to place the pid into the procs file - // unlike the other subsystems - if err := writeFile(dir, "cgroup.procs", strconv.Itoa(d.pid)); err != nil { - return err - } - if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil { - return err - } + return s.SetDir(dir, d.c.CpusetCpus, d.pid) } + return nil } @@ -44,6 +35,24 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } +func (s *CpusetGroup) SetDir(dir, value string, pid int) error { + if err := s.ensureParent(dir); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil { + return err + } + + if err := writeFile(dir, "cpuset.cpus", value); err != nil { + return err + } + + return nil +} + func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { return diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go b/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go index c27150d2bf..ea92934a0f 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go @@ -14,7 +14,7 @@ type MemoryGroup struct { func (s *MemoryGroup) Set(d *data) error { dir, err := d.join("memory") - // only return an error for memory if it was not specified + // only return an error for memory if it was specified if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) { return err } diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go index 01e5bf49b1..7fcd99bceb 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go @@ -21,7 +21,7 @@ import ( ) type systemdCgroup struct { - cleanupDirs []string + cgroup *cgroups.Cgroup } type subsystem interface { @@ -84,39 +84,15 @@ func getIfaceForUnit(unitName string) string { return "Unit" } -type cgroupArg struct { - File string - Value string -} - func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { var ( unitName = getUnitName(c) slice = "system.slice" properties []systemd1.Property - cpuArgs []cgroupArg - cpusetArgs []cgroupArg - memoryArgs []cgroupArg - res systemdCgroup + res = &systemdCgroup{} ) - // First set up things not supported by systemd - - // -1 disables memorySwap - if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { - memorySwap := c.MemorySwap - - if memorySwap == 0 { - // By default, MemorySwap is set to twice the size of RAM. - memorySwap = c.Memory * 2 - } - - memoryArgs = append(memoryArgs, cgroupArg{"memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)}) - } - - if c.CpusetCpus != "" { - cpusetArgs = append(cpusetArgs, cgroupArg{"cpuset.cpus", c.CpusetCpus}) - } + res.cgroup = c if c.Slice != "" { slice = c.Slice @@ -150,201 +126,84 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { return nil, err } - // To work around the lack of /dev/pts/* support above we need to manually add these - // so, ask systemd for the cgroup used - props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) - if err != nil { - return nil, err - } - - cgroup := props["ControlGroup"].(string) - if !c.AllowAllDevices { - // Atm we can't use the systemd device support because of two missing things: - // * Support for wildcards to allow mknod on any device - // * Support for wildcards to allow /dev/pts support - // - // The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is - // in wide use. When both these are availalable we will be able to switch, but need to keep the old - // implementation for backwards compat. - // - // Note: we can't use systemd to set up the initial limits, and then change the cgroup - // because systemd will re-write the device settings if it needs to re-apply the cgroup context. - // This happens at least for v208 when any sibling unit is started. - - mountpoint, err := cgroups.FindCgroupMountpoint("devices") - if err != nil { + if err := joinDevices(c, pid); err != nil { return nil, err } - - initPath, err := cgroups.GetInitCgroupDir("devices") - if err != nil { - return nil, err - } - - dir := filepath.Join(mountpoint, initPath, c.Parent, c.Name) - - res.cleanupDirs = append(res.cleanupDirs, dir) - - if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) { - return nil, err - } - - if err := ioutil.WriteFile(filepath.Join(dir, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { - return nil, err - } - - if err := writeFile(dir, "devices.deny", "a"); err != nil { - return nil, err - } - - for _, dev := range c.AllowedDevices { - if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { - return nil, err - } - } } - if len(cpuArgs) != 0 { - mountpoint, err := cgroups.FindCgroupMountpoint("cpu") - if err != nil { + // -1 disables memorySwap + if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { + if err := joinMemory(c, pid); err != nil { return nil, err } - path := filepath.Join(mountpoint, cgroup) - - for _, arg := range cpuArgs { - if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { - return nil, err - } - } - } - - if len(memoryArgs) != 0 { - mountpoint, err := cgroups.FindCgroupMountpoint("memory") - if err != nil { - return nil, err - } - - path := filepath.Join(mountpoint, cgroup) - - for _, arg := range memoryArgs { - if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { - return nil, err - } - } } // we need to manually join the freezer cgroup in systemd because it does not currently support it // via the dbus api - freezerPath, err := joinFreezer(c, pid) - if err != nil { + if err := joinFreezer(c, pid); err != nil { return nil, err } - res.cleanupDirs = append(res.cleanupDirs, freezerPath) - if len(cpusetArgs) != 0 { - // systemd does not atm set up the cpuset controller, so we must manually - // join it. Additionally that is a very finicky controller where each - // level must have a full setup as the default for a new directory is "no cpus", - // so we avoid using any hierarchies here, creating a toplevel directory. - mountpoint, err := cgroups.FindCgroupMountpoint("cpuset") - if err != nil { - return nil, err - } - - initPath, err := cgroups.GetInitCgroupDir("cpuset") - if err != nil { - return nil, err - } - - var ( - foundCpus bool - foundMems bool - - rootPath = filepath.Join(mountpoint, initPath) - path = filepath.Join(mountpoint, initPath, c.Parent+"-"+c.Name) - ) - - res.cleanupDirs = append(res.cleanupDirs, path) - - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return nil, err - } - - for _, arg := range cpusetArgs { - if arg.File == "cpuset.cpus" { - foundCpus = true - } - if arg.File == "cpuset.mems" { - foundMems = true - } - if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { - return nil, err - } - } - - // These are required, if not specified inherit from parent - if !foundCpus { - s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.cpus")) - if err != nil { - return nil, err - } - - if err := ioutil.WriteFile(filepath.Join(path, "cpuset.cpus"), s, 0700); err != nil { - return nil, err - } - } - - // These are required, if not specified inherit from parent - if !foundMems { - s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.mems")) - if err != nil { - return nil, err - } - - if err := ioutil.WriteFile(filepath.Join(path, "cpuset.mems"), s, 0700); err != nil { - return nil, err - } - } - - if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { + if c.CpusetCpus != "" { + if err := joinCpuset(c, pid); err != nil { return nil, err } } - return &res, nil + return res, nil } func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } +func (c *systemdCgroup) Paths() (map[string]string, error) { + paths := make(map[string]string) + + for sysname := range subsystems { + subsystemPath, err := getSubsystemPath(c.cgroup, sysname) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if err == cgroups.ErrNotFound { + continue + } + + return nil, err + } + + paths[sysname] = subsystemPath + } + + return paths, nil +} + func (c *systemdCgroup) Cleanup() error { // systemd cleans up, we don't need to do much + paths, err := c.Paths() + if err != nil { + return err + } - for _, path := range c.cleanupDirs { + for _, path := range paths { os.RemoveAll(path) } return nil } -func joinFreezer(c *cgroups.Cgroup, pid int) (string, error) { +func joinFreezer(c *cgroups.Cgroup, pid int) error { path, err := getSubsystemPath(c, "freezer") if err != nil { - return "", err + return err } if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return "", err + return err } - if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { - return "", err - } - - return path, nil + return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700) } func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { @@ -389,20 +248,12 @@ func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { } func GetPids(c *cgroups.Cgroup) ([]int, error) { - unitName := getUnitName(c) - - mountpoint, err := cgroups.FindCgroupMountpoint("cpu") + path, err := getSubsystemPath(c, "cpu") if err != nil { return nil, err } - props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) - if err != nil { - return nil, err - } - cgroup := props["ControlGroup"].(string) - - return cgroups.ReadProcsFile(filepath.Join(mountpoint, cgroup)) + return cgroups.ReadProcsFile(path) } func getUnitName(c *cgroups.Cgroup) string { @@ -437,3 +288,71 @@ func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) { return stats, nil } + +// Atm we can't use the systemd device support because of two missing things: +// * Support for wildcards to allow mknod on any device +// * Support for wildcards to allow /dev/pts support +// +// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is +// in wide use. When both these are availalable we will be able to switch, but need to keep the old +// implementation for backwards compat. +// +// Note: we can't use systemd to set up the initial limits, and then change the cgroup +// because systemd will re-write the device settings if it needs to re-apply the cgroup context. +// This happens at least for v208 when any sibling unit is started. +func joinDevices(c *cgroups.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "devices") + if err != nil { + return err + } + + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return err + } + + if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { + return err + } + + if err := writeFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range c.AllowedDevices { + if err := writeFile(path, "devices.allow", dev.GetCgroupAllowString()); err != nil { + return err + } + } + + return nil +} + +func joinMemory(c *cgroups.Cgroup, pid int) error { + memorySwap := c.MemorySwap + + if memorySwap == 0 { + // By default, MemorySwap is set to twice the size of RAM. + memorySwap = c.Memory * 2 + } + + path, err := getSubsystemPath(c, "memory") + if err != nil { + return err + } + + return ioutil.WriteFile(filepath.Join(path, "memory.memsw.limit_in_bytes"), []byte(strconv.FormatInt(memorySwap, 10)), 0700) +} + +// systemd does not atm set up the cpuset controller, so we must manually +// join it. Additionally that is a very finicky controller where each +// level must have a full setup as the default for a new directory is "no cpus" +func joinCpuset(c *cgroups.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "cpuset") + if err != nil { + return err + } + + s := &fs.CpusetGroup{} + + return s.SetDir(path, c.CpusetCpus, pid) +} diff --git a/vendor/src/github.com/docker/libcontainer/cgroups/utils.go b/vendor/src/github.com/docker/libcontainer/cgroups/utils.go index ce5c4f3364..6688ff71e4 100644 --- a/vendor/src/github.com/docker/libcontainer/cgroups/utils.go +++ b/vendor/src/github.com/docker/libcontainer/cgroups/utils.go @@ -4,6 +4,7 @@ import ( "bufio" "fmt" "io" + "io/ioutil" "os" "path/filepath" "strconv" @@ -166,3 +167,23 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) { } return "", ErrNotFound } + +func pathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +func EnterPid(cgroupPaths map[string]string, pid int) error { + for _, path := range cgroupPaths { + if pathExists(path) { + if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), + []byte(strconv.Itoa(pid)), 0700); err != nil { + return err + } + } + } + + return nil +} diff --git a/vendor/src/github.com/docker/libcontainer/console/console.go b/vendor/src/github.com/docker/libcontainer/console/console.go index c0d1fb0433..346f537d53 100644 --- a/vendor/src/github.com/docker/libcontainer/console/console.go +++ b/vendor/src/github.com/docker/libcontainer/console/console.go @@ -114,7 +114,7 @@ func OpenPtmx() (*os.File, error) { func OpenTerminal(name string, flag int) (*os.File, error) { r, e := syscall.Open(name, flag, 0) if e != nil { - return nil, &os.PathError{"open", name, e} + return nil, &os.PathError{Op: "open", Path: name, Err: e} } return os.NewFile(uintptr(r), name), nil } diff --git a/vendor/src/github.com/docker/libcontainer/mount/init.go b/vendor/src/github.com/docker/libcontainer/mount/init.go index 7edea49994..05ab334c36 100644 --- a/vendor/src/github.com/docker/libcontainer/mount/init.go +++ b/vendor/src/github.com/docker/libcontainer/mount/init.go @@ -236,7 +236,7 @@ func reOpenDevNull(rootfs string) error { if stat.Rdev == devNullStat.Rdev { // Close and re-open the fd. if err = syscall.Dup2(int(file.Fd()), fd); err != nil { - return fmt.Errorf("Failed to dup fd %d to fd %d - %s", file.Fd(), fd) + return fmt.Errorf("Failed to dup fd %d to fd %d - %s", file.Fd(), fd, err) } } } diff --git a/vendor/src/github.com/docker/libcontainer/namespaces/exec.go b/vendor/src/github.com/docker/libcontainer/namespaces/exec.go index c9b2037cc7..382abfbccc 100644 --- a/vendor/src/github.com/docker/libcontainer/namespaces/exec.go +++ b/vendor/src/github.com/docker/libcontainer/namespaces/exec.go @@ -56,14 +56,19 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri // Do this before syncing with child so that no children // can escape the cgroup - cleaner, err := SetupCgroups(container, command.Process.Pid) + cgroupRef, err := SetupCgroups(container, command.Process.Pid) if err != nil { command.Process.Kill() command.Wait() return -1, err } - if cleaner != nil { - defer cleaner.Cleanup() + defer cgroupRef.Cleanup() + + cgroupPaths, err := cgroupRef.Paths() + if err != nil { + command.Process.Kill() + command.Wait() + return -1, err } var networkState network.NetworkState @@ -77,6 +82,7 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri InitPid: command.Process.Pid, InitStartTime: started, NetworkState: networkState, + CgroupPaths: cgroupPaths, } if err := libcontainer.SaveState(dataPath, state); err != nil { diff --git a/vendor/src/github.com/docker/libcontainer/namespaces/execin.go b/vendor/src/github.com/docker/libcontainer/namespaces/execin.go index 2ac9dba72e..8b81edecb9 100644 --- a/vendor/src/github.com/docker/libcontainer/namespaces/execin.go +++ b/vendor/src/github.com/docker/libcontainer/namespaces/execin.go @@ -3,6 +3,7 @@ package namespaces import ( + "fmt" "io" "os" "os/exec" @@ -11,6 +12,7 @@ import ( "syscall" "github.com/docker/libcontainer" + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/syncpipe" "github.com/docker/libcontainer/system" @@ -18,10 +20,10 @@ import ( // ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs []string, initPath string, +func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs []string, initPath, action string, stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) { - args := []string{"nsenter", "--nspid", strconv.Itoa(state.InitPid)} + args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)} if console != "" { args = append(args, "--console", console) @@ -58,6 +60,11 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs } pipe.CloseChild() + // Enter cgroups. + if err := EnterCgroups(state, cmd.Process.Pid); err != nil { + return -1, err + } + if err := pipe.SendToChild(container); err != nil { cmd.Process.Kill() cmd.Wait() @@ -101,3 +108,7 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error { panic("unreachable") } + +func EnterCgroups(state *libcontainer.State, pid int) error { + return cgroups.EnterPid(state.CgroupPaths, pid) +} diff --git a/vendor/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c b/vendor/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c index 3bc29e2bcf..2869dd14d6 100644 --- a/vendor/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c +++ b/vendor/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c @@ -31,8 +31,8 @@ void get_args(int *argc, char ***argv) contents_size += kBufSize; contents = (char *)realloc(contents, contents_size); bytes_read = - read(fd, contents + contents_offset, - contents_size - contents_offset); + read(fd, contents + contents_offset, + contents_size - contents_offset); contents_offset += bytes_read; } while (bytes_read > 0); @@ -80,20 +80,20 @@ void nsenter() char **argv; get_args(&argc, &argv); - // check argv 0 to ensure that we are supposed to setns - // we use strncmp to test for a value of "nsenter" but also allows alternate implmentations - // after the setns code path to continue to use the argv 0 to determine actions to be run - // resulting in the ability to specify "nsenter-mknod", "nsenter-exec", etc... - if (strncmp(argv[0], kNsEnter, strlen(kNsEnter)) != 0) { - return; - } + // check argv 0 to ensure that we are supposed to setns + // we use strncmp to test for a value of "nsenter" but also allows alternate implmentations + // after the setns code path to continue to use the argv 0 to determine actions to be run + // resulting in the ability to specify "nsenter-mknod", "nsenter-exec", etc... + if (strncmp(argv[0], kNsEnter, strlen(kNsEnter)) != 0) { + return; + } static const struct option longopts[] = { {"nspid", required_argument, NULL, 'n'}, {"console", required_argument, NULL, 't'}, {NULL, 0, NULL, 0} }; - + pid_t init_pid = -1; char *init_pid_str = NULL; char *console = NULL; diff --git a/vendor/src/github.com/docker/libcontainer/netlink/netlink.go b/vendor/src/github.com/docker/libcontainer/netlink/netlink.go index 5cc756256d..dd9b1c1643 100644 --- a/vendor/src/github.com/docker/libcontainer/netlink/netlink.go +++ b/vendor/src/github.com/docker/libcontainer/netlink/netlink.go @@ -21,3 +21,10 @@ type Route struct { Iface *net.Interface Default bool } + +// An IfAddr defines IP network settings for a given network interface +type IfAddr struct { + Iface *net.Interface + IP net.IP + IPNet *net.IPNet +} diff --git a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux.go b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux.go index 5fcc817aff..215fb178a0 100644 --- a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux.go +++ b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux.go @@ -651,30 +651,28 @@ func NetworkSetNsFd(iface *net.Interface, fd int) error { return s.HandleAck(wb.Seq) } -// Add an Ip address to an interface. This is identical to: -// ip addr add $ip/$ipNet dev $iface -func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { +func networkLinkIpAction(action, flags int, ifa IfAddr) error { s, err := getNetlinkSocket() if err != nil { return err } defer s.Close() - family := getIpFamily(ip) + family := getIpFamily(ifa.IP) - wb := newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + wb := newNetlinkRequest(action, flags) msg := newIfAddrmsg(family) - msg.Index = uint32(iface.Index) - prefixLen, _ := ipNet.Mask.Size() + msg.Index = uint32(ifa.Iface.Index) + prefixLen, _ := ifa.IPNet.Mask.Size() msg.Prefixlen = uint8(prefixLen) wb.AddData(msg) var ipData []byte if family == syscall.AF_INET { - ipData = ip.To4() + ipData = ifa.IP.To4() } else { - ipData = ip.To16() + ipData = ifa.IP.To16() } localData := newRtAttr(syscall.IFA_LOCAL, ipData) @@ -690,6 +688,26 @@ func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { return s.HandleAck(wb.Seq) } +// Delete an IP address from an interface. This is identical to: +// ip addr del $ip/$ipNet dev $iface +func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { + return networkLinkIpAction( + syscall.RTM_DELADDR, + syscall.NLM_F_ACK, + IfAddr{iface, ip, ipNet}, + ) +} + +// Add an Ip address to an interface. This is identical to: +// ip addr add $ip/$ipNet dev $iface +func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { + return networkLinkIpAction( + syscall.RTM_NEWADDR, + syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK, + IfAddr{iface, ip, ipNet}, + ) +} + func zeroTerminated(s string) []byte { return []byte(s + "\000") } diff --git a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go index a25f286138..086aee7f0b 100644 --- a/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go +++ b/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go @@ -2,9 +2,55 @@ package netlink import ( "net" + "strings" "testing" ) +func ipAssigned(iface *net.Interface, ip net.IP) bool { + addrs, _ := iface.Addrs() + + for _, addr := range addrs { + args := strings.SplitN(addr.String(), "/", 2) + if args[0] == ip.String() { + return true + } + } + + return false +} + +func TestAddDelNetworkIp(t *testing.T) { + if testing.Short() { + return + } + + ifaceName := "lo" + ip := net.ParseIP("127.0.1.1") + mask := net.IPv4Mask(255, 255, 255, 255) + ipNet := &net.IPNet{IP: ip, Mask: mask} + + iface, err := net.InterfaceByName(ifaceName) + if err != nil { + t.Skip("No 'lo' interface; skipping tests") + } + + if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil { + t.Fatal(err) + } + + if !ipAssigned(iface, ip) { + t.Fatalf("Could not locate address '%s' in lo address list.", ip.String()) + } + + if err := NetworkLinkDelIp(iface, ip, ipNet); err != nil { + t.Fatal(err) + } + + if ipAssigned(iface, ip) { + t.Fatalf("Located address '%s' in lo address list after removal.", ip.String()) + } +} + func TestCreateBridgeWithMac(t *testing.T) { if testing.Short() { return diff --git a/vendor/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go b/vendor/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go index 783e68cae5..f6e84adf7e 100644 --- a/vendor/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go +++ b/vendor/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go @@ -31,6 +31,10 @@ func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { return ErrNotImplemented } +func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { + return ErrNotImplemented +} + func AddRoute(destination, source, gateway, device string) error { return ErrNotImplemented } diff --git a/vendor/src/github.com/docker/libcontainer/network/network.go b/vendor/src/github.com/docker/libcontainer/network/network.go index 48eeec6047..c7560c04a9 100644 --- a/vendor/src/github.com/docker/libcontainer/network/network.go +++ b/vendor/src/github.com/docker/libcontainer/network/network.go @@ -44,6 +44,14 @@ func SetInterfaceInNamespacePid(name string, nsPid int) error { return netlink.NetworkSetNsPid(iface, nsPid) } +func SetInterfaceInNamespaceFd(name string, fd uintptr) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.NetworkSetNsFd(iface, int(fd)) +} + func SetInterfaceMaster(name, master string) error { iface, err := net.InterfaceByName(name) if err != nil { diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/config.go b/vendor/src/github.com/docker/libcontainer/nsinit/config.go index 5beb04acb3..74c7b3c09f 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/config.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/config.go @@ -1,4 +1,4 @@ -package nsinit +package main import ( "encoding/json" @@ -15,7 +15,7 @@ var configCommand = cli.Command{ } func configAction(context *cli.Context) { - container, err := loadContainer() + container, err := loadConfig() if err != nil { log.Fatal(err) } diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/exec.go b/vendor/src/github.com/docker/libcontainer/nsinit/exec.go index 4a38a61b4f..c46b191782 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/exec.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/exec.go @@ -1,4 +1,4 @@ -package nsinit +package main import ( "fmt" @@ -8,6 +8,7 @@ import ( "os/exec" "os/signal" "syscall" + "text/tabwriter" "github.com/codegangsta/cli" "github.com/docker/docker/pkg/term" @@ -20,12 +21,29 @@ var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", Action: execAction, + Flags: []cli.Flag{ + cli.BoolFlag{Name: "list", Usage: "list all registered exec functions"}, + cli.StringFlag{Name: "func", Value: "exec", Usage: "function name to exec inside a container"}, + }, } func execAction(context *cli.Context) { + if context.Bool("list") { + w := tabwriter.NewWriter(os.Stdout, 10, 1, 3, ' ', 0) + fmt.Fprint(w, "NAME\tUSAGE\n") + + for k, f := range argvs { + fmt.Fprintf(w, "%s\t%s\n", k, f.Usage) + } + + w.Flush() + + return + } + var exitCode int - container, err := loadContainer() + container, err := loadConfig() if err != nil { log.Fatal(err) } @@ -36,7 +54,7 @@ func execAction(context *cli.Context) { } if state != nil { - exitCode, err = startInExistingContainer(container, state, context) + exitCode, err = startInExistingContainer(container, state, context.String("func"), context) } else { exitCode, err = startContainer(container, dataPath, []string(context.Args())) } @@ -52,7 +70,7 @@ func execAction(context *cli.Context) { // with the nsenter argument so that the C code can setns an the namespaces that we require. Then that // code path will drop us into the path that we can do the final setup of the namespace and exec the users // application. -func startInExistingContainer(config *libcontainer.Config, state *libcontainer.State, context *cli.Context) (int, error) { +func startInExistingContainer(config *libcontainer.Config, state *libcontainer.State, action string, context *cli.Context) (int, error) { var ( master *os.File console string @@ -102,7 +120,7 @@ func startInExistingContainer(config *libcontainer.Config, state *libcontainer.S }() } - return namespaces.ExecIn(config, state, context.Args(), os.Args[0], stdin, stdout, stderr, console, startCallback) + return namespaces.ExecIn(config, state, context.Args(), os.Args[0], action, stdin, stdout, stderr, console, startCallback) } // startContainer starts the container. Returns the exit status or -1 and an diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/init.go b/vendor/src/github.com/docker/libcontainer/nsinit/init.go index e7a96632d7..c091ee1099 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/init.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/init.go @@ -1,4 +1,4 @@ -package nsinit +package main import ( "log" @@ -26,7 +26,7 @@ var ( func initAction(context *cli.Context) { runtime.LockOSThread() - container, err := loadContainer() + container, err := loadConfig() if err != nil { log.Fatal(err) } diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/cli.go b/vendor/src/github.com/docker/libcontainer/nsinit/main.go similarity index 58% rename from vendor/src/github.com/docker/libcontainer/nsinit/cli.go rename to vendor/src/github.com/docker/libcontainer/nsinit/main.go index c8d153312f..d65c0140e8 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/cli.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/main.go @@ -1,38 +1,41 @@ -package nsinit +package main import ( "log" "os" + "strings" "github.com/codegangsta/cli" ) var ( logPath = os.Getenv("log") - argvs = make(map[string]func()) + argvs = make(map[string]*rFunc) ) func init() { - argvs["nsenter"] = nsenter -} - -func preload(context *cli.Context) error { - if logPath != "" { - if err := openLog(logPath); err != nil { - return err - } + argvs["exec"] = &rFunc{ + Usage: "execute a process inside an existing container", + Action: nsenterExec, } - return nil + argvs["mknod"] = &rFunc{ + Usage: "mknod a device inside an existing container", + Action: nsenterMknod, + } + + argvs["ip"] = &rFunc{ + Usage: "display the container's network interfaces", + Action: nsenterIp, + } } -func NsInit() { +func main() { // we need to check our argv 0 for any registred functions to run instead of the // normal cli code path - - action, exists := argvs[os.Args[0]] + f, exists := argvs[strings.TrimPrefix(os.Args[0], "nsenter-")] if exists { - action() + runFunc(f) return } diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/nsenter.go b/vendor/src/github.com/docker/libcontainer/nsinit/nsenter.go index fabd65e9b4..8dc149f4fb 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/nsenter.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/nsenter.go @@ -1,42 +1,84 @@ -package nsinit +package main import ( + "fmt" "log" + "net" "os" + "strconv" + "strings" + "text/tabwriter" "github.com/docker/libcontainer" + "github.com/docker/libcontainer/devices" + "github.com/docker/libcontainer/mount/nodes" "github.com/docker/libcontainer/namespaces" _ "github.com/docker/libcontainer/namespaces/nsenter" - "github.com/docker/libcontainer/syncpipe" ) -func findUserArgs() []string { - i := 0 - for _, a := range os.Args { - i++ - - if a == "--" { - break - } - } - - return os.Args[i:] -} - -// this expects that we already have our namespaces setup by the C initializer -// we are expected to finalize the namespace and exec the user's application -func nsenter() { - syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3) - if err != nil { - log.Fatalf("unable to create sync pipe: %s", err) - } - - var config *libcontainer.Config - if err := syncPipe.ReadFromParent(&config); err != nil { - log.Fatalf("reading container config from parent: %s", err) - } - - if err := namespaces.FinalizeSetns(config, findUserArgs()); err != nil { +// nsenterExec exec's a process inside an existing container +func nsenterExec(config *libcontainer.Config, args []string) { + if err := namespaces.FinalizeSetns(config, args); err != nil { log.Fatalf("failed to nsenter: %s", err) } } + +// nsenterMknod runs mknod inside an existing container +// +// mknod +func nsenterMknod(config *libcontainer.Config, args []string) { + if len(args) != 4 { + log.Fatalf("expected mknod to have 4 arguments not %d", len(args)) + } + + t := rune(args[1][0]) + + major, err := strconv.Atoi(args[2]) + if err != nil { + log.Fatal(err) + } + + minor, err := strconv.Atoi(args[3]) + if err != nil { + log.Fatal(err) + } + + n := &devices.Device{ + Path: args[0], + Type: t, + MajorNumber: int64(major), + MinorNumber: int64(minor), + } + + if err := nodes.CreateDeviceNode("/", n); err != nil { + log.Fatal(err) + } +} + +// nsenterIp displays the network interfaces inside a container's net namespace +func nsenterIp(config *libcontainer.Config, args []string) { + interfaces, err := net.Interfaces() + if err != nil { + log.Fatal(err) + } + + w := tabwriter.NewWriter(os.Stdout, 10, 1, 3, ' ', 0) + fmt.Fprint(w, "NAME\tMTU\tMAC\tFLAG\tADDRS\n") + + for _, iface := range interfaces { + addrs, err := iface.Addrs() + if err != nil { + log.Fatal(err) + } + + o := []string{} + + for _, a := range addrs { + o = append(o, a.String()) + } + + fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n", iface.Name, iface.MTU, iface.HardwareAddr, iface.Flags, strings.Join(o, ",")) + } + + w.Flush() +} diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/nsinit/nsinit.go b/vendor/src/github.com/docker/libcontainer/nsinit/nsinit/nsinit.go deleted file mode 100644 index 816c4da5f9..0000000000 --- a/vendor/src/github.com/docker/libcontainer/nsinit/nsinit/nsinit.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "github.com/docker/libcontainer/nsinit" - -func main() { - nsinit.NsInit() -} diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/pause.go b/vendor/src/github.com/docker/libcontainer/nsinit/pause.go index 492a0e858a..ada24250c1 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/pause.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/pause.go @@ -1,4 +1,4 @@ -package nsinit +package main import ( "log" @@ -34,7 +34,7 @@ func unpauseAction(context *cli.Context) { } func toggle(state cgroups.FreezerState) error { - container, err := loadContainer() + container, err := loadConfig() if err != nil { return err } diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/stats.go b/vendor/src/github.com/docker/libcontainer/nsinit/stats.go index 3e59305b03..612b4a4bae 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/stats.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/stats.go @@ -1,4 +1,4 @@ -package nsinit +package main import ( "encoding/json" @@ -16,7 +16,7 @@ var statsCommand = cli.Command{ } func statsAction(context *cli.Context) { - container, err := loadContainer() + container, err := loadConfig() if err != nil { log.Fatal(err) } diff --git a/vendor/src/github.com/docker/libcontainer/nsinit/utils.go b/vendor/src/github.com/docker/libcontainer/nsinit/utils.go index 8525ba9a6d..7f5155942c 100644 --- a/vendor/src/github.com/docker/libcontainer/nsinit/utils.go +++ b/vendor/src/github.com/docker/libcontainer/nsinit/utils.go @@ -1,4 +1,4 @@ -package nsinit +package main import ( "encoding/json" @@ -6,10 +6,18 @@ import ( "os" "path/filepath" + "github.com/codegangsta/cli" "github.com/docker/libcontainer" + "github.com/docker/libcontainer/syncpipe" ) -func loadContainer() (*libcontainer.Config, error) { +// rFunc is a function registration for calling after an execin +type rFunc struct { + Usage string + Action func(*libcontainer.Config, []string) +} + +func loadConfig() (*libcontainer.Config, error) { f, err := os.Open(filepath.Join(dataPath, "container.json")) if err != nil { return nil, err @@ -35,12 +43,52 @@ func openLog(name string) error { return nil } -func loadContainerFromJson(rawData string) (*libcontainer.Config, error) { - var container *libcontainer.Config +func findUserArgs() []string { + i := 0 + for _, a := range os.Args { + i++ - if err := json.Unmarshal([]byte(rawData), &container); err != nil { + if a == "--" { + break + } + } + + return os.Args[i:] +} + +// loadConfigFromFd loads a container's config from the sync pipe that is provided by +// fd 3 when running a process +func loadConfigFromFd() (*libcontainer.Config, error) { + syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3) + if err != nil { return nil, err } - return container, nil + var config *libcontainer.Config + if err := syncPipe.ReadFromParent(&config); err != nil { + return nil, err + } + + return config, nil +} + +func preload(context *cli.Context) error { + if logPath != "" { + if err := openLog(logPath); err != nil { + return err + } + } + + return nil +} + +func runFunc(f *rFunc) { + userArgs := findUserArgs() + + config, err := loadConfigFromFd() + if err != nil { + log.Fatalf("unable to receive config from sync pipe: %s", err) + } + + f.Action(config, userArgs) } diff --git a/vendor/src/github.com/docker/libcontainer/state.go b/vendor/src/github.com/docker/libcontainer/state.go index ee5d14d2ec..208b4c6276 100644 --- a/vendor/src/github.com/docker/libcontainer/state.go +++ b/vendor/src/github.com/docker/libcontainer/state.go @@ -18,6 +18,9 @@ type State struct { // Network runtime state. NetworkState network.NetworkState `json:"network_state,omitempty"` + + // Path to all the cgroups setup for a container. Key is cgroup subsystem name. + CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` } // The running state of the container.