diff --git a/daemon/container_windows.go b/daemon/container_windows.go index e56b7e2d1a..7d885c3c3d 100644 --- a/daemon/container_windows.go +++ b/daemon/container_windows.go @@ -183,3 +183,7 @@ func (container *Container) unmountIpcMounts() error { func (container *Container) ipcMounts() []execdriver.Mount { return nil } + +func getDefaultRouteMtu() (int, error) { + return -1, errSystemNotSupported +} diff --git a/daemon/daemon.go b/daemon/daemon.go index 65fbfbb2e9..185cc9aa16 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -52,7 +52,6 @@ import ( "github.com/docker/docker/volume/local" "github.com/docker/docker/volume/store" "github.com/docker/libnetwork" - "github.com/opencontainers/runc/libcontainer/netlink" ) var ( @@ -1076,20 +1075,6 @@ func setDefaultMtu(config *Config) { var errNoDefaultRoute = errors.New("no default route was found") -// getDefaultRouteMtu returns the MTU for the default route's interface. -func getDefaultRouteMtu() (int, error) { - routes, err := netlink.NetworkGetRoutes() - if err != nil { - return 0, err - } - for _, r := range routes { - if r.Default && r.Iface != nil { - return r.Iface.MTU, nil - } - } - return 0, errNoDefaultRoute -} - // verifyContainerSettings performs validation of the hostconfig and config // structures. func (daemon *Daemon) verifyContainerSettings(ctx context.Context, hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) { diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index 0ae7d95d89..1fc3fd64b5 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -28,6 +28,7 @@ import ( "github.com/docker/libnetwork/netlabel" "github.com/docker/libnetwork/options" "github.com/opencontainers/runc/libcontainer/label" + "github.com/vishvananda/netlink" ) const ( @@ -554,3 +555,22 @@ func (daemon *Daemon) newBaseContainer(id string) Container { VolumesRW: make(map[string]bool), } } + +// getDefaultRouteMtu returns the MTU for the default route's interface. +func getDefaultRouteMtu() (int, error) { + routes, err := netlink.RouteList(nil, 0) + if err != nil { + return 0, err + } + for _, r := range routes { + // a nil Dst means that this is the default route. + if r.Dst == nil { + i, err := net.InterfaceByIndex(r.LinkIndex) + if err != nil { + continue + } + return i.MTU, nil + } + } + return 0, errNoDefaultRoute +} diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index da64c74012..d76dbd2585 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -65,7 +65,13 @@ func (d *Driver) createContainer(ctx context.Context, c *execdriver.Command, hoo return nil, err } } - + // add CAP_ prefix to all caps for new libcontainer update to match + // the spec format. + for i, s := range container.Capabilities { + if !strings.HasPrefix(s, "CAP_") { + container.Capabilities[i] = fmt.Sprintf("CAP_%s", s) + } + } container.AdditionalGroups = c.GroupAdd if c.AppArmorProfile != "" { diff --git a/daemon/execdriver/native/exec.go b/daemon/execdriver/native/exec.go index 8327cc7650..ac6c13fb29 100644 --- a/daemon/execdriver/native/exec.go +++ b/daemon/execdriver/native/exec.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "os/exec" + "strings" "syscall" "github.com/docker/docker/context" @@ -36,6 +37,13 @@ func (d *Driver) Exec(ctx context.Context, c *execdriver.Command, processConfig if processConfig.Privileged { p.Capabilities = execdriver.GetAllCapabilities() } + // add CAP_ prefix to all caps for new libcontainer update to match + // the spec format. + for i, s := range p.Capabilities { + if !strings.HasPrefix(s, "CAP_") { + p.Capabilities[i] = fmt.Sprintf("CAP_%s", s) + } + } config := active.Config() if err := setupPipes(&config, processConfig, p, pipes); err != nil { diff --git a/daemon/execdriver/utils.go b/daemon/execdriver/utils.go index 6712dafd64..7860e04026 100644 --- a/daemon/execdriver/utils.go +++ b/daemon/execdriver/utils.go @@ -119,6 +119,5 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) { newCaps = append(newCaps, strings.ToUpper(cap)) } } - return newCaps, nil } diff --git a/hack/vendor.sh b/hack/vendor.sh index ab09775f41..7550694b0e 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c -clone git github.com/opencontainers/runc v0.0.4 # libcontainer +clone git github.com/opencontainers/runc aac9179bbadbf958054ce97ab368ac178140e5da # libcontainer # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) clone git github.com/coreos/go-systemd v3 clone git github.com/godbus/dbus v2 diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/README.md b/vendor/src/github.com/opencontainers/runc/libcontainer/README.md index a0742fe02f..295edb4f7e 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/README.md +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/README.md @@ -32,20 +32,20 @@ struct describing how the container is to be created. A sample would look simil config := &configs.Config{ Rootfs: rootfs, Capabilities: []string{ - "CHOWN", - "DAC_OVERRIDE", - "FSETID", - "FOWNER", - "MKNOD", - "NET_RAW", - "SETGID", - "SETUID", - "SETFCAP", - "SETPCAP", - "NET_BIND_SERVICE", - "SYS_CHROOT", - "KILL", - "AUDIT_WRITE", + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/capabilities_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/capabilities_linux.go index 6b8b465c56..13932db859 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/capabilities_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/capabilities_linux.go @@ -12,44 +12,44 @@ import ( const allCapabilityTypes = capability.CAPS | capability.BOUNDS var capabilityList = map[string]capability.Cap{ - "SETPCAP": capability.CAP_SETPCAP, - "SYS_MODULE": capability.CAP_SYS_MODULE, - "SYS_RAWIO": capability.CAP_SYS_RAWIO, - "SYS_PACCT": capability.CAP_SYS_PACCT, - "SYS_ADMIN": capability.CAP_SYS_ADMIN, - "SYS_NICE": capability.CAP_SYS_NICE, - "SYS_RESOURCE": capability.CAP_SYS_RESOURCE, - "SYS_TIME": capability.CAP_SYS_TIME, - "SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG, - "MKNOD": capability.CAP_MKNOD, - "AUDIT_WRITE": capability.CAP_AUDIT_WRITE, - "AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL, - "MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE, - "MAC_ADMIN": capability.CAP_MAC_ADMIN, - "NET_ADMIN": capability.CAP_NET_ADMIN, - "SYSLOG": capability.CAP_SYSLOG, - "CHOWN": capability.CAP_CHOWN, - "NET_RAW": capability.CAP_NET_RAW, - "DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE, - "FOWNER": capability.CAP_FOWNER, - "DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH, - "FSETID": capability.CAP_FSETID, - "KILL": capability.CAP_KILL, - "SETGID": capability.CAP_SETGID, - "SETUID": capability.CAP_SETUID, - "LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE, - "NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE, - "NET_BROADCAST": capability.CAP_NET_BROADCAST, - "IPC_LOCK": capability.CAP_IPC_LOCK, - "IPC_OWNER": capability.CAP_IPC_OWNER, - "SYS_CHROOT": capability.CAP_SYS_CHROOT, - "SYS_PTRACE": capability.CAP_SYS_PTRACE, - "SYS_BOOT": capability.CAP_SYS_BOOT, - "LEASE": capability.CAP_LEASE, - "SETFCAP": capability.CAP_SETFCAP, - "WAKE_ALARM": capability.CAP_WAKE_ALARM, - "BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND, - "AUDIT_READ": capability.CAP_AUDIT_READ, + "CAP_SETPCAP": capability.CAP_SETPCAP, + "CAP_SYS_MODULE": capability.CAP_SYS_MODULE, + "CAP_SYS_RAWIO": capability.CAP_SYS_RAWIO, + "CAP_SYS_PACCT": capability.CAP_SYS_PACCT, + "CAP_SYS_ADMIN": capability.CAP_SYS_ADMIN, + "CAP_SYS_NICE": capability.CAP_SYS_NICE, + "CAP_SYS_RESOURCE": capability.CAP_SYS_RESOURCE, + "CAP_SYS_TIME": capability.CAP_SYS_TIME, + "CAP_SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG, + "CAP_MKNOD": capability.CAP_MKNOD, + "CAP_AUDIT_WRITE": capability.CAP_AUDIT_WRITE, + "CAP_AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL, + "CAP_MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE, + "CAP_MAC_ADMIN": capability.CAP_MAC_ADMIN, + "CAP_NET_ADMIN": capability.CAP_NET_ADMIN, + "CAP_SYSLOG": capability.CAP_SYSLOG, + "CAP_CHOWN": capability.CAP_CHOWN, + "CAP_NET_RAW": capability.CAP_NET_RAW, + "CAP_DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE, + "CAP_FOWNER": capability.CAP_FOWNER, + "CAP_DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH, + "CAP_FSETID": capability.CAP_FSETID, + "CAP_KILL": capability.CAP_KILL, + "CAP_SETGID": capability.CAP_SETGID, + "CAP_SETUID": capability.CAP_SETUID, + "CAP_LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE, + "CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE, + "CAP_NET_BROADCAST": capability.CAP_NET_BROADCAST, + "CAP_IPC_LOCK": capability.CAP_IPC_LOCK, + "CAP_IPC_OWNER": capability.CAP_IPC_OWNER, + "CAP_SYS_CHROOT": capability.CAP_SYS_CHROOT, + "CAP_SYS_PTRACE": capability.CAP_SYS_PTRACE, + "CAP_SYS_BOOT": capability.CAP_SYS_BOOT, + "CAP_LEASE": capability.CAP_LEASE, + "CAP_SETFCAP": capability.CAP_SETFCAP, + "CAP_WAKE_ALARM": capability.CAP_WAKE_ALARM, + "CAP_BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND, + "CAP_AUDIT_READ": capability.CAP_AUDIT_READ, } func newCapWhitelist(caps []string) (*whitelist, error) { diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 3a182684f1..8ff7e3b03d 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -5,7 +5,6 @@ package cgroups import ( "bufio" "fmt" - "io" "io/ioutil" "os" "path/filepath" @@ -105,12 +104,12 @@ type Mount struct { Subsystems []string } -func (m Mount) GetThisCgroupDir() (string, error) { +func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { if len(m.Subsystems) == 0 { return "", fmt.Errorf("no subsystem for mount") } - return GetThisCgroupDir(m.Subsystems[0]) + return getControllerPath(m.Subsystems[0], cgroups) } func GetCgroupMounts() ([]Mount, error) { @@ -176,23 +175,22 @@ func GetAllSubsystems() ([]string, error) { // Returns the relative path to the cgroup docker is running in. func GetThisCgroupDir(subsystem string) (string, error) { - f, err := os.Open("/proc/self/cgroup") + cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return "", err } - defer f.Close() - return ParseCgroupFile(subsystem, f) + return getControllerPath(subsystem, cgroups) } func GetInitCgroupDir(subsystem string) (string, error) { - f, err := os.Open("/proc/1/cgroup") + + cgroups, err := ParseCgroupFile("/proc/1/cgroup") if err != nil { return "", err } - defer f.Close() - return ParseCgroupFile(subsystem, f) + return getControllerPath(subsystem, cgroups) } func ReadProcsFile(dir string) ([]int, error) { @@ -219,23 +217,40 @@ func ReadProcsFile(dir string) ([]int, error) { return out, nil } -func ParseCgroupFile(subsystem string, r io.Reader) (string, error) { - s := bufio.NewScanner(r) +func ParseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + s := bufio.NewScanner(f) + cgroups := make(map[string]string) for s.Scan() { if err := s.Err(); err != nil { - return "", err + return nil, err } text := s.Text() parts := strings.Split(text, ":") for _, subs := range strings.Split(parts[1], ",") { - if subs == subsystem || subs == cgroupNamePrefix+subsystem { - return parts[2], nil - } + cgroups[subs] = parts[2] } } + return cgroups, nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + return p, nil + } return "", NewNotFoundError(subsystem) } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go index f26287608a..4b298e1eae 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -20,8 +20,12 @@ type IDMap struct { } // Seccomp represents syscall restrictions +// By default, only the native architecture of the kernel is allowed to be used +// for syscalls. Additional architectures can be added by specifying them in +// Architectures. type Seccomp struct { DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` Syscalls []*Syscall `json:"syscalls"` } @@ -169,6 +173,9 @@ type Config struct { // Hooks are a collection of actions to perform at various container lifecycle events. // Hooks are not able to be marshaled to json but they are also not needed to. Hooks *Hooks `json:"-"` + + // Version is the version of opencontainer specification that is supported. + Version string `json:"version"` } type Hooks struct { @@ -182,9 +189,10 @@ type Hooks struct { // HookState is the payload provided to a hook on execution. type HookState struct { - ID string `json:"id"` - Pid int `json:"pid"` - Root string `json:"root"` + Version string `json:"version"` + ID string `json:"id"` + Pid int `json:"pid"` + Root string `json:"root"` } type Hook interface { diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go index 50668f04fc..cc770c916f 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -13,6 +13,9 @@ type Mount struct { // Mount flags. Flags int `json:"flags"` + // Propagation Flags + PropagationFlags []int `json:"propagation_flags"` + // Mount data applied to the mount. Data string `json:"data"` diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/console_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/console_linux.go index 3ac6bfec8c..f345f572b3 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/console_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/console_linux.go @@ -75,7 +75,7 @@ func (c *linuxConsole) Close() error { // mount initializes the console inside the rootfs mounting with the specified mount label // and applying the correct ownership of the console. -func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { +func (c *linuxConsole) mount(rootfs, mountLabel string) error { oldMask := syscall.Umask(0000) defer syscall.Umask(oldMask) if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go index 574773bf51..853aedf4dd 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -250,8 +250,9 @@ func (c *linuxContainer) Destroy() error { c.initProcess = nil if c.config.Hooks != nil { s := configs.HookState{ - ID: c.id, - Root: c.config.Rootfs, + Version: c.config.Version, + ID: c.id, + Root: c.config.Rootfs, } for _, hook := range c.config.Hooks.Poststop { if err := hook.Run(s); err != nil { @@ -423,7 +424,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { return err } - err = c.criuSwrk(nil, req, criuOpts) + err = c.criuSwrk(nil, req, criuOpts, false) if err != nil { return err } @@ -516,6 +517,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { FileLocks: proto.Bool(criuOpts.FileLocks), }, } + for _, m := range c.config.Mounts { switch m.Device { case "bind": @@ -573,14 +575,36 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { } } - err = c.criuSwrk(process, req, criuOpts) + err = c.criuSwrk(process, req, criuOpts, true) if err != nil { return err } return nil } -func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error { +func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { + if err := c.cgroupManager.Apply(pid); err != nil { + return err + } + + path := fmt.Sprintf("/proc/%d/cgroup", pid) + cgroupsPaths, err := cgroups.ParseCgroupFile(path) + if err != nil { + return err + } + + for c, p := range cgroupsPaths { + cgroupRoot := &criurpc.CgroupRoot{ + Ctrl: proto.String(c), + Path: proto.String(p), + } + req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot) + } + + return nil +} + +func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) if err != nil { return err @@ -614,6 +638,13 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * } }() + if applyCgroups { + err := c.criuApplyCgroups(cmd.Process.Pid, req) + if err != nil { + return err + } + } + var extFds []string if process != nil { extFds, err = getPipeFds(cmd.Process.Pid) diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go index 6854a2d902..ddb1186595 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "io/ioutil" + "net" "os" "strconv" "strings" @@ -14,10 +15,10 @@ import ( "github.com/Sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/netlink" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/utils" + "github.com/vishvananda/netlink" ) type initType string @@ -186,7 +187,11 @@ func setupUser(config *initConfig) error { return err } } - + // before we change to the container's user make sure that the processes STDIO + // is correctly owned by the user that we are switching to. + if err := fixStdioPermissions(execUser); err != nil { + return err + } suppGroups := append(execUser.Sgids, addGroups...) if err := syscall.Setgroups(suppGroups); err != nil { return err @@ -207,6 +212,34 @@ func setupUser(config *initConfig) error { return nil } +// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. +// The ownership needs to match because it is created outside of the container and needs to be +// localized. +func fixStdioPermissions(u *user.ExecUser) error { + var null syscall.Stat_t + if err := syscall.Stat("/dev/null", &null); err != nil { + return err + } + for _, fd := range []uintptr{ + os.Stdin.Fd(), + os.Stderr.Fd(), + os.Stdout.Fd(), + } { + var s syscall.Stat_t + if err := syscall.Fstat(int(fd), &s); err != nil { + return err + } + // skip chown of /dev/null if it was used as one of the STDIO fds. + if s.Rdev == null.Rdev { + continue + } + if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil { + return err + } + } + return nil +} + // setupNetwork sets up and initializes any network interface inside the container. func setupNetwork(config *initConfig) error { for _, config := range config.Networks { @@ -223,7 +256,30 @@ func setupNetwork(config *initConfig) error { func setupRoute(config *configs.Config) error { for _, config := range config.Routes { - if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { + _, dst, err := net.ParseCIDR(config.Destination) + if err != nil { + return err + } + src := net.ParseIP(config.Source) + if src == nil { + return fmt.Errorf("Invalid source for route: %s", config.Source) + } + gw := net.ParseIP(config.Gateway) + if gw == nil { + return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) + } + l, err := netlink.LinkByName(config.InterfaceName) + if err != nil { + return err + } + route := &netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + Dst: dst, + Src: src, + Gw: gw, + LinkIndex: l.Attrs().Index, + } + if err := netlink.RouteAdd(route); err != nil { return err } } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/MAINTAINERS b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/MAINTAINERS deleted file mode 100644 index 1cb551364d..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/MAINTAINERS +++ /dev/null @@ -1,2 +0,0 @@ -Michael Crosby (@crosbymichael) -Guillaume J. Charmes (@creack) diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink.go deleted file mode 100644 index 9088366061..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink.go +++ /dev/null @@ -1,31 +0,0 @@ -// Packet netlink provide access to low level Netlink sockets and messages. -// -// Actual implementations are in: -// netlink_linux.go -// netlink_darwin.go -package netlink - -import ( - "errors" - "net" -) - -var ( - ErrWrongSockType = errors.New("Wrong socket type") - ErrShortResponse = errors.New("Got short response from netlink") - ErrInterfaceExists = errors.New("Network interface already exists") -) - -// A Route is a subnet associated with the interface to reach it. -type Route struct { - *net.IPNet - Iface *net.Interface - Default bool -} - -// An IfAddr defines IP network settings for a given network interface -type IfAddr struct { - Iface *net.Interface - IP net.IP - IPNet *net.IPNet -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux.go deleted file mode 100644 index c81c269e61..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux.go +++ /dev/null @@ -1,1321 +0,0 @@ -package netlink - -import ( - "encoding/binary" - "fmt" - "io" - "math/rand" - "net" - "os" - "sync/atomic" - "syscall" - "time" - "unsafe" -) - -const ( - IFNAMSIZ = 16 - DEFAULT_CHANGE = 0xFFFFFFFF - IFLA_INFO_KIND = 1 - IFLA_INFO_DATA = 2 - VETH_INFO_PEER = 1 - IFLA_MACVLAN_MODE = 1 - IFLA_VLAN_ID = 1 - IFLA_NET_NS_FD = 28 - IFLA_ADDRESS = 1 - IFLA_BRPORT_MODE = 4 - SIOC_BRADDBR = 0x89a0 - SIOC_BRDELBR = 0x89a1 - SIOC_BRADDIF = 0x89a2 - SIOC_BRDELIF = 0x89a3 -) - -const ( - MACVLAN_MODE_PRIVATE = 1 << iota - MACVLAN_MODE_VEPA - MACVLAN_MODE_BRIDGE - MACVLAN_MODE_PASSTHRU -) - -var nextSeqNr uint32 - -type ifreqHwaddr struct { - IfrnName [IFNAMSIZ]byte - IfruHwaddr syscall.RawSockaddr -} - -type ifreqIndex struct { - IfrnName [IFNAMSIZ]byte - IfruIndex int32 -} - -type ifreqFlags struct { - IfrnName [IFNAMSIZ]byte - Ifruflags uint16 -} - -var native binary.ByteOrder - -var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) - -func init() { - var x uint32 = 0x01020304 - if *(*byte)(unsafe.Pointer(&x)) == 0x01 { - native = binary.BigEndian - } else { - native = binary.LittleEndian - } -} - -func getIpFamily(ip net.IP) int { - if len(ip) <= net.IPv4len { - return syscall.AF_INET - } - if ip.To4() != nil { - return syscall.AF_INET - } - return syscall.AF_INET6 -} - -type NetlinkRequestData interface { - Len() int - ToWireFormat() []byte -} - -type IfInfomsg struct { - syscall.IfInfomsg -} - -func newIfInfomsg(family int) *IfInfomsg { - return &IfInfomsg{ - IfInfomsg: syscall.IfInfomsg{ - Family: uint8(family), - }, - } -} - -func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { - msg := newIfInfomsg(family) - parent.children = append(parent.children, msg) - return msg -} - -func (msg *IfInfomsg) ToWireFormat() []byte { - length := syscall.SizeofIfInfomsg - b := make([]byte, length) - b[0] = msg.Family - b[1] = 0 - native.PutUint16(b[2:4], msg.Type) - native.PutUint32(b[4:8], uint32(msg.Index)) - native.PutUint32(b[8:12], msg.Flags) - native.PutUint32(b[12:16], msg.Change) - return b -} - -func (msg *IfInfomsg) Len() int { - return syscall.SizeofIfInfomsg -} - -type IfAddrmsg struct { - syscall.IfAddrmsg -} - -func newIfAddrmsg(family int) *IfAddrmsg { - return &IfAddrmsg{ - IfAddrmsg: syscall.IfAddrmsg{ - Family: uint8(family), - }, - } -} - -func (msg *IfAddrmsg) ToWireFormat() []byte { - length := syscall.SizeofIfAddrmsg - b := make([]byte, length) - b[0] = msg.Family - b[1] = msg.Prefixlen - b[2] = msg.Flags - b[3] = msg.Scope - native.PutUint32(b[4:8], msg.Index) - return b -} - -func (msg *IfAddrmsg) Len() int { - return syscall.SizeofIfAddrmsg -} - -type RtMsg struct { - syscall.RtMsg -} - -func newRtMsg() *RtMsg { - return &RtMsg{ - RtMsg: syscall.RtMsg{ - Table: syscall.RT_TABLE_MAIN, - Scope: syscall.RT_SCOPE_UNIVERSE, - Protocol: syscall.RTPROT_BOOT, - Type: syscall.RTN_UNICAST, - }, - } -} - -func (msg *RtMsg) ToWireFormat() []byte { - length := syscall.SizeofRtMsg - b := make([]byte, length) - b[0] = msg.Family - b[1] = msg.Dst_len - b[2] = msg.Src_len - b[3] = msg.Tos - b[4] = msg.Table - b[5] = msg.Protocol - b[6] = msg.Scope - b[7] = msg.Type - native.PutUint32(b[8:12], msg.Flags) - return b -} - -func (msg *RtMsg) Len() int { - return syscall.SizeofRtMsg -} - -func rtaAlignOf(attrlen int) int { - return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1) -} - -type RtAttr struct { - syscall.RtAttr - Data []byte - children []NetlinkRequestData -} - -func newRtAttr(attrType int, data []byte) *RtAttr { - return &RtAttr{ - RtAttr: syscall.RtAttr{ - Type: uint16(attrType), - }, - children: []NetlinkRequestData{}, - Data: data, - } -} - -func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr { - attr := newRtAttr(attrType, data) - parent.children = append(parent.children, attr) - return attr -} - -func (a *RtAttr) Len() int { - if len(a.children) == 0 { - return (syscall.SizeofRtAttr + len(a.Data)) - } - - l := 0 - for _, child := range a.children { - l += child.Len() - } - l += syscall.SizeofRtAttr - return rtaAlignOf(l + len(a.Data)) -} - -func (a *RtAttr) ToWireFormat() []byte { - length := a.Len() - buf := make([]byte, rtaAlignOf(length)) - - if a.Data != nil { - copy(buf[4:], a.Data) - } else { - next := 4 - for _, child := range a.children { - childBuf := child.ToWireFormat() - copy(buf[next:], childBuf) - next += rtaAlignOf(len(childBuf)) - } - } - - if l := uint16(length); l != 0 { - native.PutUint16(buf[0:2], l) - } - native.PutUint16(buf[2:4], a.Type) - return buf -} - -func uint32Attr(t int, n uint32) *RtAttr { - buf := make([]byte, 4) - native.PutUint32(buf, n) - return newRtAttr(t, buf) -} - -type NetlinkRequest struct { - syscall.NlMsghdr - Data []NetlinkRequestData -} - -func (rr *NetlinkRequest) ToWireFormat() []byte { - length := rr.Len - dataBytes := make([][]byte, len(rr.Data)) - for i, data := range rr.Data { - dataBytes[i] = data.ToWireFormat() - length += uint32(len(dataBytes[i])) - } - b := make([]byte, length) - native.PutUint32(b[0:4], length) - native.PutUint16(b[4:6], rr.Type) - native.PutUint16(b[6:8], rr.Flags) - native.PutUint32(b[8:12], rr.Seq) - native.PutUint32(b[12:16], rr.Pid) - - next := 16 - for _, data := range dataBytes { - copy(b[next:], data) - next += len(data) - } - return b -} - -func (rr *NetlinkRequest) AddData(data NetlinkRequestData) { - if data != nil { - rr.Data = append(rr.Data, data) - } -} - -func newNetlinkRequest(proto, flags int) *NetlinkRequest { - return &NetlinkRequest{ - NlMsghdr: syscall.NlMsghdr{ - Len: uint32(syscall.NLMSG_HDRLEN), - Type: uint16(proto), - Flags: syscall.NLM_F_REQUEST | uint16(flags), - Seq: atomic.AddUint32(&nextSeqNr, 1), - }, - } -} - -type NetlinkSocket struct { - fd int - lsa syscall.SockaddrNetlink -} - -func getNetlinkSocket() (*NetlinkSocket, error) { - fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE) - if err != nil { - return nil, err - } - s := &NetlinkSocket{ - fd: fd, - } - s.lsa.Family = syscall.AF_NETLINK - if err := syscall.Bind(fd, &s.lsa); err != nil { - syscall.Close(fd) - return nil, err - } - - return s, nil -} - -func (s *NetlinkSocket) Close() { - syscall.Close(s.fd) -} - -func (s *NetlinkSocket) Send(request *NetlinkRequest) error { - if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil { - return err - } - return nil -} - -func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { - rb := make([]byte, syscall.Getpagesize()) - nr, _, err := syscall.Recvfrom(s.fd, rb, 0) - if err != nil { - return nil, err - } - if nr < syscall.NLMSG_HDRLEN { - return nil, ErrShortResponse - } - rb = rb[:nr] - return syscall.ParseNetlinkMessage(rb) -} - -func (s *NetlinkSocket) GetPid() (uint32, error) { - lsa, err := syscall.Getsockname(s.fd) - if err != nil { - return 0, err - } - switch v := lsa.(type) { - case *syscall.SockaddrNetlink: - return v.Pid, nil - } - return 0, ErrWrongSockType -} - -func (s *NetlinkSocket) CheckMessage(m syscall.NetlinkMessage, seq, pid uint32) error { - if m.Header.Seq != seq { - return fmt.Errorf("netlink: invalid seq %d, expected %d", m.Header.Seq, seq) - } - if m.Header.Pid != pid { - return fmt.Errorf("netlink: wrong pid %d, expected %d", m.Header.Pid, pid) - } - if m.Header.Type == syscall.NLMSG_DONE { - return io.EOF - } - if m.Header.Type == syscall.NLMSG_ERROR { - e := int32(native.Uint32(m.Data[0:4])) - if e == 0 { - return io.EOF - } - return syscall.Errno(-e) - } - return nil -} - -func (s *NetlinkSocket) HandleAck(seq uint32) error { - pid, err := s.GetPid() - if err != nil { - return err - } - -outer: - for { - msgs, err := s.Receive() - if err != nil { - return err - } - for _, m := range msgs { - if err := s.CheckMessage(m, seq, pid); err != nil { - if err == io.EOF { - break outer - } - return err - } - } - } - - return nil -} - -func zeroTerminated(s string) []byte { - return []byte(s + "\000") -} - -func nonZeroTerminated(s string) []byte { - return []byte(s) -} - -// Add a new network link of a specified type. -// This is identical to running: ip link add $name type $linkType -func NetworkLinkAdd(name string, linkType string) error { - if name == "" || linkType == "" { - return fmt.Errorf("Neither link name nor link type can be empty!") - } - - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - wb.AddData(msg) - - linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil) - newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType)) - wb.AddData(linkInfo) - - nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name)) - wb.AddData(nameData) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Delete a network link. -// This is identical to running: ip link del $name -func NetworkLinkDel(name string) error { - if name == "" { - return fmt.Errorf("Network link name can not be empty!") - } - - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - - wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Index = int32(iface.Index) - wb.AddData(msg) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Bring up a particular network interface. -// This is identical to running: ip link set dev $name up -func NetworkLinkUp(iface *net.Interface) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Index = int32(iface.Index) - msg.Flags = syscall.IFF_UP - msg.Change = syscall.IFF_UP - wb.AddData(msg) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Bring down a particular network interface. -// This is identical to running: ip link set $name down -func NetworkLinkDown(iface *net.Interface) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Index = int32(iface.Index) - msg.Flags = 0 & ^syscall.IFF_UP - msg.Change = DEFAULT_CHANGE - wb.AddData(msg) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Set link layer address ie. MAC Address. -// This is identical to running: ip link set dev $name address $macaddress -func NetworkSetMacAddress(iface *net.Interface, macaddr string) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - hwaddr, err := net.ParseMAC(macaddr) - if err != nil { - return err - } - - var ( - MULTICAST byte = 0x1 - ) - - if hwaddr[0]&0x1 == MULTICAST { - return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr) - } - - wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Index = int32(iface.Index) - msg.Change = DEFAULT_CHANGE - wb.AddData(msg) - - macdata := make([]byte, 6) - copy(macdata, hwaddr) - data := newRtAttr(IFLA_ADDRESS, macdata) - wb.AddData(data) - - if err := s.Send(wb); err != nil { - return err - } - return s.HandleAck(wb.Seq) -} - -// Set link Maximum Transmission Unit -// This is identical to running: ip link set dev $name mtu $MTU -// bridge is a bitch here https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=292088 -// https://bugzilla.redhat.com/show_bug.cgi?id=697021 -// There is a discussion about how to deal with ifcs joining bridge with MTU > 1500 -// Regular network nterfaces do seem to work though! -func NetworkSetMTU(iface *net.Interface, mtu int) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST - msg.Index = int32(iface.Index) - msg.Change = DEFAULT_CHANGE - wb.AddData(msg) - wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu))) - - if err := s.Send(wb); err != nil { - return err - } - return s.HandleAck(wb.Seq) -} - -// Set link queue length -// This is identical to running: ip link set dev $name txqueuelen $QLEN -func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST - msg.Index = int32(iface.Index) - msg.Change = DEFAULT_CHANGE - wb.AddData(msg) - wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen))) - - if err := s.Send(wb); err != nil { - return err - } - return s.HandleAck(wb.Seq) -} - -func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST - msg.Index = int32(iface.Index) - msg.Change = DEFAULT_CHANGE - wb.AddData(msg) - wb.AddData(rtattr) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Add an interface to bridge. -// This is identical to running: ip link set $name master $master -func NetworkSetMaster(iface, master *net.Interface) error { - data := uint32Attr(syscall.IFLA_MASTER, uint32(master.Index)) - return networkMasterAction(iface, data) -} - -// Remove an interface from the bridge -// This is is identical to to running: ip link $name set nomaster -func NetworkSetNoMaster(iface *net.Interface) error { - data := uint32Attr(syscall.IFLA_MASTER, 0) - return networkMasterAction(iface, data) -} - -func networkSetNsAction(iface *net.Interface, rtattr *RtAttr) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Index = int32(iface.Index) - wb.AddData(msg) - wb.AddData(rtattr) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Move a particular network interface to a particular network namespace -// specified by PID. This is identical to running: ip link set dev $name netns $pid -func NetworkSetNsPid(iface *net.Interface, nspid int) error { - data := uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid)) - return networkSetNsAction(iface, data) -} - -// Move a particular network interface to a particular mounted -// network namespace specified by file descriptor. -// This is idential to running: ip link set dev $name netns $fd -func NetworkSetNsFd(iface *net.Interface, fd int) error { - data := uint32Attr(IFLA_NET_NS_FD, uint32(fd)) - return networkSetNsAction(iface, data) -} - -// Rename a particular interface to a different name -// !!! Note that you can't rename an active interface. You need to bring it down before renaming it. -// This is identical to running: ip link set dev ${oldName} name ${newName} -func NetworkChangeName(iface *net.Interface, newName string) error { - if len(newName) >= IFNAMSIZ { - return fmt.Errorf("Interface name %s too long", newName) - } - - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - msg.Index = int32(iface.Index) - msg.Change = DEFAULT_CHANGE - wb.AddData(msg) - - nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(newName)) - wb.AddData(nameData) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Add a new VETH pair link on the host -// This is identical to running: ip link add name $name type veth peer name $peername -func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - wb.AddData(msg) - - nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1)) - wb.AddData(nameData) - - txqLen := make([]byte, 4) - native.PutUint32(txqLen, uint32(txQueueLen)) - txqData := newRtAttr(syscall.IFLA_TXQLEN, txqLen) - wb.AddData(txqData) - - nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) - newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth")) - nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) - nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil) - - newIfInfomsgChild(nest3, syscall.AF_UNSPEC) - newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2)) - - txqLen2 := make([]byte, 4) - native.PutUint32(txqLen2, uint32(txQueueLen)) - newRtAttrChild(nest3, syscall.IFLA_TXQLEN, txqLen2) - - wb.AddData(nest1) - - if err := s.Send(wb); err != nil { - return err - } - - if err := s.HandleAck(wb.Seq); err != nil { - if os.IsExist(err) { - return ErrInterfaceExists - } - - return err - } - - return nil -} - -// Add a new VLAN interface with masterDev as its upper device -// This is identical to running: -// ip link add name $name link $masterdev type vlan id $id -func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - - masterDevIfc, err := net.InterfaceByName(masterDev) - if err != nil { - return err - } - - msg := newIfInfomsg(syscall.AF_UNSPEC) - wb.AddData(msg) - - nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) - newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("vlan")) - - nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) - vlanData := make([]byte, 2) - native.PutUint16(vlanData, vlanId) - newRtAttrChild(nest2, IFLA_VLAN_ID, vlanData) - wb.AddData(nest1) - - wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index))) - wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(vlanDev))) - - if err := s.Send(wb); err != nil { - return err - } - return s.HandleAck(wb.Seq) -} - -// MacVlan link has LowerDev, UpperDev and operates in Mode mode -// This simplifies the code when creating MacVlan or MacVtap interface -type MacVlanLink struct { - MasterDev string - SlaveDev string - mode string -} - -func (m MacVlanLink) Mode() uint32 { - modeMap := map[string]uint32{ - "private": MACVLAN_MODE_PRIVATE, - "vepa": MACVLAN_MODE_VEPA, - "bridge": MACVLAN_MODE_BRIDGE, - "passthru": MACVLAN_MODE_PASSTHRU, - } - - return modeMap[m.mode] -} - -// Add MAC VLAN network interface with masterDev as its upper device -// This is identical to running: -// ip link add name $name link $masterdev type macvlan mode $mode -func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - - masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev) - if err != nil { - return err - } - - msg := newIfInfomsg(syscall.AF_UNSPEC) - wb.AddData(msg) - - nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) - newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type)) - - nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) - macVlanData := make([]byte, 4) - native.PutUint32(macVlanData, mcvln.Mode()) - newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData) - wb.AddData(nest1) - - wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index))) - wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev))) - - if err := s.Send(wb); err != nil { - return err - } - return s.HandleAck(wb.Seq) -} - -func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { - return networkLinkMacVlan("macvlan", &MacVlanLink{ - MasterDev: masterDev, - SlaveDev: macVlanDev, - mode: mode, - }) -} - -func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error { - return networkLinkMacVlan("macvtap", &MacVlanLink{ - MasterDev: masterDev, - SlaveDev: macVlanDev, - mode: mode, - }) -} - -func networkLinkIpAction(action, flags int, ifa IfAddr) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - family := getIpFamily(ifa.IP) - - wb := newNetlinkRequest(action, flags) - - msg := newIfAddrmsg(family) - msg.Index = uint32(ifa.Iface.Index) - prefixLen, _ := ifa.IPNet.Mask.Size() - msg.Prefixlen = uint8(prefixLen) - wb.AddData(msg) - - var ipData []byte - if family == syscall.AF_INET { - ipData = ifa.IP.To4() - } else { - ipData = ifa.IP.To16() - } - - localData := newRtAttr(syscall.IFA_LOCAL, ipData) - wb.AddData(localData) - - addrData := newRtAttr(syscall.IFA_ADDRESS, ipData) - wb.AddData(addrData) - - if err := s.Send(wb); err != nil { - return err - } - - return s.HandleAck(wb.Seq) -} - -// Delete an IP address from an interface. This is identical to: -// ip addr del $ip/$ipNet dev $iface -func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { - return networkLinkIpAction( - syscall.RTM_DELADDR, - syscall.NLM_F_ACK, - IfAddr{iface, ip, ipNet}, - ) -} - -// Add an Ip address to an interface. This is identical to: -// ip addr add $ip/$ipNet dev $iface -func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { - return networkLinkIpAction( - syscall.RTM_NEWADDR, - syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK, - IfAddr{iface, ip, ipNet}, - ) -} - -// Returns an array of IPNet for all the currently routed subnets on ipv4 -// This is similar to the first column of "ip route" output -func NetworkGetRoutes() ([]Route, error) { - s, err := getNetlinkSocket() - if err != nil { - return nil, err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP) - - msg := newIfInfomsg(syscall.AF_UNSPEC) - wb.AddData(msg) - - if err := s.Send(wb); err != nil { - return nil, err - } - - pid, err := s.GetPid() - if err != nil { - return nil, err - } - - res := make([]Route, 0) - -outer: - for { - msgs, err := s.Receive() - if err != nil { - return nil, err - } - for _, m := range msgs { - if err := s.CheckMessage(m, wb.Seq, pid); err != nil { - if err == io.EOF { - break outer - } - return nil, err - } - if m.Header.Type != syscall.RTM_NEWROUTE { - continue - } - - var r Route - - msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0])) - - if msg.Flags&syscall.RTM_F_CLONED != 0 { - // Ignore cloned routes - continue - } - - if msg.Table != syscall.RT_TABLE_MAIN { - // Ignore non-main tables - continue - } - - if msg.Family != syscall.AF_INET { - // Ignore non-ipv4 routes - continue - } - - if msg.Dst_len == 0 { - // Default routes - r.Default = true - } - - attrs, err := syscall.ParseNetlinkRouteAttr(&m) - if err != nil { - return nil, err - } - for _, attr := range attrs { - switch attr.Attr.Type { - case syscall.RTA_DST: - ip := attr.Value - r.IPNet = &net.IPNet{ - IP: ip, - Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)), - } - case syscall.RTA_OIF: - index := int(native.Uint32(attr.Value[0:4])) - r.Iface, _ = net.InterfaceByIndex(index) - } - } - if r.Default || r.IPNet != nil { - res = append(res, r) - } - } - } - - return res, nil -} - -// Add a new route table entry. -func AddRoute(destination, source, gateway, device string) error { - if destination == "" && source == "" && gateway == "" { - return fmt.Errorf("one of destination, source or gateway must not be blank") - } - - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - - wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - msg := newRtMsg() - currentFamily := -1 - var rtAttrs []*RtAttr - - if destination != "" { - destIP, destNet, err := net.ParseCIDR(destination) - if err != nil { - return fmt.Errorf("destination CIDR %s couldn't be parsed", destination) - } - destFamily := getIpFamily(destIP) - currentFamily = destFamily - destLen, bits := destNet.Mask.Size() - if destLen == 0 && bits == 0 { - return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination) - } - msg.Family = uint8(destFamily) - msg.Dst_len = uint8(destLen) - var destData []byte - if destFamily == syscall.AF_INET { - destData = destIP.To4() - } else { - destData = destIP.To16() - } - rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData)) - } - - if source != "" { - srcIP := net.ParseIP(source) - if srcIP == nil { - return fmt.Errorf("source IP %s couldn't be parsed", source) - } - srcFamily := getIpFamily(srcIP) - if currentFamily != -1 && currentFamily != srcFamily { - return fmt.Errorf("source and destination ip were not the same IP family") - } - currentFamily = srcFamily - msg.Family = uint8(srcFamily) - var srcData []byte - if srcFamily == syscall.AF_INET { - srcData = srcIP.To4() - } else { - srcData = srcIP.To16() - } - rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData)) - } - - if gateway != "" { - gwIP := net.ParseIP(gateway) - if gwIP == nil { - return fmt.Errorf("gateway IP %s couldn't be parsed", gateway) - } - gwFamily := getIpFamily(gwIP) - if currentFamily != -1 && currentFamily != gwFamily { - return fmt.Errorf("gateway, source, and destination ip were not the same IP family") - } - msg.Family = uint8(gwFamily) - var gwData []byte - if gwFamily == syscall.AF_INET { - gwData = gwIP.To4() - } else { - gwData = gwIP.To16() - } - rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData)) - } - - wb.AddData(msg) - for _, attr := range rtAttrs { - wb.AddData(attr) - } - - iface, err := net.InterfaceByName(device) - if err != nil { - return err - } - wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index))) - - if err := s.Send(wb); err != nil { - return err - } - return s.HandleAck(wb.Seq) -} - -// Add a new default gateway. Identical to: -// ip route add default via $ip -func AddDefaultGw(ip, device string) error { - return AddRoute("", "", ip, device) -} - -// THIS CODE DOES NOT COMMUNICATE WITH KERNEL VIA RTNETLINK INTERFACE -// IT IS HERE FOR BACKWARDS COMPATIBILITY WITH OLDER LINUX KERNELS -// WHICH SHIP WITH OLDER NOT ENTIRELY FUNCTIONAL VERSION OF NETLINK -func getIfSocket() (fd int, err error) { - for _, socket := range []int{ - syscall.AF_INET, - syscall.AF_PACKET, - syscall.AF_INET6, - } { - if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil { - break - } - } - if err == nil { - return fd, nil - } - return -1, err -} - -// Create the actual bridge device. This is more backward-compatible than -// netlink.NetworkLinkAdd and works on RHEL 6. -func CreateBridge(name string, setMacAddr bool) error { - if len(name) >= IFNAMSIZ { - return fmt.Errorf("Interface name %s too long", name) - } - - s, err := getIfSocket() - if err != nil { - return err - } - defer syscall.Close(s) - - nameBytePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return err - } - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 { - return err - } - if setMacAddr { - return SetMacAddress(name, randMacAddr()) - } - return nil -} - -// Delete the actual bridge device. -func DeleteBridge(name string) error { - s, err := getIfSocket() - if err != nil { - return err - } - defer syscall.Close(s) - - nameBytePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return err - } - - var ifr ifreqFlags - copy(ifr.IfrnName[:len(ifr.IfrnName)-1], []byte(name)) - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), - syscall.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifr))); err != 0 { - return err - } - - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), - SIOC_BRDELBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 { - return err - } - return nil -} - -func ifIoctBridge(iface, master *net.Interface, op uintptr) error { - if len(master.Name) >= IFNAMSIZ { - return fmt.Errorf("Interface name %s too long", master.Name) - } - - s, err := getIfSocket() - if err != nil { - return err - } - defer syscall.Close(s) - - ifr := ifreqIndex{} - copy(ifr.IfrnName[:len(ifr.IfrnName)-1], master.Name) - ifr.IfruIndex = int32(iface.Index) - - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), op, uintptr(unsafe.Pointer(&ifr))); err != 0 { - return err - } - - return nil -} - -// Add a slave to a bridge device. This is more backward-compatible than -// netlink.NetworkSetMaster and works on RHEL 6. -func AddToBridge(iface, master *net.Interface) error { - return ifIoctBridge(iface, master, SIOC_BRADDIF) -} - -// Detach a slave from a bridge device. This is more backward-compatible than -// netlink.NetworkSetMaster and works on RHEL 6. -func DelFromBridge(iface, master *net.Interface) error { - return ifIoctBridge(iface, master, SIOC_BRDELIF) -} - -func randMacAddr() string { - hw := make(net.HardwareAddr, 6) - for i := 0; i < 6; i++ { - hw[i] = byte(rnd.Intn(255)) - } - hw[0] &^= 0x1 // clear multicast bit - hw[0] |= 0x2 // set local assignment bit (IEEE802) - return hw.String() -} - -func SetMacAddress(name, addr string) error { - if len(name) >= IFNAMSIZ { - return fmt.Errorf("Interface name %s too long", name) - } - - hw, err := net.ParseMAC(addr) - if err != nil { - return err - } - - s, err := getIfSocket() - if err != nil { - return err - } - defer syscall.Close(s) - - ifr := ifreqHwaddr{} - ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER - copy(ifr.IfrnName[:len(ifr.IfrnName)-1], name) - - for i := 0; i < 6; i++ { - ifr.IfruHwaddr.Data[i] = ifrDataByte(hw[i]) - } - - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 { - return err - } - return nil -} - -func SetHairpinMode(iface *net.Interface, enabled bool) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() - req := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) - - msg := newIfInfomsg(syscall.AF_BRIDGE) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST - msg.Index = int32(iface.Index) - msg.Change = DEFAULT_CHANGE - req.AddData(msg) - - mode := []byte{0} - if enabled { - mode[0] = byte(1) - } - - br := newRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil) - newRtAttrChild(br, IFLA_BRPORT_MODE, mode) - req.AddData(br) - if err := s.Send(req); err != nil { - return err - } - - return s.HandleAck(req.Seq) -} - -func ChangeName(iface *net.Interface, newName string) error { - if len(newName) >= IFNAMSIZ { - return fmt.Errorf("Interface name %s too long", newName) - } - - fd, err := getIfSocket() - if err != nil { - return err - } - defer syscall.Close(fd) - - data := [IFNAMSIZ * 2]byte{} - // the "-1"s here are very important for ensuring we get proper null - // termination of our new C strings - copy(data[:IFNAMSIZ-1], iface.Name) - copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName) - - if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 { - return errno - } - - return nil -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go deleted file mode 100644 index 965e0bfbc7..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go +++ /dev/null @@ -1,7 +0,0 @@ -// +build arm ppc64 ppc64le - -package netlink - -func ifrDataByte(b byte) uint8 { - return uint8(b) -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go deleted file mode 100644 index 7446279892..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go +++ /dev/null @@ -1,7 +0,0 @@ -// +build !arm,!ppc64,!ppc64le - -package netlink - -func ifrDataByte(b byte) int8 { - return int8(b) -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_unsupported.go b/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_unsupported.go deleted file mode 100644 index 4b11bf8ba5..0000000000 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_unsupported.go +++ /dev/null @@ -1,88 +0,0 @@ -// +build !linux - -package netlink - -import ( - "errors" - "net" -) - -var ( - ErrNotImplemented = errors.New("not implemented") -) - -func NetworkGetRoutes() ([]Route, error) { - return nil, ErrNotImplemented -} - -func NetworkLinkAdd(name string, linkType string) error { - return ErrNotImplemented -} - -func NetworkLinkDel(name string) error { - return ErrNotImplemented -} - -func NetworkLinkUp(iface *net.Interface) error { - return ErrNotImplemented -} - -func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { - return ErrNotImplemented -} - -func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error { - return ErrNotImplemented -} - -func AddRoute(destination, source, gateway, device string) error { - return ErrNotImplemented -} - -func AddDefaultGw(ip, device string) error { - return ErrNotImplemented -} - -func NetworkSetMTU(iface *net.Interface, mtu int) error { - return ErrNotImplemented -} - -func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error { - return ErrNotImplemented -} - -func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error { - return ErrNotImplemented -} - -func NetworkChangeName(iface *net.Interface, newName string) error { - return ErrNotImplemented -} - -func NetworkSetNsFd(iface *net.Interface, fd int) error { - return ErrNotImplemented -} - -func NetworkSetNsPid(iface *net.Interface, nspid int) error { - return ErrNotImplemented -} - -func NetworkSetMaster(iface, master *net.Interface) error { - return ErrNotImplemented -} - -func NetworkLinkDown(iface *net.Interface) error { - return ErrNotImplemented -} - -func CreateBridge(name string, setMacAddr bool) error { - return ErrNotImplemented -} - -func DeleteBridge(name string) error { - return ErrNotImplemented -} - -func AddToBridge(iface, master *net.Interface) error { - return ErrNotImplemented -} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/network_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/network_linux.go index fc564a0f75..ce93277a52 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/network_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/network_linux.go @@ -11,8 +11,8 @@ import ( "strings" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/netlink" "github.com/opencontainers/runc/libcontainer/utils" + "github.com/vishvananda/netlink" ) var strategies = map[string]networkStrategy{ @@ -93,11 +93,7 @@ func (l *loopback) create(n *network, nspid int) error { } func (l *loopback) initialize(config *network) error { - iface, err := net.InterfaceByName("lo") - if err != nil { - return err - } - return netlink.NetworkLinkUp(iface) + return netlink.LinkSetUp(&netlink.Device{netlink.LinkAttrs{Name: "lo"}}) } func (l *loopback) attach(n *configs.Network) (err error) { @@ -115,42 +111,36 @@ type veth struct { } func (v *veth) detach(n *configs.Network) (err error) { - bridge, err := net.InterfaceByName(n.Bridge) - if err != nil { - return err - } - host, err := net.InterfaceByName(n.HostInterfaceName) - if err != nil { - return err - } - if err := netlink.DelFromBridge(host, bridge); err != nil { - return err - } - return nil + return netlink.LinkSetMaster(&netlink.Device{netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil) } // attach a container network interface to an external network func (v *veth) attach(n *configs.Network) (err error) { - bridge, err := net.InterfaceByName(n.Bridge) + brl, err := netlink.LinkByName(n.Bridge) if err != nil { return err } - host, err := net.InterfaceByName(n.HostInterfaceName) + br, ok := brl.(*netlink.Bridge) + if !ok { + return fmt.Errorf("Wrong device type %T", brl) + } + host, err := netlink.LinkByName(n.HostInterfaceName) if err != nil { return err } - if err := netlink.AddToBridge(host, bridge); err != nil { + + if err := netlink.LinkSetMaster(host, br); err != nil { return err } - if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil { + if err := netlink.LinkSetMTU(host, n.Mtu); err != nil { return err } if n.HairpinMode { - if err := netlink.SetHairpinMode(host, true); err != nil { + if err := netlink.LinkSetHairpin(host, true); err != nil { return err } } - if err := netlink.NetworkLinkUp(host); err != nil { + if err := netlink.LinkSetUp(host); err != nil { return err } @@ -163,26 +153,32 @@ func (v *veth) create(n *network, nspid int) (err error) { return err } n.TempVethPeerName = tmpName - defer func() { - if err != nil { - netlink.NetworkLinkDel(n.HostInterfaceName) - netlink.NetworkLinkDel(n.TempVethPeerName) - } - }() if n.Bridge == "" { return fmt.Errorf("bridge is not specified") } - if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil { + veth := &netlink.Veth{ + LinkAttrs: netlink.LinkAttrs{ + Name: n.HostInterfaceName, + TxQLen: n.TxQueueLen, + }, + PeerName: n.TempVethPeerName, + } + if err := netlink.LinkAdd(veth); err != nil { return err } + defer func() { + if err != nil { + netlink.LinkDel(veth) + } + }() if err := v.attach(&n.Network); err != nil { return err } - child, err := net.InterfaceByName(n.TempVethPeerName) + child, err := netlink.LinkByName(n.TempVethPeerName) if err != nil { return err } - return netlink.NetworkSetNsPid(child, nspid) + return netlink.LinkSetNsPid(child, nspid) } func (v *veth) generateTempPeerName() (string, error) { @@ -194,53 +190,68 @@ func (v *veth) initialize(config *network) error { if peer == "" { return fmt.Errorf("peer is not specified") } - child, err := net.InterfaceByName(peer) + child, err := netlink.LinkByName(peer) if err != nil { return err } - if err := netlink.NetworkLinkDown(child); err != nil { + if err := netlink.LinkSetDown(child); err != nil { return err } - if err := netlink.NetworkChangeName(child, config.Name); err != nil { + if err := netlink.LinkSetName(child, config.Name); err != nil { return err } // get the interface again after we changed the name as the index also changes. - if child, err = net.InterfaceByName(config.Name); err != nil { + if child, err = netlink.LinkByName(config.Name); err != nil { return err } if config.MacAddress != "" { - if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil { + mac, err := net.ParseMAC(config.MacAddress) + if err != nil { + return err + } + if err := netlink.LinkSetHardwareAddr(child, mac); err != nil { return err } } - ip, ipNet, err := net.ParseCIDR(config.Address) + ip, err := netlink.ParseAddr(config.Address) if err != nil { return err } - if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil { + if err := netlink.AddrAdd(child, ip); err != nil { return err } if config.IPv6Address != "" { - if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil { + ip6, err := netlink.ParseAddr(config.IPv6Address) + if err != nil { return err } - if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil { + if err := netlink.AddrAdd(child, ip6); err != nil { return err } } - if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil { + if err := netlink.LinkSetMTU(child, config.Mtu); err != nil { return err } - if err := netlink.NetworkLinkUp(child); err != nil { + if err := netlink.LinkSetUp(child); err != nil { return err } if config.Gateway != "" { - if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil { + gw := net.ParseIP(config.Gateway) + if err := netlink.RouteAdd(&netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + LinkIndex: child.Attrs().Index, + Gw: gw, + }); err != nil { return err } } if config.IPv6Gateway != "" { - if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil { + gw := net.ParseIP(config.IPv6Gateway) + if err := netlink.RouteAdd(&netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + LinkIndex: child.Attrs().Index, + Gw: gw, + }); err != nil { return err } } diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index cd02d00a0a..69b077bfb5 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -65,11 +65,11 @@ static int clone_parent(jmp_buf * env) void nsexec() { - char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; + char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" }; const int num = sizeof(namespaces) / sizeof(char *); jmp_buf env; char buf[PATH_MAX], *val; - int i, tfd, child, len, pipenum, consolefd = -1; + int i, tfd, self_tfd, child, len, pipenum, consolefd = -1; pid_t pid; char *console; @@ -114,17 +114,30 @@ void nsexec() exit(1); } + self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY); + if (self_tfd == -1) { + pr_perror("Failed to open /proc/self/ns"); + exit(1); + } + for (i = 0; i < num; i++) { struct stat st; + struct stat self_st; int fd; /* Symlinks on all namespaces exist for dead processes, but they can't be opened */ - if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) { + if (fstatat(tfd, namespaces[i], &st, 0) == -1) { // Ignore nonexistent namespaces. if (errno == ENOENT) continue; } + /* Skip namespaces we're already part of */ + if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && + st.st_ino == self_st.st_ino) { + continue; + } + fd = openat(tfd, namespaces[i], O_RDONLY); if (fd == -1) { pr_perror("Failed to open ns file %s for ns %s", buf, @@ -139,6 +152,9 @@ void nsexec() close(fd); } + close(self_tfd); + close(tfd); + if (setjmp(env) == 1) { // Child diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go index f191c16ee2..0072e19483 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go @@ -203,9 +203,10 @@ func (p *initProcess) start() (err error) { }() if p.config.Config.Hooks != nil { s := configs.HookState{ - ID: p.container.id, - Pid: p.pid(), - Root: p.config.Config.Rootfs, + Version: p.container.config.Version, + ID: p.container.id, + Pid: p.pid(), + Root: p.config.Config.Rootfs, } for _, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go index ecdc7ca64f..3e3a7d2e02 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -68,7 +68,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) { return newSystemError(err) } if !setupDev { - if err := reOpenDevNull(config.Rootfs); err != nil { + if err := reOpenDevNull(); err != nil { return newSystemError(err) } } @@ -96,7 +96,6 @@ func mountCmd(cmd configs.Command) error { func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { var ( dest = m.Destination - data = label.FormatMountLabel(m.Data, mountLabel) ) if !strings.HasPrefix(dest, rootfs) { dest = filepath.Join(rootfs, dest) @@ -107,12 +106,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { if err := os.MkdirAll(dest, 0755); err != nil { return err } - return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "") + return mountPropagate(m, rootfs, mountLabel) case "mqueue": if err := os.MkdirAll(dest, 0755); err != nil { return err } - if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), ""); err != nil { + if err := mountPropagate(m, rootfs, mountLabel); err != nil { return err } return label.SetFileLabel(dest, mountLabel) @@ -123,7 +122,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { return err } } - if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { + if err := mountPropagate(m, rootfs, mountLabel); err != nil { return err } if stat != nil { @@ -136,12 +135,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { if err := os.MkdirAll(dest, 0755); err != nil { return err } - return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data) + return mountPropagate(m, rootfs, mountLabel) case "securityfs": if err := os.MkdirAll(dest, 0755); err != nil { return err } - return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data) + return mountPropagate(m, rootfs, mountLabel) case "bind": stat, err := os.Stat(m.Source) if err != nil { @@ -159,16 +158,17 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { if err := checkMountDestination(rootfs, dest); err != nil { return err } + // update the mount with the correct dest after symlinks are resolved. + m.Destination = dest if err := createIfNotExists(dest, stat.IsDir()); err != nil { return err } - if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { + if err := mountPropagate(m, rootfs, mountLabel); err != nil { return err } - if m.Flags&syscall.MS_RDONLY != 0 { - if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil { - return err - } + // bind mount won't change mount options, we need remount to make mount options effective. + if err := remount(m, rootfs); err != nil { + return err } if m.Relabel != "" { if err := label.Validate(m.Relabel); err != nil { @@ -179,11 +179,6 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { return err } } - if m.Flags&syscall.MS_PRIVATE != 0 { - if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { - return err - } - } case "cgroup": binds, err := getCgroupMounts(m) if err != nil { @@ -197,11 +192,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } } tmpfs := &configs.Mount{ - Source: "tmpfs", - Device: "tmpfs", - Destination: m.Destination, - Flags: defaultMountFlags, - Data: "mode=755", + Source: "tmpfs", + Device: "tmpfs", + Destination: m.Destination, + Flags: defaultMountFlags, + Data: "mode=755", + PropagationFlags: m.PropagationFlags, } if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil { return err @@ -236,8 +232,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } if m.Flags&syscall.MS_RDONLY != 0 { // remount cgroup root as readonly - rootfsCgroup := filepath.Join(rootfs, m.Destination) - if err := syscall.Mount("", rootfsCgroup, "", defaultMountFlags|syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil { + mcgrouproot := &configs.Mount{ + Destination: m.Destination, + Flags: defaultMountFlags | syscall.MS_RDONLY, + } + if err := remount(mcgrouproot, rootfs); err != nil { return err } } @@ -253,10 +252,15 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { return nil, err } + cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + var binds []*configs.Mount for _, mm := range mounts { - dir, err := mm.GetThisCgroupDir() + dir, err := mm.GetThisCgroupDir(cgroupPaths) if err != nil { return nil, err } @@ -265,10 +269,11 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { return nil, err } binds = append(binds, &configs.Mount{ - Device: "bind", - Source: filepath.Join(mm.Mountpoint, relDir), - Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")), - Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags, + Device: "bind", + Source: filepath.Join(mm.Mountpoint, relDir), + Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")), + Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags, + PropagationFlags: m.PropagationFlags, }) } @@ -325,7 +330,7 @@ func setupDevSymlinks(rootfs string) error { // this method will make them point to `/dev/null` in this container's rootfs. This // needs to be called after we chroot/pivot into the container's rootfs so that any // symlinks are resolved locally. -func reOpenDevNull(rootfs string) error { +func reOpenDevNull() error { var stat, devNullStat syscall.Stat_t file, err := os.Open("/dev/null") if err != nil { @@ -430,7 +435,7 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error { return fmt.Errorf("symlink dev ptmx %s", err) } if console != nil { - return console.mount(config.Rootfs, config.MountLabel, 0, 0) + return console.mount(config.Rootfs, config.MountLabel) } return nil } @@ -529,3 +534,40 @@ func writeSystemProperty(key, value string) error { keyPath := strings.Replace(key, ".", "/", -1) return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644) } + +func remount(m *configs.Mount, rootfs string) error { + var ( + dest = m.Destination + ) + if !strings.HasPrefix(dest, rootfs) { + dest = filepath.Join(rootfs, dest) + } + + if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil { + return err + } + return nil +} + +// Do the mount operation followed by additional mounts required to take care +// of propagation flags. +func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { + var ( + dest = m.Destination + data = label.FormatMountLabel(m.Data, mountLabel) + ) + if !strings.HasPrefix(dest, rootfs) { + dest = filepath.Join(rootfs, dest) + } + + if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { + return err + } + + for _, pflag := range m.PropagationFlags { + if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil { + return err + } + } + return nil +} diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go index 58bdbf6d63..1e9ccf8f2d 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go @@ -37,6 +37,18 @@ func InitSeccomp(config *configs.Seccomp) error { return fmt.Errorf("error creating filter: %s", err) } + // Add extra architectures + for _, arch := range config.Architectures { + scmpArch, err := libseccomp.GetArchFromString(arch) + if err != nil { + return err + } + + if err := filter.AddArch(scmpArch); err != nil { + return err + } + } + // Unset no new privs bit if err := filter.SetNoNewPrivsBit(false); err != nil { return fmt.Errorf("error setting no new privileges: %s", err)