Browse Source

Bump libcontainer to 08b5415ffa3769ff7c1d2f673f613

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
Michael Crosby 9 years ago
parent
commit
6bd3e6357c

+ 1 - 1
hack/vendor.sh

@@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
 clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
 
-clone git github.com/opencontainers/runc v0.0.4 # libcontainer
+clone git github.com/opencontainers/runc 08b5415ffa3769ff7c1d2f673f61382d69aabb7d # libcontainer
 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
 clone git github.com/coreos/go-systemd v3
 clone git github.com/godbus/dbus v2

+ 14 - 14
vendor/src/github.com/opencontainers/runc/libcontainer/README.md

@@ -32,20 +32,20 @@ struct describing how the container is to be created.  A sample would look simil
 config := &configs.Config{
     Rootfs: rootfs,
     Capabilities: []string{
-        "CHOWN",
-        "DAC_OVERRIDE",
-        "FSETID",
-        "FOWNER",
-        "MKNOD",
-        "NET_RAW",
-        "SETGID",
-        "SETUID",
-        "SETFCAP",
-        "SETPCAP",
-        "NET_BIND_SERVICE",
-        "SYS_CHROOT",
-        "KILL",
-        "AUDIT_WRITE",
+        "CAP_CHOWN",
+        "CAP_DAC_OVERRIDE",
+        "CAP_FSETID",
+        "CAP_FOWNER",
+        "CAP_MKNOD",
+        "CAP_NET_RAW",
+        "CAP_SETGID",
+        "CAP_SETUID",
+        "CAP_SETFCAP",
+        "CAP_SETPCAP",
+        "CAP_NET_BIND_SERVICE",
+        "CAP_SYS_CHROOT",
+        "CAP_KILL",
+        "CAP_AUDIT_WRITE",
     },
     Namespaces: configs.Namespaces([]configs.Namespace{
         {Type: configs.NEWNS},

+ 38 - 38
vendor/src/github.com/opencontainers/runc/libcontainer/capabilities_linux.go

@@ -12,44 +12,44 @@ import (
 const allCapabilityTypes = capability.CAPS | capability.BOUNDS
 
 var capabilityList = map[string]capability.Cap{
-	"SETPCAP":          capability.CAP_SETPCAP,
-	"SYS_MODULE":       capability.CAP_SYS_MODULE,
-	"SYS_RAWIO":        capability.CAP_SYS_RAWIO,
-	"SYS_PACCT":        capability.CAP_SYS_PACCT,
-	"SYS_ADMIN":        capability.CAP_SYS_ADMIN,
-	"SYS_NICE":         capability.CAP_SYS_NICE,
-	"SYS_RESOURCE":     capability.CAP_SYS_RESOURCE,
-	"SYS_TIME":         capability.CAP_SYS_TIME,
-	"SYS_TTY_CONFIG":   capability.CAP_SYS_TTY_CONFIG,
-	"MKNOD":            capability.CAP_MKNOD,
-	"AUDIT_WRITE":      capability.CAP_AUDIT_WRITE,
-	"AUDIT_CONTROL":    capability.CAP_AUDIT_CONTROL,
-	"MAC_OVERRIDE":     capability.CAP_MAC_OVERRIDE,
-	"MAC_ADMIN":        capability.CAP_MAC_ADMIN,
-	"NET_ADMIN":        capability.CAP_NET_ADMIN,
-	"SYSLOG":           capability.CAP_SYSLOG,
-	"CHOWN":            capability.CAP_CHOWN,
-	"NET_RAW":          capability.CAP_NET_RAW,
-	"DAC_OVERRIDE":     capability.CAP_DAC_OVERRIDE,
-	"FOWNER":           capability.CAP_FOWNER,
-	"DAC_READ_SEARCH":  capability.CAP_DAC_READ_SEARCH,
-	"FSETID":           capability.CAP_FSETID,
-	"KILL":             capability.CAP_KILL,
-	"SETGID":           capability.CAP_SETGID,
-	"SETUID":           capability.CAP_SETUID,
-	"LINUX_IMMUTABLE":  capability.CAP_LINUX_IMMUTABLE,
-	"NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
-	"NET_BROADCAST":    capability.CAP_NET_BROADCAST,
-	"IPC_LOCK":         capability.CAP_IPC_LOCK,
-	"IPC_OWNER":        capability.CAP_IPC_OWNER,
-	"SYS_CHROOT":       capability.CAP_SYS_CHROOT,
-	"SYS_PTRACE":       capability.CAP_SYS_PTRACE,
-	"SYS_BOOT":         capability.CAP_SYS_BOOT,
-	"LEASE":            capability.CAP_LEASE,
-	"SETFCAP":          capability.CAP_SETFCAP,
-	"WAKE_ALARM":       capability.CAP_WAKE_ALARM,
-	"BLOCK_SUSPEND":    capability.CAP_BLOCK_SUSPEND,
-	"AUDIT_READ":       capability.CAP_AUDIT_READ,
+	"CAP_SETPCAP":          capability.CAP_SETPCAP,
+	"CAP_SYS_MODULE":       capability.CAP_SYS_MODULE,
+	"CAP_SYS_RAWIO":        capability.CAP_SYS_RAWIO,
+	"CAP_SYS_PACCT":        capability.CAP_SYS_PACCT,
+	"CAP_SYS_ADMIN":        capability.CAP_SYS_ADMIN,
+	"CAP_SYS_NICE":         capability.CAP_SYS_NICE,
+	"CAP_SYS_RESOURCE":     capability.CAP_SYS_RESOURCE,
+	"CAP_SYS_TIME":         capability.CAP_SYS_TIME,
+	"CAP_SYS_TTY_CONFIG":   capability.CAP_SYS_TTY_CONFIG,
+	"CAP_MKNOD":            capability.CAP_MKNOD,
+	"CAP_AUDIT_WRITE":      capability.CAP_AUDIT_WRITE,
+	"CAP_AUDIT_CONTROL":    capability.CAP_AUDIT_CONTROL,
+	"CAP_MAC_OVERRIDE":     capability.CAP_MAC_OVERRIDE,
+	"CAP_MAC_ADMIN":        capability.CAP_MAC_ADMIN,
+	"CAP_NET_ADMIN":        capability.CAP_NET_ADMIN,
+	"CAP_SYSLOG":           capability.CAP_SYSLOG,
+	"CAP_CHOWN":            capability.CAP_CHOWN,
+	"CAP_NET_RAW":          capability.CAP_NET_RAW,
+	"CAP_DAC_OVERRIDE":     capability.CAP_DAC_OVERRIDE,
+	"CAP_FOWNER":           capability.CAP_FOWNER,
+	"CAP_DAC_READ_SEARCH":  capability.CAP_DAC_READ_SEARCH,
+	"CAP_FSETID":           capability.CAP_FSETID,
+	"CAP_KILL":             capability.CAP_KILL,
+	"CAP_SETGID":           capability.CAP_SETGID,
+	"CAP_SETUID":           capability.CAP_SETUID,
+	"CAP_LINUX_IMMUTABLE":  capability.CAP_LINUX_IMMUTABLE,
+	"CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
+	"CAP_NET_BROADCAST":    capability.CAP_NET_BROADCAST,
+	"CAP_IPC_LOCK":         capability.CAP_IPC_LOCK,
+	"CAP_IPC_OWNER":        capability.CAP_IPC_OWNER,
+	"CAP_SYS_CHROOT":       capability.CAP_SYS_CHROOT,
+	"CAP_SYS_PTRACE":       capability.CAP_SYS_PTRACE,
+	"CAP_SYS_BOOT":         capability.CAP_SYS_BOOT,
+	"CAP_LEASE":            capability.CAP_LEASE,
+	"CAP_SETFCAP":          capability.CAP_SETFCAP,
+	"CAP_WAKE_ALARM":       capability.CAP_WAKE_ALARM,
+	"CAP_BLOCK_SUSPEND":    capability.CAP_BLOCK_SUSPEND,
+	"CAP_AUDIT_READ":       capability.CAP_AUDIT_READ,
 }
 
 func newCapWhitelist(caps []string) (*whitelist, error) {

+ 30 - 15
vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go

@@ -5,7 +5,6 @@ package cgroups
 import (
 	"bufio"
 	"fmt"
-	"io"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -105,12 +104,12 @@ type Mount struct {
 	Subsystems []string
 }
 
-func (m Mount) GetThisCgroupDir() (string, error) {
+func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
 	if len(m.Subsystems) == 0 {
 		return "", fmt.Errorf("no subsystem for mount")
 	}
 
-	return GetThisCgroupDir(m.Subsystems[0])
+	return getControllerPath(m.Subsystems[0], cgroups)
 }
 
 func GetCgroupMounts() ([]Mount, error) {
@@ -176,23 +175,22 @@ func GetAllSubsystems() ([]string, error) {
 
 // Returns the relative path to the cgroup docker is running in.
 func GetThisCgroupDir(subsystem string) (string, error) {
-	f, err := os.Open("/proc/self/cgroup")
+	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return "", err
 	}
-	defer f.Close()
 
-	return ParseCgroupFile(subsystem, f)
+	return getControllerPath(subsystem, cgroups)
 }
 
 func GetInitCgroupDir(subsystem string) (string, error) {
-	f, err := os.Open("/proc/1/cgroup")
+
+	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
 	if err != nil {
 		return "", err
 	}
-	defer f.Close()
 
-	return ParseCgroupFile(subsystem, f)
+	return getControllerPath(subsystem, cgroups)
 }
 
 func ReadProcsFile(dir string) ([]int, error) {
@@ -219,23 +217,40 @@ func ReadProcsFile(dir string) ([]int, error) {
 	return out, nil
 }
 
-func ParseCgroupFile(subsystem string, r io.Reader) (string, error) {
-	s := bufio.NewScanner(r)
+func ParseCgroupFile(path string) (map[string]string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	cgroups := make(map[string]string)
 
 	for s.Scan() {
 		if err := s.Err(); err != nil {
-			return "", err
+			return nil, err
 		}
 
 		text := s.Text()
 		parts := strings.Split(text, ":")
 
 		for _, subs := range strings.Split(parts[1], ",") {
-			if subs == subsystem || subs == cgroupNamePrefix+subsystem {
-				return parts[2], nil
-			}
+			cgroups[subs] = parts[2]
 		}
 	}
+	return cgroups, nil
+}
+
+func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
+
+	if p, ok := cgroups[subsystem]; ok {
+		return p, nil
+	}
+
+	if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
+		return p, nil
+	}
 
 	return "", NewNotFoundError(subsystem)
 }

+ 48 - 0
vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go

@@ -1,5 +1,13 @@
 package configs
 
+import (
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"github.com/opencontainers/runc/libcontainer/label"
+)
+
 type Mount struct {
 	// Source path for the mount.
 	Source string `json:"source"`
@@ -13,6 +21,9 @@ type Mount struct {
 	// Mount flags.
 	Flags int `json:"flags"`
 
+	// Propagation Flags
+	PropagationFlags []int `json:"propagation_flags"`
+
 	// Mount data applied to the mount.
 	Data string `json:"data"`
 
@@ -25,3 +36,40 @@ type Mount struct {
 	// Optional Command to be run after Source is mounted.
 	PostmountCmds []Command `json:"postmount_cmds"`
 }
+
+func (m *Mount) Remount(rootfs string) error {
+	var (
+		dest = m.Destination
+	)
+	if !strings.HasPrefix(dest, rootfs) {
+		dest = filepath.Join(rootfs, dest)
+	}
+
+	if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Do the mount operation followed by additional mounts required to take care
+// of propagation flags.
+func (m *Mount) MountPropagate(rootfs string, mountLabel string) error {
+	var (
+		dest = m.Destination
+		data = label.FormatMountLabel(m.Data, mountLabel)
+	)
+	if !strings.HasPrefix(dest, rootfs) {
+		dest = filepath.Join(rootfs, dest)
+	}
+
+	if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
+		return err
+	}
+
+	for _, pflag := range m.PropagationFlags {
+		if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
+			return err
+		}
+	}
+	return nil
+}

+ 33 - 3
vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go

@@ -423,7 +423,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		return err
 	}
 
-	err = c.criuSwrk(nil, req, criuOpts)
+	err = c.criuSwrk(nil, req, criuOpts, false)
 	if err != nil {
 		return err
 	}
@@ -516,6 +516,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 			FileLocks:      proto.Bool(criuOpts.FileLocks),
 		},
 	}
+
 	for _, m := range c.config.Mounts {
 		switch m.Device {
 		case "bind":
@@ -573,14 +574,36 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 		}
 	}
 
-	err = c.criuSwrk(process, req, criuOpts)
+	err = c.criuSwrk(process, req, criuOpts, true)
 	if err != nil {
 		return err
 	}
 	return nil
 }
 
-func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error {
+func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
+	if err := c.cgroupManager.Apply(pid); err != nil {
+		return err
+	}
+
+	path := fmt.Sprintf("/proc/%d/cgroup", pid)
+	cgroupsPaths, err := cgroups.ParseCgroupFile(path)
+	if err != nil {
+		return err
+	}
+
+	for c, p := range cgroupsPaths {
+		cgroupRoot := &criurpc.CgroupRoot{
+			Ctrl: proto.String(c),
+			Path: proto.String(p),
+		}
+		req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot)
+	}
+
+	return nil
+}
+
+func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
 	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
 	if err != nil {
 		return err
@@ -614,6 +637,13 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 		}
 	}()
 
+	if applyCgroups {
+		err := c.criuApplyCgroups(cmd.Process.Pid, req)
+		if err != nil {
+			return err
+		}
+	}
+
 	var extFds []string
 	if process != nil {
 		extFds, err = getPipeFds(cmd.Process.Pid)

+ 37 - 3
vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go

@@ -6,6 +6,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
+	"net"
 	"os"
 	"strconv"
 	"strings"
@@ -14,10 +15,10 @@ import (
 	"github.com/Sirupsen/logrus"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/netlink"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/user"
 	"github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/vishvananda/netlink"
 )
 
 type initType string
@@ -186,7 +187,17 @@ func setupUser(config *initConfig) error {
 			return err
 		}
 	}
-
+	// change the permissions on the STDIO of the current process so that when the user
+	// is changed for the container, it's STDIO of the process matches the user.
+	for _, fd := range []uintptr{
+		os.Stdin.Fd(),
+		os.Stderr.Fd(),
+		os.Stdout.Fd(),
+	} {
+		if err := syscall.Fchown(int(fd), execUser.Uid, execUser.Gid); err != nil {
+			return err
+		}
+	}
 	suppGroups := append(execUser.Sgids, addGroups...)
 	if err := syscall.Setgroups(suppGroups); err != nil {
 		return err
@@ -223,7 +234,30 @@ func setupNetwork(config *initConfig) error {
 
 func setupRoute(config *configs.Config) error {
 	for _, config := range config.Routes {
-		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
+		_, dst, err := net.ParseCIDR(config.Destination)
+		if err != nil {
+			return err
+		}
+		src := net.ParseIP(config.Source)
+		if src == nil {
+			return fmt.Errorf("Invalid source for route: %s", config.Source)
+		}
+		gw := net.ParseIP(config.Gateway)
+		if gw == nil {
+			return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
+		}
+		l, err := netlink.LinkByName(config.InterfaceName)
+		if err != nil {
+			return err
+		}
+		route := &netlink.Route{
+			Scope:     netlink.SCOPE_UNIVERSE,
+			Dst:       dst,
+			Src:       src,
+			Gw:        gw,
+			LinkIndex: l.Attrs().Index,
+		}
+		if err := netlink.RouteAdd(route); err != nil {
 			return err
 		}
 	}

+ 0 - 2
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/MAINTAINERS

@@ -1,2 +0,0 @@
-Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
-Guillaume J. Charmes <guillaume@docker.com> (@creack)

+ 0 - 31
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink.go

@@ -1,31 +0,0 @@
-// Packet netlink provide access to low level Netlink sockets and messages.
-//
-// Actual implementations are in:
-// netlink_linux.go
-// netlink_darwin.go
-package netlink
-
-import (
-	"errors"
-	"net"
-)
-
-var (
-	ErrWrongSockType   = errors.New("Wrong socket type")
-	ErrShortResponse   = errors.New("Got short response from netlink")
-	ErrInterfaceExists = errors.New("Network interface already exists")
-)
-
-// A Route is a subnet associated with the interface to reach it.
-type Route struct {
-	*net.IPNet
-	Iface   *net.Interface
-	Default bool
-}
-
-// An IfAddr defines IP network settings for a given network interface
-type IfAddr struct {
-	Iface *net.Interface
-	IP    net.IP
-	IPNet *net.IPNet
-}

+ 0 - 1321
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux.go

@@ -1,1321 +0,0 @@
-package netlink
-
-import (
-	"encoding/binary"
-	"fmt"
-	"io"
-	"math/rand"
-	"net"
-	"os"
-	"sync/atomic"
-	"syscall"
-	"time"
-	"unsafe"
-)
-
-const (
-	IFNAMSIZ          = 16
-	DEFAULT_CHANGE    = 0xFFFFFFFF
-	IFLA_INFO_KIND    = 1
-	IFLA_INFO_DATA    = 2
-	VETH_INFO_PEER    = 1
-	IFLA_MACVLAN_MODE = 1
-	IFLA_VLAN_ID      = 1
-	IFLA_NET_NS_FD    = 28
-	IFLA_ADDRESS      = 1
-	IFLA_BRPORT_MODE  = 4
-	SIOC_BRADDBR      = 0x89a0
-	SIOC_BRDELBR      = 0x89a1
-	SIOC_BRADDIF      = 0x89a2
-	SIOC_BRDELIF      = 0x89a3
-)
-
-const (
-	MACVLAN_MODE_PRIVATE = 1 << iota
-	MACVLAN_MODE_VEPA
-	MACVLAN_MODE_BRIDGE
-	MACVLAN_MODE_PASSTHRU
-)
-
-var nextSeqNr uint32
-
-type ifreqHwaddr struct {
-	IfrnName   [IFNAMSIZ]byte
-	IfruHwaddr syscall.RawSockaddr
-}
-
-type ifreqIndex struct {
-	IfrnName  [IFNAMSIZ]byte
-	IfruIndex int32
-}
-
-type ifreqFlags struct {
-	IfrnName  [IFNAMSIZ]byte
-	Ifruflags uint16
-}
-
-var native binary.ByteOrder
-
-var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
-
-func init() {
-	var x uint32 = 0x01020304
-	if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
-		native = binary.BigEndian
-	} else {
-		native = binary.LittleEndian
-	}
-}
-
-func getIpFamily(ip net.IP) int {
-	if len(ip) <= net.IPv4len {
-		return syscall.AF_INET
-	}
-	if ip.To4() != nil {
-		return syscall.AF_INET
-	}
-	return syscall.AF_INET6
-}
-
-type NetlinkRequestData interface {
-	Len() int
-	ToWireFormat() []byte
-}
-
-type IfInfomsg struct {
-	syscall.IfInfomsg
-}
-
-func newIfInfomsg(family int) *IfInfomsg {
-	return &IfInfomsg{
-		IfInfomsg: syscall.IfInfomsg{
-			Family: uint8(family),
-		},
-	}
-}
-
-func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
-	msg := newIfInfomsg(family)
-	parent.children = append(parent.children, msg)
-	return msg
-}
-
-func (msg *IfInfomsg) ToWireFormat() []byte {
-	length := syscall.SizeofIfInfomsg
-	b := make([]byte, length)
-	b[0] = msg.Family
-	b[1] = 0
-	native.PutUint16(b[2:4], msg.Type)
-	native.PutUint32(b[4:8], uint32(msg.Index))
-	native.PutUint32(b[8:12], msg.Flags)
-	native.PutUint32(b[12:16], msg.Change)
-	return b
-}
-
-func (msg *IfInfomsg) Len() int {
-	return syscall.SizeofIfInfomsg
-}
-
-type IfAddrmsg struct {
-	syscall.IfAddrmsg
-}
-
-func newIfAddrmsg(family int) *IfAddrmsg {
-	return &IfAddrmsg{
-		IfAddrmsg: syscall.IfAddrmsg{
-			Family: uint8(family),
-		},
-	}
-}
-
-func (msg *IfAddrmsg) ToWireFormat() []byte {
-	length := syscall.SizeofIfAddrmsg
-	b := make([]byte, length)
-	b[0] = msg.Family
-	b[1] = msg.Prefixlen
-	b[2] = msg.Flags
-	b[3] = msg.Scope
-	native.PutUint32(b[4:8], msg.Index)
-	return b
-}
-
-func (msg *IfAddrmsg) Len() int {
-	return syscall.SizeofIfAddrmsg
-}
-
-type RtMsg struct {
-	syscall.RtMsg
-}
-
-func newRtMsg() *RtMsg {
-	return &RtMsg{
-		RtMsg: syscall.RtMsg{
-			Table:    syscall.RT_TABLE_MAIN,
-			Scope:    syscall.RT_SCOPE_UNIVERSE,
-			Protocol: syscall.RTPROT_BOOT,
-			Type:     syscall.RTN_UNICAST,
-		},
-	}
-}
-
-func (msg *RtMsg) ToWireFormat() []byte {
-	length := syscall.SizeofRtMsg
-	b := make([]byte, length)
-	b[0] = msg.Family
-	b[1] = msg.Dst_len
-	b[2] = msg.Src_len
-	b[3] = msg.Tos
-	b[4] = msg.Table
-	b[5] = msg.Protocol
-	b[6] = msg.Scope
-	b[7] = msg.Type
-	native.PutUint32(b[8:12], msg.Flags)
-	return b
-}
-
-func (msg *RtMsg) Len() int {
-	return syscall.SizeofRtMsg
-}
-
-func rtaAlignOf(attrlen int) int {
-	return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1)
-}
-
-type RtAttr struct {
-	syscall.RtAttr
-	Data     []byte
-	children []NetlinkRequestData
-}
-
-func newRtAttr(attrType int, data []byte) *RtAttr {
-	return &RtAttr{
-		RtAttr: syscall.RtAttr{
-			Type: uint16(attrType),
-		},
-		children: []NetlinkRequestData{},
-		Data:     data,
-	}
-}
-
-func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
-	attr := newRtAttr(attrType, data)
-	parent.children = append(parent.children, attr)
-	return attr
-}
-
-func (a *RtAttr) Len() int {
-	if len(a.children) == 0 {
-		return (syscall.SizeofRtAttr + len(a.Data))
-	}
-
-	l := 0
-	for _, child := range a.children {
-		l += child.Len()
-	}
-	l += syscall.SizeofRtAttr
-	return rtaAlignOf(l + len(a.Data))
-}
-
-func (a *RtAttr) ToWireFormat() []byte {
-	length := a.Len()
-	buf := make([]byte, rtaAlignOf(length))
-
-	if a.Data != nil {
-		copy(buf[4:], a.Data)
-	} else {
-		next := 4
-		for _, child := range a.children {
-			childBuf := child.ToWireFormat()
-			copy(buf[next:], childBuf)
-			next += rtaAlignOf(len(childBuf))
-		}
-	}
-
-	if l := uint16(length); l != 0 {
-		native.PutUint16(buf[0:2], l)
-	}
-	native.PutUint16(buf[2:4], a.Type)
-	return buf
-}
-
-func uint32Attr(t int, n uint32) *RtAttr {
-	buf := make([]byte, 4)
-	native.PutUint32(buf, n)
-	return newRtAttr(t, buf)
-}
-
-type NetlinkRequest struct {
-	syscall.NlMsghdr
-	Data []NetlinkRequestData
-}
-
-func (rr *NetlinkRequest) ToWireFormat() []byte {
-	length := rr.Len
-	dataBytes := make([][]byte, len(rr.Data))
-	for i, data := range rr.Data {
-		dataBytes[i] = data.ToWireFormat()
-		length += uint32(len(dataBytes[i]))
-	}
-	b := make([]byte, length)
-	native.PutUint32(b[0:4], length)
-	native.PutUint16(b[4:6], rr.Type)
-	native.PutUint16(b[6:8], rr.Flags)
-	native.PutUint32(b[8:12], rr.Seq)
-	native.PutUint32(b[12:16], rr.Pid)
-
-	next := 16
-	for _, data := range dataBytes {
-		copy(b[next:], data)
-		next += len(data)
-	}
-	return b
-}
-
-func (rr *NetlinkRequest) AddData(data NetlinkRequestData) {
-	if data != nil {
-		rr.Data = append(rr.Data, data)
-	}
-}
-
-func newNetlinkRequest(proto, flags int) *NetlinkRequest {
-	return &NetlinkRequest{
-		NlMsghdr: syscall.NlMsghdr{
-			Len:   uint32(syscall.NLMSG_HDRLEN),
-			Type:  uint16(proto),
-			Flags: syscall.NLM_F_REQUEST | uint16(flags),
-			Seq:   atomic.AddUint32(&nextSeqNr, 1),
-		},
-	}
-}
-
-type NetlinkSocket struct {
-	fd  int
-	lsa syscall.SockaddrNetlink
-}
-
-func getNetlinkSocket() (*NetlinkSocket, error) {
-	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE)
-	if err != nil {
-		return nil, err
-	}
-	s := &NetlinkSocket{
-		fd: fd,
-	}
-	s.lsa.Family = syscall.AF_NETLINK
-	if err := syscall.Bind(fd, &s.lsa); err != nil {
-		syscall.Close(fd)
-		return nil, err
-	}
-
-	return s, nil
-}
-
-func (s *NetlinkSocket) Close() {
-	syscall.Close(s.fd)
-}
-
-func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
-	if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil {
-		return err
-	}
-	return nil
-}
-
-func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
-	rb := make([]byte, syscall.Getpagesize())
-	nr, _, err := syscall.Recvfrom(s.fd, rb, 0)
-	if err != nil {
-		return nil, err
-	}
-	if nr < syscall.NLMSG_HDRLEN {
-		return nil, ErrShortResponse
-	}
-	rb = rb[:nr]
-	return syscall.ParseNetlinkMessage(rb)
-}
-
-func (s *NetlinkSocket) GetPid() (uint32, error) {
-	lsa, err := syscall.Getsockname(s.fd)
-	if err != nil {
-		return 0, err
-	}
-	switch v := lsa.(type) {
-	case *syscall.SockaddrNetlink:
-		return v.Pid, nil
-	}
-	return 0, ErrWrongSockType
-}
-
-func (s *NetlinkSocket) CheckMessage(m syscall.NetlinkMessage, seq, pid uint32) error {
-	if m.Header.Seq != seq {
-		return fmt.Errorf("netlink: invalid seq %d, expected %d", m.Header.Seq, seq)
-	}
-	if m.Header.Pid != pid {
-		return fmt.Errorf("netlink: wrong pid %d, expected %d", m.Header.Pid, pid)
-	}
-	if m.Header.Type == syscall.NLMSG_DONE {
-		return io.EOF
-	}
-	if m.Header.Type == syscall.NLMSG_ERROR {
-		e := int32(native.Uint32(m.Data[0:4]))
-		if e == 0 {
-			return io.EOF
-		}
-		return syscall.Errno(-e)
-	}
-	return nil
-}
-
-func (s *NetlinkSocket) HandleAck(seq uint32) error {
-	pid, err := s.GetPid()
-	if err != nil {
-		return err
-	}
-
-outer:
-	for {
-		msgs, err := s.Receive()
-		if err != nil {
-			return err
-		}
-		for _, m := range msgs {
-			if err := s.CheckMessage(m, seq, pid); err != nil {
-				if err == io.EOF {
-					break outer
-				}
-				return err
-			}
-		}
-	}
-
-	return nil
-}
-
-func zeroTerminated(s string) []byte {
-	return []byte(s + "\000")
-}
-
-func nonZeroTerminated(s string) []byte {
-	return []byte(s)
-}
-
-// Add a new network link of a specified type.
-// This is identical to running: ip link add $name type $linkType
-func NetworkLinkAdd(name string, linkType string) error {
-	if name == "" || linkType == "" {
-		return fmt.Errorf("Neither link name nor link type can be empty!")
-	}
-
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	wb.AddData(msg)
-
-	linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil)
-	newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType))
-	wb.AddData(linkInfo)
-
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name))
-	wb.AddData(nameData)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Delete a network link.
-// This is identical to running: ip link del $name
-func NetworkLinkDel(name string) error {
-	if name == "" {
-		return fmt.Errorf("Network link name can not be empty!")
-	}
-
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	iface, err := net.InterfaceByName(name)
-	if err != nil {
-		return err
-	}
-
-	wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Index = int32(iface.Index)
-	wb.AddData(msg)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Bring up a particular network interface.
-// This is identical to running: ip link set dev $name up
-func NetworkLinkUp(iface *net.Interface) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Index = int32(iface.Index)
-	msg.Flags = syscall.IFF_UP
-	msg.Change = syscall.IFF_UP
-	wb.AddData(msg)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Bring down a particular network interface.
-// This is identical to running: ip link set $name down
-func NetworkLinkDown(iface *net.Interface) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Index = int32(iface.Index)
-	msg.Flags = 0 & ^syscall.IFF_UP
-	msg.Change = DEFAULT_CHANGE
-	wb.AddData(msg)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Set link layer address ie. MAC Address.
-// This is identical to running: ip link set dev $name address $macaddress
-func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	hwaddr, err := net.ParseMAC(macaddr)
-	if err != nil {
-		return err
-	}
-
-	var (
-		MULTICAST byte = 0x1
-	)
-
-	if hwaddr[0]&0x1 == MULTICAST {
-		return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr)
-	}
-
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Index = int32(iface.Index)
-	msg.Change = DEFAULT_CHANGE
-	wb.AddData(msg)
-
-	macdata := make([]byte, 6)
-	copy(macdata, hwaddr)
-	data := newRtAttr(IFLA_ADDRESS, macdata)
-	wb.AddData(data)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-	return s.HandleAck(wb.Seq)
-}
-
-// Set link Maximum Transmission Unit
-// This is identical to running: ip link set dev $name mtu $MTU
-// bridge is a bitch here https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=292088
-// https://bugzilla.redhat.com/show_bug.cgi?id=697021
-// There is a discussion about how to deal with ifcs joining bridge with MTU > 1500
-// Regular network nterfaces do seem to work though!
-func NetworkSetMTU(iface *net.Interface, mtu int) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
-	msg.Index = int32(iface.Index)
-	msg.Change = DEFAULT_CHANGE
-	wb.AddData(msg)
-	wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu)))
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-	return s.HandleAck(wb.Seq)
-}
-
-// Set link queue length
-// This is identical to running: ip link set dev $name txqueuelen $QLEN
-func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
-	msg.Index = int32(iface.Index)
-	msg.Change = DEFAULT_CHANGE
-	wb.AddData(msg)
-	wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen)))
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-	return s.HandleAck(wb.Seq)
-}
-
-func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
-	msg.Index = int32(iface.Index)
-	msg.Change = DEFAULT_CHANGE
-	wb.AddData(msg)
-	wb.AddData(rtattr)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Add an interface to bridge.
-// This is identical to running: ip link set $name master $master
-func NetworkSetMaster(iface, master *net.Interface) error {
-	data := uint32Attr(syscall.IFLA_MASTER, uint32(master.Index))
-	return networkMasterAction(iface, data)
-}
-
-// Remove an interface from the bridge
-// This is is identical to to running: ip link $name set nomaster
-func NetworkSetNoMaster(iface *net.Interface) error {
-	data := uint32Attr(syscall.IFLA_MASTER, 0)
-	return networkMasterAction(iface, data)
-}
-
-func networkSetNsAction(iface *net.Interface, rtattr *RtAttr) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Index = int32(iface.Index)
-	wb.AddData(msg)
-	wb.AddData(rtattr)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Move a particular network interface to a particular network namespace
-// specified by PID. This is identical to running: ip link set dev $name netns $pid
-func NetworkSetNsPid(iface *net.Interface, nspid int) error {
-	data := uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid))
-	return networkSetNsAction(iface, data)
-}
-
-// Move a particular network interface to a particular mounted
-// network namespace specified by file descriptor.
-// This is idential to running: ip link set dev $name netns $fd
-func NetworkSetNsFd(iface *net.Interface, fd int) error {
-	data := uint32Attr(IFLA_NET_NS_FD, uint32(fd))
-	return networkSetNsAction(iface, data)
-}
-
-// Rename a particular interface to a different name
-// !!! Note that you can't rename an active interface. You need to bring it down before renaming it.
-// This is identical to running: ip link set dev ${oldName} name ${newName}
-func NetworkChangeName(iface *net.Interface, newName string) error {
-	if len(newName) >= IFNAMSIZ {
-		return fmt.Errorf("Interface name %s too long", newName)
-	}
-
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	msg.Index = int32(iface.Index)
-	msg.Change = DEFAULT_CHANGE
-	wb.AddData(msg)
-
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(newName))
-	wb.AddData(nameData)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Add a new VETH pair link on the host
-// This is identical to running: ip link add name $name type veth peer name $peername
-func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	wb.AddData(msg)
-
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1))
-	wb.AddData(nameData)
-
-	txqLen := make([]byte, 4)
-	native.PutUint32(txqLen, uint32(txQueueLen))
-	txqData := newRtAttr(syscall.IFLA_TXQLEN, txqLen)
-	wb.AddData(txqData)
-
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
-	newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth"))
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
-	nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil)
-
-	newIfInfomsgChild(nest3, syscall.AF_UNSPEC)
-	newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2))
-
-	txqLen2 := make([]byte, 4)
-	native.PutUint32(txqLen2, uint32(txQueueLen))
-	newRtAttrChild(nest3, syscall.IFLA_TXQLEN, txqLen2)
-
-	wb.AddData(nest1)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	if err := s.HandleAck(wb.Seq); err != nil {
-		if os.IsExist(err) {
-			return ErrInterfaceExists
-		}
-
-		return err
-	}
-
-	return nil
-}
-
-// Add a new VLAN interface with masterDev as its upper device
-// This is identical to running:
-// ip link add name $name link $masterdev type vlan id $id
-func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
-
-	masterDevIfc, err := net.InterfaceByName(masterDev)
-	if err != nil {
-		return err
-	}
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	wb.AddData(msg)
-
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
-	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("vlan"))
-
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
-	vlanData := make([]byte, 2)
-	native.PutUint16(vlanData, vlanId)
-	newRtAttrChild(nest2, IFLA_VLAN_ID, vlanData)
-	wb.AddData(nest1)
-
-	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
-	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(vlanDev)))
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-	return s.HandleAck(wb.Seq)
-}
-
-// MacVlan link has LowerDev, UpperDev and operates in Mode mode
-// This simplifies the code when creating MacVlan or MacVtap interface
-type MacVlanLink struct {
-	MasterDev string
-	SlaveDev  string
-	mode      string
-}
-
-func (m MacVlanLink) Mode() uint32 {
-	modeMap := map[string]uint32{
-		"private":  MACVLAN_MODE_PRIVATE,
-		"vepa":     MACVLAN_MODE_VEPA,
-		"bridge":   MACVLAN_MODE_BRIDGE,
-		"passthru": MACVLAN_MODE_PASSTHRU,
-	}
-
-	return modeMap[m.mode]
-}
-
-// Add MAC VLAN network interface with masterDev as its upper device
-// This is identical to running:
-// ip link add name $name link $masterdev type macvlan mode $mode
-func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
-
-	masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev)
-	if err != nil {
-		return err
-	}
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	wb.AddData(msg)
-
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
-	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type))
-
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
-	macVlanData := make([]byte, 4)
-	native.PutUint32(macVlanData, mcvln.Mode())
-	newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
-	wb.AddData(nest1)
-
-	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
-	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev)))
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-	return s.HandleAck(wb.Seq)
-}
-
-func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
-	return networkLinkMacVlan("macvlan", &MacVlanLink{
-		MasterDev: masterDev,
-		SlaveDev:  macVlanDev,
-		mode:      mode,
-	})
-}
-
-func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error {
-	return networkLinkMacVlan("macvtap", &MacVlanLink{
-		MasterDev: masterDev,
-		SlaveDev:  macVlanDev,
-		mode:      mode,
-	})
-}
-
-func networkLinkIpAction(action, flags int, ifa IfAddr) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	family := getIpFamily(ifa.IP)
-
-	wb := newNetlinkRequest(action, flags)
-
-	msg := newIfAddrmsg(family)
-	msg.Index = uint32(ifa.Iface.Index)
-	prefixLen, _ := ifa.IPNet.Mask.Size()
-	msg.Prefixlen = uint8(prefixLen)
-	wb.AddData(msg)
-
-	var ipData []byte
-	if family == syscall.AF_INET {
-		ipData = ifa.IP.To4()
-	} else {
-		ipData = ifa.IP.To16()
-	}
-
-	localData := newRtAttr(syscall.IFA_LOCAL, ipData)
-	wb.AddData(localData)
-
-	addrData := newRtAttr(syscall.IFA_ADDRESS, ipData)
-	wb.AddData(addrData)
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-
-	return s.HandleAck(wb.Seq)
-}
-
-// Delete an IP address from an interface. This is identical to:
-// ip addr del $ip/$ipNet dev $iface
-func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
-	return networkLinkIpAction(
-		syscall.RTM_DELADDR,
-		syscall.NLM_F_ACK,
-		IfAddr{iface, ip, ipNet},
-	)
-}
-
-// Add an Ip address to an interface. This is identical to:
-// ip addr add $ip/$ipNet dev $iface
-func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
-	return networkLinkIpAction(
-		syscall.RTM_NEWADDR,
-		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK,
-		IfAddr{iface, ip, ipNet},
-	)
-}
-
-// Returns an array of IPNet for all the currently routed subnets on ipv4
-// This is similar to the first column of "ip route" output
-func NetworkGetRoutes() ([]Route, error) {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return nil, err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
-
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
-	wb.AddData(msg)
-
-	if err := s.Send(wb); err != nil {
-		return nil, err
-	}
-
-	pid, err := s.GetPid()
-	if err != nil {
-		return nil, err
-	}
-
-	res := make([]Route, 0)
-
-outer:
-	for {
-		msgs, err := s.Receive()
-		if err != nil {
-			return nil, err
-		}
-		for _, m := range msgs {
-			if err := s.CheckMessage(m, wb.Seq, pid); err != nil {
-				if err == io.EOF {
-					break outer
-				}
-				return nil, err
-			}
-			if m.Header.Type != syscall.RTM_NEWROUTE {
-				continue
-			}
-
-			var r Route
-
-			msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0]))
-
-			if msg.Flags&syscall.RTM_F_CLONED != 0 {
-				// Ignore cloned routes
-				continue
-			}
-
-			if msg.Table != syscall.RT_TABLE_MAIN {
-				// Ignore non-main tables
-				continue
-			}
-
-			if msg.Family != syscall.AF_INET {
-				// Ignore non-ipv4 routes
-				continue
-			}
-
-			if msg.Dst_len == 0 {
-				// Default routes
-				r.Default = true
-			}
-
-			attrs, err := syscall.ParseNetlinkRouteAttr(&m)
-			if err != nil {
-				return nil, err
-			}
-			for _, attr := range attrs {
-				switch attr.Attr.Type {
-				case syscall.RTA_DST:
-					ip := attr.Value
-					r.IPNet = &net.IPNet{
-						IP:   ip,
-						Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)),
-					}
-				case syscall.RTA_OIF:
-					index := int(native.Uint32(attr.Value[0:4]))
-					r.Iface, _ = net.InterfaceByIndex(index)
-				}
-			}
-			if r.Default || r.IPNet != nil {
-				res = append(res, r)
-			}
-		}
-	}
-
-	return res, nil
-}
-
-// Add a new route table entry.
-func AddRoute(destination, source, gateway, device string) error {
-	if destination == "" && source == "" && gateway == "" {
-		return fmt.Errorf("one of destination, source or gateway must not be blank")
-	}
-
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-
-	wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
-	msg := newRtMsg()
-	currentFamily := -1
-	var rtAttrs []*RtAttr
-
-	if destination != "" {
-		destIP, destNet, err := net.ParseCIDR(destination)
-		if err != nil {
-			return fmt.Errorf("destination CIDR %s couldn't be parsed", destination)
-		}
-		destFamily := getIpFamily(destIP)
-		currentFamily = destFamily
-		destLen, bits := destNet.Mask.Size()
-		if destLen == 0 && bits == 0 {
-			return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination)
-		}
-		msg.Family = uint8(destFamily)
-		msg.Dst_len = uint8(destLen)
-		var destData []byte
-		if destFamily == syscall.AF_INET {
-			destData = destIP.To4()
-		} else {
-			destData = destIP.To16()
-		}
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData))
-	}
-
-	if source != "" {
-		srcIP := net.ParseIP(source)
-		if srcIP == nil {
-			return fmt.Errorf("source IP %s couldn't be parsed", source)
-		}
-		srcFamily := getIpFamily(srcIP)
-		if currentFamily != -1 && currentFamily != srcFamily {
-			return fmt.Errorf("source and destination ip were not the same IP family")
-		}
-		currentFamily = srcFamily
-		msg.Family = uint8(srcFamily)
-		var srcData []byte
-		if srcFamily == syscall.AF_INET {
-			srcData = srcIP.To4()
-		} else {
-			srcData = srcIP.To16()
-		}
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData))
-	}
-
-	if gateway != "" {
-		gwIP := net.ParseIP(gateway)
-		if gwIP == nil {
-			return fmt.Errorf("gateway IP %s couldn't be parsed", gateway)
-		}
-		gwFamily := getIpFamily(gwIP)
-		if currentFamily != -1 && currentFamily != gwFamily {
-			return fmt.Errorf("gateway, source, and destination ip were not the same IP family")
-		}
-		msg.Family = uint8(gwFamily)
-		var gwData []byte
-		if gwFamily == syscall.AF_INET {
-			gwData = gwIP.To4()
-		} else {
-			gwData = gwIP.To16()
-		}
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData))
-	}
-
-	wb.AddData(msg)
-	for _, attr := range rtAttrs {
-		wb.AddData(attr)
-	}
-
-	iface, err := net.InterfaceByName(device)
-	if err != nil {
-		return err
-	}
-	wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index)))
-
-	if err := s.Send(wb); err != nil {
-		return err
-	}
-	return s.HandleAck(wb.Seq)
-}
-
-// Add a new default gateway. Identical to:
-// ip route add default via $ip
-func AddDefaultGw(ip, device string) error {
-	return AddRoute("", "", ip, device)
-}
-
-// THIS CODE DOES NOT COMMUNICATE WITH KERNEL VIA RTNETLINK INTERFACE
-// IT IS HERE FOR BACKWARDS COMPATIBILITY WITH OLDER LINUX KERNELS
-// WHICH SHIP WITH OLDER NOT ENTIRELY FUNCTIONAL VERSION OF NETLINK
-func getIfSocket() (fd int, err error) {
-	for _, socket := range []int{
-		syscall.AF_INET,
-		syscall.AF_PACKET,
-		syscall.AF_INET6,
-	} {
-		if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil {
-			break
-		}
-	}
-	if err == nil {
-		return fd, nil
-	}
-	return -1, err
-}
-
-// Create the actual bridge device.  This is more backward-compatible than
-// netlink.NetworkLinkAdd and works on RHEL 6.
-func CreateBridge(name string, setMacAddr bool) error {
-	if len(name) >= IFNAMSIZ {
-		return fmt.Errorf("Interface name %s too long", name)
-	}
-
-	s, err := getIfSocket()
-	if err != nil {
-		return err
-	}
-	defer syscall.Close(s)
-
-	nameBytePtr, err := syscall.BytePtrFromString(name)
-	if err != nil {
-		return err
-	}
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
-		return err
-	}
-	if setMacAddr {
-		return SetMacAddress(name, randMacAddr())
-	}
-	return nil
-}
-
-// Delete the actual bridge device.
-func DeleteBridge(name string) error {
-	s, err := getIfSocket()
-	if err != nil {
-		return err
-	}
-	defer syscall.Close(s)
-
-	nameBytePtr, err := syscall.BytePtrFromString(name)
-	if err != nil {
-		return err
-	}
-
-	var ifr ifreqFlags
-	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], []byte(name))
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s),
-		syscall.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifr))); err != 0 {
-		return err
-	}
-
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s),
-		SIOC_BRDELBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
-		return err
-	}
-	return nil
-}
-
-func ifIoctBridge(iface, master *net.Interface, op uintptr) error {
-	if len(master.Name) >= IFNAMSIZ {
-		return fmt.Errorf("Interface name %s too long", master.Name)
-	}
-
-	s, err := getIfSocket()
-	if err != nil {
-		return err
-	}
-	defer syscall.Close(s)
-
-	ifr := ifreqIndex{}
-	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], master.Name)
-	ifr.IfruIndex = int32(iface.Index)
-
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), op, uintptr(unsafe.Pointer(&ifr))); err != 0 {
-		return err
-	}
-
-	return nil
-}
-
-// Add a slave to a bridge device.  This is more backward-compatible than
-// netlink.NetworkSetMaster and works on RHEL 6.
-func AddToBridge(iface, master *net.Interface) error {
-	return ifIoctBridge(iface, master, SIOC_BRADDIF)
-}
-
-// Detach a slave from a bridge device.  This is more backward-compatible than
-// netlink.NetworkSetMaster and works on RHEL 6.
-func DelFromBridge(iface, master *net.Interface) error {
-	return ifIoctBridge(iface, master, SIOC_BRDELIF)
-}
-
-func randMacAddr() string {
-	hw := make(net.HardwareAddr, 6)
-	for i := 0; i < 6; i++ {
-		hw[i] = byte(rnd.Intn(255))
-	}
-	hw[0] &^= 0x1 // clear multicast bit
-	hw[0] |= 0x2  // set local assignment bit (IEEE802)
-	return hw.String()
-}
-
-func SetMacAddress(name, addr string) error {
-	if len(name) >= IFNAMSIZ {
-		return fmt.Errorf("Interface name %s too long", name)
-	}
-
-	hw, err := net.ParseMAC(addr)
-	if err != nil {
-		return err
-	}
-
-	s, err := getIfSocket()
-	if err != nil {
-		return err
-	}
-	defer syscall.Close(s)
-
-	ifr := ifreqHwaddr{}
-	ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER
-	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], name)
-
-	for i := 0; i < 6; i++ {
-		ifr.IfruHwaddr.Data[i] = ifrDataByte(hw[i])
-	}
-
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 {
-		return err
-	}
-	return nil
-}
-
-func SetHairpinMode(iface *net.Interface, enabled bool) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
-	req := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
-
-	msg := newIfInfomsg(syscall.AF_BRIDGE)
-	msg.Type = syscall.RTM_SETLINK
-	msg.Flags = syscall.NLM_F_REQUEST
-	msg.Index = int32(iface.Index)
-	msg.Change = DEFAULT_CHANGE
-	req.AddData(msg)
-
-	mode := []byte{0}
-	if enabled {
-		mode[0] = byte(1)
-	}
-
-	br := newRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil)
-	newRtAttrChild(br, IFLA_BRPORT_MODE, mode)
-	req.AddData(br)
-	if err := s.Send(req); err != nil {
-		return err
-	}
-
-	return s.HandleAck(req.Seq)
-}
-
-func ChangeName(iface *net.Interface, newName string) error {
-	if len(newName) >= IFNAMSIZ {
-		return fmt.Errorf("Interface name %s too long", newName)
-	}
-
-	fd, err := getIfSocket()
-	if err != nil {
-		return err
-	}
-	defer syscall.Close(fd)
-
-	data := [IFNAMSIZ * 2]byte{}
-	// the "-1"s here are very important for ensuring we get proper null
-	// termination of our new C strings
-	copy(data[:IFNAMSIZ-1], iface.Name)
-	copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName)
-
-	if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
-		return errno
-	}
-
-	return nil
-}

+ 0 - 7
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go

@@ -1,7 +0,0 @@
-// +build arm ppc64 ppc64le
-
-package netlink
-
-func ifrDataByte(b byte) uint8 {
-	return uint8(b)
-}

+ 0 - 7
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go

@@ -1,7 +0,0 @@
-// +build !arm,!ppc64,!ppc64le
-
-package netlink
-
-func ifrDataByte(b byte) int8 {
-	return int8(b)
-}

+ 0 - 88
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_unsupported.go

@@ -1,88 +0,0 @@
-// +build !linux
-
-package netlink
-
-import (
-	"errors"
-	"net"
-)
-
-var (
-	ErrNotImplemented = errors.New("not implemented")
-)
-
-func NetworkGetRoutes() ([]Route, error) {
-	return nil, ErrNotImplemented
-}
-
-func NetworkLinkAdd(name string, linkType string) error {
-	return ErrNotImplemented
-}
-
-func NetworkLinkDel(name string) error {
-	return ErrNotImplemented
-}
-
-func NetworkLinkUp(iface *net.Interface) error {
-	return ErrNotImplemented
-}
-
-func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
-	return ErrNotImplemented
-}
-
-func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
-	return ErrNotImplemented
-}
-
-func AddRoute(destination, source, gateway, device string) error {
-	return ErrNotImplemented
-}
-
-func AddDefaultGw(ip, device string) error {
-	return ErrNotImplemented
-}
-
-func NetworkSetMTU(iface *net.Interface, mtu int) error {
-	return ErrNotImplemented
-}
-
-func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
-	return ErrNotImplemented
-}
-
-func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
-	return ErrNotImplemented
-}
-
-func NetworkChangeName(iface *net.Interface, newName string) error {
-	return ErrNotImplemented
-}
-
-func NetworkSetNsFd(iface *net.Interface, fd int) error {
-	return ErrNotImplemented
-}
-
-func NetworkSetNsPid(iface *net.Interface, nspid int) error {
-	return ErrNotImplemented
-}
-
-func NetworkSetMaster(iface, master *net.Interface) error {
-	return ErrNotImplemented
-}
-
-func NetworkLinkDown(iface *net.Interface) error {
-	return ErrNotImplemented
-}
-
-func CreateBridge(name string, setMacAddr bool) error {
-	return ErrNotImplemented
-}
-
-func DeleteBridge(name string) error {
-	return ErrNotImplemented
-}
-
-func AddToBridge(iface, master *net.Interface) error {
-	return ErrNotImplemented
-}

+ 57 - 46
vendor/src/github.com/opencontainers/runc/libcontainer/network_linux.go

@@ -11,8 +11,8 @@ import (
 	"strings"
 
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/netlink"
 	"github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/vishvananda/netlink"
 )
 
 var strategies = map[string]networkStrategy{
@@ -93,11 +93,7 @@ func (l *loopback) create(n *network, nspid int) error {
 }
 
 func (l *loopback) initialize(config *network) error {
-	iface, err := net.InterfaceByName("lo")
-	if err != nil {
-		return err
-	}
-	return netlink.NetworkLinkUp(iface)
+	return netlink.LinkSetUp(&netlink.Device{netlink.LinkAttrs{Name: "lo"}})
 }
 
 func (l *loopback) attach(n *configs.Network) (err error) {
@@ -115,42 +111,36 @@ type veth struct {
 }
 
 func (v *veth) detach(n *configs.Network) (err error) {
-	bridge, err := net.InterfaceByName(n.Bridge)
-	if err != nil {
-		return err
-	}
-	host, err := net.InterfaceByName(n.HostInterfaceName)
-	if err != nil {
-		return err
-	}
-	if err := netlink.DelFromBridge(host, bridge); err != nil {
-		return err
-	}
-	return nil
+	return netlink.LinkSetMaster(&netlink.Device{netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
 }
 
 // attach a container network interface to an external network
 func (v *veth) attach(n *configs.Network) (err error) {
-	bridge, err := net.InterfaceByName(n.Bridge)
+	brl, err := netlink.LinkByName(n.Bridge)
 	if err != nil {
 		return err
 	}
-	host, err := net.InterfaceByName(n.HostInterfaceName)
+	br, ok := brl.(*netlink.Bridge)
+	if !ok {
+		return fmt.Errorf("Wrong device type %T", brl)
+	}
+	host, err := netlink.LinkByName(n.HostInterfaceName)
 	if err != nil {
 		return err
 	}
-	if err := netlink.AddToBridge(host, bridge); err != nil {
+
+	if err := netlink.LinkSetMaster(host, br); err != nil {
 		return err
 	}
-	if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil {
+	if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
 		return err
 	}
 	if n.HairpinMode {
-		if err := netlink.SetHairpinMode(host, true); err != nil {
+		if err := netlink.LinkSetHairpin(host, true); err != nil {
 			return err
 		}
 	}
-	if err := netlink.NetworkLinkUp(host); err != nil {
+	if err := netlink.LinkSetUp(host); err != nil {
 		return err
 	}
 
@@ -163,26 +153,32 @@ func (v *veth) create(n *network, nspid int) (err error) {
 		return err
 	}
 	n.TempVethPeerName = tmpName
-	defer func() {
-		if err != nil {
-			netlink.NetworkLinkDel(n.HostInterfaceName)
-			netlink.NetworkLinkDel(n.TempVethPeerName)
-		}
-	}()
 	if n.Bridge == "" {
 		return fmt.Errorf("bridge is not specified")
 	}
-	if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
+	veth := &netlink.Veth{
+		LinkAttrs: netlink.LinkAttrs{
+			Name:   n.HostInterfaceName,
+			TxQLen: n.TxQueueLen,
+		},
+		PeerName: n.TempVethPeerName,
+	}
+	if err := netlink.LinkAdd(veth); err != nil {
 		return err
 	}
+	defer func() {
+		if err != nil {
+			netlink.LinkDel(veth)
+		}
+	}()
 	if err := v.attach(&n.Network); err != nil {
 		return err
 	}
-	child, err := net.InterfaceByName(n.TempVethPeerName)
+	child, err := netlink.LinkByName(n.TempVethPeerName)
 	if err != nil {
 		return err
 	}
-	return netlink.NetworkSetNsPid(child, nspid)
+	return netlink.LinkSetNsPid(child, nspid)
 }
 
 func (v *veth) generateTempPeerName() (string, error) {
@@ -194,53 +190,68 @@ func (v *veth) initialize(config *network) error {
 	if peer == "" {
 		return fmt.Errorf("peer is not specified")
 	}
-	child, err := net.InterfaceByName(peer)
+	child, err := netlink.LinkByName(peer)
 	if err != nil {
 		return err
 	}
-	if err := netlink.NetworkLinkDown(child); err != nil {
+	if err := netlink.LinkSetDown(child); err != nil {
 		return err
 	}
-	if err := netlink.NetworkChangeName(child, config.Name); err != nil {
+	if err := netlink.LinkSetName(child, config.Name); err != nil {
 		return err
 	}
 	// get the interface again after we changed the name as the index also changes.
-	if child, err = net.InterfaceByName(config.Name); err != nil {
+	if child, err = netlink.LinkByName(config.Name); err != nil {
 		return err
 	}
 	if config.MacAddress != "" {
-		if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil {
+		mac, err := net.ParseMAC(config.MacAddress)
+		if err != nil {
+			return err
+		}
+		if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
 			return err
 		}
 	}
-	ip, ipNet, err := net.ParseCIDR(config.Address)
+	ip, err := netlink.ParseAddr(config.Address)
 	if err != nil {
 		return err
 	}
-	if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
+	if err := netlink.AddrAdd(child, ip); err != nil {
 		return err
 	}
 	if config.IPv6Address != "" {
-		if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil {
+		ip6, err := netlink.ParseAddr(config.IPv6Address)
+		if err != nil {
 			return err
 		}
-		if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
+		if err := netlink.AddrAdd(child, ip6); err != nil {
 			return err
 		}
 	}
-	if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil {
+	if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
 		return err
 	}
-	if err := netlink.NetworkLinkUp(child); err != nil {
+	if err := netlink.LinkSetUp(child); err != nil {
 		return err
 	}
 	if config.Gateway != "" {
-		if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil {
+		gw := net.ParseIP(config.Gateway)
+		if err := netlink.RouteAdd(&netlink.Route{
+			Scope:     netlink.SCOPE_UNIVERSE,
+			LinkIndex: child.Attrs().Index,
+			Gw:        gw,
+		}); err != nil {
 			return err
 		}
 	}
 	if config.IPv6Gateway != "" {
-		if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil {
+		gw := net.ParseIP(config.IPv6Gateway)
+		if err := netlink.RouteAdd(&netlink.Route{
+			Scope:     netlink.SCOPE_UNIVERSE,
+			LinkIndex: child.Attrs().Index,
+			Gw:        gw,
+		}); err != nil {
 			return err
 		}
 	}

+ 31 - 28
vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go

@@ -96,7 +96,6 @@ func mountCmd(cmd configs.Command) error {
 func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 	var (
 		dest = m.Destination
-		data = label.FormatMountLabel(m.Data, mountLabel)
 	)
 	if !strings.HasPrefix(dest, rootfs) {
 		dest = filepath.Join(rootfs, dest)
@@ -107,12 +106,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 		if err := os.MkdirAll(dest, 0755); err != nil {
 			return err
 		}
-		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "")
+		return m.MountPropagate(rootfs, mountLabel)
 	case "mqueue":
 		if err := os.MkdirAll(dest, 0755); err != nil {
 			return err
 		}
-		if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), ""); err != nil {
+		if err := m.MountPropagate(rootfs, mountLabel); err != nil {
 			return err
 		}
 		return label.SetFileLabel(dest, mountLabel)
@@ -123,7 +122,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 				return err
 			}
 		}
-		if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
+		if err := m.MountPropagate(rootfs, mountLabel); err != nil {
 			return err
 		}
 		if stat != nil {
@@ -136,12 +135,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 		if err := os.MkdirAll(dest, 0755); err != nil {
 			return err
 		}
-		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
+		return m.MountPropagate(rootfs, mountLabel)
 	case "securityfs":
 		if err := os.MkdirAll(dest, 0755); err != nil {
 			return err
 		}
-		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
+		return m.MountPropagate(rootfs, mountLabel)
 	case "bind":
 		stat, err := os.Stat(m.Source)
 		if err != nil {
@@ -162,13 +161,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 		if err := createIfNotExists(dest, stat.IsDir()); err != nil {
 			return err
 		}
-		if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
+		if err := m.MountPropagate(rootfs, mountLabel); err != nil {
 			return err
 		}
-		if m.Flags&syscall.MS_RDONLY != 0 {
-			if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
-				return err
-			}
+		// bind mount won't change mount options, we need remount to make mount options effective.
+		if err := m.Remount(rootfs); err != nil {
+			return err
 		}
 		if m.Relabel != "" {
 			if err := label.Validate(m.Relabel); err != nil {
@@ -179,11 +177,6 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 				return err
 			}
 		}
-		if m.Flags&syscall.MS_PRIVATE != 0 {
-			if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
-				return err
-			}
-		}
 	case "cgroup":
 		binds, err := getCgroupMounts(m)
 		if err != nil {
@@ -197,11 +190,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 			}
 		}
 		tmpfs := &configs.Mount{
-			Source:      "tmpfs",
-			Device:      "tmpfs",
-			Destination: m.Destination,
-			Flags:       defaultMountFlags,
-			Data:        "mode=755",
+			Source:           "tmpfs",
+			Device:           "tmpfs",
+			Destination:      m.Destination,
+			Flags:            defaultMountFlags,
+			Data:             "mode=755",
+			PropagationFlags: m.PropagationFlags,
 		}
 		if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
 			return err
@@ -236,8 +230,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 		}
 		if m.Flags&syscall.MS_RDONLY != 0 {
 			// remount cgroup root as readonly
-			rootfsCgroup := filepath.Join(rootfs, m.Destination)
-			if err := syscall.Mount("", rootfsCgroup, "", defaultMountFlags|syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
+			mcgrouproot := &configs.Mount{
+				Destination: m.Destination,
+				Flags:       defaultMountFlags | syscall.MS_RDONLY,
+			}
+			if err := mcgrouproot.Remount(rootfs); err != nil {
 				return err
 			}
 		}
@@ -253,10 +250,15 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
 		return nil, err
 	}
 
+	cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
+	if err != nil {
+		return nil, err
+	}
+
 	var binds []*configs.Mount
 
 	for _, mm := range mounts {
-		dir, err := mm.GetThisCgroupDir()
+		dir, err := mm.GetThisCgroupDir(cgroupPaths)
 		if err != nil {
 			return nil, err
 		}
@@ -265,10 +267,11 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
 			return nil, err
 		}
 		binds = append(binds, &configs.Mount{
-			Device:      "bind",
-			Source:      filepath.Join(mm.Mountpoint, relDir),
-			Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
-			Flags:       syscall.MS_BIND | syscall.MS_REC | m.Flags,
+			Device:           "bind",
+			Source:           filepath.Join(mm.Mountpoint, relDir),
+			Destination:      filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
+			Flags:            syscall.MS_BIND | syscall.MS_REC | m.Flags,
+			PropagationFlags: m.PropagationFlags,
 		})
 	}