Jelajahi Sumber

Vendoring the netlink changes

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
Flavio Crisciani 8 tahun lalu
induk
melakukan
6f062c298a
36 mengubah file dengan 2443 tambahan dan 188 penghapusan
  1. 1 1
      libnetwork/vendor.conf
  2. 12 3
      libnetwork/vendor/github.com/vishvananda/netlink/addr.go
  3. 34 5
      libnetwork/vendor/github.com/vishvananda/netlink/addr_linux.go
  4. 344 0
      libnetwork/vendor/github.com/vishvananda/netlink/conntrack_linux.go
  5. 53 0
      libnetwork/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go
  6. 40 1
      libnetwork/vendor/github.com/vishvananda/netlink/filter.go
  7. 40 5
      libnetwork/vendor/github.com/vishvananda/netlink/filter_linux.go
  8. 218 0
      libnetwork/vendor/github.com/vishvananda/netlink/handle_unspecified.go
  9. 147 5
      libnetwork/vendor/github.com/vishvananda/netlink/link.go
  10. 191 31
      libnetwork/vendor/github.com/vishvananda/netlink/link_linux.go
  11. 9 1
      libnetwork/vendor/github.com/vishvananda/netlink/netlink.go
  12. 4 3
      libnetwork/vendor/github.com/vishvananda/netlink/netlink_linux.go
  13. 96 18
      libnetwork/vendor/github.com/vishvananda/netlink/netlink_unspecified.go
  14. 189 0
      libnetwork/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go
  15. 34 0
      libnetwork/vendor/github.com/vishvananda/netlink/nl/link_linux.go
  16. 36 0
      libnetwork/vendor/github.com/vishvananda/netlink/nl/mpls_linux.go
  17. 27 5
      libnetwork/vendor/github.com/vishvananda/netlink/nl/nl_linux.go
  18. 11 0
      libnetwork/vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go
  19. 27 1
      libnetwork/vendor/github.com/vishvananda/netlink/nl/route_linux.go
  20. 31 0
      libnetwork/vendor/github.com/vishvananda/netlink/nl/syscall.go
  21. 46 26
      libnetwork/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go
  22. 32 0
      libnetwork/vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go
  23. 70 8
      libnetwork/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go
  24. 32 0
      libnetwork/vendor/github.com/vishvananda/netlink/order.go
  25. 1 4
      libnetwork/vendor/github.com/vishvananda/netlink/protinfo.go
  26. 23 17
      libnetwork/vendor/github.com/vishvananda/netlink/protinfo_linux.go
  27. 5 4
      libnetwork/vendor/github.com/vishvananda/netlink/qdisc.go
  28. 16 2
      libnetwork/vendor/github.com/vishvananda/netlink/qdisc_linux.go
  29. 55 5
      libnetwork/vendor/github.com/vishvananda/netlink/route.go
  30. 234 23
      libnetwork/vendor/github.com/vishvananda/netlink/route_linux.go
  31. 4 0
      libnetwork/vendor/github.com/vishvananda/netlink/route_unspecified.go
  32. 27 0
      libnetwork/vendor/github.com/vishvananda/netlink/socket.go
  33. 159 0
      libnetwork/vendor/github.com/vishvananda/netlink/socket_linux.go
  34. 98 0
      libnetwork/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go
  35. 3 2
      libnetwork/vendor/github.com/vishvananda/netlink/xfrm_state.go
  36. 94 18
      libnetwork/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go

+ 1 - 1
libnetwork/vendor.conf

@@ -35,7 +35,7 @@ github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1
 github.com/stretchr/testify dab07ac62d4905d3e48d17dc549c684ac3b7c15a
 github.com/syndtr/gocapability/capability 2c00daeb6c3b45114c80ac44119e7b8801fdd852
 github.com/ugorji/go f1f1a805ed361a0e078bb537e4ea78cd37dcf065
-github.com/vishvananda/netlink 17ea11b5a11c5614597c65a671105e8ee58c4d04
+github.com/vishvananda/netlink 1e86b2bee5b6a7d377e4c02bb7f98209d6a7297c
 github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
 golang.org/x/net c427ad74c6d7a814201695e9ffde0c5d400a7674
 golang.org/x/sys 8f0908ab3b2457e2e15403d3697c9ef5cb4b57a9

+ 12 - 3
libnetwork/vendor/github.com/vishvananda/netlink/addr.go

@@ -10,9 +10,11 @@ import (
 // include a mask, so it stores the address as a net.IPNet.
 type Addr struct {
 	*net.IPNet
-	Label string
-	Flags int
-	Scope int
+	Label     string
+	Flags     int
+	Scope     int
+	Peer      *net.IPNet
+	Broadcast net.IP
 }
 
 // String returns $ip/$netmask $label
@@ -43,3 +45,10 @@ func (a Addr) Equal(x Addr) bool {
 	// ignore label for comparison
 	return a.IP.Equal(x.IP) && sizea == sizeb
 }
+
+func (a Addr) PeerEqual(x Addr) bool {
+	sizea, _ := a.Peer.Mask.Size()
+	sizeb, _ := x.Peer.Mask.Size()
+	// ignore label for comparison
+	return a.Peer.IP.Equal(x.Peer.IP) && sizea == sizeb
+}

+ 34 - 5
libnetwork/vendor/github.com/vishvananda/netlink/addr_linux.go

@@ -27,6 +27,19 @@ func (h *Handle) AddrAdd(link Link, addr *Addr) error {
 	return h.addrHandle(link, addr, req)
 }
 
+// AddrReplace will replace (or, if not present, add) an IP address on a link device.
+// Equivalent to: `ip addr replace $addr dev $link`
+func AddrReplace(link Link, addr *Addr) error {
+	return pkgHandle.AddrReplace(link, addr)
+}
+
+// AddrReplace will replace (or, if not present, add) an IP address on a link device.
+// Equivalent to: `ip addr replace $addr dev $link`
+func (h *Handle) AddrReplace(link Link, addr *Addr) error {
+	req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE|syscall.NLM_F_ACK)
+	return h.addrHandle(link, addr, req)
+}
+
 // AddrDel will delete an IP address from a link device.
 // Equivalent to: `ip addr del $addr dev $link`
 func AddrDel(link Link, addr *Addr) error {
@@ -56,17 +69,27 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 	msg.Prefixlen = uint8(prefixlen)
 	req.AddData(msg)
 
-	var addrData []byte
+	var localAddrData []byte
 	if family == FAMILY_V4 {
-		addrData = addr.IP.To4()
+		localAddrData = addr.IP.To4()
 	} else {
-		addrData = addr.IP.To16()
+		localAddrData = addr.IP.To16()
 	}
 
-	localData := nl.NewRtAttr(syscall.IFA_LOCAL, addrData)
+	localData := nl.NewRtAttr(syscall.IFA_LOCAL, localAddrData)
 	req.AddData(localData)
+	var peerAddrData []byte
+	if addr.Peer != nil {
+		if family == FAMILY_V4 {
+			peerAddrData = addr.Peer.IP.To4()
+		} else {
+			peerAddrData = addr.Peer.IP.To16()
+		}
+	} else {
+		peerAddrData = localAddrData
+	}
 
-	addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, addrData)
+	addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, peerAddrData)
 	req.AddData(addressData)
 
 	if addr.Flags != 0 {
@@ -80,6 +103,10 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 		}
 	}
 
+	if addr.Broadcast != nil {
+		req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast))
+	}
+
 	if addr.Label != "" {
 		labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label))
 		req.AddData(labelData)
@@ -161,11 +188,13 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
 				IP:   attr.Value,
 				Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
 			}
+			addr.Peer = dst
 		case syscall.IFA_LOCAL:
 			local = &net.IPNet{
 				IP:   attr.Value,
 				Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
 			}
+			addr.IPNet = local
 		case syscall.IFA_LABEL:
 			addr.Label = string(attr.Value[:len(attr.Value)-1])
 		case IFA_FLAGS:

+ 344 - 0
libnetwork/vendor/github.com/vishvananda/netlink/conntrack_linux.go

@@ -0,0 +1,344 @@
+package netlink
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"net"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+// ConntrackTableType Conntrack table for the netlink operation
+type ConntrackTableType uint8
+
+const (
+	// ConntrackTable Conntrack table
+	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK		 1
+	ConntrackTable = 1
+	// ConntrackExpectTable Conntrack expect table
+	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
+	ConntrackExpectTable = 2
+)
+
+const (
+	// backward compatibility with golang 1.6 which does not have io.SeekCurrent
+	seekCurrent = 1
+)
+
+// InetFamily Family type
+type InetFamily uint8
+
+//  -L [table] [options]          List conntrack or expectation table
+//  -G [table] parameters         Get conntrack or expectation
+
+//  -I [table] parameters         Create a conntrack or expectation
+//  -U [table] parameters         Update a conntrack
+//  -E [table] [options]          Show events
+
+//  -C [table]                    Show counter
+//  -S                            Show statistics
+
+// ConntrackTableList returns the flow list of a table of a specific family
+// conntrack -L [table] [options]          List conntrack or expectation table
+func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
+	return pkgHandle.ConntrackTableList(table, family)
+}
+
+// ConntrackTableFlush flushes all the flows of a specified table
+// conntrack -F [table]            Flush table
+// The flush operation applies to all the family types
+func ConntrackTableFlush(table ConntrackTableType) error {
+	return pkgHandle.ConntrackTableFlush(table)
+}
+
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
+	return pkgHandle.ConntrackDeleteFilter(table, family, filter)
+}
+
+// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
+// conntrack -L [table] [options]          List conntrack or expectation table
+func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
+	res, err := h.dumpConntrackTable(table, family)
+	if err != nil {
+		return nil, err
+	}
+
+	// Deserialize all the flows
+	var result []*ConntrackFlow
+	for _, dataRaw := range res {
+		result = append(result, parseRawData(dataRaw))
+	}
+
+	return result, nil
+}
+
+// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
+// conntrack -F [table]            Flush table
+// The flush operation applies to all the family types
+func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
+	req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
+	_, err := req.Execute(syscall.NETLINK_NETFILTER, 0)
+	return err
+}
+
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
+	res, err := h.dumpConntrackTable(table, family)
+	if err != nil {
+		return 0, err
+	}
+
+	var matched uint
+	for _, dataRaw := range res {
+		flow := parseRawData(dataRaw)
+		if match := filter.MatchConntrackFlow(flow); match {
+			req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
+			// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
+			req2.AddRawData(dataRaw[4:])
+			req2.Execute(syscall.NETLINK_NETFILTER, 0)
+			matched++
+		}
+	}
+
+	return matched, nil
+}
+
+func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
+	// Create the Netlink request object
+	req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
+	// Add the netfilter header
+	msg := &nl.Nfgenmsg{
+		NfgenFamily: uint8(family),
+		Version:     nl.NFNETLINK_V0,
+		ResId:       0,
+	}
+	req.AddData(msg)
+	return req
+}
+
+func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
+	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP)
+	return req.Execute(syscall.NETLINK_NETFILTER, 0)
+}
+
+// The full conntrack flow structure is very complicated and can be found in the file:
+// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
+// For the time being, the structure below allows to parse and extract the base information of a flow
+type ipTuple struct {
+	SrcIP    net.IP
+	DstIP    net.IP
+	Protocol uint8
+	SrcPort  uint16
+	DstPort  uint16
+}
+
+type ConntrackFlow struct {
+	FamilyType uint8
+	Forward    ipTuple
+	Reverse    ipTuple
+}
+
+func (s *ConntrackFlow) String() string {
+	// conntrack cmd output:
+	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001
+	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d",
+		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
+		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort,
+		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort)
+}
+
+// This method parse the ip tuple structure
+// The message structure is the following:
+// <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
+// <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
+// <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
+// <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
+// <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
+func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) {
+	for i := 0; i < 2; i++ {
+		_, t, _, v := parseNfAttrTLV(reader)
+		switch t {
+		case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
+			tpl.SrcIP = v
+		case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
+			tpl.DstIP = v
+		}
+	}
+	// Skip the next 4 bytes  nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
+	reader.Seek(4, seekCurrent)
+	_, t, _, v := parseNfAttrTLV(reader)
+	if t == nl.CTA_PROTO_NUM {
+		tpl.Protocol = uint8(v[0])
+	}
+	// Skip some padding 3 bytes
+	reader.Seek(3, seekCurrent)
+	for i := 0; i < 2; i++ {
+		_, t, _ := parseNfAttrTL(reader)
+		switch t {
+		case nl.CTA_PROTO_SRC_PORT:
+			parseBERaw16(reader, &tpl.SrcPort)
+		case nl.CTA_PROTO_DST_PORT:
+			parseBERaw16(reader, &tpl.DstPort)
+		}
+		// Skip some padding 2 byte
+		reader.Seek(2, seekCurrent)
+	}
+}
+
+func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
+	isNested, attrType, len = parseNfAttrTL(r)
+
+	value = make([]byte, len)
+	binary.Read(r, binary.BigEndian, &value)
+	return isNested, attrType, len, value
+}
+
+func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
+	binary.Read(r, nl.NativeEndian(), &len)
+	len -= nl.SizeofNfattr
+
+	binary.Read(r, nl.NativeEndian(), &attrType)
+	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
+	attrType = attrType & (nl.NLA_F_NESTED - 1)
+
+	return isNested, attrType, len
+}
+
+func parseBERaw16(r *bytes.Reader, v *uint16) {
+	binary.Read(r, binary.BigEndian, v)
+}
+
+func parseRawData(data []byte) *ConntrackFlow {
+	s := &ConntrackFlow{}
+	// First there is the Nfgenmsg header
+	// consume only the family field
+	reader := bytes.NewReader(data)
+	binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
+
+	// skip rest of the Netfilter header
+	reader.Seek(3, seekCurrent)
+	// The message structure is the following:
+	// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
+	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
+	// flow information of the forward flow
+	// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
+	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
+	// flow information of the reverse flow
+	for reader.Len() > 0 {
+		nested, t, l := parseNfAttrTL(reader)
+		if nested && t == nl.CTA_TUPLE_ORIG {
+			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
+				parseIpTuple(reader, &s.Forward)
+			}
+		} else if nested && t == nl.CTA_TUPLE_REPLY {
+			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
+				parseIpTuple(reader, &s.Reverse)
+
+				// Got all the useful information stop parsing
+				break
+			} else {
+				// Header not recognized skip it
+				reader.Seek(int64(l), seekCurrent)
+			}
+		}
+	}
+
+	return s
+}
+
+// Conntrack parameters and options:
+//   -n, --src-nat ip                      source NAT ip
+//   -g, --dst-nat ip                      destination NAT ip
+//   -j, --any-nat ip                      source or destination NAT ip
+//   -m, --mark mark                       Set mark
+//   -c, --secmark secmark                 Set selinux secmark
+//   -e, --event-mask eventmask            Event mask, eg. NEW,DESTROY
+//   -z, --zero                            Zero counters while listing
+//   -o, --output type[,...]               Output format, eg. xml
+//   -l, --label label[,...]               conntrack labels
+
+// Common parameters and options:
+//   -s, --src, --orig-src ip              Source address from original direction
+//   -d, --dst, --orig-dst ip              Destination address from original direction
+//   -r, --reply-src ip            Source addres from reply direction
+//   -q, --reply-dst ip            Destination address from reply direction
+//   -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
+//   -f, --family proto            Layer 3 Protocol, eg. 'ipv6'
+//   -t, --timeout timeout         Set timeout
+//   -u, --status status           Set status, eg. ASSURED
+//   -w, --zone value              Set conntrack zone
+//   --orig-zone value             Set zone for original direction
+//   --reply-zone value            Set zone for reply direction
+//   -b, --buffer-size             Netlink socket buffer size
+//   --mask-src ip                 Source mask address
+//   --mask-dst ip                 Destination mask address
+
+// Filter types
+type ConntrackFilterType uint8
+
+const (
+	ConntrackOrigSrcIP = iota // -orig-src ip   Source address from original direction
+	ConntrackOrigDstIP        // -orig-dst ip   Destination address from original direction
+	ConntrackNatSrcIP         // -src-nat ip    Source NAT ip
+	ConntrackNatDstIP         // -dst-nat ip    Destination NAT ip
+	ConntrackNatAnyIP         // -any-nat ip    Source or destination NAT ip
+)
+
+type ConntrackFilter struct {
+	ipFilter map[ConntrackFilterType]net.IP
+}
+
+// AddIP adds an IP to the conntrack filter
+func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
+	if f.ipFilter == nil {
+		f.ipFilter = make(map[ConntrackFilterType]net.IP)
+	}
+	if _, ok := f.ipFilter[tp]; ok {
+		return errors.New("Filter attribute already present")
+	}
+	f.ipFilter[tp] = ip
+	return nil
+}
+
+// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
+// false otherwise
+func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
+	if len(f.ipFilter) == 0 {
+		// empty filter always not match
+		return false
+	}
+
+	match := true
+	// -orig-src ip   Source address from original direction
+	if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
+		match = match && elem.Equal(flow.Forward.SrcIP)
+	}
+
+	// -orig-dst ip   Destination address from original direction
+	if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
+		match = match && elem.Equal(flow.Forward.DstIP)
+	}
+
+	// -src-nat ip    Source NAT ip
+	if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found {
+		match = match && elem.Equal(flow.Reverse.SrcIP)
+	}
+
+	// -dst-nat ip    Destination NAT ip
+	if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found {
+		match = match && elem.Equal(flow.Reverse.DstIP)
+	}
+
+	// -any-nat ip    Source or destination NAT ip
+	if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found {
+		match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
+	}
+
+	return match
+}

+ 53 - 0
libnetwork/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go

@@ -0,0 +1,53 @@
+// +build !linux
+
+package netlink
+
+// ConntrackTableType Conntrack table for the netlink operation
+type ConntrackTableType uint8
+
+// InetFamily Family type
+type InetFamily uint8
+
+// ConntrackFlow placeholder
+type ConntrackFlow struct{}
+
+// ConntrackFilter placeholder
+type ConntrackFilter struct{}
+
+// ConntrackTableList returns the flow list of a table of a specific family
+// conntrack -L [table] [options]          List conntrack or expectation table
+func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
+	return nil, ErrNotImplemented
+}
+
+// ConntrackTableFlush flushes all the flows of a specified table
+// conntrack -F [table]            Flush table
+// The flush operation applies to all the family types
+func ConntrackTableFlush(table ConntrackTableType) error {
+	return ErrNotImplemented
+}
+
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
+	return 0, ErrNotImplemented
+}
+
+// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
+// conntrack -L [table] [options]          List conntrack or expectation table
+func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
+	return nil, ErrNotImplemented
+}
+
+// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
+// conntrack -F [table]            Flush table
+// The flush operation applies to all the family types
+func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
+	return ErrNotImplemented
+}
+
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
+	return 0, ErrNotImplemented
+}

+ 40 - 1
libnetwork/vendor/github.com/vishvananda/netlink/filter.go

@@ -1,6 +1,10 @@
 package netlink
 
-import "fmt"
+import (
+	"fmt"
+
+	"github.com/vishvananda/netlink/nl"
+)
 
 type Filter interface {
 	Attrs() *FilterAttrs
@@ -180,11 +184,46 @@ func NewMirredAction(redirIndex int) *MirredAction {
 	}
 }
 
+// Constants used in TcU32Sel.Flags.
+const (
+	TC_U32_TERMINAL  = nl.TC_U32_TERMINAL
+	TC_U32_OFFSET    = nl.TC_U32_OFFSET
+	TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET
+	TC_U32_EAT       = nl.TC_U32_EAT
+)
+
+// Sel of the U32 filters that contains multiple TcU32Key. This is the copy
+// and the frontend representation of nl.TcU32Sel. It is serialized into canonical
+// nl.TcU32Sel with the appropriate endianness.
+type TcU32Sel struct {
+	Flags    uint8
+	Offshift uint8
+	Nkeys    uint8
+	Pad      uint8
+	Offmask  uint16
+	Off      uint16
+	Offoff   int16
+	Hoff     int16
+	Hmask    uint32
+	Keys     []TcU32Key
+}
+
+// TcU32Key contained of Sel in the U32 filters. This is the copy and the frontend
+// representation of nl.TcU32Key. It is serialized into chanonical nl.TcU32Sel
+// with the appropriate endianness.
+type TcU32Key struct {
+	Mask    uint32
+	Val     uint32
+	Off     int32
+	OffMask int32
+}
+
 // U32 filters on many packet related properties
 type U32 struct {
 	FilterAttrs
 	ClassId    uint32
 	RedirIndex int
+	Sel        *TcU32Sel
 	Actions    []Action
 }
 

+ 40 - 5
libnetwork/vendor/github.com/vishvananda/netlink/filter_linux.go

@@ -6,6 +6,7 @@ import (
 	"errors"
 	"fmt"
 	"syscall"
+	"unsafe"
 
 	"github.com/vishvananda/netlink/nl"
 )
@@ -128,12 +129,34 @@ func (h *Handle) FilterAdd(filter Filter) error {
 
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
 	if u32, ok := filter.(*U32); ok {
-		// match all
-		sel := nl.TcU32Sel{
-			Nkeys: 1,
-			Flags: nl.TC_U32_TERMINAL,
+		// Convert TcU32Sel into nl.TcU32Sel as it is without copy.
+		sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel))
+		if sel == nil {
+			// match all
+			sel = &nl.TcU32Sel{
+				Nkeys: 1,
+				Flags: nl.TC_U32_TERMINAL,
+			}
+			sel.Keys = append(sel.Keys, nl.TcU32Key{})
+		}
+
+		if native != networkOrder {
+			// Copy Tcu32Sel.
+			cSel := sel
+			keys := make([]nl.TcU32Key, cap(sel.Keys))
+			copy(keys, sel.Keys)
+			cSel.Keys = keys
+			sel = cSel
+
+			// Handle the endianness of attributes
+			sel.Offmask = native.Uint16(htons(sel.Offmask))
+			sel.Hmask = native.Uint32(htonl(sel.Hmask))
+			for _, key := range sel.Keys {
+				key.Mask = native.Uint32(htonl(key.Mask))
+				key.Val = native.Uint32(htonl(key.Val))
+			}
 		}
-		sel.Keys = append(sel.Keys, nl.TcU32Key{})
+		sel.Nkeys = uint8(len(sel.Keys))
 		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
 		if u32.ClassId != 0 {
 			nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId))
@@ -425,6 +448,16 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 		case nl.TCA_U32_SEL:
 			detailed = true
 			sel := nl.DeserializeTcU32Sel(datum.Value)
+			u32.Sel = (*TcU32Sel)(unsafe.Pointer(sel))
+			if native != networkOrder {
+				// Handle the endianness of attributes
+				u32.Sel.Offmask = native.Uint16(htons(sel.Offmask))
+				u32.Sel.Hmask = native.Uint32(htonl(sel.Hmask))
+				for _, key := range u32.Sel.Keys {
+					key.Mask = native.Uint32(htonl(key.Mask))
+					key.Val = native.Uint32(htonl(key.Val))
+				}
+			}
 			// only parse if we have a very basic redirect
 			if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
 				return detailed, nil
@@ -443,6 +476,8 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 					u32.RedirIndex = int(action.Ifindex)
 				}
 			}
+		case nl.TCA_U32_CLASSID:
+			u32.ClassId = native.Uint32(datum.Value)
 		}
 	}
 	return detailed, nil

+ 218 - 0
libnetwork/vendor/github.com/vishvananda/netlink/handle_unspecified.go

@@ -0,0 +1,218 @@
+// +build !linux
+
+package netlink
+
+import (
+	"net"
+	"time"
+
+	"github.com/vishvananda/netns"
+)
+
+type Handle struct{}
+
+func NewHandle(nlFamilies ...int) (*Handle, error) {
+	return nil, ErrNotImplemented
+}
+
+func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
+	return nil, ErrNotImplemented
+}
+
+func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) Delete() {}
+
+func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
+	return false
+}
+
+func (h *Handle) SetSocketTimeout(to time.Duration) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) SetPromiscOn(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) SetPromiscOff(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetUp(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetDown(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetMTU(link Link, mtu int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetName(link Link, name string) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetAlias(link Link, name string) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetMaster(link Link, master *Bridge) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetNoMaster(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetMasterByIndex(link Link, masterIndex int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetNsPid(link Link, nspid int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetNsFd(link Link, fd int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkAdd(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkDel(link Link) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkByName(name string) (Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) LinkByAlias(alias string) (Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) LinkByIndex(index int) (Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) LinkList() ([]Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) LinkSetHairpin(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetGuard(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetFastLeave(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetLearning(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetRootBlock(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetFlood(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) AddrAdd(link Link, addr *Addr) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) AddrDel(link Link, addr *Addr) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) ClassDel(class Class) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) ClassChange(class Class) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) ClassReplace(class Class) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) ClassAdd(class Class) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) FilterDel(filter Filter) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) FilterAdd(filter Filter) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) NeighAdd(neigh *Neigh) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) NeighSet(neigh *Neigh) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) NeighAppend(neigh *Neigh) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) NeighDel(neigh *Neigh) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
+	return nil, ErrNotImplemented
+}
+
+func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
+	return nil, ErrNotImplemented
+}

+ 147 - 5
libnetwork/vendor/github.com/vishvananda/netlink/link.go

@@ -35,6 +35,41 @@ type LinkAttrs struct {
 	Promisc      int
 	Xdp          *LinkXdp
 	EncapType    string
+	Protinfo     *Protinfo
+	OperState    LinkOperState
+}
+
+// LinkOperState represents the values of the IFLA_OPERSTATE link
+// attribute, which contains the RFC2863 state of the interface.
+type LinkOperState uint8
+
+const (
+	OperUnknown        = iota // Status can't be determined.
+	OperNotPresent            // Some component is missing.
+	OperDown                  // Down.
+	OperLowerLayerDown        // Down due to state of lower layer.
+	OperTesting               // In some test mode.
+	OperDormant               // Not up but pending an external event.
+	OperUp                    // Up, ready to send packets.
+)
+
+func (s LinkOperState) String() string {
+	switch s {
+	case OperNotPresent:
+		return "not-present"
+	case OperDown:
+		return "down"
+	case OperLowerLayerDown:
+		return "lower-layer-down"
+	case OperTesting:
+		return "testing"
+	case OperDormant:
+		return "dormant"
+	case OperUp:
+		return "up"
+	default:
+		return "unknown"
+	}
 }
 
 // NewLinkAttrs returns LinkAttrs structure filled with default values
@@ -44,10 +79,12 @@ func NewLinkAttrs() LinkAttrs {
 	}
 }
 
+type LinkStatistics LinkStatistics64
+
 /*
 Ref: struct rtnl_link_stats {...}
 */
-type LinkStatistics struct {
+type LinkStatistics32 struct {
 	RxPackets         uint32
 	TxPackets         uint32
 	RxBytes           uint32
@@ -73,6 +110,63 @@ type LinkStatistics struct {
 	TxCompressed      uint32
 }
 
+func (s32 LinkStatistics32) to64() *LinkStatistics64 {
+	return &LinkStatistics64{
+		RxPackets:         uint64(s32.RxPackets),
+		TxPackets:         uint64(s32.TxPackets),
+		RxBytes:           uint64(s32.RxBytes),
+		TxBytes:           uint64(s32.TxBytes),
+		RxErrors:          uint64(s32.RxErrors),
+		TxErrors:          uint64(s32.TxErrors),
+		RxDropped:         uint64(s32.RxDropped),
+		TxDropped:         uint64(s32.TxDropped),
+		Multicast:         uint64(s32.Multicast),
+		Collisions:        uint64(s32.Collisions),
+		RxLengthErrors:    uint64(s32.RxLengthErrors),
+		RxOverErrors:      uint64(s32.RxOverErrors),
+		RxCrcErrors:       uint64(s32.RxCrcErrors),
+		RxFrameErrors:     uint64(s32.RxFrameErrors),
+		RxFifoErrors:      uint64(s32.RxFifoErrors),
+		RxMissedErrors:    uint64(s32.RxMissedErrors),
+		TxAbortedErrors:   uint64(s32.TxAbortedErrors),
+		TxCarrierErrors:   uint64(s32.TxCarrierErrors),
+		TxFifoErrors:      uint64(s32.TxFifoErrors),
+		TxHeartbeatErrors: uint64(s32.TxHeartbeatErrors),
+		TxWindowErrors:    uint64(s32.TxWindowErrors),
+		RxCompressed:      uint64(s32.RxCompressed),
+		TxCompressed:      uint64(s32.TxCompressed),
+	}
+}
+
+/*
+Ref: struct rtnl_link_stats64 {...}
+*/
+type LinkStatistics64 struct {
+	RxPackets         uint64
+	TxPackets         uint64
+	RxBytes           uint64
+	TxBytes           uint64
+	RxErrors          uint64
+	TxErrors          uint64
+	RxDropped         uint64
+	TxDropped         uint64
+	Multicast         uint64
+	Collisions        uint64
+	RxLengthErrors    uint64
+	RxOverErrors      uint64
+	RxCrcErrors       uint64
+	RxFrameErrors     uint64
+	RxFifoErrors      uint64
+	RxMissedErrors    uint64
+	TxAbortedErrors   uint64
+	TxCarrierErrors   uint64
+	TxFifoErrors      uint64
+	TxHeartbeatErrors uint64
+	TxWindowErrors    uint64
+	RxCompressed      uint64
+	TxCompressed      uint64
+}
+
 type LinkXdp struct {
 	Fd       int
 	Attached bool
@@ -301,31 +395,31 @@ func StringToBondMode(s string) BondMode {
 
 // Possible BondMode
 const (
-	BOND_MODE_802_3AD BondMode = iota
-	BOND_MODE_BALANCE_RR
+	BOND_MODE_BALANCE_RR BondMode = iota
 	BOND_MODE_ACTIVE_BACKUP
 	BOND_MODE_BALANCE_XOR
 	BOND_MODE_BROADCAST
+	BOND_MODE_802_3AD
 	BOND_MODE_BALANCE_TLB
 	BOND_MODE_BALANCE_ALB
 	BOND_MODE_UNKNOWN
 )
 
 var bondModeToString = map[BondMode]string{
-	BOND_MODE_802_3AD:       "802.3ad",
 	BOND_MODE_BALANCE_RR:    "balance-rr",
 	BOND_MODE_ACTIVE_BACKUP: "active-backup",
 	BOND_MODE_BALANCE_XOR:   "balance-xor",
 	BOND_MODE_BROADCAST:     "broadcast",
+	BOND_MODE_802_3AD:       "802.3ad",
 	BOND_MODE_BALANCE_TLB:   "balance-tlb",
 	BOND_MODE_BALANCE_ALB:   "balance-alb",
 }
 var StringToBondModeMap = map[string]BondMode{
-	"802.3ad":       BOND_MODE_802_3AD,
 	"balance-rr":    BOND_MODE_BALANCE_RR,
 	"active-backup": BOND_MODE_ACTIVE_BACKUP,
 	"balance-xor":   BOND_MODE_BALANCE_XOR,
 	"broadcast":     BOND_MODE_BROADCAST,
+	"802.3ad":       BOND_MODE_802_3AD,
 	"balance-tlb":   BOND_MODE_BALANCE_TLB,
 	"balance-alb":   BOND_MODE_BALANCE_ALB,
 }
@@ -589,6 +683,54 @@ func (gretap *Gretap) Type() string {
 	return "gretap"
 }
 
+type Iptun struct {
+	LinkAttrs
+	Ttl      uint8
+	Tos      uint8
+	PMtuDisc uint8
+	Link     uint32
+	Local    net.IP
+	Remote   net.IP
+}
+
+func (iptun *Iptun) Attrs() *LinkAttrs {
+	return &iptun.LinkAttrs
+}
+
+func (iptun *Iptun) Type() string {
+	return "ipip"
+}
+
+type Vti struct {
+	LinkAttrs
+	IKey   uint32
+	OKey   uint32
+	Link   uint32
+	Local  net.IP
+	Remote net.IP
+}
+
+func (vti *Vti) Attrs() *LinkAttrs {
+	return &vti.LinkAttrs
+}
+
+func (iptun *Vti) Type() string {
+	return "vti"
+}
+
+type Vrf struct {
+	LinkAttrs
+	Table uint32
+}
+
+func (vrf *Vrf) Attrs() *LinkAttrs {
+	return &vrf.LinkAttrs
+}
+
+func (vrf *Vrf) Type() string {
+	return "vrf"
+}
+
 // iproute2 supported devices;
 // vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
 // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |

+ 191 - 31
libnetwork/vendor/github.com/vishvananda/netlink/link_linux.go

@@ -13,7 +13,11 @@ import (
 	"github.com/vishvananda/netns"
 )
 
-const SizeofLinkStats = 0x5c
+const (
+	SizeofLinkStats32 = 0x5c
+	SizeofLinkStats64 = 0xd8
+	IFLA_STATS64      = 0x17 // syscall pkg does not contain this one
+)
 
 const (
 	TUNTAP_MODE_TUN  TuntapMode = syscall.IFF_TUN
@@ -25,7 +29,6 @@ const (
 	TUNTAP_ONE_QUEUE TuntapFlag = syscall.IFF_ONE_QUEUE
 )
 
-var native = nl.NativeEndian()
 var lookupByDump = false
 
 var macvlanModes = [...]uint32{
@@ -55,6 +58,44 @@ func (h *Handle) ensureIndex(link *LinkAttrs) {
 	}
 }
 
+func (h *Handle) LinkSetARPOff(link Link) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+	msg.Change |= syscall.IFF_NOARP
+	msg.Flags |= syscall.IFF_NOARP
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+func LinkSetARPOff(link Link) error {
+	return pkgHandle.LinkSetARPOff(link)
+}
+
+func (h *Handle) LinkSetARPOn(link Link) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+	msg.Change |= syscall.IFF_NOARP
+	msg.Flags &= ^uint32(syscall.IFF_NOARP)
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+func LinkSetARPOn(link Link) error {
+	return pkgHandle.LinkSetARPOn(link)
+}
+
 func (h *Handle) SetPromiscOn(link Link) error {
 	base := link.Attrs()
 	h.ensureIndex(base)
@@ -783,6 +824,12 @@ func (h *Handle) LinkAdd(link Link) error {
 		}
 	} else if gretap, ok := link.(*Gretap); ok {
 		addGretapAttrs(gretap, linkInfo)
+	} else if iptun, ok := link.(*Iptun); ok {
+		addIptunAttrs(iptun, linkInfo)
+	} else if vti, ok := link.(*Vti); ok {
+		addVtiAttrs(vti, linkInfo)
+	} else if vrf, ok := link.(*Vrf); ok {
+		addVrfAttrs(vrf, linkInfo)
 	}
 
 	req.AddData(linkInfo)
@@ -949,7 +996,7 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) {
 		return nil, fmt.Errorf("Link not found")
 
 	case len(msgs) == 1:
-		return linkDeserialize(msgs[0])
+		return LinkDeserialize(nil, msgs[0])
 
 	default:
 		return nil, fmt.Errorf("More than one link found")
@@ -958,7 +1005,7 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) {
 
 // linkDeserialize deserializes a raw message received from netlink into
 // a link object.
-func linkDeserialize(m []byte) (Link, error) {
+func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) {
 	msg := nl.DeserializeIfInfomsg(m)
 
 	attrs, err := nl.ParseRouteAttr(m[msg.Len():])
@@ -970,8 +1017,12 @@ func linkDeserialize(m []byte) (Link, error) {
 	if msg.Flags&syscall.IFF_PROMISC != 0 {
 		base.Promisc = 1
 	}
-	var link Link
-	linkType := ""
+	var (
+		link     Link
+		stats32  []byte
+		stats64  []byte
+		linkType string
+	)
 	for _, attr := range attrs {
 		switch attr.Attr.Type {
 		case syscall.IFLA_LINKINFO:
@@ -1006,6 +1057,12 @@ func linkDeserialize(m []byte) (Link, error) {
 						link = &Macvtap{}
 					case "gretap":
 						link = &Gretap{}
+					case "ipip":
+						link = &Iptun{}
+					case "vti":
+						link = &Vti{}
+					case "vrf":
+						link = &Vrf{}
 					default:
 						link = &GenericLink{LinkType: linkType}
 					}
@@ -1029,6 +1086,12 @@ func linkDeserialize(m []byte) (Link, error) {
 						parseMacvtapData(link, data)
 					case "gretap":
 						parseGretapData(link, data)
+					case "ipip":
+						parseIptunData(link, data)
+					case "vti":
+						parseVtiData(link, data)
+					case "vrf":
+						parseVrfData(link, data)
 					}
 				}
 			}
@@ -1055,15 +1118,35 @@ func linkDeserialize(m []byte) (Link, error) {
 		case syscall.IFLA_IFALIAS:
 			base.Alias = string(attr.Value[:len(attr.Value)-1])
 		case syscall.IFLA_STATS:
-			base.Statistics = parseLinkStats(attr.Value[:])
+			stats32 = attr.Value[:]
+		case IFLA_STATS64:
+			stats64 = attr.Value[:]
 		case nl.IFLA_XDP:
 			xdp, err := parseLinkXdp(attr.Value[:])
 			if err != nil {
 				return nil, err
 			}
 			base.Xdp = xdp
+		case syscall.IFLA_PROTINFO | syscall.NLA_F_NESTED:
+			if hdr != nil && hdr.Type == syscall.RTM_NEWLINK &&
+				msg.Family == syscall.AF_BRIDGE {
+				attrs, err := nl.ParseRouteAttr(attr.Value[:])
+				if err != nil {
+					return nil, err
+				}
+				base.Protinfo = parseProtinfo(attrs)
+			}
+		case syscall.IFLA_OPERSTATE:
+			base.OperState = LinkOperState(uint8(attr.Value[0]))
 		}
 	}
+
+	if stats64 != nil {
+		base.Statistics = parseLinkStats64(stats64)
+	} else if stats32 != nil {
+		base.Statistics = parseLinkStats32(stats32)
+	}
+
 	// Links that don't have IFLA_INFO_KIND are hardware devices
 	if link == nil {
 		link = &Device{}
@@ -1096,7 +1179,7 @@ func (h *Handle) LinkList() ([]Link, error) {
 
 	var res []Link
 	for _, m := range msgs {
-		link, err := linkDeserialize(m)
+		link, err := LinkDeserialize(nil, m)
 		if err != nil {
 			return nil, err
 		}
@@ -1145,7 +1228,7 @@ func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-cha
 			}
 			for _, m := range msgs {
 				ifmsg := nl.DeserializeIfInfomsg(m.Data)
-				link, err := linkDeserialize(m.Data)
+				link, err := LinkDeserialize(&m.Header, m.Data)
 				if err != nil {
 					return
 				}
@@ -1397,26 +1480,6 @@ func linkFlags(rawFlags uint32) net.Flags {
 	return f
 }
 
-func htonl(val uint32) []byte {
-	bytes := make([]byte, 4)
-	binary.BigEndian.PutUint32(bytes, val)
-	return bytes
-}
-
-func htons(val uint16) []byte {
-	bytes := make([]byte, 2)
-	binary.BigEndian.PutUint16(bytes, val)
-	return bytes
-}
-
-func ntohl(buf []byte) uint32 {
-	return binary.BigEndian.Uint32(buf)
-}
-
-func ntohs(buf []byte) uint16 {
-	return binary.BigEndian.Uint16(buf)
-}
-
 func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
 	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
 
@@ -1490,8 +1553,12 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) {
 	}
 }
 
-func parseLinkStats(data []byte) *LinkStatistics {
-	return (*LinkStatistics)(unsafe.Pointer(&data[0:SizeofLinkStats][0]))
+func parseLinkStats32(data []byte) *LinkStatistics {
+	return (*LinkStatistics)((*LinkStatistics32)(unsafe.Pointer(&data[0:SizeofLinkStats32][0])).to64())
+}
+
+func parseLinkStats64(data []byte) *LinkStatistics {
+	return (*LinkStatistics)((*LinkStatistics64)(unsafe.Pointer(&data[0:SizeofLinkStats64][0])))
 }
 
 func addXdpAttrs(xdp *LinkXdp, req *nl.NetlinkRequest) {
@@ -1518,3 +1585,96 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) {
 	}
 	return xdp, nil
 }
+
+func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) {
+	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+
+	ip := iptun.Local.To4()
+	if ip != nil {
+		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LOCAL, []byte(ip))
+	}
+
+	ip = iptun.Remote.To4()
+	if ip != nil {
+		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_REMOTE, []byte(ip))
+	}
+
+	if iptun.Link != 0 {
+		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LINK, nl.Uint32Attr(iptun.Link))
+	}
+	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(iptun.PMtuDisc))
+	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TTL, nl.Uint8Attr(iptun.Ttl))
+	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TOS, nl.Uint8Attr(iptun.Tos))
+}
+
+func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) {
+	iptun := link.(*Iptun)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.IFLA_IPTUN_LOCAL:
+			iptun.Local = net.IP(datum.Value[0:4])
+		case nl.IFLA_IPTUN_REMOTE:
+			iptun.Remote = net.IP(datum.Value[0:4])
+		case nl.IFLA_IPTUN_TTL:
+			iptun.Ttl = uint8(datum.Value[0])
+		case nl.IFLA_IPTUN_TOS:
+			iptun.Tos = uint8(datum.Value[0])
+		case nl.IFLA_IPTUN_PMTUDISC:
+			iptun.PMtuDisc = uint8(datum.Value[0])
+		}
+	}
+}
+
+func addVtiAttrs(vti *Vti, linkInfo *nl.RtAttr) {
+	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+
+	ip := vti.Local.To4()
+	if ip != nil {
+		nl.NewRtAttrChild(data, nl.IFLA_VTI_LOCAL, []byte(ip))
+	}
+
+	ip = vti.Remote.To4()
+	if ip != nil {
+		nl.NewRtAttrChild(data, nl.IFLA_VTI_REMOTE, []byte(ip))
+	}
+
+	if vti.Link != 0 {
+		nl.NewRtAttrChild(data, nl.IFLA_VTI_LINK, nl.Uint32Attr(vti.Link))
+	}
+
+	nl.NewRtAttrChild(data, nl.IFLA_VTI_IKEY, htonl(vti.IKey))
+	nl.NewRtAttrChild(data, nl.IFLA_VTI_OKEY, htonl(vti.OKey))
+}
+
+func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) {
+	vti := link.(*Vti)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.IFLA_VTI_LOCAL:
+			vti.Local = net.IP(datum.Value[0:4])
+		case nl.IFLA_VTI_REMOTE:
+			vti.Remote = net.IP(datum.Value[0:4])
+		case nl.IFLA_VTI_IKEY:
+			vti.IKey = ntohl(datum.Value[0:4])
+		case nl.IFLA_VTI_OKEY:
+			vti.OKey = ntohl(datum.Value[0:4])
+		}
+	}
+}
+
+func addVrfAttrs(vrf *Vrf, linkInfo *nl.RtAttr) {
+	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+	b := make([]byte, 4)
+	native.PutUint32(b, uint32(vrf.Table))
+	nl.NewRtAttrChild(data, nl.IFLA_VRF_TABLE, b)
+}
+
+func parseVrfData(link Link, data []syscall.NetlinkRouteAttr) {
+	vrf := link.(*Vrf)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.IFLA_VRF_TABLE:
+			vrf.Table = native.Uint32(datum.Value[0:4])
+		}
+	}
+}

+ 9 - 1
libnetwork/vendor/github.com/vishvananda/netlink/netlink.go

@@ -8,7 +8,15 @@
 // interface that is loosly modeled on the iproute2 cli.
 package netlink
 
-import "net"
+import (
+	"errors"
+	"net"
+)
+
+var (
+	// ErrNotImplemented is returned when a requested feature is not implemented.
+	ErrNotImplemented = errors.New("not implemented")
+)
 
 // ParseIPNet parses a string in ip/net format and returns a net.IPNet.
 // This is valuable because addresses in netlink are often IPNets and

+ 4 - 3
libnetwork/vendor/github.com/vishvananda/netlink/netlink_linux.go

@@ -4,7 +4,8 @@ import "github.com/vishvananda/netlink/nl"
 
 // Family type definitions
 const (
-	FAMILY_ALL = nl.FAMILY_ALL
-	FAMILY_V4  = nl.FAMILY_V4
-	FAMILY_V6  = nl.FAMILY_V6
+	FAMILY_ALL  = nl.FAMILY_ALL
+	FAMILY_V4   = nl.FAMILY_V4
+	FAMILY_V6   = nl.FAMILY_V6
+	FAMILY_MPLS = nl.FAMILY_MPLS
 )

+ 96 - 18
libnetwork/vendor/github.com/vishvananda/netlink/netlink_unspecified.go

@@ -2,43 +2,117 @@
 
 package netlink
 
-import (
-	"errors"
-)
+import "net"
 
-var (
-	ErrNotImplemented = errors.New("not implemented")
-)
+func LinkSetUp(link Link) error {
+	return ErrNotImplemented
+}
+
+func LinkSetDown(link Link) error {
+	return ErrNotImplemented
+}
+
+func LinkSetMTU(link Link, mtu int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetMaster(link Link, master *Bridge) error {
+	return ErrNotImplemented
+}
+
+func LinkSetNsPid(link Link, nspid int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetNsFd(link Link, fd int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetName(link Link, name string) error {
+	return ErrNotImplemented
+}
+
+func LinkSetAlias(link Link, name string) error {
+	return ErrNotImplemented
+}
+
+func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
+	return ErrNotImplemented
+}
+
+func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
+	return ErrNotImplemented
+}
+
+func LinkSetVfVlan(link Link, vf, vlan int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetVfTxRate(link Link, vf, rate int) error {
+	return ErrNotImplemented
+}
 
-func LinkSetUp(link *Link) error {
+func LinkSetNoMaster(link Link) error {
 	return ErrNotImplemented
 }
 
-func LinkSetDown(link *Link) error {
+func LinkSetMasterByIndex(link Link, masterIndex int) error {
 	return ErrNotImplemented
 }
 
-func LinkSetMTU(link *Link, mtu int) error {
+func LinkSetXdpFd(link Link, fd int) error {
 	return ErrNotImplemented
 }
 
-func LinkSetMaster(link *Link, master *Link) error {
+func LinkSetARPOff(link Link) error {
 	return ErrNotImplemented
 }
 
-func LinkSetNsPid(link *Link, nspid int) error {
+func LinkSetARPOn(link Link) error {
+	return ErrNotImplemented
+}
+
+func LinkByName(name string) (Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func LinkByAlias(alias string) (Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func LinkByIndex(index int) (Link, error) {
+	return nil, ErrNotImplemented
+}
+
+func LinkSetHairpin(link Link, mode bool) error {
 	return ErrNotImplemented
 }
 
-func LinkSetNsFd(link *Link, fd int) error {
+func LinkSetGuard(link Link, mode bool) error {
 	return ErrNotImplemented
 }
 
-func LinkAdd(link *Link) error {
+func LinkSetFastLeave(link Link, mode bool) error {
 	return ErrNotImplemented
 }
 
-func LinkDel(link *Link) error {
+func LinkSetLearning(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func LinkSetRootBlock(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func LinkSetFlood(link Link, mode bool) error {
+	return ErrNotImplemented
+}
+
+func LinkAdd(link Link) error {
+	return ErrNotImplemented
+}
+
+func LinkDel(link Link) error {
 	return ErrNotImplemented
 }
 
@@ -70,15 +144,15 @@ func LinkList() ([]Link, error) {
 	return nil, ErrNotImplemented
 }
 
-func AddrAdd(link *Link, addr *Addr) error {
+func AddrAdd(link Link, addr *Addr) error {
 	return ErrNotImplemented
 }
 
-func AddrDel(link *Link, addr *Addr) error {
+func AddrDel(link Link, addr *Addr) error {
 	return ErrNotImplemented
 }
 
-func AddrList(link *Link, family int) ([]Addr, error) {
+func AddrList(link Link, family int) ([]Addr, error) {
 	return nil, ErrNotImplemented
 }
 
@@ -90,7 +164,7 @@ func RouteDel(route *Route) error {
 	return ErrNotImplemented
 }
 
-func RouteList(link *Link, family int) ([]Route, error) {
+func RouteList(link Link, family int) ([]Route, error) {
 	return nil, ErrNotImplemented
 }
 
@@ -141,3 +215,7 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
 func NeighDeserialize(m []byte) (*Neigh, error) {
 	return nil, ErrNotImplemented
 }
+
+func SocketGet(local, remote net.Addr) (*Socket, error) {
+	return nil, ErrNotImplemented
+}

+ 189 - 0
libnetwork/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go

@@ -0,0 +1,189 @@
+package nl
+
+import "unsafe"
+
+// Track the message sizes for the correct serialization/deserialization
+const (
+	SizeofNfgenmsg      = 4
+	SizeofNfattr        = 4
+	SizeofNfConntrack   = 376
+	SizeofNfctTupleHead = 52
+)
+
+var L4ProtoMap = map[uint8]string{
+	6:  "tcp",
+	17: "udp",
+}
+
+// All the following constants are coming from:
+// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+
+// enum cntl_msg_types {
+// 	IPCTNL_MSG_CT_NEW,
+// 	IPCTNL_MSG_CT_GET,
+// 	IPCTNL_MSG_CT_DELETE,
+// 	IPCTNL_MSG_CT_GET_CTRZERO,
+// 	IPCTNL_MSG_CT_GET_STATS_CPU,
+// 	IPCTNL_MSG_CT_GET_STATS,
+// 	IPCTNL_MSG_CT_GET_DYING,
+// 	IPCTNL_MSG_CT_GET_UNCONFIRMED,
+//
+// 	IPCTNL_MSG_MAX
+// };
+const (
+	IPCTNL_MSG_CT_GET    = 1
+	IPCTNL_MSG_CT_DELETE = 2
+)
+
+// #define NFNETLINK_V0	0
+const (
+	NFNETLINK_V0 = 0
+)
+
+// #define NLA_F_NESTED (1 << 15)
+const (
+	NLA_F_NESTED = (1 << 15)
+)
+
+// enum ctattr_type {
+// 	CTA_UNSPEC,
+// 	CTA_TUPLE_ORIG,
+// 	CTA_TUPLE_REPLY,
+// 	CTA_STATUS,
+// 	CTA_PROTOINFO,
+// 	CTA_HELP,
+// 	CTA_NAT_SRC,
+// #define CTA_NAT	CTA_NAT_SRC	/* backwards compatibility */
+// 	CTA_TIMEOUT,
+// 	CTA_MARK,
+// 	CTA_COUNTERS_ORIG,
+// 	CTA_COUNTERS_REPLY,
+// 	CTA_USE,
+// 	CTA_ID,
+// 	CTA_NAT_DST,
+// 	CTA_TUPLE_MASTER,
+// 	CTA_SEQ_ADJ_ORIG,
+// 	CTA_NAT_SEQ_ADJ_ORIG	= CTA_SEQ_ADJ_ORIG,
+// 	CTA_SEQ_ADJ_REPLY,
+// 	CTA_NAT_SEQ_ADJ_REPLY	= CTA_SEQ_ADJ_REPLY,
+// 	CTA_SECMARK,		/* obsolete */
+// 	CTA_ZONE,
+// 	CTA_SECCTX,
+// 	CTA_TIMESTAMP,
+// 	CTA_MARK_MASK,
+// 	CTA_LABELS,
+// 	CTA_LABELS_MASK,
+// 	__CTA_MAX
+// };
+const (
+	CTA_TUPLE_ORIG  = 1
+	CTA_TUPLE_REPLY = 2
+	CTA_STATUS      = 3
+	CTA_TIMEOUT     = 8
+	CTA_MARK        = 9
+	CTA_PROTOINFO   = 4
+)
+
+// enum ctattr_tuple {
+// 	CTA_TUPLE_UNSPEC,
+// 	CTA_TUPLE_IP,
+// 	CTA_TUPLE_PROTO,
+// 	CTA_TUPLE_ZONE,
+// 	__CTA_TUPLE_MAX
+// };
+// #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
+const (
+	CTA_TUPLE_IP    = 1
+	CTA_TUPLE_PROTO = 2
+)
+
+// enum ctattr_ip {
+// 	CTA_IP_UNSPEC,
+// 	CTA_IP_V4_SRC,
+// 	CTA_IP_V4_DST,
+// 	CTA_IP_V6_SRC,
+// 	CTA_IP_V6_DST,
+// 	__CTA_IP_MAX
+// };
+// #define CTA_IP_MAX (__CTA_IP_MAX - 1)
+const (
+	CTA_IP_V4_SRC = 1
+	CTA_IP_V4_DST = 2
+	CTA_IP_V6_SRC = 3
+	CTA_IP_V6_DST = 4
+)
+
+// enum ctattr_l4proto {
+// 	CTA_PROTO_UNSPEC,
+// 	CTA_PROTO_NUM,
+// 	CTA_PROTO_SRC_PORT,
+// 	CTA_PROTO_DST_PORT,
+// 	CTA_PROTO_ICMP_ID,
+// 	CTA_PROTO_ICMP_TYPE,
+// 	CTA_PROTO_ICMP_CODE,
+// 	CTA_PROTO_ICMPV6_ID,
+// 	CTA_PROTO_ICMPV6_TYPE,
+// 	CTA_PROTO_ICMPV6_CODE,
+// 	__CTA_PROTO_MAX
+// };
+// #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
+const (
+	CTA_PROTO_NUM      = 1
+	CTA_PROTO_SRC_PORT = 2
+	CTA_PROTO_DST_PORT = 3
+)
+
+// enum ctattr_protoinfo {
+// 	CTA_PROTOINFO_UNSPEC,
+// 	CTA_PROTOINFO_TCP,
+// 	CTA_PROTOINFO_DCCP,
+// 	CTA_PROTOINFO_SCTP,
+// 	__CTA_PROTOINFO_MAX
+// };
+// #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
+const (
+	CTA_PROTOINFO_TCP = 1
+)
+
+// enum ctattr_protoinfo_tcp {
+// 	CTA_PROTOINFO_TCP_UNSPEC,
+// 	CTA_PROTOINFO_TCP_STATE,
+// 	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+// 	CTA_PROTOINFO_TCP_WSCALE_REPLY,
+// 	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+// 	CTA_PROTOINFO_TCP_FLAGS_REPLY,
+// 	__CTA_PROTOINFO_TCP_MAX
+// };
+// #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
+const (
+	CTA_PROTOINFO_TCP_STATE           = 1
+	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL = 2
+	CTA_PROTOINFO_TCP_WSCALE_REPLY    = 3
+	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL  = 4
+	CTA_PROTOINFO_TCP_FLAGS_REPLY     = 5
+)
+
+// /* General form of address family dependent message.
+//  */
+// struct nfgenmsg {
+// 	__u8  nfgen_family;		/* AF_xxx */
+// 	__u8  version;		/* nfnetlink version */
+// 	__be16    res_id;		/* resource id */
+// };
+type Nfgenmsg struct {
+	NfgenFamily uint8
+	Version     uint8
+	ResId       uint16 // big endian
+}
+
+func (msg *Nfgenmsg) Len() int {
+	return SizeofNfgenmsg
+}
+
+func DeserializeNfgenmsg(b []byte) *Nfgenmsg {
+	return (*Nfgenmsg)(unsafe.Pointer(&b[0:SizeofNfgenmsg][0]))
+}
+
+func (msg *Nfgenmsg) Serialize() []byte {
+	return (*(*[SizeofNfgenmsg]byte)(unsafe.Pointer(msg)))[:]
+}

+ 34 - 0
libnetwork/vendor/github.com/vishvananda/netlink/nl/link_linux.go

@@ -418,3 +418,37 @@ const (
 	IFLA_XDP_ATTACHED /* read-only bool indicating if prog is attached */
 	IFLA_XDP_MAX      = IFLA_XDP_ATTACHED
 )
+
+const (
+	IFLA_IPTUN_UNSPEC = iota
+	IFLA_IPTUN_LINK
+	IFLA_IPTUN_LOCAL
+	IFLA_IPTUN_REMOTE
+	IFLA_IPTUN_TTL
+	IFLA_IPTUN_TOS
+	IFLA_IPTUN_ENCAP_LIMIT
+	IFLA_IPTUN_FLOWINFO
+	IFLA_IPTUN_FLAGS
+	IFLA_IPTUN_PROTO
+	IFLA_IPTUN_PMTUDISC
+	IFLA_IPTUN_6RD_PREFIX
+	IFLA_IPTUN_6RD_RELAY_PREFIX
+	IFLA_IPTUN_6RD_PREFIXLEN
+	IFLA_IPTUN_6RD_RELAY_PREFIXLEN
+	IFLA_IPTUN_MAX = IFLA_IPTUN_6RD_RELAY_PREFIXLEN
+)
+
+const (
+	IFLA_VTI_UNSPEC = iota
+	IFLA_VTI_LINK
+	IFLA_VTI_IKEY
+	IFLA_VTI_OKEY
+	IFLA_VTI_LOCAL
+	IFLA_VTI_REMOTE
+	IFLA_VTI_MAX = IFLA_VTI_REMOTE
+)
+
+const (
+	IFLA_VRF_UNSPEC = iota
+	IFLA_VRF_TABLE
+)

+ 36 - 0
libnetwork/vendor/github.com/vishvananda/netlink/nl/mpls_linux.go

@@ -0,0 +1,36 @@
+package nl
+
+import "encoding/binary"
+
+const (
+	MPLS_LS_LABEL_SHIFT = 12
+	MPLS_LS_S_SHIFT     = 8
+)
+
+func EncodeMPLSStack(labels ...int) []byte {
+	b := make([]byte, 4*len(labels))
+	for idx, label := range labels {
+		l := label << MPLS_LS_LABEL_SHIFT
+		if idx == len(labels)-1 {
+			l |= 1 << MPLS_LS_S_SHIFT
+		}
+		binary.BigEndian.PutUint32(b[idx*4:], uint32(l))
+	}
+	return b
+}
+
+func DecodeMPLSStack(buf []byte) []int {
+	if len(buf)%4 != 0 {
+		return nil
+	}
+	stack := make([]int, 0, len(buf)/4)
+	for len(buf) > 0 {
+		l := binary.BigEndian.Uint32(buf[:4])
+		buf = buf[4:]
+		stack = append(stack, int(l)>>MPLS_LS_LABEL_SHIFT)
+		if (l>>MPLS_LS_S_SHIFT)&1 > 0 {
+			break
+		}
+	}
+	return stack
+}

+ 27 - 5
libnetwork/vendor/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -17,13 +17,14 @@ import (
 
 const (
 	// Family type definitions
-	FAMILY_ALL = syscall.AF_UNSPEC
-	FAMILY_V4  = syscall.AF_INET
-	FAMILY_V6  = syscall.AF_INET6
+	FAMILY_ALL  = syscall.AF_UNSPEC
+	FAMILY_V4   = syscall.AF_INET
+	FAMILY_V6   = syscall.AF_INET6
+	FAMILY_MPLS = AF_MPLS
 )
 
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
-var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
+var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM, syscall.NETLINK_NETFILTER}
 
 var nextSeqNr uint32
 
@@ -320,6 +321,7 @@ func (a *RtAttr) Serialize() []byte {
 type NetlinkRequest struct {
 	syscall.NlMsghdr
 	Data    []NetlinkRequestData
+	RawData []byte
 	Sockets map[int]*SocketHandle
 }
 
@@ -331,6 +333,8 @@ func (req *NetlinkRequest) Serialize() []byte {
 		dataBytes[i] = data.Serialize()
 		length = length + len(dataBytes[i])
 	}
+	length += len(req.RawData)
+
 	req.Len = uint32(length)
 	b := make([]byte, length)
 	hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
@@ -342,6 +346,10 @@ func (req *NetlinkRequest) Serialize() []byte {
 			next = next + 1
 		}
 	}
+	// Add the raw data if any
+	if len(req.RawData) > 0 {
+		copy(b[next:length], req.RawData)
+	}
 	return b
 }
 
@@ -351,6 +359,13 @@ func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
 	}
 }
 
+// AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
+func (req *NetlinkRequest) AddRawData(data []byte) {
+	if data != nil {
+		req.RawData = append(req.RawData, data...)
+	}
+}
+
 // Execute the request against a the given sockType.
 // Returns a list of netlink messages in serialized format, optionally filtered
 // by resType.
@@ -450,7 +465,7 @@ type NetlinkSocket struct {
 }
 
 func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
-	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol)
+	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW|syscall.SOCK_CLOEXEC, protocol)
 	if err != nil {
 		return nil, err
 	}
@@ -656,6 +671,13 @@ func Uint32Attr(v uint32) []byte {
 	return bytes
 }
 
+func Uint64Attr(v uint64) []byte {
+	native := NativeEndian()
+	bytes := make([]byte, 8)
+	native.PutUint64(bytes, v)
+	return bytes
+}
+
 func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
 	var attrs []syscall.NetlinkRouteAttr
 	for len(b) >= syscall.SizeofRtAttr {

+ 11 - 0
libnetwork/vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go

@@ -0,0 +1,11 @@
+// +build !linux
+
+package nl
+
+import "encoding/binary"
+
+var SupportedNlFamilies = []int{}
+
+func NativeEndian() binary.ByteOrder {
+	return nil
+}

+ 27 - 1
libnetwork/vendor/github.com/vishvananda/netlink/nl/route_linux.go

@@ -43,12 +43,38 @@ func (msg *RtMsg) Serialize() []byte {
 
 type RtNexthop struct {
 	syscall.RtNexthop
+	Children []NetlinkRequestData
 }
 
 func DeserializeRtNexthop(b []byte) *RtNexthop {
 	return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0]))
 }
 
+func (msg *RtNexthop) Len() int {
+	if len(msg.Children) == 0 {
+		return syscall.SizeofRtNexthop
+	}
+
+	l := 0
+	for _, child := range msg.Children {
+		l += rtaAlignOf(child.Len())
+	}
+	l += syscall.SizeofRtNexthop
+	return rtaAlignOf(l)
+}
+
 func (msg *RtNexthop) Serialize() []byte {
-	return (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]
+	length := msg.Len()
+	msg.RtNexthop.Len = uint16(length)
+	buf := make([]byte, length)
+	copy(buf, (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:])
+	next := rtaAlignOf(syscall.SizeofRtNexthop)
+	if len(msg.Children) > 0 {
+		for _, child := range msg.Children {
+			childBuf := child.Serialize()
+			copy(buf[next:], childBuf)
+			next += rtaAlignOf(len(childBuf))
+		}
+	}
+	return buf
 }

+ 31 - 0
libnetwork/vendor/github.com/vishvananda/netlink/nl/syscall.go

@@ -35,3 +35,34 @@ const (
 	FR_ACT_UNREACHABLE /* Drop with ENETUNREACH */
 	FR_ACT_PROHIBIT    /* Drop with EACCES */
 )
+
+// socket diags related
+const (
+	SOCK_DIAG_BY_FAMILY = 20         /* linux.sock_diag.h */
+	TCPDIAG_NOCOOKIE    = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/
+)
+
+const (
+	AF_MPLS = 28
+)
+
+const (
+	RTA_NEWDST     = 0x13
+	RTA_ENCAP_TYPE = 0x15
+	RTA_ENCAP      = 0x16
+)
+
+// RTA_ENCAP subtype
+const (
+	MPLS_IPTUNNEL_UNSPEC = iota
+	MPLS_IPTUNNEL_DST
+)
+
+// light weight tunnel encap types
+const (
+	LWTUNNEL_ENCAP_NONE = iota
+	LWTUNNEL_ENCAP_MPLS
+	LWTUNNEL_ENCAP_IP
+	LWTUNNEL_ENCAP_ILA
+	LWTUNNEL_ENCAP_IP6
+)

+ 46 - 26
libnetwork/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go

@@ -11,34 +11,40 @@ const (
 	XFRM_INF = ^uint64(0)
 )
 
+type XfrmMsgType uint8
+
+type XfrmMsg interface {
+	Type() XfrmMsgType
+}
+
 // Message Types
 const (
-	XFRM_MSG_BASE        = 0x10
-	XFRM_MSG_NEWSA       = 0x10
-	XFRM_MSG_DELSA       = 0x11
-	XFRM_MSG_GETSA       = 0x12
-	XFRM_MSG_NEWPOLICY   = 0x13
-	XFRM_MSG_DELPOLICY   = 0x14
-	XFRM_MSG_GETPOLICY   = 0x15
-	XFRM_MSG_ALLOCSPI    = 0x16
-	XFRM_MSG_ACQUIRE     = 0x17
-	XFRM_MSG_EXPIRE      = 0x18
-	XFRM_MSG_UPDPOLICY   = 0x19
-	XFRM_MSG_UPDSA       = 0x1a
-	XFRM_MSG_POLEXPIRE   = 0x1b
-	XFRM_MSG_FLUSHSA     = 0x1c
-	XFRM_MSG_FLUSHPOLICY = 0x1d
-	XFRM_MSG_NEWAE       = 0x1e
-	XFRM_MSG_GETAE       = 0x1f
-	XFRM_MSG_REPORT      = 0x20
-	XFRM_MSG_MIGRATE     = 0x21
-	XFRM_MSG_NEWSADINFO  = 0x22
-	XFRM_MSG_GETSADINFO  = 0x23
-	XFRM_MSG_NEWSPDINFO  = 0x24
-	XFRM_MSG_GETSPDINFO  = 0x25
-	XFRM_MSG_MAPPING     = 0x26
-	XFRM_MSG_MAX         = 0x26
-	XFRM_NR_MSGTYPES     = 0x17
+	XFRM_MSG_BASE        XfrmMsgType = 0x10
+	XFRM_MSG_NEWSA                   = 0x10
+	XFRM_MSG_DELSA                   = 0x11
+	XFRM_MSG_GETSA                   = 0x12
+	XFRM_MSG_NEWPOLICY               = 0x13
+	XFRM_MSG_DELPOLICY               = 0x14
+	XFRM_MSG_GETPOLICY               = 0x15
+	XFRM_MSG_ALLOCSPI                = 0x16
+	XFRM_MSG_ACQUIRE                 = 0x17
+	XFRM_MSG_EXPIRE                  = 0x18
+	XFRM_MSG_UPDPOLICY               = 0x19
+	XFRM_MSG_UPDSA                   = 0x1a
+	XFRM_MSG_POLEXPIRE               = 0x1b
+	XFRM_MSG_FLUSHSA                 = 0x1c
+	XFRM_MSG_FLUSHPOLICY             = 0x1d
+	XFRM_MSG_NEWAE                   = 0x1e
+	XFRM_MSG_GETAE                   = 0x1f
+	XFRM_MSG_REPORT                  = 0x20
+	XFRM_MSG_MIGRATE                 = 0x21
+	XFRM_MSG_NEWSADINFO              = 0x22
+	XFRM_MSG_GETSADINFO              = 0x23
+	XFRM_MSG_NEWSPDINFO              = 0x24
+	XFRM_MSG_GETSPDINFO              = 0x25
+	XFRM_MSG_MAPPING                 = 0x26
+	XFRM_MSG_MAX                     = 0x26
+	XFRM_NR_MSGTYPES                 = 0x17
 )
 
 // Attribute types
@@ -81,6 +87,20 @@ const (
 	SizeofXfrmMark        = 0x08
 )
 
+// Netlink groups
+const (
+	XFRMNLGRP_NONE    = 0x0
+	XFRMNLGRP_ACQUIRE = 0x1
+	XFRMNLGRP_EXPIRE  = 0x2
+	XFRMNLGRP_SA      = 0x3
+	XFRMNLGRP_POLICY  = 0x4
+	XFRMNLGRP_AEVENTS = 0x5
+	XFRMNLGRP_REPORT  = 0x6
+	XFRMNLGRP_MIGRATE = 0x7
+	XFRMNLGRP_MAPPING = 0x8
+	__XFRMNLGRP_MAX   = 0x9
+)
+
 // typedef union {
 //   __be32    a4;
 //   __be32    a6[4];

+ 32 - 0
libnetwork/vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go

@@ -0,0 +1,32 @@
+package nl
+
+import (
+	"unsafe"
+)
+
+const (
+	SizeofXfrmUserExpire = 0xe8
+)
+
+// struct xfrm_user_expire {
+// 	struct xfrm_usersa_info		state;
+// 	__u8				hard;
+// };
+
+type XfrmUserExpire struct {
+	XfrmUsersaInfo XfrmUsersaInfo
+	Hard           uint8
+	Pad            [7]byte
+}
+
+func (msg *XfrmUserExpire) Len() int {
+	return SizeofXfrmUserExpire
+}
+
+func DeserializeXfrmUserExpire(b []byte) *XfrmUserExpire {
+	return (*XfrmUserExpire)(unsafe.Pointer(&b[0:SizeofXfrmUserExpire][0]))
+}
+
+func (msg *XfrmUserExpire) Serialize() []byte {
+	return (*(*[SizeofXfrmUserExpire]byte)(unsafe.Pointer(msg)))[:]
+}

+ 70 - 8
libnetwork/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go

@@ -5,14 +5,27 @@ import (
 )
 
 const (
-	SizeofXfrmUsersaId    = 0x18
-	SizeofXfrmStats       = 0x0c
-	SizeofXfrmUsersaInfo  = 0xe0
-	SizeofXfrmAlgo        = 0x44
-	SizeofXfrmAlgoAuth    = 0x48
-	SizeofXfrmAlgoAEAD    = 0x48
-	SizeofXfrmEncapTmpl   = 0x18
-	SizeofXfrmUsersaFlush = 0x8
+	SizeofXfrmUsersaId       = 0x18
+	SizeofXfrmStats          = 0x0c
+	SizeofXfrmUsersaInfo     = 0xe0
+	SizeofXfrmUserSpiInfo    = 0xe8
+	SizeofXfrmAlgo           = 0x44
+	SizeofXfrmAlgoAuth       = 0x48
+	SizeofXfrmAlgoAEAD       = 0x48
+	SizeofXfrmEncapTmpl      = 0x18
+	SizeofXfrmUsersaFlush    = 0x8
+	SizeofXfrmReplayStateEsn = 0x18
+)
+
+const (
+	XFRM_STATE_NOECN      = 1
+	XFRM_STATE_DECAP_DSCP = 2
+	XFRM_STATE_NOPMTUDISC = 4
+	XFRM_STATE_WILDRECV   = 8
+	XFRM_STATE_ICMP       = 16
+	XFRM_STATE_AF_UNSPEC  = 32
+	XFRM_STATE_ALIGN4     = 64
+	XFRM_STATE_ESN        = 128
 )
 
 // struct xfrm_usersa_id {
@@ -120,6 +133,30 @@ func (msg *XfrmUsersaInfo) Serialize() []byte {
 	return (*(*[SizeofXfrmUsersaInfo]byte)(unsafe.Pointer(msg)))[:]
 }
 
+// struct xfrm_userspi_info {
+// 	struct xfrm_usersa_info		info;
+// 	__u32				min;
+// 	__u32				max;
+// };
+
+type XfrmUserSpiInfo struct {
+	XfrmUsersaInfo XfrmUsersaInfo
+	Min            uint32
+	Max            uint32
+}
+
+func (msg *XfrmUserSpiInfo) Len() int {
+	return SizeofXfrmUserSpiInfo
+}
+
+func DeserializeXfrmUserSpiInfo(b []byte) *XfrmUserSpiInfo {
+	return (*XfrmUserSpiInfo)(unsafe.Pointer(&b[0:SizeofXfrmUserSpiInfo][0]))
+}
+
+func (msg *XfrmUserSpiInfo) Serialize() []byte {
+	return (*(*[SizeofXfrmUserSpiInfo]byte)(unsafe.Pointer(msg)))[:]
+}
+
 // struct xfrm_algo {
 //   char    alg_name[64];
 //   unsigned int  alg_key_len;    /* in bits */
@@ -270,3 +307,28 @@ func DeserializeXfrmUsersaFlush(b []byte) *XfrmUsersaFlush {
 func (msg *XfrmUsersaFlush) Serialize() []byte {
 	return (*(*[SizeofXfrmUsersaFlush]byte)(unsafe.Pointer(msg)))[:]
 }
+
+// struct xfrm_replay_state_esn {
+//     unsigned int    bmp_len;
+//     __u32           oseq;
+//     __u32           seq;
+//     __u32           oseq_hi;
+//     __u32           seq_hi;
+//     __u32           replay_window;
+//     __u32           bmp[0];
+// };
+
+type XfrmReplayStateEsn struct {
+	BmpLen       uint32
+	OSeq         uint32
+	Seq          uint32
+	OSeqHi       uint32
+	SeqHi        uint32
+	ReplayWindow uint32
+	Bmp          []uint32
+}
+
+func (msg *XfrmReplayStateEsn) Serialize() []byte {
+	// We deliberately do not pass Bmp, as it gets set by the kernel.
+	return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:]
+}

+ 32 - 0
libnetwork/vendor/github.com/vishvananda/netlink/order.go

@@ -0,0 +1,32 @@
+package netlink
+
+import (
+	"encoding/binary"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+var (
+	native       = nl.NativeEndian()
+	networkOrder = binary.BigEndian
+)
+
+func htonl(val uint32) []byte {
+	bytes := make([]byte, 4)
+	binary.BigEndian.PutUint32(bytes, val)
+	return bytes
+}
+
+func htons(val uint16) []byte {
+	bytes := make([]byte, 2)
+	binary.BigEndian.PutUint16(bytes, val)
+	return bytes
+}
+
+func ntohl(buf []byte) uint32 {
+	return binary.BigEndian.Uint32(buf)
+}
+
+func ntohs(buf []byte) uint16 {
+	return binary.BigEndian.Uint16(buf)
+}

+ 1 - 4
libnetwork/vendor/github.com/vishvananda/netlink/protinfo.go

@@ -46,8 +46,5 @@ func boolToByte(x bool) []byte {
 }
 
 func byteToBool(x byte) bool {
-	if uint8(x) != 0 {
-		return true
-	}
-	return false
+	return uint8(x) != 0
 }

+ 23 - 17
libnetwork/vendor/github.com/vishvananda/netlink/protinfo_linux.go

@@ -40,25 +40,31 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
 			if err != nil {
 				return pi, err
 			}
-			var pi Protinfo
-			for _, info := range infos {
-				switch info.Attr.Type {
-				case nl.IFLA_BRPORT_MODE:
-					pi.Hairpin = byteToBool(info.Value[0])
-				case nl.IFLA_BRPORT_GUARD:
-					pi.Guard = byteToBool(info.Value[0])
-				case nl.IFLA_BRPORT_FAST_LEAVE:
-					pi.FastLeave = byteToBool(info.Value[0])
-				case nl.IFLA_BRPORT_PROTECT:
-					pi.RootBlock = byteToBool(info.Value[0])
-				case nl.IFLA_BRPORT_LEARNING:
-					pi.Learning = byteToBool(info.Value[0])
-				case nl.IFLA_BRPORT_UNICAST_FLOOD:
-					pi.Flood = byteToBool(info.Value[0])
-				}
-			}
+			pi = *parseProtinfo(infos)
+
 			return pi, nil
 		}
 	}
 	return pi, fmt.Errorf("Device with index %d not found", base.Index)
 }
+
+func parseProtinfo(infos []syscall.NetlinkRouteAttr) *Protinfo {
+	var pi Protinfo
+	for _, info := range infos {
+		switch info.Attr.Type {
+		case nl.IFLA_BRPORT_MODE:
+			pi.Hairpin = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_GUARD:
+			pi.Guard = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_FAST_LEAVE:
+			pi.FastLeave = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_PROTECT:
+			pi.RootBlock = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_LEARNING:
+			pi.Learning = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_UNICAST_FLOOD:
+			pi.Flood = byteToBool(info.Value[0])
+		}
+	}
+	return &pi
+}

+ 5 - 4
libnetwork/vendor/github.com/vishvananda/netlink/qdisc.go

@@ -187,10 +187,11 @@ func (qdisc *Netem) Type() string {
 // Tbf is a classless qdisc that rate limits based on tokens
 type Tbf struct {
 	QdiscAttrs
-	// TODO: handle 64bit rate properly
-	Rate   uint64
-	Limit  uint32
-	Buffer uint32
+	Rate     uint64
+	Limit    uint32
+	Buffer   uint32
+	Peakrate uint64
+	Minburst uint32
 	// TODO: handle other settings
 }
 

+ 16 - 2
libnetwork/vendor/github.com/vishvananda/netlink/qdisc_linux.go

@@ -168,11 +168,20 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
 	} else if tbf, ok := qdisc.(*Tbf); ok {
 		opt := nl.TcTbfQopt{}
-		// TODO: handle rate > uint32
 		opt.Rate.Rate = uint32(tbf.Rate)
+		opt.Peakrate.Rate = uint32(tbf.Peakrate)
 		opt.Limit = tbf.Limit
 		opt.Buffer = tbf.Buffer
 		nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
+		if tbf.Rate >= uint64(1<<32) {
+			nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate))
+		}
+		if tbf.Peakrate >= uint64(1<<32) {
+			nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate))
+		}
+		if tbf.Peakrate > 0 {
+			nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst))
+		}
 	} else if htb, ok := qdisc.(*Htb); ok {
 		opt := nl.TcHtbGlob{}
 		opt.Version = htb.Version
@@ -418,10 +427,15 @@ func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 		case nl.TCA_TBF_PARMS:
 			opt := nl.DeserializeTcTbfQopt(datum.Value)
 			tbf.Rate = uint64(opt.Rate.Rate)
+			tbf.Peakrate = uint64(opt.Peakrate.Rate)
 			tbf.Limit = opt.Limit
 			tbf.Buffer = opt.Buffer
 		case nl.TCA_TBF_RATE64:
-			tbf.Rate = native.Uint64(datum.Value[0:4])
+			tbf.Rate = native.Uint64(datum.Value[0:8])
+		case nl.TCA_TBF_PRATE64:
+			tbf.Peakrate = native.Uint64(datum.Value[0:8])
+		case nl.TCA_TBF_PBURST:
+			tbf.Minburst = native.Uint32(datum.Value[0:4])
 		}
 	}
 	return nil

+ 55 - 5
libnetwork/vendor/github.com/vishvananda/netlink/route.go

@@ -3,6 +3,7 @@ package netlink
 import (
 	"fmt"
 	"net"
+	"strings"
 )
 
 // Scope is an enum representing a route scope.
@@ -10,6 +11,20 @@ type Scope uint8
 
 type NextHopFlag int
 
+type Destination interface {
+	Family() int
+	Decode([]byte) error
+	Encode() ([]byte, error)
+	String() string
+}
+
+type Encap interface {
+	Type() int
+	Decode([]byte) error
+	Encode() ([]byte, error)
+	String() string
+}
+
 // Route represents a netlink route.
 type Route struct {
 	LinkIndex  int
@@ -25,15 +40,36 @@ type Route struct {
 	Type       int
 	Tos        int
 	Flags      int
+	MPLSDst    *int
+	NewDst     Destination
+	Encap      Encap
 }
 
 func (r Route) String() string {
+	elems := []string{}
+	if len(r.MultiPath) == 0 {
+		elems = append(elems, fmt.Sprintf("Ifindex: %d", r.LinkIndex))
+	}
+	if r.MPLSDst != nil {
+		elems = append(elems, fmt.Sprintf("Dst: %d", r.MPLSDst))
+	} else {
+		elems = append(elems, fmt.Sprintf("Dst: %s", r.Dst))
+	}
+	if r.NewDst != nil {
+		elems = append(elems, fmt.Sprintf("NewDst: %s", r.NewDst))
+	}
+	if r.Encap != nil {
+		elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap))
+	}
+	elems = append(elems, fmt.Sprintf("Src: %s", r.Src))
 	if len(r.MultiPath) > 0 {
-		return fmt.Sprintf("{Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.Dst,
-			r.Src, r.MultiPath, r.ListFlags(), r.Table)
+		elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath))
+	} else {
+		elems = append(elems, fmt.Sprintf("Gw: %s", r.Gw))
 	}
-	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.LinkIndex, r.Dst,
-		r.Src, r.Gw, r.ListFlags(), r.Table)
+	elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags()))
+	elems = append(elems, fmt.Sprintf("Table: %d", r.Table))
+	return fmt.Sprintf("{%s}", strings.Join(elems, " "))
 }
 
 func (r *Route) SetFlag(flag NextHopFlag) {
@@ -59,8 +95,22 @@ type NexthopInfo struct {
 	LinkIndex int
 	Hops      int
 	Gw        net.IP
+	Flags     int
+	NewDst    Destination
+	Encap     Encap
 }
 
 func (n *NexthopInfo) String() string {
-	return fmt.Sprintf("{Ifindex: %d Weight: %d, Gw: %s}", n.LinkIndex, n.Hops+1, n.Gw)
+	elems := []string{}
+	elems = append(elems, fmt.Sprintf("Ifindex: %d", n.LinkIndex))
+	if n.NewDst != nil {
+		elems = append(elems, fmt.Sprintf("NewDst: %s", n.NewDst))
+	}
+	if n.Encap != nil {
+		elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap))
+	}
+	elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1))
+	elems = append(elems, fmt.Sprintf("Gw: %d", n.Gw))
+	elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags()))
+	return fmt.Sprintf("{%s}", strings.Join(elems, " "))
 }

+ 234 - 23
libnetwork/vendor/github.com/vishvananda/netlink/route_linux.go

@@ -3,6 +3,7 @@ package netlink
 import (
 	"fmt"
 	"net"
+	"strings"
 	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
@@ -42,16 +43,92 @@ var testFlags = []flagString{
 	{f: FLAG_PERVASIVE, s: "pervasive"},
 }
 
-func (r *Route) ListFlags() []string {
+func listFlags(flag int) []string {
 	var flags []string
 	for _, tf := range testFlags {
-		if r.Flags&int(tf.f) != 0 {
+		if flag&int(tf.f) != 0 {
 			flags = append(flags, tf.s)
 		}
 	}
 	return flags
 }
 
+func (r *Route) ListFlags() []string {
+	return listFlags(r.Flags)
+}
+
+func (n *NexthopInfo) ListFlags() []string {
+	return listFlags(n.Flags)
+}
+
+type MPLSDestination struct {
+	Labels []int
+}
+
+func (d *MPLSDestination) Family() int {
+	return nl.FAMILY_MPLS
+}
+
+func (d *MPLSDestination) Decode(buf []byte) error {
+	d.Labels = nl.DecodeMPLSStack(buf)
+	return nil
+}
+
+func (d *MPLSDestination) Encode() ([]byte, error) {
+	return nl.EncodeMPLSStack(d.Labels...), nil
+}
+
+func (d *MPLSDestination) String() string {
+	s := make([]string, 0, len(d.Labels))
+	for _, l := range d.Labels {
+		s = append(s, fmt.Sprintf("%d", l))
+	}
+	return strings.Join(s, "/")
+}
+
+type MPLSEncap struct {
+	Labels []int
+}
+
+func (e *MPLSEncap) Type() int {
+	return nl.LWTUNNEL_ENCAP_MPLS
+}
+
+func (e *MPLSEncap) Decode(buf []byte) error {
+	if len(buf) < 4 {
+		return fmt.Errorf("Lack of bytes")
+	}
+	native := nl.NativeEndian()
+	l := native.Uint16(buf)
+	if len(buf) < int(l) {
+		return fmt.Errorf("Lack of bytes")
+	}
+	buf = buf[:l]
+	typ := native.Uint16(buf[2:])
+	if typ != nl.MPLS_IPTUNNEL_DST {
+		return fmt.Errorf("Unknown MPLS Encap Type: %d", typ)
+	}
+	e.Labels = nl.DecodeMPLSStack(buf[4:])
+	return nil
+}
+
+func (e *MPLSEncap) Encode() ([]byte, error) {
+	s := nl.EncodeMPLSStack(e.Labels...)
+	native := nl.NativeEndian()
+	hdr := make([]byte, 4)
+	native.PutUint16(hdr, uint16(len(s)+4))
+	native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST)
+	return append(hdr, s...), nil
+}
+
+func (e *MPLSEncap) String() string {
+	s := make([]string, 0, len(e.Labels))
+	for _, l := range e.Labels {
+		s = append(s, fmt.Sprintf("%d", l))
+	}
+	return strings.Join(s, "/")
+}
+
 // RouteAdd will add a route to the system.
 // Equivalent to: `ip route add $route`
 func RouteAdd(route *Route) error {
@@ -61,7 +138,22 @@ func RouteAdd(route *Route) error {
 // RouteAdd will add a route to the system.
 // Equivalent to: `ip route add $route`
 func (h *Handle) RouteAdd(route *Route) error {
-	req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+	flags := syscall.NLM_F_CREATE | syscall.NLM_F_EXCL | syscall.NLM_F_ACK
+	req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags)
+	return h.routeHandle(route, req, nl.NewRtMsg())
+}
+
+// RouteReplace will add a route to the system.
+// Equivalent to: `ip route replace $route`
+func RouteReplace(route *Route) error {
+	return pkgHandle.RouteReplace(route)
+}
+
+// RouteReplace will add a route to the system.
+// Equivalent to: `ip route replace $route`
+func (h *Handle) RouteReplace(route *Route) error {
+	flags := syscall.NLM_F_CREATE | syscall.NLM_F_REPLACE | syscall.NLM_F_ACK
+	req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags)
 	return h.routeHandle(route, req, nl.NewRtMsg())
 }
 
@@ -79,7 +171,7 @@ func (h *Handle) RouteDel(route *Route) error {
 }
 
 func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
-	if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil {
+	if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil {
 		return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil")
 	}
 
@@ -98,6 +190,33 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 			dstData = route.Dst.IP.To16()
 		}
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData))
+	} else if route.MPLSDst != nil {
+		family = nl.FAMILY_MPLS
+		msg.Dst_len = uint8(20)
+		msg.Type = syscall.RTN_UNICAST
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst)))
+	}
+
+	if route.NewDst != nil {
+		if family != -1 && family != route.NewDst.Family() {
+			return fmt.Errorf("new destination and destination are not the same address family")
+		}
+		buf, err := route.NewDst.Encode()
+		if err != nil {
+			return err
+		}
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_NEWDST, buf))
+	}
+
+	if route.Encap != nil {
+		buf := make([]byte, 2)
+		native.PutUint16(buf, uint16(route.Encap.Type()))
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
+		buf, err := route.Encap.Encode()
+		if err != nil {
+			return err
+		}
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP, buf))
 	}
 
 	if route.Src != nil {
@@ -138,26 +257,43 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 				RtNexthop: syscall.RtNexthop{
 					Hops:    uint8(nh.Hops),
 					Ifindex: int32(nh.LinkIndex),
-					Len:     uint16(syscall.SizeofRtNexthop),
+					Flags:   uint8(nh.Flags),
 				},
 			}
-			var gwData []byte
+			children := []nl.NetlinkRequestData{}
 			if nh.Gw != nil {
 				gwFamily := nl.GetIPFamily(nh.Gw)
 				if family != -1 && family != gwFamily {
 					return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
 				}
-				var gw *nl.RtAttr
 				if gwFamily == FAMILY_V4 {
-					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4()))
+					children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4())))
 				} else {
-					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16()))
+					children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16())))
 				}
-				gwData = gw.Serialize()
-				rtnh.Len += uint16(len(gwData))
 			}
+			if nh.NewDst != nil {
+				if family != -1 && family != nh.NewDst.Family() {
+					return fmt.Errorf("new destination and destination are not the same address family")
+				}
+				buf, err := nh.NewDst.Encode()
+				if err != nil {
+					return err
+				}
+				children = append(children, nl.NewRtAttr(nl.RTA_NEWDST, buf))
+			}
+			if nh.Encap != nil {
+				buf := make([]byte, 2)
+				native.PutUint16(buf, uint16(nh.Encap.Type()))
+				rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
+				buf, err := nh.Encap.Encode()
+				if err != nil {
+					return err
+				}
+				children = append(children, nl.NewRtAttr(nl.RTA_ENCAP, buf))
+			}
+			rtnh.Children = children
 			buf = append(buf, rtnh.Serialize()...)
-			buf = append(buf, gwData...)
 		}
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf))
 	}
@@ -283,14 +419,22 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
 				continue
 			case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src):
 				continue
-			case filterMask&RT_FILTER_DST != 0 && filter.Dst != nil:
-				if route.Dst == nil {
-					continue
-				}
-				aMaskLen, aMaskBits := route.Dst.Mask.Size()
-				bMaskLen, bMaskBits := filter.Dst.Mask.Size()
-				if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) {
-					continue
+			case filterMask&RT_FILTER_DST != 0:
+				if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) {
+					if filter.Dst == nil {
+						if route.Dst != nil {
+							continue
+						}
+					} else {
+						if route.Dst == nil {
+							continue
+						}
+						aMaskLen, aMaskBits := route.Dst.Mask.Size()
+						bMaskLen, bMaskBits := filter.Dst.Mask.Size()
+						if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) {
+							continue
+						}
+					}
 				}
 			}
 		}
@@ -316,6 +460,7 @@ func deserializeRoute(m []byte) (Route, error) {
 	}
 
 	native := nl.NativeEndian()
+	var encap, encapType syscall.NetlinkRouteAttr
 	for _, attr := range attrs {
 		switch attr.Attr.Type {
 		case syscall.RTA_GATEWAY:
@@ -323,9 +468,17 @@ func deserializeRoute(m []byte) (Route, error) {
 		case syscall.RTA_PREFSRC:
 			route.Src = net.IP(attr.Value)
 		case syscall.RTA_DST:
-			route.Dst = &net.IPNet{
-				IP:   attr.Value,
-				Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+			if msg.Family == nl.FAMILY_MPLS {
+				stack := nl.DecodeMPLSStack(attr.Value)
+				if len(stack) == 0 || len(stack) > 1 {
+					return route, fmt.Errorf("invalid MPLS RTA_DST")
+				}
+				route.MPLSDst = &stack[0]
+			} else {
+				route.Dst = &net.IPNet{
+					IP:   attr.Value,
+					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+				}
 			}
 		case syscall.RTA_OIF:
 			route.LinkIndex = int(native.Uint32(attr.Value[0:4]))
@@ -347,17 +500,47 @@ func deserializeRoute(m []byte) (Route, error) {
 				info := &NexthopInfo{
 					LinkIndex: int(nh.RtNexthop.Ifindex),
 					Hops:      int(nh.RtNexthop.Hops),
+					Flags:     int(nh.RtNexthop.Flags),
 				}
 				attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)])
 				if err != nil {
 					return nil, nil, err
 				}
+				var encap, encapType syscall.NetlinkRouteAttr
 				for _, attr := range attrs {
 					switch attr.Attr.Type {
 					case syscall.RTA_GATEWAY:
 						info.Gw = net.IP(attr.Value)
+					case nl.RTA_NEWDST:
+						var d Destination
+						switch msg.Family {
+						case nl.FAMILY_MPLS:
+							d = &MPLSDestination{}
+						}
+						if err := d.Decode(attr.Value); err != nil {
+							return nil, nil, err
+						}
+						info.NewDst = d
+					case nl.RTA_ENCAP_TYPE:
+						encapType = attr
+					case nl.RTA_ENCAP:
+						encap = attr
+					}
+				}
+
+				if len(encap.Value) != 0 && len(encapType.Value) != 0 {
+					typ := int(native.Uint16(encapType.Value[0:2]))
+					var e Encap
+					switch typ {
+					case nl.LWTUNNEL_ENCAP_MPLS:
+						e = &MPLSEncap{}
+						if err := e.Decode(encap.Value); err != nil {
+							return nil, nil, err
+						}
 					}
+					info.Encap = e
 				}
+
 				return info, value[int(nh.RtNexthop.Len):], nil
 			}
 			rest := attr.Value
@@ -369,8 +552,36 @@ func deserializeRoute(m []byte) (Route, error) {
 				route.MultiPath = append(route.MultiPath, info)
 				rest = buf
 			}
+		case nl.RTA_NEWDST:
+			var d Destination
+			switch msg.Family {
+			case nl.FAMILY_MPLS:
+				d = &MPLSDestination{}
+			}
+			if err := d.Decode(attr.Value); err != nil {
+				return route, err
+			}
+			route.NewDst = d
+		case nl.RTA_ENCAP_TYPE:
+			encapType = attr
+		case nl.RTA_ENCAP:
+			encap = attr
+		}
+	}
+
+	if len(encap.Value) != 0 && len(encapType.Value) != 0 {
+		typ := int(native.Uint16(encapType.Value[0:2]))
+		var e Encap
+		switch typ {
+		case nl.LWTUNNEL_ENCAP_MPLS:
+			e = &MPLSEncap{}
+			if err := e.Decode(encap.Value); err != nil {
+				return route, err
+			}
 		}
+		route.Encap = e
 	}
+
 	return route, nil
 }
 

+ 4 - 0
libnetwork/vendor/github.com/vishvananda/netlink/route_unspecified.go

@@ -5,3 +5,7 @@ package netlink
 func (r *Route) ListFlags() []string {
 	return []string{}
 }
+
+func (n *NexthopInfo) ListFlags() []string {
+	return []string{}
+}

+ 27 - 0
libnetwork/vendor/github.com/vishvananda/netlink/socket.go

@@ -0,0 +1,27 @@
+package netlink
+
+import "net"
+
+// SocketID identifies a single socket.
+type SocketID struct {
+	SourcePort      uint16
+	DestinationPort uint16
+	Source          net.IP
+	Destination     net.IP
+	Interface       uint32
+	Cookie          [2]uint32
+}
+
+// Socket represents a netlink socket.
+type Socket struct {
+	Family  uint8
+	State   uint8
+	Timer   uint8
+	Retrans uint8
+	ID      SocketID
+	Expires uint32
+	RQueue  uint32
+	WQueue  uint32
+	UID     uint32
+	INode   uint32
+}

+ 159 - 0
libnetwork/vendor/github.com/vishvananda/netlink/socket_linux.go

@@ -0,0 +1,159 @@
+package netlink
+
+import (
+	"errors"
+	"fmt"
+	"net"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+const (
+	sizeofSocketID      = 0x30
+	sizeofSocketRequest = sizeofSocketID + 0x8
+	sizeofSocket        = sizeofSocketID + 0x18
+)
+
+type socketRequest struct {
+	Family   uint8
+	Protocol uint8
+	Ext      uint8
+	pad      uint8
+	States   uint32
+	ID       SocketID
+}
+
+type writeBuffer struct {
+	Bytes []byte
+	pos   int
+}
+
+func (b *writeBuffer) Write(c byte) {
+	b.Bytes[b.pos] = c
+	b.pos++
+}
+
+func (b *writeBuffer) Next(n int) []byte {
+	s := b.Bytes[b.pos : b.pos+n]
+	b.pos += n
+	return s
+}
+
+func (r *socketRequest) Serialize() []byte {
+	b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)}
+	b.Write(r.Family)
+	b.Write(r.Protocol)
+	b.Write(r.Ext)
+	b.Write(r.pad)
+	native.PutUint32(b.Next(4), r.States)
+	networkOrder.PutUint16(b.Next(2), r.ID.SourcePort)
+	networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort)
+	copy(b.Next(4), r.ID.Source.To4())
+	b.Next(12)
+	copy(b.Next(4), r.ID.Destination.To4())
+	b.Next(12)
+	native.PutUint32(b.Next(4), r.ID.Interface)
+	native.PutUint32(b.Next(4), r.ID.Cookie[0])
+	native.PutUint32(b.Next(4), r.ID.Cookie[1])
+	return b.Bytes
+}
+
+func (r *socketRequest) Len() int { return sizeofSocketRequest }
+
+type readBuffer struct {
+	Bytes []byte
+	pos   int
+}
+
+func (b *readBuffer) Read() byte {
+	c := b.Bytes[b.pos]
+	b.pos++
+	return c
+}
+
+func (b *readBuffer) Next(n int) []byte {
+	s := b.Bytes[b.pos : b.pos+n]
+	b.pos += n
+	return s
+}
+
+func (s *Socket) deserialize(b []byte) error {
+	if len(b) < sizeofSocket {
+		return fmt.Errorf("socket data short read (%d); want %d", len(b), sizeofSocket)
+	}
+	rb := readBuffer{Bytes: b}
+	s.Family = rb.Read()
+	s.State = rb.Read()
+	s.Timer = rb.Read()
+	s.Retrans = rb.Read()
+	s.ID.SourcePort = networkOrder.Uint16(rb.Next(2))
+	s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2))
+	s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
+	rb.Next(12)
+	s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
+	rb.Next(12)
+	s.ID.Interface = native.Uint32(rb.Next(4))
+	s.ID.Cookie[0] = native.Uint32(rb.Next(4))
+	s.ID.Cookie[1] = native.Uint32(rb.Next(4))
+	s.Expires = native.Uint32(rb.Next(4))
+	s.RQueue = native.Uint32(rb.Next(4))
+	s.WQueue = native.Uint32(rb.Next(4))
+	s.UID = native.Uint32(rb.Next(4))
+	s.INode = native.Uint32(rb.Next(4))
+	return nil
+}
+
+// SocketGet returns the Socket identified by its local and remote addresses.
+func SocketGet(local, remote net.Addr) (*Socket, error) {
+	localTCP, ok := local.(*net.TCPAddr)
+	if !ok {
+		return nil, ErrNotImplemented
+	}
+	remoteTCP, ok := remote.(*net.TCPAddr)
+	if !ok {
+		return nil, ErrNotImplemented
+	}
+	localIP := localTCP.IP.To4()
+	if localIP == nil {
+		return nil, ErrNotImplemented
+	}
+	remoteIP := remoteTCP.IP.To4()
+	if remoteIP == nil {
+		return nil, ErrNotImplemented
+	}
+
+	s, err := nl.Subscribe(syscall.NETLINK_INET_DIAG)
+	if err != nil {
+		return nil, err
+	}
+	defer s.Close()
+	req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0)
+	req.AddData(&socketRequest{
+		Family:   syscall.AF_INET,
+		Protocol: syscall.IPPROTO_TCP,
+		ID: SocketID{
+			SourcePort:      uint16(localTCP.Port),
+			DestinationPort: uint16(remoteTCP.Port),
+			Source:          localIP,
+			Destination:     remoteIP,
+			Cookie:          [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE},
+		},
+	})
+	s.Send(req)
+	msgs, err := s.Receive()
+	if err != nil {
+		return nil, err
+	}
+	if len(msgs) == 0 {
+		return nil, errors.New("no message nor error from netlink")
+	}
+	if len(msgs) > 2 {
+		return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs))
+	}
+	sock := &Socket{}
+	if err := sock.deserialize(msgs[0].Data); err != nil {
+		return nil, err
+	}
+	return sock, nil
+}

+ 98 - 0
libnetwork/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go

@@ -0,0 +1,98 @@
+package netlink
+
+import (
+	"fmt"
+	"syscall"
+
+	"github.com/vishvananda/netns"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+type XfrmMsg interface {
+	Type() nl.XfrmMsgType
+}
+
+type XfrmMsgExpire struct {
+	XfrmState *XfrmState
+	Hard      bool
+}
+
+func (ue *XfrmMsgExpire) Type() nl.XfrmMsgType {
+	return nl.XFRM_MSG_EXPIRE
+}
+
+func parseXfrmMsgExpire(b []byte) *XfrmMsgExpire {
+	var e XfrmMsgExpire
+
+	msg := nl.DeserializeXfrmUserExpire(b)
+	e.XfrmState = xfrmStateFromXfrmUsersaInfo(&msg.XfrmUsersaInfo)
+	e.Hard = msg.Hard == 1
+
+	return &e
+}
+
+func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error,
+	types ...nl.XfrmMsgType) error {
+
+	groups, err := xfrmMcastGroups(types)
+	if err != nil {
+		return nil
+	}
+	s, err := nl.SubscribeAt(netns.None(), netns.None(), syscall.NETLINK_XFRM, groups...)
+	if err != nil {
+		return err
+	}
+
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+
+	}
+
+	go func() {
+		defer close(ch)
+		for {
+			msgs, err := s.Receive()
+			if err != nil {
+				errorChan <- err
+				return
+			}
+			for _, m := range msgs {
+				switch m.Header.Type {
+				case nl.XFRM_MSG_EXPIRE:
+					ch <- parseXfrmMsgExpire(m.Data)
+				default:
+					errorChan <- fmt.Errorf("unsupported msg type: %x", m.Header.Type)
+				}
+			}
+		}
+	}()
+
+	return nil
+}
+
+func xfrmMcastGroups(types []nl.XfrmMsgType) ([]uint, error) {
+	groups := make([]uint, 0)
+
+	if len(types) == 0 {
+		return nil, fmt.Errorf("no xfrm msg type specified")
+	}
+
+	for _, t := range types {
+		var group uint
+
+		switch t {
+		case nl.XFRM_MSG_EXPIRE:
+			group = nl.XFRMNLGRP_EXPIRE
+		default:
+			return nil, fmt.Errorf("unsupported group: %x", t)
+		}
+
+		groups = append(groups, group)
+	}
+
+	return groups, nil
+}

+ 3 - 2
libnetwork/vendor/github.com/vishvananda/netlink/xfrm_state.go

@@ -83,11 +83,12 @@ type XfrmState struct {
 	Crypt        *XfrmStateAlgo
 	Aead         *XfrmStateAlgo
 	Encap        *XfrmStateEncap
+	ESN          bool
 }
 
 func (sa XfrmState) String() string {
-	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v,Encap: %v",
-		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap)
+	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
+		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN)
 }
 func (sa XfrmState) Print(stats bool) string {
 	if !stats {

+ 94 - 18
libnetwork/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go

@@ -60,6 +60,21 @@ func writeMark(m *XfrmMark) []byte {
 	return mark.Serialize()
 }
 
+func writeReplayEsn(replayWindow int) []byte {
+	replayEsn := &nl.XfrmReplayStateEsn{
+		OSeq:         0,
+		Seq:          0,
+		OSeqHi:       0,
+		SeqHi:        0,
+		ReplayWindow: uint32(replayWindow),
+	}
+
+	// taken from iproute2/ip/xfrm_state.c:
+	replayEsn.BmpLen = uint32((replayWindow + (4 * 8) - 1) / (4 * 8))
+
+	return replayEsn.Serialize()
+}
+
 // XfrmStateAdd will add an xfrm state to the system.
 // Equivalent to: `ip xfrm state add $state`
 func XfrmStateAdd(state *XfrmState) error {
@@ -72,6 +87,12 @@ func (h *Handle) XfrmStateAdd(state *XfrmState) error {
 	return h.xfrmStateAddOrUpdate(state, nl.XFRM_MSG_NEWSA)
 }
 
+// XfrmStateAllocSpi will allocate an xfrm state in the system.
+// Equivalent to: `ip xfrm state allocspi`
+func XfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
+	return pkgHandle.xfrmStateAllocSpi(state)
+}
+
 // XfrmStateUpdate will update an xfrm state to the system.
 // Equivalent to: `ip xfrm state update $state`
 func XfrmStateUpdate(state *XfrmState) error {
@@ -85,21 +106,23 @@ func (h *Handle) XfrmStateUpdate(state *XfrmState) error {
 }
 
 func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
+
 	// A state with spi 0 can't be deleted so don't allow it to be set
 	if state.Spi == 0 {
 		return fmt.Errorf("Spi must be set when adding xfrm state.")
 	}
 	req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
 
-	msg := &nl.XfrmUsersaInfo{}
-	msg.Family = uint16(nl.GetIPFamily(state.Dst))
-	msg.Id.Daddr.FromIP(state.Dst)
-	msg.Saddr.FromIP(state.Src)
-	msg.Id.Proto = uint8(state.Proto)
-	msg.Mode = uint8(state.Mode)
-	msg.Id.Spi = nl.Swap32(uint32(state.Spi))
-	msg.Reqid = uint32(state.Reqid)
-	msg.ReplayWindow = uint8(state.ReplayWindow)
+	msg := xfrmUsersaInfoFromXfrmState(state)
+
+	if state.ESN {
+		if state.ReplayWindow == 0 {
+			return fmt.Errorf("ESN flag set without ReplayWindow")
+		}
+		msg.Flags |= nl.XFRM_STATE_ESN
+		msg.ReplayWindow = 0
+	}
+
 	limitsToLft(state.Limits, &msg.Lft)
 	req.AddData(msg)
 
@@ -129,11 +152,44 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
 		out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
 		req.AddData(out)
 	}
+	if state.ESN {
+		out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow))
+		req.AddData(out)
+	}
 
 	_, err := req.Execute(syscall.NETLINK_XFRM, 0)
 	return err
 }
 
+func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
+	req := h.newNetlinkRequest(nl.XFRM_MSG_ALLOCSPI,
+		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+
+	msg := &nl.XfrmUserSpiInfo{}
+	msg.XfrmUsersaInfo = *(xfrmUsersaInfoFromXfrmState(state))
+	// 1-255 is reserved by IANA for future use
+	msg.Min = 0x100
+	msg.Max = 0xffffffff
+	req.AddData(msg)
+
+	if state.Mark != nil {
+		out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
+		req.AddData(out)
+	}
+
+	msgs, err := req.Execute(syscall.NETLINK_XFRM, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	s, err := parseXfrmState(msgs[0], FAMILY_ALL)
+	if err != nil {
+		return nil, err
+	}
+
+	return s, err
+}
+
 // XfrmStateDel will delete an xfrm state from the system. Note that
 // the Algos are ignored when matching the state to delete.
 // Equivalent to: `ip xfrm state del $state`
@@ -241,14 +297,7 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState
 
 var familyError = fmt.Errorf("family error")
 
-func parseXfrmState(m []byte, family int) (*XfrmState, error) {
-	msg := nl.DeserializeXfrmUsersaInfo(m)
-
-	// This is mainly for the state dump
-	if family != FAMILY_ALL && family != int(msg.Family) {
-		return nil, familyError
-	}
-
+func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState {
 	var state XfrmState
 
 	state.Dst = msg.Id.Daddr.ToIP()
@@ -260,6 +309,19 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 	state.ReplayWindow = int(msg.ReplayWindow)
 	lftToLimits(&msg.Lft, &state.Limits)
 
+	return &state
+}
+
+func parseXfrmState(m []byte, family int) (*XfrmState, error) {
+	msg := nl.DeserializeXfrmUsersaInfo(m)
+
+	// This is mainly for the state dump
+	if family != FAMILY_ALL && family != int(msg.Family) {
+		return nil, familyError
+	}
+
+	state := xfrmStateFromXfrmUsersaInfo(msg)
+
 	attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:])
 	if err != nil {
 		return nil, err
@@ -310,7 +372,7 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 		}
 	}
 
-	return &state, nil
+	return state, nil
 }
 
 // XfrmStateFlush will flush the xfrm state on the system.
@@ -366,3 +428,17 @@ func limitsToLft(lmts XfrmStateLimits, lft *nl.XfrmLifetimeCfg) {
 func lftToLimits(lft *nl.XfrmLifetimeCfg, lmts *XfrmStateLimits) {
 	*lmts = *(*XfrmStateLimits)(unsafe.Pointer(lft))
 }
+
+func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo {
+	msg := &nl.XfrmUsersaInfo{}
+	msg.Family = uint16(nl.GetIPFamily(state.Dst))
+	msg.Id.Daddr.FromIP(state.Dst)
+	msg.Saddr.FromIP(state.Src)
+	msg.Id.Proto = uint8(state.Proto)
+	msg.Mode = uint8(state.Mode)
+	msg.Id.Spi = nl.Swap32(uint32(state.Spi))
+	msg.Reqid = uint32(state.Reqid)
+	msg.ReplayWindow = uint8(state.ReplayWindow)
+
+	return msg
+}