浏览代码

Vendoring vishvananda/netlink @c682914

Signed-off-by: Alessandro Boch <aboch@docker.com>
Alessandro Boch 8 年之前
父节点
当前提交
694c787123
共有 26 个文件被更改,包括 1054 次插入134 次删除
  1. 1 1
      vendor.conf
  2. 5 4
      vendor/github.com/vishvananda/netlink/addr.go
  3. 4 0
      vendor/github.com/vishvananda/netlink/addr_linux.go
  4. 38 21
      vendor/github.com/vishvananda/netlink/link_linux.go
  5. 9 1
      vendor/github.com/vishvananda/netlink/netlink.go
  6. 4 3
      vendor/github.com/vishvananda/netlink/netlink_linux.go
  7. 14 9
      vendor/github.com/vishvananda/netlink/netlink_unspecified.go
  8. 36 0
      vendor/github.com/vishvananda/netlink/nl/mpls_linux.go
  9. 5 4
      vendor/github.com/vishvananda/netlink/nl/nl_linux.go
  10. 11 0
      vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go
  11. 27 1
      vendor/github.com/vishvananda/netlink/nl/route_linux.go
  12. 31 0
      vendor/github.com/vishvananda/netlink/nl/syscall.go
  13. 46 26
      vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go
  14. 32 0
      vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go
  15. 70 8
      vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go
  16. 32 0
      vendor/github.com/vishvananda/netlink/order.go
  17. 5 4
      vendor/github.com/vishvananda/netlink/qdisc.go
  18. 12 0
      vendor/github.com/vishvananda/netlink/qdisc_linux.go
  19. 55 5
      vendor/github.com/vishvananda/netlink/route.go
  20. 232 27
      vendor/github.com/vishvananda/netlink/route_linux.go
  21. 4 0
      vendor/github.com/vishvananda/netlink/route_unspecified.go
  22. 27 0
      vendor/github.com/vishvananda/netlink/socket.go
  23. 159 0
      vendor/github.com/vishvananda/netlink/socket_linux.go
  24. 98 0
      vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go
  25. 3 2
      vendor/github.com/vishvananda/netlink/xfrm_state.go
  26. 94 18
      vendor/github.com/vishvananda/netlink/xfrm_state_linux.go

+ 1 - 1
vendor.conf

@@ -34,7 +34,7 @@ github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e
 github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
 github.com/docker/libkv 1d8431073ae03cdaedb198a89722f3aab6d418ef
 github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
-github.com/vishvananda/netlink ebdfb7402004b397e6573c71132160d8e23cc12a
+github.com/vishvananda/netlink c682914b0b231f6cad204a86e565551e51d387c0
 github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
 github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
 github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d

+ 5 - 4
vendor/github.com/vishvananda/netlink/addr.go

@@ -10,10 +10,11 @@ import (
 // include a mask, so it stores the address as a net.IPNet.
 type Addr struct {
 	*net.IPNet
-	Label string
-	Flags int
-	Scope int
-	Peer  *net.IPNet
+	Label     string
+	Flags     int
+	Scope     int
+	Peer      *net.IPNet
+	Broadcast net.IP
 }
 
 // String returns $ip/$netmask $label

+ 4 - 0
vendor/github.com/vishvananda/netlink/addr_linux.go

@@ -90,6 +90,10 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 		}
 	}
 
+	if addr.Broadcast != nil {
+		req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast))
+	}
+
 	if addr.Label != "" {
 		labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label))
 		req.AddData(labelData)

+ 38 - 21
vendor/github.com/vishvananda/netlink/link_linux.go

@@ -29,7 +29,6 @@ const (
 	TUNTAP_ONE_QUEUE TuntapFlag = syscall.IFF_ONE_QUEUE
 )
 
-var native = nl.NativeEndian()
 var lookupByDump = false
 
 var macvlanModes = [...]uint32{
@@ -59,6 +58,44 @@ func (h *Handle) ensureIndex(link *LinkAttrs) {
 	}
 }
 
+func (h *Handle) LinkSetARPOff(link Link) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+	msg.Change |= syscall.IFF_NOARP
+	msg.Flags |= syscall.IFF_NOARP
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+func LinkSetARPOff(link Link) error {
+	return pkgHandle.LinkSetARPOff(link)
+}
+
+func (h *Handle) LinkSetARPOn(link Link) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
+	msg.Change |= syscall.IFF_NOARP
+	msg.Flags &= ^uint32(syscall.IFF_NOARP)
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+func LinkSetARPOn(link Link) error {
+	return pkgHandle.LinkSetARPOn(link)
+}
+
 func (h *Handle) SetPromiscOn(link Link) error {
 	base := link.Attrs()
 	h.ensureIndex(base)
@@ -1443,26 +1480,6 @@ func linkFlags(rawFlags uint32) net.Flags {
 	return f
 }
 
-func htonl(val uint32) []byte {
-	bytes := make([]byte, 4)
-	binary.BigEndian.PutUint32(bytes, val)
-	return bytes
-}
-
-func htons(val uint16) []byte {
-	bytes := make([]byte, 2)
-	binary.BigEndian.PutUint16(bytes, val)
-	return bytes
-}
-
-func ntohl(buf []byte) uint32 {
-	return binary.BigEndian.Uint32(buf)
-}
-
-func ntohs(buf []byte) uint16 {
-	return binary.BigEndian.Uint16(buf)
-}
-
 func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
 	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
 

+ 9 - 1
vendor/github.com/vishvananda/netlink/netlink.go

@@ -8,7 +8,15 @@
 // interface that is loosly modeled on the iproute2 cli.
 package netlink
 
-import "net"
+import (
+	"errors"
+	"net"
+)
+
+var (
+	// ErrNotImplemented is returned when a requested feature is not implemented.
+	ErrNotImplemented = errors.New("not implemented")
+)
 
 // ParseIPNet parses a string in ip/net format and returns a net.IPNet.
 // This is valuable because addresses in netlink are often IPNets and

+ 4 - 3
vendor/github.com/vishvananda/netlink/netlink_linux.go

@@ -4,7 +4,8 @@ import "github.com/vishvananda/netlink/nl"
 
 // Family type definitions
 const (
-	FAMILY_ALL = nl.FAMILY_ALL
-	FAMILY_V4  = nl.FAMILY_V4
-	FAMILY_V6  = nl.FAMILY_V6
+	FAMILY_ALL  = nl.FAMILY_ALL
+	FAMILY_V4   = nl.FAMILY_V4
+	FAMILY_V6   = nl.FAMILY_V6
+	FAMILY_MPLS = nl.FAMILY_MPLS
 )

+ 14 - 9
vendor/github.com/vishvananda/netlink/netlink_unspecified.go

@@ -2,14 +2,7 @@
 
 package netlink
 
-import (
-	"errors"
-	"net"
-)
-
-var (
-	ErrNotImplemented = errors.New("not implemented")
-)
+import "net"
 
 func LinkSetUp(link Link) error {
 	return ErrNotImplemented
@@ -23,7 +16,7 @@ func LinkSetMTU(link Link, mtu int) error {
 	return ErrNotImplemented
 }
 
-func LinkSetMaster(link Link, master *Link) error {
+func LinkSetMaster(link Link, master *Bridge) error {
 	return ErrNotImplemented
 }
 
@@ -71,6 +64,14 @@ func LinkSetXdpFd(link Link, fd int) error {
 	return ErrNotImplemented
 }
 
+func LinkSetARPOff(link Link) error {
+	return ErrNotImplemented
+}
+
+func LinkSetARPOn(link Link) error {
+	return ErrNotImplemented
+}
+
 func LinkByName(name string) (Link, error) {
 	return nil, ErrNotImplemented
 }
@@ -214,3 +215,7 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
 func NeighDeserialize(m []byte) (*Neigh, error) {
 	return nil, ErrNotImplemented
 }
+
+func SocketGet(local, remote net.Addr) (*Socket, error) {
+	return nil, ErrNotImplemented
+}

+ 36 - 0
vendor/github.com/vishvananda/netlink/nl/mpls_linux.go

@@ -0,0 +1,36 @@
+package nl
+
+import "encoding/binary"
+
+const (
+	MPLS_LS_LABEL_SHIFT = 12
+	MPLS_LS_S_SHIFT     = 8
+)
+
+func EncodeMPLSStack(labels ...int) []byte {
+	b := make([]byte, 4*len(labels))
+	for idx, label := range labels {
+		l := label << MPLS_LS_LABEL_SHIFT
+		if idx == len(labels)-1 {
+			l |= 1 << MPLS_LS_S_SHIFT
+		}
+		binary.BigEndian.PutUint32(b[idx*4:], uint32(l))
+	}
+	return b
+}
+
+func DecodeMPLSStack(buf []byte) []int {
+	if len(buf)%4 != 0 {
+		return nil
+	}
+	stack := make([]int, 0, len(buf)/4)
+	for len(buf) > 0 {
+		l := binary.BigEndian.Uint32(buf[:4])
+		buf = buf[4:]
+		stack = append(stack, int(l)>>MPLS_LS_LABEL_SHIFT)
+		if (l>>MPLS_LS_S_SHIFT)&1 > 0 {
+			break
+		}
+	}
+	return stack
+}

+ 5 - 4
vendor/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -17,9 +17,10 @@ import (
 
 const (
 	// Family type definitions
-	FAMILY_ALL = syscall.AF_UNSPEC
-	FAMILY_V4  = syscall.AF_INET
-	FAMILY_V6  = syscall.AF_INET6
+	FAMILY_ALL  = syscall.AF_UNSPEC
+	FAMILY_V4   = syscall.AF_INET
+	FAMILY_V6   = syscall.AF_INET6
+	FAMILY_MPLS = AF_MPLS
 )
 
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
@@ -450,7 +451,7 @@ type NetlinkSocket struct {
 }
 
 func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
-	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol)
+	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW|syscall.SOCK_CLOEXEC, protocol)
 	if err != nil {
 		return nil, err
 	}

+ 11 - 0
vendor/github.com/vishvananda/netlink/nl/nl_unspecified.go

@@ -0,0 +1,11 @@
+// +build !linux
+
+package nl
+
+import "encoding/binary"
+
+var SupportedNlFamilies = []int{}
+
+func NativeEndian() binary.ByteOrder {
+	return nil
+}

+ 27 - 1
vendor/github.com/vishvananda/netlink/nl/route_linux.go

@@ -43,12 +43,38 @@ func (msg *RtMsg) Serialize() []byte {
 
 type RtNexthop struct {
 	syscall.RtNexthop
+	Children []NetlinkRequestData
 }
 
 func DeserializeRtNexthop(b []byte) *RtNexthop {
 	return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0]))
 }
 
+func (msg *RtNexthop) Len() int {
+	if len(msg.Children) == 0 {
+		return syscall.SizeofRtNexthop
+	}
+
+	l := 0
+	for _, child := range msg.Children {
+		l += rtaAlignOf(child.Len())
+	}
+	l += syscall.SizeofRtNexthop
+	return rtaAlignOf(l)
+}
+
 func (msg *RtNexthop) Serialize() []byte {
-	return (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]
+	length := msg.Len()
+	msg.RtNexthop.Len = uint16(length)
+	buf := make([]byte, length)
+	copy(buf, (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:])
+	next := rtaAlignOf(syscall.SizeofRtNexthop)
+	if len(msg.Children) > 0 {
+		for _, child := range msg.Children {
+			childBuf := child.Serialize()
+			copy(buf[next:], childBuf)
+			next += rtaAlignOf(len(childBuf))
+		}
+	}
+	return buf
 }

+ 31 - 0
vendor/github.com/vishvananda/netlink/nl/syscall.go

@@ -35,3 +35,34 @@ const (
 	FR_ACT_UNREACHABLE /* Drop with ENETUNREACH */
 	FR_ACT_PROHIBIT    /* Drop with EACCES */
 )
+
+// socket diags related
+const (
+	SOCK_DIAG_BY_FAMILY = 20         /* linux.sock_diag.h */
+	TCPDIAG_NOCOOKIE    = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/
+)
+
+const (
+	AF_MPLS = 28
+)
+
+const (
+	RTA_NEWDST     = 0x13
+	RTA_ENCAP_TYPE = 0x15
+	RTA_ENCAP      = 0x16
+)
+
+// RTA_ENCAP subtype
+const (
+	MPLS_IPTUNNEL_UNSPEC = iota
+	MPLS_IPTUNNEL_DST
+)
+
+// light weight tunnel encap types
+const (
+	LWTUNNEL_ENCAP_NONE = iota
+	LWTUNNEL_ENCAP_MPLS
+	LWTUNNEL_ENCAP_IP
+	LWTUNNEL_ENCAP_ILA
+	LWTUNNEL_ENCAP_IP6
+)

+ 46 - 26
vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go

@@ -11,34 +11,40 @@ const (
 	XFRM_INF = ^uint64(0)
 )
 
+type XfrmMsgType uint8
+
+type XfrmMsg interface {
+	Type() XfrmMsgType
+}
+
 // Message Types
 const (
-	XFRM_MSG_BASE        = 0x10
-	XFRM_MSG_NEWSA       = 0x10
-	XFRM_MSG_DELSA       = 0x11
-	XFRM_MSG_GETSA       = 0x12
-	XFRM_MSG_NEWPOLICY   = 0x13
-	XFRM_MSG_DELPOLICY   = 0x14
-	XFRM_MSG_GETPOLICY   = 0x15
-	XFRM_MSG_ALLOCSPI    = 0x16
-	XFRM_MSG_ACQUIRE     = 0x17
-	XFRM_MSG_EXPIRE      = 0x18
-	XFRM_MSG_UPDPOLICY   = 0x19
-	XFRM_MSG_UPDSA       = 0x1a
-	XFRM_MSG_POLEXPIRE   = 0x1b
-	XFRM_MSG_FLUSHSA     = 0x1c
-	XFRM_MSG_FLUSHPOLICY = 0x1d
-	XFRM_MSG_NEWAE       = 0x1e
-	XFRM_MSG_GETAE       = 0x1f
-	XFRM_MSG_REPORT      = 0x20
-	XFRM_MSG_MIGRATE     = 0x21
-	XFRM_MSG_NEWSADINFO  = 0x22
-	XFRM_MSG_GETSADINFO  = 0x23
-	XFRM_MSG_NEWSPDINFO  = 0x24
-	XFRM_MSG_GETSPDINFO  = 0x25
-	XFRM_MSG_MAPPING     = 0x26
-	XFRM_MSG_MAX         = 0x26
-	XFRM_NR_MSGTYPES     = 0x17
+	XFRM_MSG_BASE        XfrmMsgType = 0x10
+	XFRM_MSG_NEWSA                   = 0x10
+	XFRM_MSG_DELSA                   = 0x11
+	XFRM_MSG_GETSA                   = 0x12
+	XFRM_MSG_NEWPOLICY               = 0x13
+	XFRM_MSG_DELPOLICY               = 0x14
+	XFRM_MSG_GETPOLICY               = 0x15
+	XFRM_MSG_ALLOCSPI                = 0x16
+	XFRM_MSG_ACQUIRE                 = 0x17
+	XFRM_MSG_EXPIRE                  = 0x18
+	XFRM_MSG_UPDPOLICY               = 0x19
+	XFRM_MSG_UPDSA                   = 0x1a
+	XFRM_MSG_POLEXPIRE               = 0x1b
+	XFRM_MSG_FLUSHSA                 = 0x1c
+	XFRM_MSG_FLUSHPOLICY             = 0x1d
+	XFRM_MSG_NEWAE                   = 0x1e
+	XFRM_MSG_GETAE                   = 0x1f
+	XFRM_MSG_REPORT                  = 0x20
+	XFRM_MSG_MIGRATE                 = 0x21
+	XFRM_MSG_NEWSADINFO              = 0x22
+	XFRM_MSG_GETSADINFO              = 0x23
+	XFRM_MSG_NEWSPDINFO              = 0x24
+	XFRM_MSG_GETSPDINFO              = 0x25
+	XFRM_MSG_MAPPING                 = 0x26
+	XFRM_MSG_MAX                     = 0x26
+	XFRM_NR_MSGTYPES                 = 0x17
 )
 
 // Attribute types
@@ -81,6 +87,20 @@ const (
 	SizeofXfrmMark        = 0x08
 )
 
+// Netlink groups
+const (
+	XFRMNLGRP_NONE    = 0x0
+	XFRMNLGRP_ACQUIRE = 0x1
+	XFRMNLGRP_EXPIRE  = 0x2
+	XFRMNLGRP_SA      = 0x3
+	XFRMNLGRP_POLICY  = 0x4
+	XFRMNLGRP_AEVENTS = 0x5
+	XFRMNLGRP_REPORT  = 0x6
+	XFRMNLGRP_MIGRATE = 0x7
+	XFRMNLGRP_MAPPING = 0x8
+	__XFRMNLGRP_MAX   = 0x9
+)
+
 // typedef union {
 //   __be32    a4;
 //   __be32    a6[4];

+ 32 - 0
vendor/github.com/vishvananda/netlink/nl/xfrm_monitor_linux.go

@@ -0,0 +1,32 @@
+package nl
+
+import (
+	"unsafe"
+)
+
+const (
+	SizeofXfrmUserExpire = 0xe8
+)
+
+// struct xfrm_user_expire {
+// 	struct xfrm_usersa_info		state;
+// 	__u8				hard;
+// };
+
+type XfrmUserExpire struct {
+	XfrmUsersaInfo XfrmUsersaInfo
+	Hard           uint8
+	Pad            [7]byte
+}
+
+func (msg *XfrmUserExpire) Len() int {
+	return SizeofXfrmUserExpire
+}
+
+func DeserializeXfrmUserExpire(b []byte) *XfrmUserExpire {
+	return (*XfrmUserExpire)(unsafe.Pointer(&b[0:SizeofXfrmUserExpire][0]))
+}
+
+func (msg *XfrmUserExpire) Serialize() []byte {
+	return (*(*[SizeofXfrmUserExpire]byte)(unsafe.Pointer(msg)))[:]
+}

+ 70 - 8
vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go

@@ -5,14 +5,27 @@ import (
 )
 
 const (
-	SizeofXfrmUsersaId    = 0x18
-	SizeofXfrmStats       = 0x0c
-	SizeofXfrmUsersaInfo  = 0xe0
-	SizeofXfrmAlgo        = 0x44
-	SizeofXfrmAlgoAuth    = 0x48
-	SizeofXfrmAlgoAEAD    = 0x48
-	SizeofXfrmEncapTmpl   = 0x18
-	SizeofXfrmUsersaFlush = 0x8
+	SizeofXfrmUsersaId       = 0x18
+	SizeofXfrmStats          = 0x0c
+	SizeofXfrmUsersaInfo     = 0xe0
+	SizeofXfrmUserSpiInfo    = 0xe8
+	SizeofXfrmAlgo           = 0x44
+	SizeofXfrmAlgoAuth       = 0x48
+	SizeofXfrmAlgoAEAD       = 0x48
+	SizeofXfrmEncapTmpl      = 0x18
+	SizeofXfrmUsersaFlush    = 0x8
+	SizeofXfrmReplayStateEsn = 0x18
+)
+
+const (
+	XFRM_STATE_NOECN      = 1
+	XFRM_STATE_DECAP_DSCP = 2
+	XFRM_STATE_NOPMTUDISC = 4
+	XFRM_STATE_WILDRECV   = 8
+	XFRM_STATE_ICMP       = 16
+	XFRM_STATE_AF_UNSPEC  = 32
+	XFRM_STATE_ALIGN4     = 64
+	XFRM_STATE_ESN        = 128
 )
 
 // struct xfrm_usersa_id {
@@ -120,6 +133,30 @@ func (msg *XfrmUsersaInfo) Serialize() []byte {
 	return (*(*[SizeofXfrmUsersaInfo]byte)(unsafe.Pointer(msg)))[:]
 }
 
+// struct xfrm_userspi_info {
+// 	struct xfrm_usersa_info		info;
+// 	__u32				min;
+// 	__u32				max;
+// };
+
+type XfrmUserSpiInfo struct {
+	XfrmUsersaInfo XfrmUsersaInfo
+	Min            uint32
+	Max            uint32
+}
+
+func (msg *XfrmUserSpiInfo) Len() int {
+	return SizeofXfrmUserSpiInfo
+}
+
+func DeserializeXfrmUserSpiInfo(b []byte) *XfrmUserSpiInfo {
+	return (*XfrmUserSpiInfo)(unsafe.Pointer(&b[0:SizeofXfrmUserSpiInfo][0]))
+}
+
+func (msg *XfrmUserSpiInfo) Serialize() []byte {
+	return (*(*[SizeofXfrmUserSpiInfo]byte)(unsafe.Pointer(msg)))[:]
+}
+
 // struct xfrm_algo {
 //   char    alg_name[64];
 //   unsigned int  alg_key_len;    /* in bits */
@@ -270,3 +307,28 @@ func DeserializeXfrmUsersaFlush(b []byte) *XfrmUsersaFlush {
 func (msg *XfrmUsersaFlush) Serialize() []byte {
 	return (*(*[SizeofXfrmUsersaFlush]byte)(unsafe.Pointer(msg)))[:]
 }
+
+// struct xfrm_replay_state_esn {
+//     unsigned int    bmp_len;
+//     __u32           oseq;
+//     __u32           seq;
+//     __u32           oseq_hi;
+//     __u32           seq_hi;
+//     __u32           replay_window;
+//     __u32           bmp[0];
+// };
+
+type XfrmReplayStateEsn struct {
+	BmpLen       uint32
+	OSeq         uint32
+	Seq          uint32
+	OSeqHi       uint32
+	SeqHi        uint32
+	ReplayWindow uint32
+	Bmp          []uint32
+}
+
+func (msg *XfrmReplayStateEsn) Serialize() []byte {
+	// We deliberately do not pass Bmp, as it gets set by the kernel.
+	return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:]
+}

+ 32 - 0
vendor/github.com/vishvananda/netlink/order.go

@@ -0,0 +1,32 @@
+package netlink
+
+import (
+	"encoding/binary"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+var (
+	native       = nl.NativeEndian()
+	networkOrder = binary.BigEndian
+)
+
+func htonl(val uint32) []byte {
+	bytes := make([]byte, 4)
+	binary.BigEndian.PutUint32(bytes, val)
+	return bytes
+}
+
+func htons(val uint16) []byte {
+	bytes := make([]byte, 2)
+	binary.BigEndian.PutUint16(bytes, val)
+	return bytes
+}
+
+func ntohl(buf []byte) uint32 {
+	return binary.BigEndian.Uint32(buf)
+}
+
+func ntohs(buf []byte) uint16 {
+	return binary.BigEndian.Uint16(buf)
+}

+ 5 - 4
vendor/github.com/vishvananda/netlink/qdisc.go

@@ -187,10 +187,11 @@ func (qdisc *Netem) Type() string {
 // Tbf is a classless qdisc that rate limits based on tokens
 type Tbf struct {
 	QdiscAttrs
-	// TODO: handle 64bit rate properly
-	Rate   uint64
-	Limit  uint32
-	Buffer uint32
+	Rate     uint64
+	Limit    uint32
+	Buffer   uint32
+	Peakrate uint64
+	Minburst uint32
 	// TODO: handle other settings
 }
 

+ 12 - 0
vendor/github.com/vishvananda/netlink/qdisc_linux.go

@@ -169,12 +169,19 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 	} else if tbf, ok := qdisc.(*Tbf); ok {
 		opt := nl.TcTbfQopt{}
 		opt.Rate.Rate = uint32(tbf.Rate)
+		opt.Peakrate.Rate = uint32(tbf.Peakrate)
 		opt.Limit = tbf.Limit
 		opt.Buffer = tbf.Buffer
 		nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
 		if tbf.Rate >= uint64(1<<32) {
 			nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate))
 		}
+		if tbf.Peakrate >= uint64(1<<32) {
+			nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate))
+		}
+		if tbf.Peakrate > 0 {
+			nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst))
+		}
 	} else if htb, ok := qdisc.(*Htb); ok {
 		opt := nl.TcHtbGlob{}
 		opt.Version = htb.Version
@@ -420,10 +427,15 @@ func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 		case nl.TCA_TBF_PARMS:
 			opt := nl.DeserializeTcTbfQopt(datum.Value)
 			tbf.Rate = uint64(opt.Rate.Rate)
+			tbf.Peakrate = uint64(opt.Peakrate.Rate)
 			tbf.Limit = opt.Limit
 			tbf.Buffer = opt.Buffer
 		case nl.TCA_TBF_RATE64:
 			tbf.Rate = native.Uint64(datum.Value[0:8])
+		case nl.TCA_TBF_PRATE64:
+			tbf.Peakrate = native.Uint64(datum.Value[0:8])
+		case nl.TCA_TBF_PBURST:
+			tbf.Minburst = native.Uint32(datum.Value[0:4])
 		}
 	}
 	return nil

+ 55 - 5
vendor/github.com/vishvananda/netlink/route.go

@@ -3,6 +3,7 @@ package netlink
 import (
 	"fmt"
 	"net"
+	"strings"
 )
 
 // Scope is an enum representing a route scope.
@@ -10,6 +11,20 @@ type Scope uint8
 
 type NextHopFlag int
 
+type Destination interface {
+	Family() int
+	Decode([]byte) error
+	Encode() ([]byte, error)
+	String() string
+}
+
+type Encap interface {
+	Type() int
+	Decode([]byte) error
+	Encode() ([]byte, error)
+	String() string
+}
+
 // Route represents a netlink route.
 type Route struct {
 	LinkIndex  int
@@ -25,15 +40,36 @@ type Route struct {
 	Type       int
 	Tos        int
 	Flags      int
+	MPLSDst    *int
+	NewDst     Destination
+	Encap      Encap
 }
 
 func (r Route) String() string {
+	elems := []string{}
+	if len(r.MultiPath) == 0 {
+		elems = append(elems, fmt.Sprintf("Ifindex: %d", r.LinkIndex))
+	}
+	if r.MPLSDst != nil {
+		elems = append(elems, fmt.Sprintf("Dst: %d", r.MPLSDst))
+	} else {
+		elems = append(elems, fmt.Sprintf("Dst: %s", r.Dst))
+	}
+	if r.NewDst != nil {
+		elems = append(elems, fmt.Sprintf("NewDst: %s", r.NewDst))
+	}
+	if r.Encap != nil {
+		elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap))
+	}
+	elems = append(elems, fmt.Sprintf("Src: %s", r.Src))
 	if len(r.MultiPath) > 0 {
-		return fmt.Sprintf("{Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.Dst,
-			r.Src, r.MultiPath, r.ListFlags(), r.Table)
+		elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath))
+	} else {
+		elems = append(elems, fmt.Sprintf("Gw: %s", r.Gw))
 	}
-	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.LinkIndex, r.Dst,
-		r.Src, r.Gw, r.ListFlags(), r.Table)
+	elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags()))
+	elems = append(elems, fmt.Sprintf("Table: %d", r.Table))
+	return fmt.Sprintf("{%s}", strings.Join(elems, " "))
 }
 
 func (r *Route) SetFlag(flag NextHopFlag) {
@@ -59,8 +95,22 @@ type NexthopInfo struct {
 	LinkIndex int
 	Hops      int
 	Gw        net.IP
+	Flags     int
+	NewDst    Destination
+	Encap     Encap
 }
 
 func (n *NexthopInfo) String() string {
-	return fmt.Sprintf("{Ifindex: %d Weight: %d, Gw: %s}", n.LinkIndex, n.Hops+1, n.Gw)
+	elems := []string{}
+	elems = append(elems, fmt.Sprintf("Ifindex: %d", n.LinkIndex))
+	if n.NewDst != nil {
+		elems = append(elems, fmt.Sprintf("NewDst: %s", n.NewDst))
+	}
+	if n.Encap != nil {
+		elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap))
+	}
+	elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1))
+	elems = append(elems, fmt.Sprintf("Gw: %d", n.Gw))
+	elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags()))
+	return fmt.Sprintf("{%s}", strings.Join(elems, " "))
 }

+ 232 - 27
vendor/github.com/vishvananda/netlink/route_linux.go

@@ -3,6 +3,7 @@ package netlink
 import (
 	"fmt"
 	"net"
+	"strings"
 	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
@@ -42,16 +43,92 @@ var testFlags = []flagString{
 	{f: FLAG_PERVASIVE, s: "pervasive"},
 }
 
-func (r *Route) ListFlags() []string {
+func listFlags(flag int) []string {
 	var flags []string
 	for _, tf := range testFlags {
-		if r.Flags&int(tf.f) != 0 {
+		if flag&int(tf.f) != 0 {
 			flags = append(flags, tf.s)
 		}
 	}
 	return flags
 }
 
+func (r *Route) ListFlags() []string {
+	return listFlags(r.Flags)
+}
+
+func (n *NexthopInfo) ListFlags() []string {
+	return listFlags(n.Flags)
+}
+
+type MPLSDestination struct {
+	Labels []int
+}
+
+func (d *MPLSDestination) Family() int {
+	return nl.FAMILY_MPLS
+}
+
+func (d *MPLSDestination) Decode(buf []byte) error {
+	d.Labels = nl.DecodeMPLSStack(buf)
+	return nil
+}
+
+func (d *MPLSDestination) Encode() ([]byte, error) {
+	return nl.EncodeMPLSStack(d.Labels...), nil
+}
+
+func (d *MPLSDestination) String() string {
+	s := make([]string, 0, len(d.Labels))
+	for _, l := range d.Labels {
+		s = append(s, fmt.Sprintf("%d", l))
+	}
+	return strings.Join(s, "/")
+}
+
+type MPLSEncap struct {
+	Labels []int
+}
+
+func (e *MPLSEncap) Type() int {
+	return nl.LWTUNNEL_ENCAP_MPLS
+}
+
+func (e *MPLSEncap) Decode(buf []byte) error {
+	if len(buf) < 4 {
+		return fmt.Errorf("Lack of bytes")
+	}
+	native := nl.NativeEndian()
+	l := native.Uint16(buf)
+	if len(buf) < int(l) {
+		return fmt.Errorf("Lack of bytes")
+	}
+	buf = buf[:l]
+	typ := native.Uint16(buf[2:])
+	if typ != nl.MPLS_IPTUNNEL_DST {
+		return fmt.Errorf("Unknown MPLS Encap Type: %d", typ)
+	}
+	e.Labels = nl.DecodeMPLSStack(buf[4:])
+	return nil
+}
+
+func (e *MPLSEncap) Encode() ([]byte, error) {
+	s := nl.EncodeMPLSStack(e.Labels...)
+	native := nl.NativeEndian()
+	hdr := make([]byte, 4)
+	native.PutUint16(hdr, uint16(len(s)+4))
+	native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST)
+	return append(hdr, s...), nil
+}
+
+func (e *MPLSEncap) String() string {
+	s := make([]string, 0, len(e.Labels))
+	for _, l := range e.Labels {
+		s = append(s, fmt.Sprintf("%d", l))
+	}
+	return strings.Join(s, "/")
+}
+
 // RouteAdd will add a route to the system.
 // Equivalent to: `ip route add $route`
 func RouteAdd(route *Route) error {
@@ -61,7 +138,22 @@ func RouteAdd(route *Route) error {
 // RouteAdd will add a route to the system.
 // Equivalent to: `ip route add $route`
 func (h *Handle) RouteAdd(route *Route) error {
-	req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+	flags := syscall.NLM_F_CREATE | syscall.NLM_F_EXCL | syscall.NLM_F_ACK
+	req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags)
+	return h.routeHandle(route, req, nl.NewRtMsg())
+}
+
+// RouteReplace will add a route to the system.
+// Equivalent to: `ip route replace $route`
+func RouteReplace(route *Route) error {
+	return pkgHandle.RouteReplace(route)
+}
+
+// RouteReplace will add a route to the system.
+// Equivalent to: `ip route replace $route`
+func (h *Handle) RouteReplace(route *Route) error {
+	flags := syscall.NLM_F_CREATE | syscall.NLM_F_REPLACE | syscall.NLM_F_ACK
+	req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags)
 	return h.routeHandle(route, req, nl.NewRtMsg())
 }
 
@@ -79,7 +171,7 @@ func (h *Handle) RouteDel(route *Route) error {
 }
 
 func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
-	if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil {
+	if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil {
 		return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil")
 	}
 
@@ -98,6 +190,33 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 			dstData = route.Dst.IP.To16()
 		}
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData))
+	} else if route.MPLSDst != nil {
+		family = nl.FAMILY_MPLS
+		msg.Dst_len = uint8(20)
+		msg.Type = syscall.RTN_UNICAST
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst)))
+	}
+
+	if route.NewDst != nil {
+		if family != -1 && family != route.NewDst.Family() {
+			return fmt.Errorf("new destination and destination are not the same address family")
+		}
+		buf, err := route.NewDst.Encode()
+		if err != nil {
+			return err
+		}
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_NEWDST, buf))
+	}
+
+	if route.Encap != nil {
+		buf := make([]byte, 2)
+		native.PutUint16(buf, uint16(route.Encap.Type()))
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
+		buf, err := route.Encap.Encode()
+		if err != nil {
+			return err
+		}
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP, buf))
 	}
 
 	if route.Src != nil {
@@ -138,26 +257,43 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 				RtNexthop: syscall.RtNexthop{
 					Hops:    uint8(nh.Hops),
 					Ifindex: int32(nh.LinkIndex),
-					Len:     uint16(syscall.SizeofRtNexthop),
+					Flags:   uint8(nh.Flags),
 				},
 			}
-			var gwData []byte
+			children := []nl.NetlinkRequestData{}
 			if nh.Gw != nil {
 				gwFamily := nl.GetIPFamily(nh.Gw)
 				if family != -1 && family != gwFamily {
 					return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
 				}
-				var gw *nl.RtAttr
 				if gwFamily == FAMILY_V4 {
-					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4()))
+					children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4())))
 				} else {
-					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16()))
+					children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16())))
+				}
+			}
+			if nh.NewDst != nil {
+				if family != -1 && family != nh.NewDst.Family() {
+					return fmt.Errorf("new destination and destination are not the same address family")
+				}
+				buf, err := nh.NewDst.Encode()
+				if err != nil {
+					return err
+				}
+				children = append(children, nl.NewRtAttr(nl.RTA_NEWDST, buf))
+			}
+			if nh.Encap != nil {
+				buf := make([]byte, 2)
+				native.PutUint16(buf, uint16(nh.Encap.Type()))
+				rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
+				buf, err := nh.Encap.Encode()
+				if err != nil {
+					return err
 				}
-				gwData = gw.Serialize()
-				rtnh.Len += uint16(len(gwData))
+				children = append(children, nl.NewRtAttr(nl.RTA_ENCAP, buf))
 			}
+			rtnh.Children = children
 			buf = append(buf, rtnh.Serialize()...)
-			buf = append(buf, gwData...)
 		}
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf))
 	}
@@ -284,18 +420,20 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
 			case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src):
 				continue
 			case filterMask&RT_FILTER_DST != 0:
-				if filter.Dst == nil {
-					if route.Dst != nil {
-						continue
-					}
-				} else {
-					if route.Dst == nil {
-						continue
-					}
-					aMaskLen, aMaskBits := route.Dst.Mask.Size()
-					bMaskLen, bMaskBits := filter.Dst.Mask.Size()
-					if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) {
-						continue
+				if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) {
+					if filter.Dst == nil {
+						if route.Dst != nil {
+							continue
+						}
+					} else {
+						if route.Dst == nil {
+							continue
+						}
+						aMaskLen, aMaskBits := route.Dst.Mask.Size()
+						bMaskLen, bMaskBits := filter.Dst.Mask.Size()
+						if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) {
+							continue
+						}
 					}
 				}
 			}
@@ -322,6 +460,7 @@ func deserializeRoute(m []byte) (Route, error) {
 	}
 
 	native := nl.NativeEndian()
+	var encap, encapType syscall.NetlinkRouteAttr
 	for _, attr := range attrs {
 		switch attr.Attr.Type {
 		case syscall.RTA_GATEWAY:
@@ -329,9 +468,17 @@ func deserializeRoute(m []byte) (Route, error) {
 		case syscall.RTA_PREFSRC:
 			route.Src = net.IP(attr.Value)
 		case syscall.RTA_DST:
-			route.Dst = &net.IPNet{
-				IP:   attr.Value,
-				Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+			if msg.Family == nl.FAMILY_MPLS {
+				stack := nl.DecodeMPLSStack(attr.Value)
+				if len(stack) == 0 || len(stack) > 1 {
+					return route, fmt.Errorf("invalid MPLS RTA_DST")
+				}
+				route.MPLSDst = &stack[0]
+			} else {
+				route.Dst = &net.IPNet{
+					IP:   attr.Value,
+					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+				}
 			}
 		case syscall.RTA_OIF:
 			route.LinkIndex = int(native.Uint32(attr.Value[0:4]))
@@ -353,17 +500,47 @@ func deserializeRoute(m []byte) (Route, error) {
 				info := &NexthopInfo{
 					LinkIndex: int(nh.RtNexthop.Ifindex),
 					Hops:      int(nh.RtNexthop.Hops),
+					Flags:     int(nh.RtNexthop.Flags),
 				}
 				attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)])
 				if err != nil {
 					return nil, nil, err
 				}
+				var encap, encapType syscall.NetlinkRouteAttr
 				for _, attr := range attrs {
 					switch attr.Attr.Type {
 					case syscall.RTA_GATEWAY:
 						info.Gw = net.IP(attr.Value)
+					case nl.RTA_NEWDST:
+						var d Destination
+						switch msg.Family {
+						case nl.FAMILY_MPLS:
+							d = &MPLSDestination{}
+						}
+						if err := d.Decode(attr.Value); err != nil {
+							return nil, nil, err
+						}
+						info.NewDst = d
+					case nl.RTA_ENCAP_TYPE:
+						encapType = attr
+					case nl.RTA_ENCAP:
+						encap = attr
+					}
+				}
+
+				if len(encap.Value) != 0 && len(encapType.Value) != 0 {
+					typ := int(native.Uint16(encapType.Value[0:2]))
+					var e Encap
+					switch typ {
+					case nl.LWTUNNEL_ENCAP_MPLS:
+						e = &MPLSEncap{}
+						if err := e.Decode(encap.Value); err != nil {
+							return nil, nil, err
+						}
 					}
+					info.Encap = e
 				}
+
 				return info, value[int(nh.RtNexthop.Len):], nil
 			}
 			rest := attr.Value
@@ -375,8 +552,36 @@ func deserializeRoute(m []byte) (Route, error) {
 				route.MultiPath = append(route.MultiPath, info)
 				rest = buf
 			}
+		case nl.RTA_NEWDST:
+			var d Destination
+			switch msg.Family {
+			case nl.FAMILY_MPLS:
+				d = &MPLSDestination{}
+			}
+			if err := d.Decode(attr.Value); err != nil {
+				return route, err
+			}
+			route.NewDst = d
+		case nl.RTA_ENCAP_TYPE:
+			encapType = attr
+		case nl.RTA_ENCAP:
+			encap = attr
 		}
 	}
+
+	if len(encap.Value) != 0 && len(encapType.Value) != 0 {
+		typ := int(native.Uint16(encapType.Value[0:2]))
+		var e Encap
+		switch typ {
+		case nl.LWTUNNEL_ENCAP_MPLS:
+			e = &MPLSEncap{}
+			if err := e.Decode(encap.Value); err != nil {
+				return route, err
+			}
+		}
+		route.Encap = e
+	}
+
 	return route, nil
 }
 

+ 4 - 0
vendor/github.com/vishvananda/netlink/route_unspecified.go

@@ -5,3 +5,7 @@ package netlink
 func (r *Route) ListFlags() []string {
 	return []string{}
 }
+
+func (n *NexthopInfo) ListFlags() []string {
+	return []string{}
+}

+ 27 - 0
vendor/github.com/vishvananda/netlink/socket.go

@@ -0,0 +1,27 @@
+package netlink
+
+import "net"
+
+// SocketID identifies a single socket.
+type SocketID struct {
+	SourcePort      uint16
+	DestinationPort uint16
+	Source          net.IP
+	Destination     net.IP
+	Interface       uint32
+	Cookie          [2]uint32
+}
+
+// Socket represents a netlink socket.
+type Socket struct {
+	Family  uint8
+	State   uint8
+	Timer   uint8
+	Retrans uint8
+	ID      SocketID
+	Expires uint32
+	RQueue  uint32
+	WQueue  uint32
+	UID     uint32
+	INode   uint32
+}

+ 159 - 0
vendor/github.com/vishvananda/netlink/socket_linux.go

@@ -0,0 +1,159 @@
+package netlink
+
+import (
+	"errors"
+	"fmt"
+	"net"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+const (
+	sizeofSocketID      = 0x30
+	sizeofSocketRequest = sizeofSocketID + 0x8
+	sizeofSocket        = sizeofSocketID + 0x18
+)
+
+type socketRequest struct {
+	Family   uint8
+	Protocol uint8
+	Ext      uint8
+	pad      uint8
+	States   uint32
+	ID       SocketID
+}
+
+type writeBuffer struct {
+	Bytes []byte
+	pos   int
+}
+
+func (b *writeBuffer) Write(c byte) {
+	b.Bytes[b.pos] = c
+	b.pos++
+}
+
+func (b *writeBuffer) Next(n int) []byte {
+	s := b.Bytes[b.pos : b.pos+n]
+	b.pos += n
+	return s
+}
+
+func (r *socketRequest) Serialize() []byte {
+	b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)}
+	b.Write(r.Family)
+	b.Write(r.Protocol)
+	b.Write(r.Ext)
+	b.Write(r.pad)
+	native.PutUint32(b.Next(4), r.States)
+	networkOrder.PutUint16(b.Next(2), r.ID.SourcePort)
+	networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort)
+	copy(b.Next(4), r.ID.Source.To4())
+	b.Next(12)
+	copy(b.Next(4), r.ID.Destination.To4())
+	b.Next(12)
+	native.PutUint32(b.Next(4), r.ID.Interface)
+	native.PutUint32(b.Next(4), r.ID.Cookie[0])
+	native.PutUint32(b.Next(4), r.ID.Cookie[1])
+	return b.Bytes
+}
+
+func (r *socketRequest) Len() int { return sizeofSocketRequest }
+
+type readBuffer struct {
+	Bytes []byte
+	pos   int
+}
+
+func (b *readBuffer) Read() byte {
+	c := b.Bytes[b.pos]
+	b.pos++
+	return c
+}
+
+func (b *readBuffer) Next(n int) []byte {
+	s := b.Bytes[b.pos : b.pos+n]
+	b.pos += n
+	return s
+}
+
+func (s *Socket) deserialize(b []byte) error {
+	if len(b) < sizeofSocket {
+		return fmt.Errorf("socket data short read (%d); want %d", len(b), sizeofSocket)
+	}
+	rb := readBuffer{Bytes: b}
+	s.Family = rb.Read()
+	s.State = rb.Read()
+	s.Timer = rb.Read()
+	s.Retrans = rb.Read()
+	s.ID.SourcePort = networkOrder.Uint16(rb.Next(2))
+	s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2))
+	s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
+	rb.Next(12)
+	s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
+	rb.Next(12)
+	s.ID.Interface = native.Uint32(rb.Next(4))
+	s.ID.Cookie[0] = native.Uint32(rb.Next(4))
+	s.ID.Cookie[1] = native.Uint32(rb.Next(4))
+	s.Expires = native.Uint32(rb.Next(4))
+	s.RQueue = native.Uint32(rb.Next(4))
+	s.WQueue = native.Uint32(rb.Next(4))
+	s.UID = native.Uint32(rb.Next(4))
+	s.INode = native.Uint32(rb.Next(4))
+	return nil
+}
+
+// SocketGet returns the Socket identified by its local and remote addresses.
+func SocketGet(local, remote net.Addr) (*Socket, error) {
+	localTCP, ok := local.(*net.TCPAddr)
+	if !ok {
+		return nil, ErrNotImplemented
+	}
+	remoteTCP, ok := remote.(*net.TCPAddr)
+	if !ok {
+		return nil, ErrNotImplemented
+	}
+	localIP := localTCP.IP.To4()
+	if localIP == nil {
+		return nil, ErrNotImplemented
+	}
+	remoteIP := remoteTCP.IP.To4()
+	if remoteIP == nil {
+		return nil, ErrNotImplemented
+	}
+
+	s, err := nl.Subscribe(syscall.NETLINK_INET_DIAG)
+	if err != nil {
+		return nil, err
+	}
+	defer s.Close()
+	req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0)
+	req.AddData(&socketRequest{
+		Family:   syscall.AF_INET,
+		Protocol: syscall.IPPROTO_TCP,
+		ID: SocketID{
+			SourcePort:      uint16(localTCP.Port),
+			DestinationPort: uint16(remoteTCP.Port),
+			Source:          localIP,
+			Destination:     remoteIP,
+			Cookie:          [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE},
+		},
+	})
+	s.Send(req)
+	msgs, err := s.Receive()
+	if err != nil {
+		return nil, err
+	}
+	if len(msgs) == 0 {
+		return nil, errors.New("no message nor error from netlink")
+	}
+	if len(msgs) > 2 {
+		return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs))
+	}
+	sock := &Socket{}
+	if err := sock.deserialize(msgs[0].Data); err != nil {
+		return nil, err
+	}
+	return sock, nil
+}

+ 98 - 0
vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go

@@ -0,0 +1,98 @@
+package netlink
+
+import (
+	"fmt"
+	"syscall"
+
+	"github.com/vishvananda/netns"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+type XfrmMsg interface {
+	Type() nl.XfrmMsgType
+}
+
+type XfrmMsgExpire struct {
+	XfrmState *XfrmState
+	Hard      bool
+}
+
+func (ue *XfrmMsgExpire) Type() nl.XfrmMsgType {
+	return nl.XFRM_MSG_EXPIRE
+}
+
+func parseXfrmMsgExpire(b []byte) *XfrmMsgExpire {
+	var e XfrmMsgExpire
+
+	msg := nl.DeserializeXfrmUserExpire(b)
+	e.XfrmState = xfrmStateFromXfrmUsersaInfo(&msg.XfrmUsersaInfo)
+	e.Hard = msg.Hard == 1
+
+	return &e
+}
+
+func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error,
+	types ...nl.XfrmMsgType) error {
+
+	groups, err := xfrmMcastGroups(types)
+	if err != nil {
+		return nil
+	}
+	s, err := nl.SubscribeAt(netns.None(), netns.None(), syscall.NETLINK_XFRM, groups...)
+	if err != nil {
+		return err
+	}
+
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+
+	}
+
+	go func() {
+		defer close(ch)
+		for {
+			msgs, err := s.Receive()
+			if err != nil {
+				errorChan <- err
+				return
+			}
+			for _, m := range msgs {
+				switch m.Header.Type {
+				case nl.XFRM_MSG_EXPIRE:
+					ch <- parseXfrmMsgExpire(m.Data)
+				default:
+					errorChan <- fmt.Errorf("unsupported msg type: %x", m.Header.Type)
+				}
+			}
+		}
+	}()
+
+	return nil
+}
+
+func xfrmMcastGroups(types []nl.XfrmMsgType) ([]uint, error) {
+	groups := make([]uint, 0)
+
+	if len(types) == 0 {
+		return nil, fmt.Errorf("no xfrm msg type specified")
+	}
+
+	for _, t := range types {
+		var group uint
+
+		switch t {
+		case nl.XFRM_MSG_EXPIRE:
+			group = nl.XFRMNLGRP_EXPIRE
+		default:
+			return nil, fmt.Errorf("unsupported group: %x", t)
+		}
+
+		groups = append(groups, group)
+	}
+
+	return groups, nil
+}

+ 3 - 2
vendor/github.com/vishvananda/netlink/xfrm_state.go

@@ -83,11 +83,12 @@ type XfrmState struct {
 	Crypt        *XfrmStateAlgo
 	Aead         *XfrmStateAlgo
 	Encap        *XfrmStateEncap
+	ESN          bool
 }
 
 func (sa XfrmState) String() string {
-	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v,Encap: %v",
-		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap)
+	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
+		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN)
 }
 func (sa XfrmState) Print(stats bool) string {
 	if !stats {

+ 94 - 18
vendor/github.com/vishvananda/netlink/xfrm_state_linux.go

@@ -60,6 +60,21 @@ func writeMark(m *XfrmMark) []byte {
 	return mark.Serialize()
 }
 
+func writeReplayEsn(replayWindow int) []byte {
+	replayEsn := &nl.XfrmReplayStateEsn{
+		OSeq:         0,
+		Seq:          0,
+		OSeqHi:       0,
+		SeqHi:        0,
+		ReplayWindow: uint32(replayWindow),
+	}
+
+	// taken from iproute2/ip/xfrm_state.c:
+	replayEsn.BmpLen = uint32((replayWindow + (4 * 8) - 1) / (4 * 8))
+
+	return replayEsn.Serialize()
+}
+
 // XfrmStateAdd will add an xfrm state to the system.
 // Equivalent to: `ip xfrm state add $state`
 func XfrmStateAdd(state *XfrmState) error {
@@ -72,6 +87,12 @@ func (h *Handle) XfrmStateAdd(state *XfrmState) error {
 	return h.xfrmStateAddOrUpdate(state, nl.XFRM_MSG_NEWSA)
 }
 
+// XfrmStateAllocSpi will allocate an xfrm state in the system.
+// Equivalent to: `ip xfrm state allocspi`
+func XfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
+	return pkgHandle.xfrmStateAllocSpi(state)
+}
+
 // XfrmStateUpdate will update an xfrm state to the system.
 // Equivalent to: `ip xfrm state update $state`
 func XfrmStateUpdate(state *XfrmState) error {
@@ -85,21 +106,23 @@ func (h *Handle) XfrmStateUpdate(state *XfrmState) error {
 }
 
 func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
+
 	// A state with spi 0 can't be deleted so don't allow it to be set
 	if state.Spi == 0 {
 		return fmt.Errorf("Spi must be set when adding xfrm state.")
 	}
 	req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
 
-	msg := &nl.XfrmUsersaInfo{}
-	msg.Family = uint16(nl.GetIPFamily(state.Dst))
-	msg.Id.Daddr.FromIP(state.Dst)
-	msg.Saddr.FromIP(state.Src)
-	msg.Id.Proto = uint8(state.Proto)
-	msg.Mode = uint8(state.Mode)
-	msg.Id.Spi = nl.Swap32(uint32(state.Spi))
-	msg.Reqid = uint32(state.Reqid)
-	msg.ReplayWindow = uint8(state.ReplayWindow)
+	msg := xfrmUsersaInfoFromXfrmState(state)
+
+	if state.ESN {
+		if state.ReplayWindow == 0 {
+			return fmt.Errorf("ESN flag set without ReplayWindow")
+		}
+		msg.Flags |= nl.XFRM_STATE_ESN
+		msg.ReplayWindow = 0
+	}
+
 	limitsToLft(state.Limits, &msg.Lft)
 	req.AddData(msg)
 
@@ -129,11 +152,44 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
 		out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
 		req.AddData(out)
 	}
+	if state.ESN {
+		out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow))
+		req.AddData(out)
+	}
 
 	_, err := req.Execute(syscall.NETLINK_XFRM, 0)
 	return err
 }
 
+func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
+	req := h.newNetlinkRequest(nl.XFRM_MSG_ALLOCSPI,
+		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+
+	msg := &nl.XfrmUserSpiInfo{}
+	msg.XfrmUsersaInfo = *(xfrmUsersaInfoFromXfrmState(state))
+	// 1-255 is reserved by IANA for future use
+	msg.Min = 0x100
+	msg.Max = 0xffffffff
+	req.AddData(msg)
+
+	if state.Mark != nil {
+		out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
+		req.AddData(out)
+	}
+
+	msgs, err := req.Execute(syscall.NETLINK_XFRM, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	s, err := parseXfrmState(msgs[0], FAMILY_ALL)
+	if err != nil {
+		return nil, err
+	}
+
+	return s, err
+}
+
 // XfrmStateDel will delete an xfrm state from the system. Note that
 // the Algos are ignored when matching the state to delete.
 // Equivalent to: `ip xfrm state del $state`
@@ -241,14 +297,7 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState
 
 var familyError = fmt.Errorf("family error")
 
-func parseXfrmState(m []byte, family int) (*XfrmState, error) {
-	msg := nl.DeserializeXfrmUsersaInfo(m)
-
-	// This is mainly for the state dump
-	if family != FAMILY_ALL && family != int(msg.Family) {
-		return nil, familyError
-	}
-
+func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState {
 	var state XfrmState
 
 	state.Dst = msg.Id.Daddr.ToIP()
@@ -260,6 +309,19 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 	state.ReplayWindow = int(msg.ReplayWindow)
 	lftToLimits(&msg.Lft, &state.Limits)
 
+	return &state
+}
+
+func parseXfrmState(m []byte, family int) (*XfrmState, error) {
+	msg := nl.DeserializeXfrmUsersaInfo(m)
+
+	// This is mainly for the state dump
+	if family != FAMILY_ALL && family != int(msg.Family) {
+		return nil, familyError
+	}
+
+	state := xfrmStateFromXfrmUsersaInfo(msg)
+
 	attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:])
 	if err != nil {
 		return nil, err
@@ -310,7 +372,7 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 		}
 	}
 
-	return &state, nil
+	return state, nil
 }
 
 // XfrmStateFlush will flush the xfrm state on the system.
@@ -366,3 +428,17 @@ func limitsToLft(lmts XfrmStateLimits, lft *nl.XfrmLifetimeCfg) {
 func lftToLimits(lft *nl.XfrmLifetimeCfg, lmts *XfrmStateLimits) {
 	*lmts = *(*XfrmStateLimits)(unsafe.Pointer(lft))
 }
+
+func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo {
+	msg := &nl.XfrmUsersaInfo{}
+	msg.Family = uint16(nl.GetIPFamily(state.Dst))
+	msg.Id.Daddr.FromIP(state.Dst)
+	msg.Saddr.FromIP(state.Src)
+	msg.Id.Proto = uint8(state.Proto)
+	msg.Mode = uint8(state.Mode)
+	msg.Id.Spi = nl.Swap32(uint32(state.Spi))
+	msg.Reqid = uint32(state.Reqid)
+	msg.ReplayWindow = uint8(state.ReplayWindow)
+
+	return msg
+}