Jelajahi Sumber

Merge pull request #43718 from s4ke/feature/bump-netlink#main

Feature/bump netlink#main
Sebastiaan van Stijn 3 tahun lalu
induk
melakukan
a347f79711
60 mengubah file dengan 5178 tambahan dan 479 penghapusan
  1. 3 3
      libnetwork/drivers/bridge/setup_device_test.go
  2. 2 2
      libnetwork/drivers/bridge/setup_ipv4_test.go
  3. 2 2
      libnetwork/drivers/bridge/setup_ipv6_test.go
  4. 1 1
      libnetwork/drivers/overlay/ov_network.go
  5. 1 1
      libnetwork/drivers/overlay/ov_utils.go
  6. 3 3
      libnetwork/osl/namespace_linux.go
  7. 1 1
      libnetwork/osl/sandbox_linux_test.go
  8. 2 3
      vendor.mod
  9. 5 1
      vendor.sum
  10. 0 19
      vendor/github.com/vishvananda/netlink/.travis.yml
  11. 1 1
      vendor/github.com/vishvananda/netlink/README.md
  12. 1 0
      vendor/github.com/vishvananda/netlink/addr.go
  13. 31 27
      vendor/github.com/vishvananda/netlink/addr_linux.go
  14. 24 0
      vendor/github.com/vishvananda/netlink/bpf_linux.go
  15. 42 14
      vendor/github.com/vishvananda/netlink/class.go
  16. 23 12
      vendor/github.com/vishvananda/netlink/class_linux.go
  17. 186 43
      vendor/github.com/vishvananda/netlink/conntrack_linux.go
  18. 461 5
      vendor/github.com/vishvananda/netlink/devlink_linux.go
  19. 86 18
      vendor/github.com/vishvananda/netlink/filter.go
  20. 255 93
      vendor/github.com/vishvananda/netlink/filter_linux.go
  21. 42 3
      vendor/github.com/vishvananda/netlink/handle_linux.go
  22. 7 1
      vendor/github.com/vishvananda/netlink/handle_unspecified.go
  23. 31 0
      vendor/github.com/vishvananda/netlink/inet_diag.go
  24. 504 0
      vendor/github.com/vishvananda/netlink/ipset_linux.go
  25. 301 18
      vendor/github.com/vishvananda/netlink/link.go
  26. 483 36
      vendor/github.com/vishvananda/netlink/link_linux.go
  27. 1 0
      vendor/github.com/vishvananda/netlink/neigh.go
  28. 38 8
      vendor/github.com/vishvananda/netlink/neigh_linux.go
  29. 21 1
      vendor/github.com/vishvananda/netlink/netlink_unspecified.go
  30. 3 3
      vendor/github.com/vishvananda/netlink/netns_linux.go
  31. 4 10
      vendor/github.com/vishvananda/netlink/nl/addr_linux.go
  32. 4 2
      vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go
  33. 61 5
      vendor/github.com/vishvananda/netlink/nl/devlink_linux.go
  34. 222 0
      vendor/github.com/vishvananda/netlink/nl/ipset_linux.go
  35. 111 1
      vendor/github.com/vishvananda/netlink/nl/link_linux.go
  36. 29 0
      vendor/github.com/vishvananda/netlink/nl/lwt_linux.go
  37. 148 8
      vendor/github.com/vishvananda/netlink/nl/nl_linux.go
  38. 79 0
      vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go
  39. 4 0
      vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go
  40. 2 2
      vendor/github.com/vishvananda/netlink/nl/seg6_linux.go
  41. 8 1
      vendor/github.com/vishvananda/netlink/nl/syscall.go
  42. 246 1
      vendor/github.com/vishvananda/netlink/nl/tc_linux.go
  43. 1 1
      vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go
  44. 217 0
      vendor/github.com/vishvananda/netlink/proc_event_linux.go
  45. 33 7
      vendor/github.com/vishvananda/netlink/qdisc.go
  46. 55 11
      vendor/github.com/vishvananda/netlink/qdisc_linux.go
  47. 82 15
      vendor/github.com/vishvananda/netlink/rdma_link_linux.go
  48. 66 19
      vendor/github.com/vishvananda/netlink/route.go
  49. 513 39
      vendor/github.com/vishvananda/netlink/route_linux.go
  50. 10 0
      vendor/github.com/vishvananda/netlink/route_unspecified.go
  51. 27 1
      vendor/github.com/vishvananda/netlink/rule.go
  52. 73 6
      vendor/github.com/vishvananda/netlink/rule_linux.go
  53. 137 8
      vendor/github.com/vishvananda/netlink/socket_linux.go
  54. 84 0
      vendor/github.com/vishvananda/netlink/tcp.go
  55. 353 0
      vendor/github.com/vishvananda/netlink/tcp_linux.go
  56. 7 6
      vendor/github.com/vishvananda/netlink/xfrm_policy.go
  57. 10 4
      vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go
  58. 2 2
      vendor/github.com/vishvananda/netlink/xfrm_state.go
  59. 28 9
      vendor/github.com/vishvananda/netlink/xfrm_state_linux.go
  60. 1 2
      vendor/modules.txt

+ 3 - 3
libnetwork/drivers/bridge/setup_device_test.go

@@ -20,7 +20,7 @@ func TestSetupNewBridge(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	config := &networkConfiguration{BridgeName: DefaultBridgeName}
 	br := &bridgeInterface{nlh: nh}
@@ -46,7 +46,7 @@ func TestSetupNewNonDefaultBridge(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	config := &networkConfiguration{BridgeName: "test0", DefaultBridge: true}
 	br := &bridgeInterface{nlh: nh}
@@ -68,7 +68,7 @@ func TestSetupDeviceUp(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	config := &networkConfiguration{BridgeName: DefaultBridgeName}
 	br := &bridgeInterface{nlh: nh}

+ 2 - 2
libnetwork/drivers/bridge/setup_ipv4_test.go

@@ -34,7 +34,7 @@ func TestSetupBridgeIPv4Fixed(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	config, br := setupTestInterface(t, nh)
 	config.AddressIPv4 = &net.IPNet{IP: ip, Mask: netw.Mask}
@@ -67,7 +67,7 @@ func TestSetupGatewayIPv4(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	ip, nw, _ := net.ParseCIDR("192.168.0.24/16")
 	nw.IP = ip

+ 2 - 2
libnetwork/drivers/bridge/setup_ipv6_test.go

@@ -21,7 +21,7 @@ func TestSetupIPv6(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	config, br := setupTestInterface(t, nh)
 	if err := setupBridgeIPv6(config, br); err != nil {
@@ -71,7 +71,7 @@ func TestSetupGatewayIPv6(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	br := &bridgeInterface{nlh: nh}
 

+ 1 - 1
libnetwork/drivers/overlay/ov_network.go

@@ -436,7 +436,7 @@ func populateVNITbl() {
 				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
 				return nil
 			}
-			defer nlh.Delete()
+			defer nlh.Close()
 
 			err = nlh.SetSocketTimeout(soTimeout)
 			if err != nil {

+ 1 - 1
libnetwork/drivers/overlay/ov_utils.go

@@ -139,7 +139,7 @@ func deleteVxlanByVNI(path string, vni uint32) error {
 		if err != nil {
 			return fmt.Errorf("failed to get netlink handle for ns %s: %v", path, err)
 		}
-		defer nlh.Delete()
+		defer nlh.Close()
 		err = nlh.SetSocketTimeout(soTimeout)
 		if err != nil {
 			logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vxlan deletion: %v", err)

+ 3 - 3
libnetwork/osl/namespace_linux.go

@@ -234,7 +234,7 @@ func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
 	}
 
 	if err = n.loopbackUp(); err != nil {
-		n.nlHandle.Delete()
+		n.nlHandle.Close()
 		return nil, err
 	}
 
@@ -287,7 +287,7 @@ func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
 	}
 
 	if err = n.loopbackUp(); err != nil {
-		n.nlHandle.Delete()
+		n.nlHandle.Close()
 		return nil, err
 	}
 
@@ -469,7 +469,7 @@ func (n *networkNamespace) Key() string {
 
 func (n *networkNamespace) Destroy() error {
 	if n.nlHandle != nil {
-		n.nlHandle.Delete()
+		n.nlHandle.Close()
 	}
 	// Assuming no running process is executing in this network namespace,
 	// unmounting is sufficient to destroy it.

+ 1 - 1
libnetwork/osl/sandbox_linux_test.go

@@ -134,7 +134,7 @@ func verifySandbox(t *testing.T, s Sandbox, ifaceSuffixes []string) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer nh.Delete()
+	defer nh.Close()
 
 	for _, suffix := range ifaceSuffixes {
 		_, err = nh.LinkByName(sboxIfaceName + suffix)

+ 2 - 3
vendor.mod

@@ -9,6 +9,7 @@ go 1.17
 require (
 	cloud.google.com/go v0.93.3
 	cloud.google.com/go/logging v1.4.2
+	github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1
 	github.com/Graylog2/go-gelf v0.0.0-20191017102106-1550ee647df0
 	github.com/Microsoft/go-winio v0.5.2
 	github.com/Microsoft/hcsshim v0.9.3
@@ -74,7 +75,7 @@ require (
 	github.com/tonistiigi/fsutil v0.0.0-20220115021204-b19f7f9cb274
 	github.com/tonistiigi/go-archvariant v1.0.0
 	github.com/vbatts/tar-split v0.11.2
-	github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5
+	github.com/vishvananda/netlink v1.2.1-beta.2
 	github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f
 	go.etcd.io/bbolt v1.3.6
 	golang.org/x/net v0.0.0-20211216030914-fe4d6282115f
@@ -88,7 +89,6 @@ require (
 
 require (
 	code.cloudfoundry.org/clock v1.0.0 // indirect
-	github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
 	github.com/agext/levenshtein v1.2.3 // indirect
 	github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2 // indirect
 	github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da // indirect
@@ -177,7 +177,6 @@ replace (
 	github.com/matttproud/golang_protobuf_extensions => github.com/matttproud/golang_protobuf_extensions v1.0.1
 	github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.6.0
 	github.com/prometheus/procfs => github.com/prometheus/procfs v0.0.11
-	github.com/vishvananda/netlink => github.com/vishvananda/netlink v1.1.0
 	go.opencensus.io => go.opencensus.io v0.22.3
 )
 

+ 5 - 1
vendor.sum

@@ -967,8 +967,12 @@ github.com/urfave/cli v1.22.4/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
 github.com/urfave/cli/v2 v2.4.0/go.mod h1:NX9W0zmTvedE5oDoOMs2RTC8RvdK98NTYZE5LbaEYPg=
 github.com/vbatts/tar-split v0.11.2 h1:Via6XqJr0hceW4wff3QRzD5gAk/tatMw/4ZA7cTlIME=
 github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI=
-github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0=
+github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk=
 github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
+github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
+github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
+github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs=
+github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
 github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI=
 github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
 github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=

+ 0 - 19
vendor/github.com/vishvananda/netlink/.travis.yml

@@ -1,19 +0,0 @@
-language: go
-go:
-  - "1.10.x"
-  - "1.11.x"
-  - "1.12.x"
-before_script:
-  # make sure we keep path in tact when we sudo
-  - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers
-  # modprobe ip_gre or else the first gre device can't be deleted
-  - sudo modprobe ip_gre
-  # modprobe nf_conntrack for the conntrack testing
-  - sudo modprobe nf_conntrack
-  - sudo modprobe nf_conntrack_netlink
-  - sudo modprobe nf_conntrack_ipv4
-  - sudo modprobe nf_conntrack_ipv6
-  - sudo modprobe sch_hfsc
-install:
-  - go get github.com/vishvananda/netns
-go_import_path: github.com/vishvananda/netlink

+ 1 - 1
vendor/github.com/vishvananda/netlink/README.md

@@ -1,6 +1,6 @@
 # netlink - netlink library for go #
 
-[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
+![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
 
 The netlink package provides a simple netlink library for go. Netlink
 is the interface a user-space program in linux uses to communicate with

+ 1 - 0
vendor/github.com/vishvananda/netlink/addr.go

@@ -17,6 +17,7 @@ type Addr struct {
 	Broadcast   net.IP
 	PreferedLft int
 	ValidLft    int
+	LinkIndex   int
 }
 
 // String returns $ip/$netmask $label

+ 31 - 27
vendor/github.com/vishvananda/netlink/addr_linux.go

@@ -11,9 +11,6 @@ import (
 	"golang.org/x/sys/unix"
 )
 
-// IFA_FLAGS is a u32 attribute.
-const IFA_FLAGS = 0x8
-
 // AddrAdd will add an IP address to a link device.
 //
 // Equivalent to: `ip addr add $addr dev $link`
@@ -125,7 +122,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 		} else {
 			b := make([]byte, 4)
 			native.PutUint32(b, uint32(addr.Flags))
-			flagsData := nl.NewRtAttr(IFA_FLAGS, b)
+			flagsData := nl.NewRtAttr(unix.IFA_FLAGS, b)
 			req.AddData(flagsData)
 		}
 	}
@@ -156,10 +153,10 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 	// value should be "forever". To compensate for that, only add the attributes if at least one of the values is
 	// non-zero, which means the caller has explicitly set them
 	if addr.ValidLft > 0 || addr.PreferedLft > 0 {
-		cachedata := nl.IfaCacheInfo{
-			IfaValid:    uint32(addr.ValidLft),
-			IfaPrefered: uint32(addr.PreferedLft),
-		}
+		cachedata := nl.IfaCacheInfo{unix.IfaCacheinfo{
+			Valid:    uint32(addr.ValidLft),
+			Prefered: uint32(addr.PreferedLft),
+		}}
 		req.AddData(nl.NewRtAttr(unix.IFA_CACHEINFO, cachedata.Serialize()))
 	}
 
@@ -179,7 +176,7 @@ func AddrList(link Link, family int) ([]Addr, error) {
 // The list can be filtered by link and ip family.
 func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP)
-	msg := nl.NewIfInfomsg(family)
+	msg := nl.NewIfAddrmsg(family)
 	req.AddData(msg)
 
 	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR)
@@ -196,12 +193,12 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
 
 	var res []Addr
 	for _, m := range msgs {
-		addr, msgFamily, ifindex, err := parseAddr(m)
+		addr, msgFamily, err := parseAddr(m)
 		if err != nil {
 			return res, err
 		}
 
-		if link != nil && ifindex != indexFilter {
+		if link != nil && addr.LinkIndex != indexFilter {
 			// Ignore messages from other interfaces
 			continue
 		}
@@ -216,11 +213,11 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
 	return res, nil
 }
 
-func parseAddr(m []byte) (addr Addr, family, index int, err error) {
+func parseAddr(m []byte) (addr Addr, family int, err error) {
 	msg := nl.DeserializeIfAddrmsg(m)
 
 	family = -1
-	index = -1
+	addr.LinkIndex = -1
 
 	attrs, err1 := nl.ParseRouteAttr(m[msg.Len():])
 	if err1 != nil {
@@ -229,7 +226,7 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
 	}
 
 	family = int(msg.Family)
-	index = int(msg.Index)
+	addr.LinkIndex = int(msg.Index)
 
 	var local, dst *net.IPNet
 	for _, attr := range attrs {
@@ -254,12 +251,12 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
 			addr.Broadcast = attr.Value
 		case unix.IFA_LABEL:
 			addr.Label = string(attr.Value[:len(attr.Value)-1])
-		case IFA_FLAGS:
+		case unix.IFA_FLAGS:
 			addr.Flags = int(native.Uint32(attr.Value[0:4]))
-		case nl.IFA_CACHEINFO:
+		case unix.IFA_CACHEINFO:
 			ci := nl.DeserializeIfaCacheInfo(attr.Value)
-			addr.PreferedLft = int(ci.IfaPrefered)
-			addr.ValidLft = int(ci.IfaValid)
+			addr.PreferedLft = int(ci.Prefered)
+			addr.ValidLft = int(ci.Valid)
 		}
 	}
 
@@ -271,7 +268,7 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
 	// But obviously, as there are IPv6 PtP addresses, too,
 	// IFA_LOCAL should also be handled for IPv6.
 	if local != nil {
-		if family == FAMILY_V4 && local.IP.Equal(dst.IP) {
+		if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) {
 			addr.IPNet = dst
 		} else {
 			addr.IPNet = local
@@ -299,13 +296,13 @@ type AddrUpdate struct {
 // AddrSubscribe takes a chan down which notifications will be sent
 // when addresses change.  Close the 'done' chan to stop subscription.
 func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
-	return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0)
+	return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil)
 }
 
 // AddrSubscribeAt works like AddrSubscribe plus it allows the caller
 // to choose the network namespace in which to subscribe (ns).
 func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
-	return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0)
+	return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil)
 }
 
 // AddrSubscribeOptions contains a set of options to use with
@@ -315,6 +312,7 @@ type AddrSubscribeOptions struct {
 	ErrorCallback     func(error)
 	ListExisting      bool
 	ReceiveBufferSize int
+	ReceiveTimeout    *unix.Timeval
 }
 
 // AddrSubscribeWithOptions work like AddrSubscribe but enable to
@@ -325,14 +323,20 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option
 		none := netns.None()
 		options.Namespace = &none
 	}
-	return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize)
+	return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize, options.ReceiveTimeout)
 }
 
-func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int) error {
+func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int, rcvTimeout *unix.Timeval) error {
 	s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR)
 	if err != nil {
 		return err
 	}
+	if rcvTimeout != nil {
+		if err := s.SetReceiveTimeout(rcvTimeout); err != nil {
+			return err
+		}
+	}
+
 	if done != nil {
 		go func() {
 			<-done
@@ -360,7 +364,8 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
 			msgs, from, err := s.Receive()
 			if err != nil {
 				if cberr != nil {
-					cberr(err)
+					cberr(fmt.Errorf("Receive failed: %v",
+						err))
 				}
 				return
 			}
@@ -375,7 +380,6 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
 					continue
 				}
 				if m.Header.Type == unix.NLMSG_ERROR {
-					native := nl.NativeEndian()
 					error := int32(native.Uint32(m.Data[0:4]))
 					if error == 0 {
 						continue
@@ -394,7 +398,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
 					continue
 				}
 
-				addr, _, ifindex, err := parseAddr(m.Data)
+				addr, _, err := parseAddr(m.Data)
 				if err != nil {
 					if cberr != nil {
 						cberr(fmt.Errorf("could not parse address: %v", err))
@@ -403,7 +407,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
 				}
 
 				ch <- AddrUpdate{LinkAddress: *addr.IPNet,
-					LinkIndex:   ifindex,
+					LinkIndex:   addr.LinkIndex,
 					NewAddr:     msgType == unix.RTM_NEWADDR,
 					Flags:       addr.Flags,
 					Scope:       addr.Scope,

+ 24 - 0
vendor/github.com/vishvananda/netlink/bpf_linux.go

@@ -16,6 +16,30 @@ const (
 	BPF_PROG_TYPE_SCHED_ACT
 	BPF_PROG_TYPE_TRACEPOINT
 	BPF_PROG_TYPE_XDP
+	BPF_PROG_TYPE_PERF_EVENT
+	BPF_PROG_TYPE_CGROUP_SKB
+	BPF_PROG_TYPE_CGROUP_SOCK
+	BPF_PROG_TYPE_LWT_IN
+	BPF_PROG_TYPE_LWT_OUT
+	BPF_PROG_TYPE_LWT_XMIT
+	BPF_PROG_TYPE_SOCK_OPS
+	BPF_PROG_TYPE_SK_SKB
+	BPF_PROG_TYPE_CGROUP_DEVICE
+	BPF_PROG_TYPE_SK_MSG
+	BPF_PROG_TYPE_RAW_TRACEPOINT
+	BPF_PROG_TYPE_CGROUP_SOCK_ADDR
+	BPF_PROG_TYPE_LWT_SEG6LOCAL
+	BPF_PROG_TYPE_LIRC_MODE2
+	BPF_PROG_TYPE_SK_REUSEPORT
+	BPF_PROG_TYPE_FLOW_DISSECTOR
+	BPF_PROG_TYPE_CGROUP_SYSCTL
+	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
+	BPF_PROG_TYPE_CGROUP_SOCKOPT
+	BPF_PROG_TYPE_TRACING
+	BPF_PROG_TYPE_STRUCT_OPS
+	BPF_PROG_TYPE_EXT
+	BPF_PROG_TYPE_LSM
+	BPF_PROG_TYPE_SK_LOOKUP
 )
 
 type BPFAttr struct {

+ 42 - 14
vendor/github.com/vishvananda/netlink/class.go

@@ -132,7 +132,10 @@ func (class *GenericClass) Type() string {
 	return class.ClassType
 }
 
-// ServiceCurve is the way the HFSC curve are represented
+// ServiceCurve is a nondecreasing function of some time unit, returning the amount of service
+// (an allowed or allocated amount of bandwidth) at some specific point in time. The purpose of it
+// should be subconsciously obvious: if a class was allowed to transfer not less than the amount
+// specified by its service curve, then the service curve is not violated.
 type ServiceCurve struct {
 	m1 uint32
 	d  uint32
@@ -144,6 +147,21 @@ func (c *ServiceCurve) Attrs() (uint32, uint32, uint32) {
 	return c.m1, c.d, c.m2
 }
 
+// Burst returns the burst rate (m1) of the curve
+func (c *ServiceCurve) Burst() uint32 {
+	return c.m1
+}
+
+// Delay return the delay (d) of the curve
+func (c *ServiceCurve) Delay() uint32 {
+	return c.d
+}
+
+// Rate returns the rate (m2) of the curve
+func (c *ServiceCurve) Rate() uint32 {
+	return c.m2
+}
+
 // HfscClass is a representation of the HFSC class
 type HfscClass struct {
 	ClassAttrs
@@ -152,35 +170,44 @@ type HfscClass struct {
 	Usc ServiceCurve
 }
 
-// SetUsc sets the Usc curve
+// SetUsc sets the USC curve. The bandwidth (m1 and m2) is specified in bits and the delay in
+// seconds.
 func (hfsc *HfscClass) SetUsc(m1 uint32, d uint32, m2 uint32) {
-	hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
+	hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2}
 }
 
-// SetFsc sets the Fsc curve
+// SetFsc sets the Fsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in
+// seconds.
 func (hfsc *HfscClass) SetFsc(m1 uint32, d uint32, m2 uint32) {
-	hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
+	hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2}
 }
 
-// SetRsc sets the Rsc curve
+// SetRsc sets the Rsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in
+// seconds.
 func (hfsc *HfscClass) SetRsc(m1 uint32, d uint32, m2 uint32) {
-	hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
+	hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2}
 }
 
-// SetSC implements the SC from the tc CLI
+// SetSC implements the SC from the `tc` CLI. This function behaves the same as if one would set the
+// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and
+// the delay in ms.
 func (hfsc *HfscClass) SetSC(m1 uint32, d uint32, m2 uint32) {
-	hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
-	hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
+	hfsc.SetRsc(m1, d, m2)
+	hfsc.SetFsc(m1, d, m2)
 }
 
-// SetUL implements the UL from the tc CLI
+// SetUL implements the UL from the `tc` CLI. This function behaves the same as if one would set the
+// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and
+// the delay in ms.
 func (hfsc *HfscClass) SetUL(m1 uint32, d uint32, m2 uint32) {
-	hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
+	hfsc.SetUsc(m1, d, m2)
 }
 
-// SetLS implements the LS from the tc CLI
+// SetLS implements the LS from the `tc` CLI. This function behaves the same as if one would set the
+// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and
+// the delay in ms.
 func (hfsc *HfscClass) SetLS(m1 uint32, d uint32, m2 uint32) {
-	hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
+	hfsc.SetFsc(m1, d, m2)
 }
 
 // NewHfscClass returns a new HFSC struct with the set parameters
@@ -193,6 +220,7 @@ func NewHfscClass(attrs ClassAttrs) *HfscClass {
 	}
 }
 
+// String() returns a string that contains the information and attributes of the HFSC class
 func (hfsc *HfscClass) String() string {
 	return fmt.Sprintf(
 		"{%s -- {RSC: {m1=%d d=%d m2=%d}} {FSC: {m1=%d d=%d m2=%d}} {USC: {m1=%d d=%d m2=%d}}}",

+ 23 - 12
vendor/github.com/vishvananda/netlink/class_linux.go

@@ -43,12 +43,12 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
 	if buffer == 0 {
 		buffer = uint32(float64(rate)/Hz() + float64(mtu))
 	}
-	buffer = uint32(Xmittime(rate, buffer))
+	buffer = Xmittime(rate, buffer)
 
 	if cbuffer == 0 {
 		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
 	}
-	cbuffer = uint32(Xmittime(ceil, cbuffer))
+	cbuffer = Xmittime(ceil, cbuffer)
 
 	return &HtbClass{
 		ClassAttrs: attrs,
@@ -56,9 +56,9 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
 		Ceil:       ceil,
 		Buffer:     buffer,
 		Cbuffer:    cbuffer,
-		Quantum:    10,
 		Level:      0,
-		Prio:       0,
+		Prio:       cattrs.Prio,
+		Quantum:    cattrs.Quantum,
 	}
 }
 
@@ -176,12 +176,21 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
 		options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize())
 		options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab))
 		options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab))
+		if htb.Rate >= uint64(1<<32) {
+			options.AddRtAttr(nl.TCA_HTB_RATE64, nl.Uint64Attr(htb.Rate))
+		}
+		if htb.Ceil >= uint64(1<<32) {
+			options.AddRtAttr(nl.TCA_HTB_CEIL64, nl.Uint64Attr(htb.Ceil))
+		}
 	case "hfsc":
 		hfsc := class.(*HfscClass)
 		opt := nl.HfscCopt{}
-		opt.Rsc.Set(hfsc.Rsc.Attrs())
-		opt.Fsc.Set(hfsc.Fsc.Attrs())
-		opt.Usc.Set(hfsc.Usc.Attrs())
+		rm1, rd, rm2 := hfsc.Rsc.Attrs()
+		opt.Rsc.Set(rm1/8, rd, rm2/8)
+		fm1, fd, fm2 := hfsc.Fsc.Attrs()
+		opt.Fsc.Set(fm1/8, fd, fm2/8)
+		um1, ud, um2 := hfsc.Usc.Attrs()
+		opt.Usc.Set(um1/8, ud, um2/8)
 		options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc))
 		options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc))
 		options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc))
@@ -303,6 +312,10 @@ func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, erro
 			htb.Quantum = opt.Quantum
 			htb.Level = opt.Level
 			htb.Prio = opt.Prio
+		case nl.TCA_HTB_RATE64:
+			htb.Rate = native.Uint64(datum.Value[0:8])
+		case nl.TCA_HTB_CEIL64:
+			htb.Ceil = native.Uint64(datum.Value[0:8])
 		}
 	}
 	return detailed, nil
@@ -315,11 +328,11 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err
 		m1, d, m2 := nl.DeserializeHfscCurve(datum.Value).Attrs()
 		switch datum.Attr.Type {
 		case nl.TCA_HFSC_RSC:
-			hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2}
+			hfsc.Rsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8}
 		case nl.TCA_HFSC_FSC:
-			hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2}
+			hfsc.Fsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8}
 		case nl.TCA_HFSC_USC:
-			hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2}
+			hfsc.Usc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8}
 		}
 	}
 	return detailed, nil
@@ -328,7 +341,6 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err
 func parseTcStats(data []byte) (*ClassStatistics, error) {
 	buf := &bytes.Buffer{}
 	buf.Write(data)
-	native := nl.NativeEndian()
 	tcStats := &tcStats{}
 	if err := binary.Read(buf, native, tcStats); err != nil {
 		return nil, err
@@ -350,7 +362,6 @@ func parseTcStats(data []byte) (*ClassStatistics, error) {
 func parseGnetStats(data []byte, gnetStats interface{}) error {
 	buf := &bytes.Buffer{}
 	buf.Write(data)
-	native := nl.NativeEndian()
 	return binary.Read(buf, native, gnetStats)
 }
 

+ 186 - 43
vendor/github.com/vishvananda/netlink/conntrack_linux.go

@@ -6,6 +6,7 @@ import (
 	"errors"
 	"fmt"
 	"net"
+	"time"
 
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
@@ -145,16 +146,23 @@ type ConntrackFlow struct {
 	Forward    ipTuple
 	Reverse    ipTuple
 	Mark       uint32
+	TimeStart  uint64
+	TimeStop   uint64
+	TimeOut    uint32
 }
 
 func (s *ConntrackFlow) String() string {
 	// conntrack cmd output:
 	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0
-	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d",
+	//             start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec)
+	start := time.Unix(0, int64(s.TimeStart))
+	stop := time.Unix(0, int64(s.TimeStop))
+	timeout := int32(s.TimeOut)
+	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x start=%v stop=%v timeout=%d(sec)",
 		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
 		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
 		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
-		s.Mark)
+		s.Mark, start, stop, timeout)
 }
 
 // This method parse the ip tuple structure
@@ -174,25 +182,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 {
 			tpl.DstIP = v
 		}
 	}
-	// Skip the next 4 bytes  nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
-	reader.Seek(4, seekCurrent)
-	_, t, _, v := parseNfAttrTLV(reader)
+	// Get total length of nested protocol-specific info.
+	_, _, protoInfoTotalLen := parseNfAttrTL(reader)
+	_, t, l, v := parseNfAttrTLV(reader)
+	// Track the number of bytes read.
+	protoInfoBytesRead := uint16(nl.SizeofNfattr) + l
 	if t == nl.CTA_PROTO_NUM {
 		tpl.Protocol = uint8(v[0])
 	}
-	// Skip some padding 3 bytes
+	// We only parse TCP & UDP headers. Skip the others.
+	if tpl.Protocol != 6 && tpl.Protocol != 17 {
+		// skip the rest
+		bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
+		reader.Seek(int64(bytesRemaining), seekCurrent)
+		return tpl.Protocol
+	}
+	// Skip 3 bytes of padding
 	reader.Seek(3, seekCurrent)
+	protoInfoBytesRead += 3
 	for i := 0; i < 2; i++ {
 		_, t, _ := parseNfAttrTL(reader)
+		protoInfoBytesRead += uint16(nl.SizeofNfattr)
 		switch t {
 		case nl.CTA_PROTO_SRC_PORT:
 			parseBERaw16(reader, &tpl.SrcPort)
+			protoInfoBytesRead += 2
 		case nl.CTA_PROTO_DST_PORT:
 			parseBERaw16(reader, &tpl.DstPort)
+			protoInfoBytesRead += 2
 		}
-		// Skip some padding 2 byte
+		// Skip 2 bytes of padding
 		reader.Seek(2, seekCurrent)
+		protoInfoBytesRead += 2
 	}
+	// Skip any remaining/unknown parts of the message
+	bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
+	reader.Seek(int64(bytesRemaining), seekCurrent)
+
 	return tpl.Protocol
 }
 
@@ -211,10 +237,14 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
 	binary.Read(r, nl.NativeEndian(), &attrType)
 	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
 	attrType = attrType & (nl.NLA_F_NESTED - 1)
-
 	return isNested, attrType, len
 }
 
+func skipNfAttrValue(r *bytes.Reader, len uint16) {
+	len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1)
+	r.Seek(int64(len), seekCurrent)
+}
+
 func parseBERaw16(r *bytes.Reader, v *uint16) {
 	binary.Read(r, binary.BigEndian, v)
 }
@@ -241,6 +271,36 @@ func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
 	return
 }
 
+// when the flow is alive, only the timestamp_start is returned in structure
+func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) {
+	var numTimeStamps int
+	oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp
+	if readSize == uint16(oneItem) {
+		numTimeStamps = 1
+	} else if readSize == 2*uint16(oneItem) {
+		numTimeStamps = 2
+	} else {
+		return
+	}
+	for i := 0; i < numTimeStamps; i++ {
+		switch _, t, _ := parseNfAttrTL(r); t {
+		case nl.CTA_TIMESTAMP_START:
+			parseBERaw64(r, &tstart)
+		case nl.CTA_TIMESTAMP_STOP:
+			parseBERaw64(r, &tstop)
+		default:
+			return
+		}
+	}
+	return
+
+}
+
+func parseTimeOut(r *bytes.Reader) (ttimeout uint32) {
+	parseBERaw32(r, &ttimeout)
+	return
+}
+
 func parseConnectionMark(r *bytes.Reader) (mark uint32) {
 	parseBERaw32(r, &mark)
 	return
@@ -266,25 +326,37 @@ func parseRawData(data []byte) *ConntrackFlow {
 		if nested, t, l := parseNfAttrTL(reader); nested {
 			switch t {
 			case nl.CTA_TUPLE_ORIG:
-				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
+				if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
 					parseIpTuple(reader, &s.Forward)
 				}
 			case nl.CTA_TUPLE_REPLY:
-				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
+				if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
 					parseIpTuple(reader, &s.Reverse)
 				} else {
 					// Header not recognized skip it
-					reader.Seek(int64(l), seekCurrent)
+					skipNfAttrValue(reader, l)
 				}
 			case nl.CTA_COUNTERS_ORIG:
 				s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
 			case nl.CTA_COUNTERS_REPLY:
 				s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
+			case nl.CTA_TIMESTAMP:
+				s.TimeStart, s.TimeStop = parseTimeStamp(reader, l)
+			case nl.CTA_PROTOINFO:
+				skipNfAttrValue(reader, l)
+			default:
+				skipNfAttrValue(reader, l)
 			}
 		} else {
 			switch t {
 			case nl.CTA_MARK:
 				s.Mark = parseConnectionMark(reader)
+			case nl.CTA_TIMEOUT:
+				s.TimeOut = parseTimeOut(reader)
+			case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID:
+				skipNfAttrValue(reader, l)
+			default:
+				skipNfAttrValue(reader, l)
 			}
 		}
 	}
@@ -318,18 +390,25 @@ func parseRawData(data []byte) *ConntrackFlow {
 //   --mask-src ip                 Source mask address
 //   --mask-dst ip                 Destination mask address
 
+// Layer 4 Protocol common parameters and options:
+// TCP, UDP, SCTP, UDPLite and DCCP
+//    --sport, --orig-port-src port    Source port in original direction
+//    --dport, --orig-port-dst port    Destination port in original direction
+
 // Filter types
 type ConntrackFilterType uint8
 
 const (
-	ConntrackOrigSrcIP  = iota                // -orig-src ip    Source address from original direction
-	ConntrackOrigDstIP                        // -orig-dst ip    Destination address from original direction
-	ConntrackReplySrcIP                       // --reply-src ip  Reply Source IP
-	ConntrackReplyDstIP                       // --reply-dst ip  Reply Destination IP
-	ConntrackReplyAnyIP                       // Match source or destination reply IP
-	ConntrackNatSrcIP   = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
-	ConntrackNatDstIP   = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
-	ConntrackNatAnyIP   = ConntrackReplyAnyIP // deprecated use instaed ConntrackReplyAnyIP
+	ConntrackOrigSrcIP   = iota                // -orig-src ip    Source address from original direction
+	ConntrackOrigDstIP                         // -orig-dst ip    Destination address from original direction
+	ConntrackReplySrcIP                        // --reply-src ip  Reply Source IP
+	ConntrackReplyDstIP                        // --reply-dst ip  Reply Destination IP
+	ConntrackReplyAnyIP                        // Match source or destination reply IP
+	ConntrackOrigSrcPort                       // --orig-port-src port    Source port in original direction
+	ConntrackOrigDstPort                       // --orig-port-dst port    Destination port in original direction
+	ConntrackNatSrcIP    = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
+	ConntrackNatDstIP    = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
+	ConntrackNatAnyIP    = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP
 )
 
 type CustomConntrackFilter interface {
@@ -339,53 +418,117 @@ type CustomConntrackFilter interface {
 }
 
 type ConntrackFilter struct {
-	ipFilter map[ConntrackFilterType]net.IP
+	ipNetFilter map[ConntrackFilterType]*net.IPNet
+	portFilter  map[ConntrackFilterType]uint16
+	protoFilter uint8
+}
+
+// AddIPNet adds a IP subnet to the conntrack filter
+func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error {
+	if ipNet == nil {
+		return fmt.Errorf("Filter attribute empty")
+	}
+	if f.ipNetFilter == nil {
+		f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet)
+	}
+	if _, ok := f.ipNetFilter[tp]; ok {
+		return errors.New("Filter attribute already present")
+	}
+	f.ipNetFilter[tp] = ipNet
+	return nil
 }
 
 // AddIP adds an IP to the conntrack filter
 func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
-	if f.ipFilter == nil {
-		f.ipFilter = make(map[ConntrackFilterType]net.IP)
+	if ip == nil {
+		return fmt.Errorf("Filter attribute empty")
 	}
-	if _, ok := f.ipFilter[tp]; ok {
+	return f.AddIPNet(tp, NewIPNet(ip))
+}
+
+// AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it
+func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error {
+	switch f.protoFilter {
+	// TCP, UDP, DCCP, SCTP, UDPLite
+	case 6, 17, 33, 132, 136:
+	default:
+		return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter)
+	}
+
+	if f.portFilter == nil {
+		f.portFilter = make(map[ConntrackFilterType]uint16)
+	}
+	if _, ok := f.portFilter[tp]; ok {
 		return errors.New("Filter attribute already present")
 	}
-	f.ipFilter[tp] = ip
+	f.portFilter[tp] = port
+	return nil
+}
+
+// AddProtocol adds the Layer 4 protocol to the conntrack filter
+func (f *ConntrackFilter) AddProtocol(proto uint8) error {
+	if f.protoFilter != 0 {
+		return errors.New("Filter attribute already present")
+	}
+	f.protoFilter = proto
 	return nil
 }
 
 // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
 // false otherwise
 func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
-	if len(f.ipFilter) == 0 {
+	if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 {
 		// empty filter always not match
 		return false
 	}
 
-	match := true
-	// -orig-src ip   Source address from original direction
-	if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
-		match = match && elem.Equal(flow.Forward.SrcIP)
+	// -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
+	if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter {
+		// different Layer 4 protocol always not match
+		return false
 	}
 
-	// -orig-dst ip   Destination address from original direction
-	if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
-		match = match && elem.Equal(flow.Forward.DstIP)
-	}
+	match := true
 
-	// -src-nat ip    Source NAT ip
-	if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found {
-		match = match && elem.Equal(flow.Reverse.SrcIP)
-	}
+	// IP conntrack filter
+	if len(f.ipNetFilter) > 0 {
+		// -orig-src ip   Source address from original direction
+		if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found {
+			match = match && elem.Contains(flow.Forward.SrcIP)
+		}
+
+		// -orig-dst ip   Destination address from original direction
+		if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found {
+			match = match && elem.Contains(flow.Forward.DstIP)
+		}
+
+		// -src-nat ip    Source NAT ip
+		if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found {
+			match = match && elem.Contains(flow.Reverse.SrcIP)
+		}
 
-	// -dst-nat ip    Destination NAT ip
-	if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found {
-		match = match && elem.Equal(flow.Reverse.DstIP)
+		// -dst-nat ip    Destination NAT ip
+		if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found {
+			match = match && elem.Contains(flow.Reverse.DstIP)
+		}
+
+		// Match source or destination reply IP
+		if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found {
+			match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP))
+		}
 	}
 
-	// Match source or destination reply IP
-	if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found {
-		match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
+	// Layer 4 Port filter
+	if len(f.portFilter) > 0 {
+		// -orig-port-src port	Source port from original direction
+		if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found {
+			match = match && elem == flow.Forward.SrcPort
+		}
+
+		// -orig-port-dst port	Destination port from original direction
+		if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found {
+			match = match && elem == flow.Forward.DstPort
+		}
 	}
 
 	return match

+ 461 - 5
vendor/github.com/vishvananda/netlink/devlink_linux.go

@@ -1,9 +1,11 @@
 package netlink
 
 import (
+	"fmt"
+	"net"
+	"strings"
 	"syscall"
 
-	"fmt"
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
 )
@@ -27,6 +29,61 @@ type DevlinkDevice struct {
 	Attrs      DevlinkDevAttrs
 }
 
+// DevlinkPortFn represents port function and its attributes
+type DevlinkPortFn struct {
+	HwAddr  net.HardwareAddr
+	State   uint8
+	OpState uint8
+}
+
+// DevlinkPortFnSetAttrs represents attributes to set
+type DevlinkPortFnSetAttrs struct {
+	FnAttrs     DevlinkPortFn
+	HwAddrValid bool
+	StateValid  bool
+}
+
+// DevlinkPort represents port and its attributes
+type DevlinkPort struct {
+	BusName        string
+	DeviceName     string
+	PortIndex      uint32
+	PortType       uint16
+	NetdeviceName  string
+	NetdevIfIndex  uint32
+	RdmaDeviceName string
+	PortFlavour    uint16
+	Fn             *DevlinkPortFn
+}
+
+type DevLinkPortAddAttrs struct {
+	Controller      uint32
+	SfNumber        uint32
+	PortIndex       uint32
+	PfNumber        uint16
+	SfNumberValid   bool
+	PortIndexValid  bool
+	ControllerValid bool
+}
+
+// DevlinkDeviceInfo represents devlink info
+type DevlinkDeviceInfo struct {
+	Driver         string
+	SerialNumber   string
+	BoardID        string
+	FwApp          string
+	FwAppBoundleID string
+	FwAppName      string
+	FwBoundleID    string
+	FwMgmt         string
+	FwMgmtAPI      string
+	FwMgmtBuild    string
+	FwNetlist      string
+	FwNetlistBuild string
+	FwPsidAPI      string
+	FwUndi         string
+}
+
 func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) {
 	devices := make([]*DevlinkDevice, 0, len(msgs))
 	for _, m := range msgs {
@@ -95,9 +152,9 @@ func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error
 	for _, a := range attrs {
 		switch a.Attr.Type {
 		case nl.DEVLINK_ATTR_BUS_NAME:
-			d.BusName = string(a.Value)
+			d.BusName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_DEV_NAME:
-			d.DeviceName = string(a.Value)
+			d.DeviceName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_ESWITCH_MODE:
 			d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value))
 		case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE:
@@ -126,12 +183,12 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) {
 	req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK)
 	req.AddData(msg)
 
-	b := make([]byte, len(dev.BusName))
+	b := make([]byte, len(dev.BusName)+1)
 	copy(b, dev.BusName)
 	data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b)
 	req.AddData(data)
 
-	b = make([]byte, len(dev.DeviceName))
+	b = make([]byte, len(dev.DeviceName)+1)
 	copy(b, dev.DeviceName)
 	data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b)
 	req.AddData(data)
@@ -270,3 +327,402 @@ func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error
 func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error {
 	return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode)
 }
+
+func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error {
+	for _, a := range attrs {
+		switch a.Attr.Type {
+		case nl.DEVLINK_ATTR_BUS_NAME:
+			port.BusName = string(a.Value[:len(a.Value)-1])
+		case nl.DEVLINK_ATTR_DEV_NAME:
+			port.DeviceName = string(a.Value[:len(a.Value)-1])
+		case nl.DEVLINK_ATTR_PORT_INDEX:
+			port.PortIndex = native.Uint32(a.Value)
+		case nl.DEVLINK_ATTR_PORT_TYPE:
+			port.PortType = native.Uint16(a.Value)
+		case nl.DEVLINK_ATTR_PORT_NETDEV_NAME:
+			port.NetdeviceName = string(a.Value[:len(a.Value)-1])
+		case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX:
+			port.NetdevIfIndex = native.Uint32(a.Value)
+		case nl.DEVLINK_ATTR_PORT_IBDEV_NAME:
+			port.RdmaDeviceName = string(a.Value[:len(a.Value)-1])
+		case nl.DEVLINK_ATTR_PORT_FLAVOUR:
+			port.PortFlavour = native.Uint16(a.Value)
+		case nl.DEVLINK_ATTR_PORT_FUNCTION:
+			port.Fn = &DevlinkPortFn{}
+			for nested := range nl.ParseAttributes(a.Value) {
+				switch nested.Type {
+				case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR:
+					port.Fn.HwAddr = nested.Value[:]
+				case nl.DEVLINK_PORT_FN_ATTR_STATE:
+					port.Fn.State = uint8(nested.Value[0])
+				case nl.DEVLINK_PORT_FN_ATTR_OPSTATE:
+					port.Fn.OpState = uint8(nested.Value[0])
+				}
+			}
+		}
+	}
+	return nil
+}
+
+func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) {
+	ports := make([]*DevlinkPort, 0, len(msgs))
+	for _, m := range msgs {
+		attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
+		if err != nil {
+			return nil, err
+		}
+		port := &DevlinkPort{}
+		if err = port.parseAttributes(attrs); err != nil {
+			return nil, err
+		}
+		ports = append(ports, port)
+	}
+	return ports, nil
+}
+
+// DevLinkGetPortList provides a pointer to devlink ports and nil error,
+// otherwise returns an error code.
+func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) {
+	f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME)
+	if err != nil {
+		return nil, err
+	}
+	msg := &nl.Genlmsg{
+		Command: nl.DEVLINK_CMD_PORT_GET,
+		Version: nl.GENL_DEVLINK_VERSION,
+	}
+	req := h.newNetlinkRequest(int(f.ID),
+		unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP)
+	req.AddData(msg)
+	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+	ports, err := parseDevLinkAllPortList(msgs)
+	if err != nil {
+		return nil, err
+	}
+	return ports, nil
+}
+
+// DevLinkGetPortList provides a pointer to devlink ports and nil error,
+// otherwise returns an error code.
+func DevLinkGetAllPortList() ([]*DevlinkPort, error) {
+	return pkgHandle.DevLinkGetAllPortList()
+}
+
+func parseDevlinkPortMsg(msgs [][]byte) (*DevlinkPort, error) {
+	m := msgs[0]
+	attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
+	if err != nil {
+		return nil, err
+	}
+	port := &DevlinkPort{}
+	if err = port.parseAttributes(attrs); err != nil {
+		return nil, err
+	}
+	return port, nil
+}
+
+// DevLinkGetPortByIndexprovides a pointer to devlink device and nil error,
+// otherwise returns an error code.
+func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) {
+
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_GET, Bus, Device)
+	if err != nil {
+		return nil, err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
+
+	respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+	port, err := parseDevlinkPortMsg(respmsg)
+	return port, err
+}
+
+// DevLinkGetPortByIndex provides a pointer to devlink portand nil error,
+// otherwise returns an error code.
+func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) {
+	return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex)
+}
+
+// DevLinkPortAdd adds a devlink port and returns a port on success
+// otherwise returns nil port and an error code.
+func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device)
+	if err != nil {
+		return nil, err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour)))
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber)))
+	if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber)))
+	}
+	if Attrs.PortIndexValid {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex)))
+	}
+	if Attrs.ControllerValid {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller)))
+	}
+	respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+	port, err := parseDevlinkPortMsg(respmsg)
+	return port, err
+}
+
+// DevLinkPortAdd adds a devlink port and returns a port on success
+// otherwise returns nil port and an error code.
+func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) {
+	return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs)
+}
+
+// DevLinkPortDel deletes a devlink port and returns success or error code.
+func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device)
+	if err != nil {
+		return err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
+	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
+	return err
+}
+
+// DevLinkPortDel deletes a devlink port and returns success or error code.
+func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error {
+	return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex)
+}
+
+// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask.
+// It returns 0 on success or error code.
+func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device)
+	if err != nil {
+		return err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
+
+	fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil)
+
+	if FnAttrs.HwAddrValid {
+		fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr))
+	}
+
+	if FnAttrs.StateValid {
+		fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State))
+	}
+	req.AddData(fnAttr)
+
+	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
+	return err
+}
+
+// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask.
+// It returns 0 on success or error code.
+func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error {
+	return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs)
+}
+
+// devlinkInfoGetter is function that is responsible for getting devlink info message
+// this is introduced for test purpose
+type devlinkInfoGetter func(bus, device string) ([]byte, error)
+
+// DevlinkGetDeviceInfoByName returns devlink info for selected device,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) {
+	info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg)
+	if err != nil {
+		return nil, err
+	}
+
+	return parseInfoData(info), nil
+}
+
+// DevlinkGetDeviceInfoByName returns devlink info for selected device,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg)
+}
+
+// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) {
+	response, err := getInfoMsg(Bus, Device)
+	if err != nil {
+		return nil, err
+	}
+
+	info, err := parseInfoMsg(response)
+	if err != nil {
+		return nil, err
+	}
+
+	return info, nil
+}
+
+// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg)
+}
+
+// GetDevlinkInfo returns devlink info for target device,
+// otherwise returns an error code.
+func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg)
+}
+
+// GetDevlinkInfoAsMap returns devlink info for target device as a map,
+// otherwise returns an error code.
+func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg)
+}
+
+func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device)
+	if err != nil {
+		return nil, err
+	}
+
+	response, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(response) < 1 {
+		return nil, fmt.Errorf("getDevlinkInfoMsg: message too short")
+	}
+
+	return response[0], nil
+}
+
+func parseInfoMsg(msg []byte) (map[string]string, error) {
+	if len(msg) < nl.SizeofGenlmsg {
+		return nil, fmt.Errorf("parseInfoMsg: message too short")
+	}
+
+	info := make(map[string]string)
+	err := collectInfoData(msg[nl.SizeofGenlmsg:], info)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return info, nil
+}
+
+func collectInfoData(msg []byte, data map[string]string) error {
+	attrs, err := nl.ParseRouteAttr(msg)
+	if err != nil {
+		return err
+	}
+
+	for _, attr := range attrs {
+		switch attr.Attr.Type {
+		case nl.DEVLINK_ATTR_INFO_DRIVER_NAME:
+			data["driver"] = parseInfoValue(attr.Value)
+		case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER:
+			data["serialNumber"] = parseInfoValue(attr.Value)
+		case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED,
+			nl.DEVLINK_ATTR_INFO_VERSION_STORED:
+			key, value, err := getNestedInfoData(attr.Value)
+			if err != nil {
+				return err
+			}
+			data[key] = value
+		}
+	}
+
+	if len(data) == 0 {
+		return fmt.Errorf("collectInfoData: could not read attributes")
+	}
+
+	return nil
+}
+
+func getNestedInfoData(msg []byte) (string, string, error) {
+	nestedAttrs, err := nl.ParseRouteAttr(msg)
+
+	var key, value string
+
+	if err != nil {
+		return "", "", err
+	}
+
+	if len(nestedAttrs) != 2 {
+		return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure")
+	}
+
+	for _, nestedAttr := range nestedAttrs {
+		switch nestedAttr.Attr.Type {
+		case nl.DEVLINK_ATTR_INFO_VERSION_NAME:
+			key = parseInfoValue(nestedAttr.Value)
+		case nl.DEVLINK_ATTR_INFO_VERSION_VALUE:
+			value = parseInfoValue(nestedAttr.Value)
+		}
+	}
+
+	if key == "" {
+		return "", "", fmt.Errorf("getNestedInfoData: key not found")
+	}
+
+	if value == "" {
+		return "", "", fmt.Errorf("getNestedInfoData: value not found")
+	}
+
+	return key, value, nil
+}
+
+func parseInfoData(data map[string]string) *DevlinkDeviceInfo {
+	info := new(DevlinkDeviceInfo)
+	for key, value := range data {
+		switch key {
+		case "driver":
+			info.Driver = value
+		case "serialNumber":
+			info.SerialNumber = value
+		case "board.id":
+			info.BoardID = value
+		case "fw.app":
+			info.FwApp = value
+		case "fw.app.bundle_id":
+			info.FwAppBoundleID = value
+		case "fw.app.name":
+			info.FwAppName = value
+		case "fw.bundle_id":
+			info.FwBoundleID = value
+		case "fw.mgmt":
+			info.FwMgmt = value
+		case "fw.mgmt.api":
+			info.FwMgmtAPI = value
+		case "fw.mgmt.build":
+			info.FwMgmtBuild = value
+		case "fw.netlist":
+			info.FwNetlist = value
+		case "fw.netlist.build":
+			info.FwNetlistBuild = value
+		case "fw.psid.api":
+			info.FwPsidAPI = value
+		case "fw.undi":
+			info.FwUndi = value
+		}
+	}
+	return info
+}
+
+func parseInfoValue(value []byte) string {
+	v := strings.ReplaceAll(string(value), "\x00", "")
+	return strings.TrimSpace(v)
+}

+ 86 - 18
vendor/github.com/vishvananda/netlink/filter.go

@@ -157,6 +157,39 @@ func NewConnmarkAction() *ConnmarkAction {
 	}
 }
 
+type CsumUpdateFlags uint32
+
+const (
+	TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1
+	TCA_CSUM_UPDATE_FLAG_ICMP    CsumUpdateFlags = 2
+	TCA_CSUM_UPDATE_FLAG_IGMP    CsumUpdateFlags = 4
+	TCA_CSUM_UPDATE_FLAG_TCP     CsumUpdateFlags = 8
+	TCA_CSUM_UPDATE_FLAG_UDP     CsumUpdateFlags = 16
+	TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32
+	TCA_CSUM_UPDATE_FLAG_SCTP    CsumUpdateFlags = 64
+)
+
+type CsumAction struct {
+	ActionAttrs
+	UpdateFlags CsumUpdateFlags
+}
+
+func (action *CsumAction) Type() string {
+	return "csum"
+}
+
+func (action *CsumAction) Attrs() *ActionAttrs {
+	return &action.ActionAttrs
+}
+
+func NewCsumAction() *CsumAction {
+	return &CsumAction{
+		ActionAttrs: ActionAttrs{
+			Action: TC_ACT_PIPE,
+		},
+	}
+}
+
 type MirredAct uint8
 
 func (a MirredAct) String() string {
@@ -213,10 +246,11 @@ const (
 
 type TunnelKeyAction struct {
 	ActionAttrs
-	Action  TunnelKeyAct
-	SrcAddr net.IP
-	DstAddr net.IP
-	KeyID   uint32
+	Action   TunnelKeyAct
+	SrcAddr  net.IP
+	DstAddr  net.IP
+	KeyID    uint32
+	DestPort uint16
 }
 
 func (action *TunnelKeyAction) Type() string {
@@ -259,6 +293,40 @@ func NewSkbEditAction() *SkbEditAction {
 	}
 }
 
+type PoliceAction struct {
+	ActionAttrs
+	Rate            uint32 // in byte per second
+	Burst           uint32 // in byte
+	RCellLog        int
+	Mtu             uint32
+	Mpu             uint16 // in byte
+	PeakRate        uint32 // in byte per second
+	PCellLog        int
+	AvRate          uint32 // in byte per second
+	Overhead        uint16
+	LinkLayer       int
+	ExceedAction    TcPolAct
+	NotExceedAction TcPolAct
+}
+
+func (action *PoliceAction) Type() string {
+	return "police"
+}
+
+func (action *PoliceAction) Attrs() *ActionAttrs {
+	return &action.ActionAttrs
+}
+
+func NewPoliceAction() *PoliceAction {
+	return &PoliceAction{
+		RCellLog:        -1,
+		PCellLog:        -1,
+		LinkLayer:       1, // ETHERNET
+		ExceedAction:    TC_POLICE_RECLASSIFY,
+		NotExceedAction: TC_POLICE_OK,
+	}
+}
+
 // MatchAll filters match all packets
 type MatchAll struct {
 	FilterAttrs
@@ -274,20 +342,20 @@ func (filter *MatchAll) Type() string {
 	return "matchall"
 }
 
-type FilterFwAttrs struct {
-	ClassId   uint32
-	InDev     string
-	Mask      uint32
-	Index     uint32
-	Buffer    uint32
-	Mtu       uint32
-	Mpu       uint16
-	Rate      uint32
-	AvRate    uint32
-	PeakRate  uint32
-	Action    TcPolAct
-	Overhead  uint16
-	LinkLayer int
+type FwFilter struct {
+	FilterAttrs
+	ClassId uint32
+	InDev   string
+	Mask    uint32
+	Police  *PoliceAction
+}
+
+func (filter *FwFilter) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *FwFilter) Type() string {
+	return "fw"
 }
 
 type BpfFilter struct {

+ 255 - 93
vendor/github.com/vishvananda/netlink/filter_linux.go

@@ -37,6 +37,7 @@ type U32 struct {
 	ClassId    uint32
 	Divisor    uint32 // Divisor MUST be power of 2.
 	Hash       uint32
+	Link       uint32
 	RedirIndex int
 	Sel        *TcU32Sel
 	Actions    []Action
@@ -50,74 +51,129 @@ func (filter *U32) Type() string {
 	return "u32"
 }
 
-// Fw filter filters on firewall marks
-// NOTE: this is in filter_linux because it refers to nl.TcPolice which
-//       is defined in nl/tc_linux.go
-type Fw struct {
+type Flower struct {
 	FilterAttrs
-	ClassId uint32
-	// TODO remove nl type from interface
-	Police nl.TcPolice
-	InDev  string
-	// TODO Action
-	Mask   uint32
-	AvRate uint32
-	Rtab   [256]uint32
-	Ptab   [256]uint32
-}
-
-func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
-	var rtab [256]uint32
-	var ptab [256]uint32
-	rcellLog := -1
-	pcellLog := -1
-	avrate := fattrs.AvRate / 8
-	police := nl.TcPolice{}
-	police.Rate.Rate = fattrs.Rate / 8
-	police.PeakRate.Rate = fattrs.PeakRate / 8
-	buffer := fattrs.Buffer
-	linklayer := nl.LINKLAYER_ETHERNET
+	DestIP        net.IP
+	DestIPMask    net.IPMask
+	SrcIP         net.IP
+	SrcIPMask     net.IPMask
+	EthType       uint16
+	EncDestIP     net.IP
+	EncDestIPMask net.IPMask
+	EncSrcIP      net.IP
+	EncSrcIPMask  net.IPMask
+	EncDestPort   uint16
+	EncKeyId      uint32
+
+	Actions []Action
+}
 
-	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
-		linklayer = fattrs.LinkLayer
-	}
+func (filter *Flower) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
 
-	police.Action = int32(fattrs.Action)
-	if police.Rate.Rate != 0 {
-		police.Rate.Mpu = fattrs.Mpu
-		police.Rate.Overhead = fattrs.Overhead
-		if CalcRtable(&police.Rate, rtab[:], rcellLog, fattrs.Mtu, linklayer) < 0 {
-			return nil, errors.New("TBF: failed to calculate rate table")
-		}
-		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
+func (filter *Flower) Type() string {
+	return "flower"
+}
+
+func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) {
+	ipType := v4Type
+	maskType := v4MaskType
+
+	encodeMask := mask
+	if mask == nil {
+		encodeMask = net.CIDRMask(32, 32)
 	}
-	police.Mtu = fattrs.Mtu
-	if police.PeakRate.Rate != 0 {
-		police.PeakRate.Mpu = fattrs.Mpu
-		police.PeakRate.Overhead = fattrs.Overhead
-		if CalcRtable(&police.PeakRate, ptab[:], pcellLog, fattrs.Mtu, linklayer) < 0 {
-			return nil, errors.New("POLICE: failed to calculate peak rate table")
+	v4IP := ip.To4()
+	if v4IP == nil {
+		ipType = v6Type
+		maskType = v6MaskType
+		if mask == nil {
+			encodeMask = net.CIDRMask(128, 128)
 		}
+	} else {
+		ip = v4IP
 	}
 
-	return &Fw{
-		FilterAttrs: attrs,
-		ClassId:     fattrs.ClassId,
-		InDev:       fattrs.InDev,
-		Mask:        fattrs.Mask,
-		Police:      police,
-		AvRate:      avrate,
-		Rtab:        rtab,
-		Ptab:        ptab,
-	}, nil
+	parent.AddRtAttr(ipType, ip)
+	parent.AddRtAttr(maskType, encodeMask)
 }
 
-func (filter *Fw) Attrs() *FilterAttrs {
-	return &filter.FilterAttrs
+func (filter *Flower) encode(parent *nl.RtAttr) error {
+	if filter.EthType != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType))
+	}
+	if filter.SrcIP != nil {
+		filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask,
+			nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC,
+			nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK)
+	}
+	if filter.DestIP != nil {
+		filter.encodeIP(parent, filter.DestIP, filter.DestIPMask,
+			nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST,
+			nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK)
+	}
+	if filter.EncSrcIP != nil {
+		filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK)
+	}
+	if filter.EncDestIP != nil {
+		filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK)
+	}
+	if filter.EncDestPort != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort))
+	}
+	if filter.EncKeyId != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId))
+	}
+
+	actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil)
+	if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
+		return err
+	}
+	return nil
 }
 
-func (filter *Fw) Type() string {
-	return "fw"
+func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error {
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_FLOWER_KEY_ETH_TYPE:
+			filter.EthType = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC:
+			filter.SrcIP = datum.Value
+		case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK:
+			filter.SrcIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST:
+			filter.DestIP = datum.Value
+		case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK:
+			filter.DestIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC:
+			filter.EncSrcIP = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK:
+			filter.EncSrcIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST:
+			filter.EncDestIP = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK:
+			filter.EncDestIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT:
+			filter.EncDestPort = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_ENC_KEY_ID:
+			filter.EncKeyId = ntohl(datum.Value)
+		case nl.TCA_FLOWER_ACT:
+			tables, err := nl.ParseRouteAttr(datum.Value)
+			if err != nil {
+				return err
+			}
+			filter.Actions, err = parseActions(tables)
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return nil
 }
 
 // FilterDel will delete a filter from the system.
@@ -169,7 +225,6 @@ func (h *Handle) FilterReplace(filter Filter) error {
 }
 
 func (h *Handle) filterModify(filter Filter, flags int) error {
-	native = nl.NativeEndian()
 	req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK)
 	base := filter.Attrs()
 	msg := &nl.TcMsg{
@@ -226,6 +281,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if filter.Hash != 0 {
 			options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash))
 		}
+		if filter.Link != 0 {
+			options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link))
+		}
 		actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil)
 		// backwards compatibility
 		if filter.RedirIndex != 0 {
@@ -234,7 +292,7 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
 			return err
 		}
-	case *Fw:
+	case *FwFilter:
 		if filter.Mask != 0 {
 			b := make([]byte, 4)
 			native.PutUint32(b, filter.Mask)
@@ -243,17 +301,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if filter.InDev != "" {
 			options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev))
 		}
-		if (filter.Police != nl.TcPolice{}) {
-
+		if filter.Police != nil {
 			police := options.AddRtAttr(nl.TCA_FW_POLICE, nil)
-			police.AddRtAttr(nl.TCA_POLICE_TBF, filter.Police.Serialize())
-			if (filter.Police.Rate != nl.TcRateSpec{}) {
-				payload := SerializeRtab(filter.Rtab)
-				police.AddRtAttr(nl.TCA_POLICE_RATE, payload)
-			}
-			if (filter.Police.PeakRate != nl.TcRateSpec{}) {
-				payload := SerializeRtab(filter.Ptab)
-				police.AddRtAttr(nl.TCA_POLICE_PEAKRATE, payload)
+			if err := encodePolice(police, filter.Police); err != nil {
+				return err
 			}
 		}
 		if filter.ClassId != 0 {
@@ -284,6 +335,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if filter.ClassId != 0 {
 			options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId))
 		}
+	case *Flower:
+		if err := filter.encode(options); err != nil {
+			return err
+		}
 	}
 
 	req.AddData(options)
@@ -347,11 +402,13 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 				case "u32":
 					filter = &U32{}
 				case "fw":
-					filter = &Fw{}
+					filter = &FwFilter{}
 				case "bpf":
 					filter = &BpfFilter{}
 				case "matchall":
 					filter = &MatchAll{}
+				case "flower":
+					filter = &Flower{}
 				default:
 					filter = &GenericFilter{FilterType: filterType}
 				}
@@ -381,6 +438,11 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 					if err != nil {
 						return nil, err
 					}
+				case "flower":
+					detailed, err = parseFlowerData(filter, data)
+					if err != nil {
+						return nil, err
+					}
 				default:
 					detailed = true
 				}
@@ -412,6 +474,53 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) {
 	attrs.Bindcnt = int(tcgen.Bindcnt)
 }
 
+func encodePolice(attr *nl.RtAttr, action *PoliceAction) error {
+	var rtab [256]uint32
+	var ptab [256]uint32
+	police := nl.TcPolice{}
+	police.Index = uint32(action.Attrs().Index)
+	police.Bindcnt = int32(action.Attrs().Bindcnt)
+	police.Capab = uint32(action.Attrs().Capab)
+	police.Refcnt = int32(action.Attrs().Refcnt)
+	police.Rate.Rate = action.Rate
+	police.PeakRate.Rate = action.PeakRate
+	police.Action = int32(action.ExceedAction)
+
+	if police.Rate.Rate != 0 {
+		police.Rate.Mpu = action.Mpu
+		police.Rate.Overhead = action.Overhead
+		if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 {
+			return errors.New("TBF: failed to calculate rate table")
+		}
+		police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst)
+	}
+
+	police.Mtu = action.Mtu
+	if police.PeakRate.Rate != 0 {
+		police.PeakRate.Mpu = action.Mpu
+		police.PeakRate.Overhead = action.Overhead
+		if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 {
+			return errors.New("POLICE: failed to calculate peak rate table")
+		}
+	}
+
+	attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize())
+	if police.Rate.Rate != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab))
+	}
+	if police.PeakRate.Rate != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab))
+	}
+	if action.AvRate != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate))
+	}
+	if action.NotExceedAction != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction)))
+	}
+
+	return nil
+}
+
 func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 	tabIndex := int(nl.TCA_ACT_TAB)
 
@@ -419,6 +528,14 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 		switch action := action.(type) {
 		default:
 			return fmt.Errorf("unknown action type %s", action.Type())
+		case *PoliceAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police"))
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
+			if err := encodePolice(aopts, action); err != nil {
+				return err
+			}
 		case *MirredAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -456,6 +573,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 				} else {
 					return fmt.Errorf("invalid dst addr %s for tunnel_key action", action.DstAddr)
 				}
+				if action.DestPort != 0 {
+					aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_DST_PORT, htons(action.DestPort))
+				}
 			}
 		case *SkbEditAction:
 			table := attr.AddRtAttr(tabIndex, nil)
@@ -487,6 +607,16 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 			}
 			toTcGen(action.Attrs(), &connmark.TcGen)
 			aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize())
+		case *CsumAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum"))
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
+			csum := nl.TcCsum{
+				UpdateFlags: uint32(action.UpdateFlags),
+			}
+			toTcGen(action.Attrs(), &csum.TcGen)
+			aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize())
 		case *BpfAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -510,6 +640,29 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 	return nil
 }
 
+func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) {
+	switch data.Attr.Type {
+	case nl.TCA_POLICE_RESULT:
+		police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4]))
+	case nl.TCA_POLICE_AVRATE:
+		police.AvRate = native.Uint32(data.Value[0:4])
+	case nl.TCA_POLICE_TBF:
+		p := *nl.DeserializeTcPolice(data.Value)
+		police.ActionAttrs = ActionAttrs{}
+		police.Attrs().Index = int(p.Index)
+		police.Attrs().Bindcnt = int(p.Bindcnt)
+		police.Attrs().Capab = int(p.Capab)
+		police.Attrs().Refcnt = int(p.Refcnt)
+		police.ExceedAction = TcPolAct(p.Action)
+		police.Rate = p.Rate.Rate
+		police.PeakRate = p.PeakRate.Rate
+		police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst)
+		police.Mtu = p.Mtu
+		police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK
+		police.Overhead = p.Rate.Overhead
+	}
+}
+
 func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 	var actions []Action
 	for _, table := range tables {
@@ -532,12 +685,16 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 					action = &BpfAction{}
 				case "connmark":
 					action = &ConnmarkAction{}
+				case "csum":
+					action = &CsumAction{}
 				case "gact":
 					action = &GenericAction{}
 				case "tunnel_key":
 					action = &TunnelKeyAction{}
 				case "skbedit":
 					action = &SkbEditAction{}
+				case "police":
+					action = &PoliceAction{}
 				default:
 					break nextattr
 				}
@@ -566,12 +723,12 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 							action.(*TunnelKeyAction).Action = TunnelKeyAct(tun.Action)
 						case nl.TCA_TUNNEL_KEY_ENC_KEY_ID:
 							action.(*TunnelKeyAction).KeyID = networkOrder.Uint32(adatum.Value[0:4])
-						case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC:
-						case nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC:
-							action.(*TunnelKeyAction).SrcAddr = net.IP(adatum.Value[:])
-						case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST:
-						case nl.TCA_TUNNEL_KEY_ENC_IPV4_DST:
-							action.(*TunnelKeyAction).DstAddr = net.IP(adatum.Value[:])
+						case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC:
+							action.(*TunnelKeyAction).SrcAddr = adatum.Value[:]
+						case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, nl.TCA_TUNNEL_KEY_ENC_IPV4_DST:
+							action.(*TunnelKeyAction).DstAddr = adatum.Value[:]
+						case nl.TCA_TUNNEL_KEY_ENC_DST_PORT:
+							action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value)
 						}
 					case "skbedit":
 						switch adatum.Attr.Type {
@@ -610,12 +767,22 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 							toAttrs(&connmark.TcGen, action.Attrs())
 							action.(*ConnmarkAction).Zone = connmark.Zone
 						}
+					case "csum":
+						switch adatum.Attr.Type {
+						case nl.TCA_CSUM_PARMS:
+							csum := *nl.DeserializeTcCsum(adatum.Value)
+							action.(*CsumAction).ActionAttrs = ActionAttrs{}
+							toAttrs(&csum.TcGen, action.Attrs())
+							action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags)
+						}
 					case "gact":
 						switch adatum.Attr.Type {
 						case nl.TCA_GACT_PARMS:
 							gen := *nl.DeserializeTcGen(adatum.Value)
 							toAttrs(&gen, action.Attrs())
 						}
+					case "police":
+						parsePolice(adatum, action.(*PoliceAction))
 					}
 				}
 			}
@@ -626,7 +793,6 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 }
 
 func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
 	u32 := filter.(*U32)
 	detailed := false
 	for _, datum := range data {
@@ -664,14 +830,15 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 			u32.Divisor = native.Uint32(datum.Value)
 		case nl.TCA_U32_HASH:
 			u32.Hash = native.Uint32(datum.Value)
+		case nl.TCA_U32_LINK:
+			u32.Link = native.Uint32(datum.Value)
 		}
 	}
 	return detailed, nil
 }
 
 func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
-	fw := filter.(*Fw)
+	fw := filter.(*FwFilter)
 	detailed := true
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -682,24 +849,18 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
 		case nl.TCA_FW_INDEV:
 			fw.InDev = string(datum.Value[:len(datum.Value)-1])
 		case nl.TCA_FW_POLICE:
+			var police PoliceAction
 			adata, _ := nl.ParseRouteAttr(datum.Value)
 			for _, aattr := range adata {
-				switch aattr.Attr.Type {
-				case nl.TCA_POLICE_TBF:
-					fw.Police = *nl.DeserializeTcPolice(aattr.Value)
-				case nl.TCA_POLICE_RATE:
-					fw.Rtab = DeserializeRtab(aattr.Value)
-				case nl.TCA_POLICE_PEAKRATE:
-					fw.Ptab = DeserializeRtab(aattr.Value)
-				}
+				parsePolice(aattr, &police)
 			}
+			fw.Police = &police
 		}
 	}
 	return detailed, nil
 }
 
 func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
 	bpf := filter.(*BpfFilter)
 	detailed := true
 	for _, datum := range data {
@@ -718,14 +879,13 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 		case nl.TCA_BPF_ID:
 			bpf.Id = int(native.Uint32(datum.Value[0:4]))
 		case nl.TCA_BPF_TAG:
-			bpf.Tag = hex.EncodeToString(datum.Value[:len(datum.Value)-1])
+			bpf.Tag = hex.EncodeToString(datum.Value)
 		}
 	}
 	return detailed, nil
 }
 
 func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
 	matchall := filter.(*MatchAll)
 	detailed := true
 	for _, datum := range data {
@@ -746,6 +906,10 @@ func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, er
 	return detailed, nil
 }
 
+func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+	return true, filter.(*Flower).decode(data)
+}
+
 func AlignToAtm(size uint) uint {
 	var linksize, cells int
 	cells = int(size / nl.ATM_CELL_PAYLOAD)
@@ -783,7 +947,7 @@ func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, lin
 	}
 	for i := 0; i < 256; i++ {
 		sz = AdjustSize(uint((i+1)<<uint32(cellLog)), uint(mpu), linklayer)
-		rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
+		rtab[i] = Xmittime(uint64(bps), uint32(sz))
 	}
 	rate.CellAlign = -1
 	rate.CellLog = uint8(cellLog)
@@ -793,14 +957,12 @@ func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, lin
 
 func DeserializeRtab(b []byte) [256]uint32 {
 	var rtab [256]uint32
-	native := nl.NativeEndian()
 	r := bytes.NewReader(b)
 	_ = binary.Read(r, native, &rtab)
 	return rtab
 }
 
 func SerializeRtab(rtab [256]uint32) []byte {
-	native := nl.NativeEndian()
 	var w bytes.Buffer
 	_ = binary.Write(&w, native, rtab)
 	return w.Bytes()

+ 42 - 3
vendor/github.com/vishvananda/netlink/handle_linux.go

@@ -15,12 +15,28 @@ var pkgHandle = &Handle{}
 // Handle is an handle for the netlink requests on a
 // specific network namespace. All the requests on the
 // same netlink family share the same netlink socket,
-// which gets released when the handle is deleted.
+// which gets released when the handle is Close'd.
 type Handle struct {
 	sockets      map[int]*nl.SocketHandle
 	lookupByDump bool
 }
 
+// SetSocketTimeout configures timeout for default netlink sockets
+func SetSocketTimeout(to time.Duration) error {
+	if to < time.Microsecond {
+		return fmt.Errorf("invalid timeout, minimul value is %s", time.Microsecond)
+	}
+
+	nl.SocketTimeoutTv = unix.NsecToTimeval(to.Nanoseconds())
+	return nil
+}
+
+// GetSocketTimeout returns the timeout value used by default netlink sockets
+func GetSocketTimeout() time.Duration {
+	nsec := unix.TimevalToNsec(nl.SocketTimeoutTv)
+	return time.Duration(nsec) * time.Nanosecond
+}
+
 // SupportsNetlinkFamily reports whether the passed netlink family is supported by this Handle
 func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
 	_, ok := h.sockets[nlFamily]
@@ -91,6 +107,21 @@ func (h *Handle) GetSocketReceiveBufferSize() ([]int, error) {
 	return results, nil
 }
 
+// SetStrictCheck sets the strict check socket option for each socket in the netlink handle. Returns early if any set operation fails
+func (h *Handle) SetStrictCheck(state bool) error {
+	for _, sh := range h.sockets {
+		var stateInt int = 0
+		if state {
+			stateInt = 1
+		}
+		err := unix.SetsockoptInt(sh.Socket.GetFd(), unix.SOL_NETLINK, unix.NETLINK_GET_STRICT_CHK, stateInt)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 // NewHandleAt returns a netlink handle on the network namespace
 // specified by ns. If ns=netns.None(), current network namespace
 // will be assumed
@@ -120,14 +151,22 @@ func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error)
 	return h, nil
 }
 
-// Delete releases the resources allocated to this handle
-func (h *Handle) Delete() {
+// Close releases the resources allocated to this handle
+func (h *Handle) Close() {
 	for _, sh := range h.sockets {
 		sh.Close()
 	}
 	h.sockets = nil
 }
 
+// Delete releases the resources allocated to this handle
+//
+// Deprecated: use Close instead which is in line with typical resource release
+// patterns for files and other resources.
+func (h *Handle) Delete() {
+	h.Close()
+}
+
 func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
 	// Do this so that package API still use nl package variable nextSeqNr
 	if h.sockets == nil {

+ 7 - 1
vendor/github.com/vishvananda/netlink/handle_unspecified.go

@@ -23,6 +23,8 @@ func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
 	return nil, ErrNotImplemented
 }
 
+func (h *Handle) Close() {}
+
 func (h *Handle) Delete() {}
 
 func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
@@ -85,7 +87,7 @@ func (h *Handle) LinkSetVfRate(link Link, vf, minRate, maxRate int) error {
 	return ErrNotImplemented
 }
 
-func (h *Handle) LinkSetMaster(link Link, master *Bridge) error {
+func (h *Handle) LinkSetMaster(link Link, master Link) error {
 	return ErrNotImplemented
 }
 
@@ -237,6 +239,10 @@ func (h *Handle) RouteAdd(route *Route) error {
 	return ErrNotImplemented
 }
 
+func (h *Handle) RouteAppend(route *Route) error {
+	return ErrNotImplemented
+}
+
 func (h *Handle) RouteDel(route *Route) error {
 	return ErrNotImplemented
 }

+ 31 - 0
vendor/github.com/vishvananda/netlink/inet_diag.go

@@ -0,0 +1,31 @@
+package netlink
+
+// INET_DIAG constatns
+const (
+	INET_DIAG_NONE = iota
+	INET_DIAG_MEMINFO
+	INET_DIAG_INFO
+	INET_DIAG_VEGASINFO
+	INET_DIAG_CONG
+	INET_DIAG_TOS
+	INET_DIAG_TCLASS
+	INET_DIAG_SKMEMINFO
+	INET_DIAG_SHUTDOWN
+	INET_DIAG_DCTCPINFO
+	INET_DIAG_PROTOCOL
+	INET_DIAG_SKV6ONLY
+	INET_DIAG_LOCALS
+	INET_DIAG_PEERS
+	INET_DIAG_PAD
+	INET_DIAG_MARK
+	INET_DIAG_BBRINFO
+	INET_DIAG_CLASS_ID
+	INET_DIAG_MD5SIG
+	INET_DIAG_MAX
+)
+
+type InetDiagTCPInfoResp struct {
+	InetDiagMsg *Socket
+	TCPInfo     *TCPInfo
+	TCPBBRInfo  *TCPBBRInfo
+}

+ 504 - 0
vendor/github.com/vishvananda/netlink/ipset_linux.go

@@ -0,0 +1,504 @@
+package netlink
+
+import (
+	"encoding/binary"
+	"log"
+	"net"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+	"golang.org/x/sys/unix"
+)
+
+// IPSetEntry is used for adding, updating, retreiving and deleting entries
+type IPSetEntry struct {
+	Comment  string
+	MAC      net.HardwareAddr
+	IP       net.IP
+	CIDR     uint8
+	Timeout  *uint32
+	Packets  *uint64
+	Bytes    *uint64
+	Protocol *uint8
+	Port     *uint16
+	IP2      net.IP
+	CIDR2    uint8
+	IFace    string
+	Mark     *uint32
+
+	Replace bool // replace existing entry
+}
+
+// IPSetResult is the result of a dump request for a set
+type IPSetResult struct {
+	Nfgenmsg           *nl.Nfgenmsg
+	Protocol           uint8
+	ProtocolMinVersion uint8
+	Revision           uint8
+	Family             uint8
+	Flags              uint8
+	SetName            string
+	TypeName           string
+	Comment            string
+	MarkMask           uint32
+
+	IPFrom   net.IP
+	IPTo     net.IP
+	PortFrom uint16
+	PortTo   uint16
+
+	HashSize     uint32
+	NumEntries   uint32
+	MaxElements  uint32
+	References   uint32
+	SizeInMemory uint32
+	CadtFlags    uint32
+	Timeout      *uint32
+	LineNo       uint32
+
+	Entries []IPSetEntry
+}
+
+// IpsetCreateOptions is the options struct for creating a new ipset
+type IpsetCreateOptions struct {
+	Replace  bool // replace existing ipset
+	Timeout  *uint32
+	Counters bool
+	Comments bool
+	Skbinfo  bool
+
+	Revision uint8
+	IPFrom   net.IP
+	IPTo     net.IP
+	PortFrom uint16
+	PortTo   uint16
+}
+
+// IpsetProtocol returns the ipset protocol version from the kernel
+func IpsetProtocol() (uint8, uint8, error) {
+	return pkgHandle.IpsetProtocol()
+}
+
+// IpsetCreate creates a new ipset
+func IpsetCreate(setname, typename string, options IpsetCreateOptions) error {
+	return pkgHandle.IpsetCreate(setname, typename, options)
+}
+
+// IpsetDestroy destroys an existing ipset
+func IpsetDestroy(setname string) error {
+	return pkgHandle.IpsetDestroy(setname)
+}
+
+// IpsetFlush flushes an existing ipset
+func IpsetFlush(setname string) error {
+	return pkgHandle.IpsetFlush(setname)
+}
+
+// IpsetList dumps an specific ipset.
+func IpsetList(setname string) (*IPSetResult, error) {
+	return pkgHandle.IpsetList(setname)
+}
+
+// IpsetListAll dumps all ipsets.
+func IpsetListAll() ([]IPSetResult, error) {
+	return pkgHandle.IpsetListAll()
+}
+
+// IpsetAdd adds an entry to an existing ipset.
+func IpsetAdd(setname string, entry *IPSetEntry) error {
+	return pkgHandle.IpsetAdd(setname, entry)
+}
+
+// IpsetDel deletes an entry from an existing ipset.
+func IpsetDel(setname string, entry *IPSetEntry) error {
+	return pkgHandle.IpsetDel(setname, entry)
+}
+
+func (h *Handle) IpsetProtocol() (protocol uint8, minVersion uint8, err error) {
+	req := h.newIpsetRequest(nl.IPSET_CMD_PROTOCOL)
+	msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0)
+
+	if err != nil {
+		return 0, 0, err
+	}
+	response := ipsetUnserialize(msgs)
+	return response.Protocol, response.ProtocolMinVersion, nil
+}
+
+func (h *Handle) IpsetCreate(setname, typename string, options IpsetCreateOptions) error {
+	req := h.newIpsetRequest(nl.IPSET_CMD_CREATE)
+
+	if !options.Replace {
+		req.Flags |= unix.NLM_F_EXCL
+	}
+
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(typename)))
+
+	revision := options.Revision
+	if revision == 0 {
+		revision = getIpsetDefaultWithTypeName(typename)
+	}
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(revision)))
+
+	data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
+
+	var family uint8
+	switch typename {
+	case "hash:mac":
+	case "bitmap:port":
+		buf := make([]byte, 4)
+		binary.BigEndian.PutUint16(buf, options.PortFrom)
+		binary.BigEndian.PutUint16(buf[2:], options.PortTo)
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_FROM|int(nl.NLA_F_NET_BYTEORDER), buf[:2]))
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_TO|int(nl.NLA_F_NET_BYTEORDER), buf[2:]))
+	default:
+		family = unix.AF_INET
+	}
+
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(family)))
+
+	if timeout := options.Timeout; timeout != nil {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *timeout})
+	}
+
+	var cadtFlags uint32
+
+	if options.Comments {
+		cadtFlags |= nl.IPSET_FLAG_WITH_COMMENT
+	}
+	if options.Counters {
+		cadtFlags |= nl.IPSET_FLAG_WITH_COUNTERS
+	}
+	if options.Skbinfo {
+		cadtFlags |= nl.IPSET_FLAG_WITH_SKBINFO
+	}
+
+	if cadtFlags != 0 {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER, Value: cadtFlags})
+	}
+
+	req.AddData(data)
+	_, err := ipsetExecute(req)
+	return err
+}
+
+func (h *Handle) IpsetDestroy(setname string) error {
+	req := h.newIpsetRequest(nl.IPSET_CMD_DESTROY)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+	_, err := ipsetExecute(req)
+	return err
+}
+
+func (h *Handle) IpsetFlush(setname string) error {
+	req := h.newIpsetRequest(nl.IPSET_CMD_FLUSH)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+	_, err := ipsetExecute(req)
+	return err
+}
+
+func (h *Handle) IpsetList(name string) (*IPSetResult, error) {
+	req := h.newIpsetRequest(nl.IPSET_CMD_LIST)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(name)))
+
+	msgs, err := ipsetExecute(req)
+	if err != nil {
+		return nil, err
+	}
+
+	result := ipsetUnserialize(msgs)
+	return &result, nil
+}
+
+func (h *Handle) IpsetListAll() ([]IPSetResult, error) {
+	req := h.newIpsetRequest(nl.IPSET_CMD_LIST)
+
+	msgs, err := ipsetExecute(req)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make([]IPSetResult, len(msgs))
+	for i, msg := range msgs {
+		result[i].unserialize(msg)
+	}
+
+	return result, nil
+}
+
+// IpsetAdd adds an entry to an existing ipset.
+func (h *Handle) IpsetAdd(setname string, entry *IPSetEntry) error {
+	return h.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry)
+}
+
+// IpsetDel deletes an entry from an existing ipset.
+func (h *Handle) IpsetDel(setname string, entry *IPSetEntry) error {
+	return h.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry)
+}
+
+func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error {
+	req := h.newIpsetRequest(nlCmd)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+
+	if entry.Comment != "" {
+		req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment)))
+	}
+
+	data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
+
+	if !entry.Replace {
+		req.Flags |= unix.NLM_F_EXCL
+	}
+
+	if entry.Timeout != nil {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *entry.Timeout})
+	}
+
+	if entry.IP != nil {
+		nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP)
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NESTED), nestedData.Serialize()))
+	}
+
+	if entry.MAC != nil {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_ETHER, entry.MAC))
+	}
+
+	if entry.CIDR != 0 {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR, nl.Uint8Attr(entry.CIDR)))
+	}
+
+	if entry.IP2 != nil {
+		nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP2)
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP2|int(nl.NLA_F_NESTED), nestedData.Serialize()))
+	}
+
+	if entry.CIDR2 != 0 {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR2, nl.Uint8Attr(entry.CIDR2)))
+	}
+
+	if entry.Port != nil {
+		if entry.Protocol == nil {
+			// use tcp protocol as default
+			val := uint8(unix.IPPROTO_TCP)
+			entry.Protocol = &val
+		}
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PROTO, nl.Uint8Attr(*entry.Protocol)))
+		buf := make([]byte, 2)
+		binary.BigEndian.PutUint16(buf, *entry.Port)
+		data.AddChild(nl.NewRtAttr(int(nl.IPSET_ATTR_PORT|nl.NLA_F_NET_BYTEORDER), buf))
+	}
+
+	if entry.IFace != "" {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IFACE, nl.ZeroTerminated(entry.IFace)))
+	}
+
+	if entry.Mark != nil {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER, Value: *entry.Mark})
+	}
+
+	data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_LINENO | nl.NLA_F_NET_BYTEORDER, Value: 0})
+	req.AddData(data)
+
+	_, err := ipsetExecute(req)
+	return err
+}
+
+func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest {
+	req := h.newNetlinkRequest(cmd|(unix.NFNL_SUBSYS_IPSET<<8), nl.GetIpsetFlags(cmd))
+
+	// Add the netfilter header
+	msg := &nl.Nfgenmsg{
+		NfgenFamily: uint8(unix.AF_NETLINK),
+		Version:     nl.NFNETLINK_V0,
+		ResId:       0,
+	}
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_PROTOCOL, nl.Uint8Attr(nl.IPSET_PROTOCOL)))
+
+	return req
+}
+
+func getIpsetDefaultWithTypeName(typename string) uint8 {
+	switch typename {
+	case "hash:ip,port",
+		"hash:ip,port,ip",
+		"hash:ip,port,net",
+		"hash:net,port":
+		return 1
+	}
+	return 0
+}
+
+func ipsetExecute(req *nl.NetlinkRequest) (msgs [][]byte, err error) {
+	msgs, err = req.Execute(unix.NETLINK_NETFILTER, 0)
+
+	if err != nil {
+		if errno := int(err.(syscall.Errno)); errno >= nl.IPSET_ERR_PRIVATE {
+			err = nl.IPSetError(uintptr(errno))
+		}
+	}
+	return
+}
+
+func ipsetUnserialize(msgs [][]byte) (result IPSetResult) {
+	for _, msg := range msgs {
+		result.unserialize(msg)
+	}
+	return result
+}
+
+func (result *IPSetResult) unserialize(msg []byte) {
+	result.Nfgenmsg = nl.DeserializeNfgenmsg(msg)
+
+	for attr := range nl.ParseAttributes(msg[4:]) {
+		switch attr.Type {
+		case nl.IPSET_ATTR_PROTOCOL:
+			result.Protocol = attr.Value[0]
+		case nl.IPSET_ATTR_SETNAME:
+			result.SetName = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_COMMENT:
+			result.Comment = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_TYPENAME:
+			result.TypeName = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_REVISION:
+			result.Revision = attr.Value[0]
+		case nl.IPSET_ATTR_FAMILY:
+			result.Family = attr.Value[0]
+		case nl.IPSET_ATTR_FLAGS:
+			result.Flags = attr.Value[0]
+		case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED:
+			result.parseAttrData(attr.Value)
+		case nl.IPSET_ATTR_ADT | nl.NLA_F_NESTED:
+			result.parseAttrADT(attr.Value)
+		case nl.IPSET_ATTR_PROTOCOL_MIN:
+			result.ProtocolMinVersion = attr.Value[0]
+		case nl.IPSET_ATTR_MARKMASK:
+			result.MarkMask = attr.Uint32()
+		default:
+			log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
+		}
+	}
+}
+
+func (result *IPSetResult) parseAttrData(data []byte) {
+	for attr := range nl.ParseAttributes(data) {
+		switch attr.Type {
+		case nl.IPSET_ATTR_HASHSIZE | nl.NLA_F_NET_BYTEORDER:
+			result.HashSize = attr.Uint32()
+		case nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER:
+			result.MaxElements = attr.Uint32()
+		case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER:
+			val := attr.Uint32()
+			result.Timeout = &val
+		case nl.IPSET_ATTR_ELEMENTS | nl.NLA_F_NET_BYTEORDER:
+			result.NumEntries = attr.Uint32()
+		case nl.IPSET_ATTR_REFERENCES | nl.NLA_F_NET_BYTEORDER:
+			result.References = attr.Uint32()
+		case nl.IPSET_ATTR_MEMSIZE | nl.NLA_F_NET_BYTEORDER:
+			result.SizeInMemory = attr.Uint32()
+		case nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER:
+			result.CadtFlags = attr.Uint32()
+		case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED:
+			for nested := range nl.ParseAttributes(attr.Value) {
+				switch nested.Type {
+				case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER:
+					result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value})
+				case nl.IPSET_ATTR_IP:
+					result.IPFrom = nested.Value
+				default:
+					log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK)
+				}
+			}
+		case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED:
+			for nested := range nl.ParseAttributes(attr.Value) {
+				switch nested.Type {
+				case nl.IPSET_ATTR_IP:
+					result.IPTo = nested.Value
+				default:
+					log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK)
+				}
+			}
+		case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER:
+			result.PortFrom = networkOrder.Uint16(attr.Value)
+		case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER:
+			result.PortTo = networkOrder.Uint16(attr.Value)
+		case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER:
+			result.LineNo = attr.Uint32()
+		case nl.IPSET_ATTR_COMMENT:
+			result.Comment = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_MARKMASK:
+			result.MarkMask = attr.Uint32()
+		default:
+			log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
+		}
+	}
+}
+
+func (result *IPSetResult) parseAttrADT(data []byte) {
+	for attr := range nl.ParseAttributes(data) {
+		switch attr.Type {
+		case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED:
+			result.Entries = append(result.Entries, parseIPSetEntry(attr.Value))
+		default:
+			log.Printf("unknown ADT attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
+		}
+	}
+}
+
+func parseIPSetEntry(data []byte) (entry IPSetEntry) {
+	for attr := range nl.ParseAttributes(data) {
+		switch attr.Type {
+		case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER:
+			val := attr.Uint32()
+			entry.Timeout = &val
+		case nl.IPSET_ATTR_BYTES | nl.NLA_F_NET_BYTEORDER:
+			val := attr.Uint64()
+			entry.Bytes = &val
+		case nl.IPSET_ATTR_PACKETS | nl.NLA_F_NET_BYTEORDER:
+			val := attr.Uint64()
+			entry.Packets = &val
+		case nl.IPSET_ATTR_ETHER:
+			entry.MAC = net.HardwareAddr(attr.Value)
+		case nl.IPSET_ATTR_IP:
+			entry.IP = net.IP(attr.Value)
+		case nl.IPSET_ATTR_COMMENT:
+			entry.Comment = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED:
+			for attr := range nl.ParseAttributes(attr.Value) {
+				switch attr.Type {
+				case nl.IPSET_ATTR_IP:
+					entry.IP = net.IP(attr.Value)
+				default:
+					log.Printf("unknown nested ADT attribute from kernel: %+v", attr)
+				}
+			}
+		case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED:
+			for attr := range nl.ParseAttributes(attr.Value) {
+				switch attr.Type {
+				case nl.IPSET_ATTR_IP:
+					entry.IP2 = net.IP(attr.Value)
+				default:
+					log.Printf("unknown nested ADT attribute from kernel: %+v", attr)
+				}
+			}
+		case nl.IPSET_ATTR_CIDR:
+			entry.CIDR = attr.Value[0]
+		case nl.IPSET_ATTR_CIDR2:
+			entry.CIDR2 = attr.Value[0]
+		case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER:
+			val := networkOrder.Uint16(attr.Value)
+			entry.Port = &val
+		case nl.IPSET_ATTR_PROTO:
+			val := attr.Value[0]
+			entry.Protocol = &val
+		case nl.IPSET_ATTR_IFACE:
+			entry.IFace = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER:
+			val := attr.Uint32()
+			entry.Mark = &val
+		default:
+			log.Printf("unknown ADT attribute from kernel: %+v", attr)
+		}
+	}
+	return
+}

+ 301 - 18
vendor/github.com/vishvananda/netlink/link.go

@@ -35,10 +35,13 @@ type LinkAttrs struct {
 	Alias        string
 	Statistics   *LinkStatistics
 	Promisc      int
+	Allmulti     int
+	Multi        int
 	Xdp          *LinkXdp
 	EncapType    string
 	Protinfo     *Protinfo
 	OperState    LinkOperState
+	PhysSwitchID int
 	NetNsID      int
 	NumTxQueues  int
 	NumRxQueues  int
@@ -65,6 +68,17 @@ type VfInfo struct {
 	LinkState uint32
 	MaxTxRate uint32 // IFLA_VF_RATE Max TxRate
 	MinTxRate uint32 // IFLA_VF_RATE Min TxRate
+	RxPackets uint64
+	TxPackets uint64
+	RxBytes   uint64
+	TxBytes   uint64
+	Multicast uint64
+	Broadcast uint64
+	RxDropped uint64
+	TxDropped uint64
+
+	RssQuery uint32
+	Trust    uint32
 }
 
 // LinkOperState represents the values of the IFLA_OPERSTATE link
@@ -103,7 +117,8 @@ func (s LinkOperState) String() string {
 // NewLinkAttrs returns LinkAttrs structure filled with default values
 func NewLinkAttrs() LinkAttrs {
 	return LinkAttrs{
-		TxQLen: -1,
+		NetNsID: -1,
+		TxQLen:  -1,
 	}
 }
 
@@ -196,10 +211,11 @@ type LinkStatistics64 struct {
 }
 
 type LinkXdp struct {
-	Fd       int
-	Attached bool
-	Flags    uint32
-	ProgId   uint32
+	Fd         int
+	Attached   bool
+	AttachMode uint32
+	Flags      uint32
+	ProgId     uint32
 }
 
 // Device links cannot be created via netlink. These links
@@ -246,6 +262,7 @@ func (ifb *Ifb) Type() string {
 type Bridge struct {
 	LinkAttrs
 	MulticastSnooping *bool
+	AgeingTime        *uint32
 	HelloTime         *uint32
 	VlanFiltering     *bool
 }
@@ -338,6 +355,7 @@ type Veth struct {
 	LinkAttrs
 	PeerName         string // veth on create only
 	PeerHardwareAddr net.HardwareAddr
+	PeerNamespace    interface{}
 }
 
 func (veth *Veth) Attrs() *LinkAttrs {
@@ -348,6 +366,19 @@ func (veth *Veth) Type() string {
 	return "veth"
 }
 
+// Wireguard represent links of type "wireguard", see https://www.wireguard.com/
+type Wireguard struct {
+	LinkAttrs
+}
+
+func (wg *Wireguard) Attrs() *LinkAttrs {
+	return &wg.LinkAttrs
+}
+
+func (wg *Wireguard) Type() string {
+	return "wireguard"
+}
+
 // GenericLink links represent types that are not currently understood
 // by this netlink library.
 type GenericLink struct {
@@ -428,6 +459,19 @@ func (ipvlan *IPVlan) Type() string {
 	return "ipvlan"
 }
 
+// IPVtap - IPVtap is a virtual interfaces based on ipvlan
+type IPVtap struct {
+	IPVlan
+}
+
+func (ipvtap *IPVtap) Attrs() *LinkAttrs {
+	return &ipvtap.LinkAttrs
+}
+
+func (ipvtap IPVtap) Type() string {
+	return "ipvtap"
+}
+
 // VlanProtocol type
 type VlanProtocol int
 
@@ -527,6 +571,27 @@ const (
 	BOND_ARP_VALIDATE_ALL
 )
 
+var bondArpValidateToString = map[BondArpValidate]string{
+	BOND_ARP_VALIDATE_NONE:   "none",
+	BOND_ARP_VALIDATE_ACTIVE: "active",
+	BOND_ARP_VALIDATE_BACKUP: "backup",
+	BOND_ARP_VALIDATE_ALL:    "none",
+}
+var StringToBondArpValidateMap = map[string]BondArpValidate{
+	"none":   BOND_ARP_VALIDATE_NONE,
+	"active": BOND_ARP_VALIDATE_ACTIVE,
+	"backup": BOND_ARP_VALIDATE_BACKUP,
+	"all":    BOND_ARP_VALIDATE_ALL,
+}
+
+func (b BondArpValidate) String() string {
+	s, ok := bondArpValidateToString[b]
+	if !ok {
+		return fmt.Sprintf("BondArpValidate(%d)", b)
+	}
+	return s
+}
+
 // BondPrimaryReselect type
 type BondPrimaryReselect int
 
@@ -537,6 +602,25 @@ const (
 	BOND_PRIMARY_RESELECT_FAILURE
 )
 
+var bondPrimaryReselectToString = map[BondPrimaryReselect]string{
+	BOND_PRIMARY_RESELECT_ALWAYS:  "always",
+	BOND_PRIMARY_RESELECT_BETTER:  "better",
+	BOND_PRIMARY_RESELECT_FAILURE: "failure",
+}
+var StringToBondPrimaryReselectMap = map[string]BondPrimaryReselect{
+	"always":  BOND_PRIMARY_RESELECT_ALWAYS,
+	"better":  BOND_PRIMARY_RESELECT_BETTER,
+	"failure": BOND_PRIMARY_RESELECT_FAILURE,
+}
+
+func (b BondPrimaryReselect) String() string {
+	s, ok := bondPrimaryReselectToString[b]
+	if !ok {
+		return fmt.Sprintf("BondPrimaryReselect(%d)", b)
+	}
+	return s
+}
+
 // BondArpAllTargets type
 type BondArpAllTargets int
 
@@ -546,6 +630,23 @@ const (
 	BOND_ARP_ALL_TARGETS_ALL
 )
 
+var bondArpAllTargetsToString = map[BondArpAllTargets]string{
+	BOND_ARP_ALL_TARGETS_ANY: "any",
+	BOND_ARP_ALL_TARGETS_ALL: "all",
+}
+var StringToBondArpAllTargetsMap = map[string]BondArpAllTargets{
+	"any": BOND_ARP_ALL_TARGETS_ANY,
+	"all": BOND_ARP_ALL_TARGETS_ALL,
+}
+
+func (b BondArpAllTargets) String() string {
+	s, ok := bondArpAllTargetsToString[b]
+	if !ok {
+		return fmt.Sprintf("BondArpAllTargets(%d)", b)
+	}
+	return s
+}
+
 // BondFailOverMac type
 type BondFailOverMac int
 
@@ -556,6 +657,25 @@ const (
 	BOND_FAIL_OVER_MAC_FOLLOW
 )
 
+var bondFailOverMacToString = map[BondFailOverMac]string{
+	BOND_FAIL_OVER_MAC_NONE:   "none",
+	BOND_FAIL_OVER_MAC_ACTIVE: "active",
+	BOND_FAIL_OVER_MAC_FOLLOW: "follow",
+}
+var StringToBondFailOverMacMap = map[string]BondFailOverMac{
+	"none":   BOND_FAIL_OVER_MAC_NONE,
+	"active": BOND_FAIL_OVER_MAC_ACTIVE,
+	"follow": BOND_FAIL_OVER_MAC_FOLLOW,
+}
+
+func (b BondFailOverMac) String() string {
+	s, ok := bondFailOverMacToString[b]
+	if !ok {
+		return fmt.Sprintf("BondFailOverMac(%d)", b)
+	}
+	return s
+}
+
 // BondXmitHashPolicy type
 type BondXmitHashPolicy int
 
@@ -647,6 +767,25 @@ const (
 	BOND_AD_SELECT_COUNT
 )
 
+var bondAdSelectToString = map[BondAdSelect]string{
+	BOND_AD_SELECT_STABLE:    "stable",
+	BOND_AD_SELECT_BANDWIDTH: "bandwidth",
+	BOND_AD_SELECT_COUNT:     "count",
+}
+var StringToBondAdSelectMap = map[string]BondAdSelect{
+	"stable":    BOND_AD_SELECT_STABLE,
+	"bandwidth": BOND_AD_SELECT_BANDWIDTH,
+	"count":     BOND_AD_SELECT_COUNT,
+}
+
+func (b BondAdSelect) String() string {
+	s, ok := bondAdSelectToString[b]
+	if !ok {
+		return fmt.Sprintf("BondAdSelect(%d)", b)
+	}
+	return s
+}
+
 // BondAdInfo represents ad info for bond
 type BondAdInfo struct {
 	AggregatorId int
@@ -678,7 +817,7 @@ type Bond struct {
 	AllSlavesActive int
 	MinLinks        int
 	LpInterval      int
-	PackersPerSlave int
+	PacketsPerSlave int
 	LacpRate        BondLacpRate
 	AdSelect        BondAdSelect
 	// looking at iproute tool AdInfo can only be retrived. It can't be set.
@@ -711,7 +850,7 @@ func NewLinkBond(atr LinkAttrs) *Bond {
 		AllSlavesActive: -1,
 		MinLinks:        -1,
 		LpInterval:      -1,
-		PackersPerSlave: -1,
+		PacketsPerSlave: -1,
 		LacpRate:        -1,
 		AdSelect:        -1,
 		AdActorSysPrio:  -1,
@@ -761,8 +900,10 @@ func (bond *Bond) Type() string {
 type BondSlaveState uint8
 
 const (
-	BondStateActive = iota // Link is active.
-	BondStateBackup        // Link is backup.
+	//BondStateActive Link is active.
+	BondStateActive BondSlaveState = iota
+	//BondStateBackup Link is backup.
+	BondStateBackup
 )
 
 func (s BondSlaveState) String() string {
@@ -776,15 +917,19 @@ func (s BondSlaveState) String() string {
 	}
 }
 
-// BondSlaveState represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave
+// BondSlaveMiiStatus represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave
 // attribute, which contains the status of MII link monitoring
 type BondSlaveMiiStatus uint8
 
 const (
-	BondLinkUp   = iota // link is up and running.
-	BondLinkFail        // link has just gone down.
-	BondLinkDown        // link has been down for too long time.
-	BondLinkBack        // link is going back.
+	//BondLinkUp link is up and running.
+	BondLinkUp BondSlaveMiiStatus = iota
+	//BondLinkFail link has just gone down.
+	BondLinkFail
+	//BondLinkDown link has been down for too long time.
+	BondLinkDown
+	//BondLinkBack link is going back.
+	BondLinkBack
 )
 
 func (s BondSlaveMiiStatus) String() string {
@@ -817,6 +962,38 @@ func (b *BondSlave) SlaveType() string {
 	return "bond"
 }
 
+type VrfSlave struct {
+	Table uint32
+}
+
+func (v *VrfSlave) SlaveType() string {
+	return "vrf"
+}
+
+// Geneve devices must specify RemoteIP and ID (VNI) on create
+// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223
+type Geneve struct {
+	LinkAttrs
+	ID             uint32 // vni
+	Remote         net.IP
+	Ttl            uint8
+	Tos            uint8
+	Dport          uint16
+	UdpCsum        uint8
+	UdpZeroCsum6Tx uint8
+	UdpZeroCsum6Rx uint8
+	Link           uint32
+	FlowBased      bool
+}
+
+func (geneve *Geneve) Attrs() *LinkAttrs {
+	return &geneve.LinkAttrs
+}
+
+func (geneve *Geneve) Type() string {
+	return "geneve"
+}
+
 // Gretap devices must specify LocalIP and RemoteIP on create
 type Gretap struct {
 	LinkAttrs
@@ -861,6 +1038,7 @@ type Iptun struct {
 	EncapType  uint16
 	EncapFlags uint16
 	FlowBased  bool
+	Proto      uint8
 }
 
 func (iptun *Iptun) Attrs() *LinkAttrs {
@@ -878,10 +1056,14 @@ type Ip6tnl struct {
 	Remote     net.IP
 	Ttl        uint8
 	Tos        uint8
-	EncapLimit uint8
 	Flags      uint32
 	Proto      uint8
 	FlowInfo   uint32
+	EncapLimit uint8
+	EncapType  uint16
+	EncapFlags uint16
+	EncapSport uint16
+	EncapDport uint16
 }
 
 func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs {
@@ -892,14 +1074,47 @@ func (ip6tnl *Ip6tnl) Type() string {
 	return "ip6tnl"
 }
 
+// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84
+type TunnelEncapType uint16
+
+const (
+	None TunnelEncapType = iota
+	FOU
+	GUE
+)
+
+// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91
+type TunnelEncapFlag uint16
+
+const (
+	CSum    TunnelEncapFlag = 1 << 0
+	CSum6                   = 1 << 1
+	RemCSum                 = 1 << 2
+)
+
+// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12
+type IP6TunnelFlag uint16
+
+const (
+	IP6_TNL_F_IGN_ENCAP_LIMIT    IP6TunnelFlag = 1  // don't add encapsulation limit if one isn't present in inner packet
+	IP6_TNL_F_USE_ORIG_TCLASS                  = 2  // copy the traffic class field from the inner packet
+	IP6_TNL_F_USE_ORIG_FLOWLABEL               = 4  // copy the flowlabel from the inner packet
+	IP6_TNL_F_MIP6_DEV                         = 8  // being used for Mobile IPv6
+	IP6_TNL_F_RCV_DSCP_COPY                    = 10 // copy DSCP from the outer packet
+	IP6_TNL_F_USE_ORIG_FWMARK                  = 20 // copy fwmark from inner packet
+	IP6_TNL_F_ALLOW_LOCAL_REMOTE               = 40 // allow remote endpoint on the local node
+)
+
 type Sittun struct {
 	LinkAttrs
 	Link       uint32
-	Local      net.IP
-	Remote     net.IP
 	Ttl        uint8
 	Tos        uint8
 	PMtuDisc   uint8
+	Proto      uint8
+	Local      net.IP
+	Remote     net.IP
+	EncapLimit uint8
 	EncapType  uint16
 	EncapFlags uint16
 	EncapSport uint16
@@ -1034,6 +1249,58 @@ var StringToIPoIBMode = map[string]IPoIBMode{
 	"connected": IPOIB_MODE_CONNECTED,
 }
 
+const (
+	CAN_STATE_ERROR_ACTIVE = iota
+	CAN_STATE_ERROR_WARNING
+	CAN_STATE_ERROR_PASSIVE
+	CAN_STATE_BUS_OFF
+	CAN_STATE_STOPPED
+	CAN_STATE_SLEEPING
+)
+
+type Can struct {
+	LinkAttrs
+
+	BitRate            uint32
+	SamplePoint        uint32
+	TimeQuanta         uint32
+	PropagationSegment uint32
+	PhaseSegment1      uint32
+	PhaseSegment2      uint32
+	SyncJumpWidth      uint32
+	BitRatePreScaler   uint32
+
+	Name                string
+	TimeSegment1Min     uint32
+	TimeSegment1Max     uint32
+	TimeSegment2Min     uint32
+	TimeSegment2Max     uint32
+	SyncJumpWidthMax    uint32
+	BitRatePreScalerMin uint32
+	BitRatePreScalerMax uint32
+	BitRatePreScalerInc uint32
+
+	ClockFrequency uint32
+
+	State uint32
+
+	Mask  uint32
+	Flags uint32
+
+	TxError uint16
+	RxError uint16
+
+	RestartMs uint32
+}
+
+func (can *Can) Attrs() *LinkAttrs {
+	return &can.LinkAttrs
+}
+
+func (can *Can) Type() string {
+	return "can"
+}
+
 type IPoIB struct {
 	LinkAttrs
 	Pkey   uint16
@@ -1049,11 +1316,27 @@ func (ipoib *IPoIB) Type() string {
 	return "ipoib"
 }
 
+type BareUDP struct {
+	LinkAttrs
+	Port       uint16
+	EtherType  uint16
+	SrcPortMin uint16
+	MultiProto bool
+}
+
+func (bareudp *BareUDP) Attrs() *LinkAttrs {
+	return &bareudp.LinkAttrs
+}
+
+func (bareudp *BareUDP) Type() string {
+	return "bareudp"
+}
+
 // iproute2 supported devices;
 // vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
 // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
 // gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon |
-// bond_slave | ipvlan | xfrm
+// bond_slave | ipvlan | xfrm | bareudp
 
 // LinkNotFoundError wraps the various not found errors when
 // getting/reading links. This is intended for better error

+ 483 - 36
vendor/github.com/vishvananda/netlink/link_linux.go

@@ -34,14 +34,27 @@ const (
 	TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI
 )
 
+var StringToTuntapModeMap = map[string]TuntapMode{
+	"tun": TUNTAP_MODE_TUN,
+	"tap": TUNTAP_MODE_TAP,
+}
+
+func (ttm TuntapMode) String() string {
+	switch ttm {
+	case TUNTAP_MODE_TUN:
+		return "tun"
+	case TUNTAP_MODE_TAP:
+		return "tap"
+	}
+	return "unknown"
+}
+
 const (
 	VF_LINK_STATE_AUTO    uint32 = 0
 	VF_LINK_STATE_ENABLE  uint32 = 1
 	VF_LINK_STATE_DISABLE uint32 = 2
 )
 
-var lookupByDump = false
-
 var macvlanModes = [...]uint32{
 	0,
 	nl.MACVLAN_MODE_PRIVATE,
@@ -138,7 +151,6 @@ func (h *Handle) LinkSetAllmulticastOn(link Link) error {
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
 	msg.Change = unix.IFF_ALLMULTI
 	msg.Flags = unix.IFF_ALLMULTI
-
 	msg.Index = int32(base.Index)
 	req.AddData(msg)
 
@@ -168,6 +180,51 @@ func (h *Handle) LinkSetAllmulticastOff(link Link) error {
 	return err
 }
 
+// LinkSetMulticastOn enables the reception of multicast packets for the link device.
+// Equivalent to: `ip link set $link multicast on`
+func LinkSetMulticastOn(link Link) error {
+	return pkgHandle.LinkSetMulticastOn(link)
+}
+
+// LinkSetMulticastOn enables the reception of multicast packets for the link device.
+// Equivalent to: `ip link set $link multicast on`
+func (h *Handle) LinkSetMulticastOn(link Link) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
+	msg.Change = unix.IFF_MULTICAST
+	msg.Flags = unix.IFF_MULTICAST
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
+	return err
+}
+
+// LinkSetAllmulticastOff disables the reception of multicast packets for the link device.
+// Equivalent to: `ip link set $link multicast off`
+func LinkSetMulticastOff(link Link) error {
+	return pkgHandle.LinkSetMulticastOff(link)
+}
+
+// LinkSetAllmulticastOff disables the reception of multicast packets for the link device.
+// Equivalent to: `ip link set $link multicast off`
+func (h *Handle) LinkSetMulticastOff(link Link) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
+	msg.Change = unix.IFF_MULTICAST
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
+	return err
+}
+
 func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error {
 	return pkgHandle.MacvlanMACAddrAdd(link, addr)
 }
@@ -237,6 +294,37 @@ func (h *Handle) macvlanMACAddrChange(link Link, addrs []net.HardwareAddr, mode
 	return err
 }
 
+// LinkSetMacvlanMode sets the mode of a macvlan or macvtap link device.
+// Note that passthrough mode cannot be set to and from and will fail.
+// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode
+func LinkSetMacvlanMode(link Link, mode MacvlanMode) error {
+	return pkgHandle.LinkSetMacvlanMode(link, mode)
+}
+
+// LinkSetMacvlanMode sets the mode of the macvlan or macvtap link device.
+// Note that passthrough mode cannot be set to and from and will fail.
+// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode
+func (h *Handle) LinkSetMacvlanMode(link Link, mode MacvlanMode) error {
+	base := link.Attrs()
+	h.ensureIndex(base)
+	req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
+
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
+	msg.Index = int32(base.Index)
+	req.AddData(msg)
+
+	linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil)
+	linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
+
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
+	data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[mode]))
+
+	req.AddData(linkInfo)
+
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
+	return err
+}
+
 func BridgeSetMcastSnoop(link Link, on bool) error {
 	return pkgHandle.BridgeSetMcastSnoop(link, on)
 }
@@ -247,6 +335,16 @@ func (h *Handle) BridgeSetMcastSnoop(link Link, on bool) error {
 	return h.linkModify(bridge, unix.NLM_F_ACK)
 }
 
+func BridgeSetVlanFiltering(link Link, on bool) error {
+	return pkgHandle.BridgeSetVlanFiltering(link, on)
+}
+
+func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error {
+	bridge := link.(*Bridge)
+	bridge.VlanFiltering = &on
+	return h.linkModify(bridge, unix.NLM_F_ACK)
+}
+
 func SetPromiscOn(link Link) error {
 	return pkgHandle.SetPromiscOn(link)
 }
@@ -491,13 +589,13 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
 	req.AddData(msg)
 
 	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
-	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
 	vfmsg := nl.VfVlan{
 		Vf:   uint32(vf),
 		Vlan: uint32(vlan),
 		Qos:  uint32(qos),
 	}
-	nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize())
+	info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize())
 	req.AddData(data)
 
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
@@ -1005,8 +1103,8 @@ func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) {
 	if bond.LpInterval >= 0 {
 		data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval)))
 	}
-	if bond.PackersPerSlave >= 0 {
-		data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave)))
+	if bond.PacketsPerSlave >= 0 {
+		data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PacketsPerSlave)))
 	}
 	if bond.LacpRate >= 0 {
 		data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate)))
@@ -1048,6 +1146,14 @@ func (h *Handle) LinkAdd(link Link) error {
 	return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
 }
 
+func LinkModify(link Link) error {
+	return pkgHandle.LinkModify(link)
+}
+
+func (h *Handle) LinkModify(link Link) error {
+	return h.linkModify(link, unix.NLM_F_REQUEST|unix.NLM_F_ACK)
+}
+
 func (h *Handle) linkModify(link Link, flags int) error {
 	// TODO: support extra data for macvlan
 	base := link.Attrs()
@@ -1060,8 +1166,6 @@ func (h *Handle) linkModify(link Link, flags int) error {
 	}
 
 	if isTuntap {
-		// TODO: support user
-		// TODO: support group
 		if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP {
 			return fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode)
 		}
@@ -1089,21 +1193,64 @@ func (h *Handle) linkModify(link Link, flags int) error {
 		}
 
 		req.Flags |= uint16(tuntap.Mode)
-
+		const TUN = "/dev/net/tun"
 		for i := 0; i < queues; i++ {
 			localReq := req
-			file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
+			fd, err := unix.Open(TUN, os.O_RDWR|syscall.O_CLOEXEC, 0)
 			if err != nil {
 				cleanupFds(fds)
 				return err
 			}
 
-			fds = append(fds, file)
-			_, _, errno := unix.Syscall(unix.SYS_IOCTL, file.Fd(), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq)))
+			_, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq)))
 			if errno != 0 {
+				// close the new fd
+				unix.Close(fd)
+				// and the already opened ones
 				cleanupFds(fds)
 				return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed [%d], errno %v", i, errno)
 			}
+
+			_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETOWNER, uintptr(tuntap.Owner))
+			if errno != 0 {
+				cleanupFds(fds)
+				return fmt.Errorf("Tuntap IOCTL TUNSETOWNER failed [%d], errno %v", i, errno)
+			}
+
+			_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETGROUP, uintptr(tuntap.Group))
+			if errno != 0 {
+				cleanupFds(fds)
+				return fmt.Errorf("Tuntap IOCTL TUNSETGROUP failed [%d], errno %v", i, errno)
+			}
+
+			// Set the tun device to non-blocking before use. The below comment
+			// taken from:
+			//
+			// https://github.com/mistsys/tuntap/commit/161418c25003bbee77d085a34af64d189df62bea
+			//
+			// Note there is a complication because in go, if a device node is
+			// opened, go sets it to use nonblocking I/O. However a /dev/net/tun
+			// doesn't work with epoll until after the TUNSETIFF ioctl has been
+			// done. So we open the unix fd directly, do the ioctl, then put the
+			// fd in nonblocking mode, an then finally wrap it in a os.File,
+			// which will see the nonblocking mode and add the fd to the
+			// pollable set, so later on when we Read() from it blocked the
+			// calling thread in the kernel.
+			//
+			// See
+			//   https://github.com/golang/go/issues/30426
+			// which got exposed in go 1.13 by the fix to
+			//   https://github.com/golang/go/issues/30624
+			err = unix.SetNonblock(fd, true)
+			if err != nil {
+				cleanupFds(fds)
+				return fmt.Errorf("Tuntap set to non-blocking failed [%d], err %v", i, err)
+			}
+
+			// create the file from the file descriptor and store it
+			file := os.NewFile(uintptr(fd), TUN)
+			fds = append(fds, file)
+
 			// 1) we only care for the name of the first tap in the multi queue set
 			// 2) if the original name was empty, the localReq has now the actual name
 			//
@@ -1114,11 +1261,29 @@ func (h *Handle) linkModify(link Link, flags int) error {
 			if i == 0 {
 				link.Attrs().Name = strings.Trim(string(localReq.Name[:]), "\x00")
 			}
+
+		}
+
+		control := func(file *os.File, f func(fd uintptr)) error {
+			name := file.Name()
+			conn, err := file.SyscallConn()
+			if err != nil {
+				return fmt.Errorf("SyscallConn() failed on %s: %v", name, err)
+			}
+			if err := conn.Control(f); err != nil {
+				return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err)
+			}
+			return nil
 		}
 
 		// only persist interface if NonPersist is NOT set
 		if !tuntap.NonPersist {
-			_, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1)
+			var errno syscall.Errno
+			if err := control(fds[0], func(fd uintptr) {
+				_, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1)
+			}); err != nil {
+				return err
+			}
 			if errno != 0 {
 				cleanupFds(fds)
 				return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
@@ -1135,7 +1300,10 @@ func (h *Handle) linkModify(link Link, flags int) error {
 				// un-persist (e.g. allow the interface to be removed) the tuntap
 				// should not hurt if not set prior, condition might be not needed
 				if !tuntap.NonPersist {
-					_, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0)
+					// ignore error
+					_ = control(fds[0], func(fd uintptr) {
+						_, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0)
+					})
 				}
 				cleanupFds(fds)
 				return err
@@ -1193,6 +1361,11 @@ func (h *Handle) linkModify(link Link, flags int) error {
 	nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(base.Name))
 	req.AddData(nameData)
 
+	if base.Alias != "" {
+		alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(base.Alias))
+		req.AddData(alias)
+	}
+
 	if base.MTU > 0 {
 		mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
 		req.AddData(mtu)
@@ -1272,12 +1445,28 @@ func (h *Handle) linkModify(link Link, flags int) error {
 		if base.TxQLen >= 0 {
 			peer.AddRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen)))
 		}
+		if base.NumTxQueues > 0 {
+			peer.AddRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues)))
+		}
+		if base.NumRxQueues > 0 {
+			peer.AddRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues)))
+		}
 		if base.MTU > 0 {
 			peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
 		}
 		if link.PeerHardwareAddr != nil {
 			peer.AddRtAttr(unix.IFLA_ADDRESS, []byte(link.PeerHardwareAddr))
 		}
+		if link.PeerNamespace != nil {
+			switch ns := link.PeerNamespace.(type) {
+			case NsPid:
+				val := nl.Uint32Attr(uint32(ns))
+				peer.AddRtAttr(unix.IFLA_NET_NS_PID, val)
+			case NsFd:
+				val := nl.Uint32Attr(uint32(ns))
+				peer.AddRtAttr(unix.IFLA_NET_NS_FD, val)
+			}
+		}
 	case *Vxlan:
 		addVxlanAttrs(link, linkInfo)
 	case *Bond:
@@ -1286,6 +1475,10 @@ func (h *Handle) linkModify(link Link, flags int) error {
 		data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
 		data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode)))
 		data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag)))
+	case *IPVtap:
+		data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
+		data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode)))
+		data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag)))
 	case *Macvlan:
 		if link.Mode != MACVLAN_MODE_DEFAULT {
 			data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
@@ -1296,6 +1489,8 @@ func (h *Handle) linkModify(link Link, flags int) error {
 			data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
 			data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode]))
 		}
+	case *Geneve:
+		addGeneveAttrs(link, linkInfo)
 	case *Gretap:
 		addGretapAttrs(link, linkInfo)
 	case *Iptun:
@@ -1318,6 +1513,8 @@ func (h *Handle) linkModify(link Link, flags int) error {
 		addXfrmiAttrs(link, linkInfo)
 	case *IPoIB:
 		addIPoIBAttrs(link, linkInfo)
+	case *BareUDP:
+		addBareUDPAttrs(link, linkInfo)
 	}
 
 	req.AddData(linkInfo)
@@ -1499,7 +1696,7 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) {
 	}
 }
 
-// linkDeserialize deserializes a raw message received from netlink into
+// LinkDeserialize deserializes a raw message received from netlink into
 // a link object.
 func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 	msg := nl.DeserializeIfInfomsg(m)
@@ -1509,10 +1706,22 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 		return nil, err
 	}
 
-	base := LinkAttrs{Index: int(msg.Index), RawFlags: msg.Flags, Flags: linkFlags(msg.Flags), EncapType: msg.EncapType()}
+	base := NewLinkAttrs()
+	base.Index = int(msg.Index)
+	base.RawFlags = msg.Flags
+	base.Flags = linkFlags(msg.Flags)
+	base.EncapType = msg.EncapType()
+	base.NetNsID = -1
 	if msg.Flags&unix.IFF_PROMISC != 0 {
 		base.Promisc = 1
 	}
+	if msg.Flags&unix.IFF_ALLMULTI != 0 {
+		base.Allmulti = 1
+	}
+	if msg.Flags&unix.IFF_MULTICAST != 0 {
+		base.Multi = 1
+	}
+
 	var (
 		link      Link
 		stats32   *LinkStatistics32
@@ -1543,16 +1752,22 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 						link = &Vlan{}
 					case "veth":
 						link = &Veth{}
+					case "wireguard":
+						link = &Wireguard{}
 					case "vxlan":
 						link = &Vxlan{}
 					case "bond":
 						link = &Bond{}
 					case "ipvlan":
 						link = &IPVlan{}
+					case "ipvtap":
+						link = &IPVtap{}
 					case "macvlan":
 						link = &Macvlan{}
 					case "macvtap":
 						link = &Macvtap{}
+					case "geneve":
+						link = &Geneve{}
 					case "gretap":
 						link = &Gretap{}
 					case "ip6gretap":
@@ -1579,6 +1794,10 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 						link = &Tuntap{}
 					case "ipoib":
 						link = &IPoIB{}
+					case "can":
+						link = &Can{}
+					case "bareudp":
+						link = &BareUDP{}
 					default:
 						link = &GenericLink{LinkType: linkType}
 					}
@@ -1596,10 +1815,14 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 						parseBondData(link, data)
 					case "ipvlan":
 						parseIPVlanData(link, data)
+					case "ipvtap":
+						parseIPVtapData(link, data)
 					case "macvlan":
 						parseMacvlanData(link, data)
 					case "macvtap":
 						parseMacvtapData(link, data)
+					case "geneve":
+						parseGeneveData(link, data)
 					case "gretap":
 						parseGretapData(link, data)
 					case "ip6gretap":
@@ -1628,13 +1851,21 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 						parseTuntapData(link, data)
 					case "ipoib":
 						parseIPoIBData(link, data)
+					case "can":
+						parseCanData(link, data)
+					case "bareudp":
+						parseBareUDPData(link, data)
 					}
+
 				case nl.IFLA_INFO_SLAVE_KIND:
 					slaveType = string(info.Value[:len(info.Value)-1])
 					switch slaveType {
 					case "bond":
 						linkSlave = &BondSlave{}
+					case "vrf":
+						linkSlave = &VrfSlave{}
 					}
+
 				case nl.IFLA_INFO_SLAVE_DATA:
 					switch slaveType {
 					case "bond":
@@ -1643,6 +1874,12 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 							return nil, err
 						}
 						parseBondSlaveData(linkSlave, data)
+					case "vrf":
+						data, err := nl.ParseRouteAttr(info.Value)
+						if err != nil {
+							return nil, err
+						}
+						parseVrfSlaveData(linkSlave, data)
 					}
 				}
 			}
@@ -1696,6 +1933,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
 			}
 		case unix.IFLA_OPERSTATE:
 			base.OperState = LinkOperState(uint8(attr.Value[0]))
+		case unix.IFLA_PHYS_SWITCH_ID:
+			base.PhysSwitchID = int(native.Uint32(attr.Value[0:4]))
 		case unix.IFLA_LINK_NETNSID:
 			base.NetNsID = int(native.Uint32(attr.Value[0:4]))
 		case unix.IFLA_GSO_MAX_SIZE:
@@ -1884,7 +2123,8 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
 			msgs, from, err := s.Receive()
 			if err != nil {
 				if cberr != nil {
-					cberr(err)
+					cberr(fmt.Errorf("Receive failed: %v",
+						err))
 				}
 				return
 			}
@@ -1899,15 +2139,15 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
 					continue
 				}
 				if m.Header.Type == unix.NLMSG_ERROR {
-					native := nl.NativeEndian()
 					error := int32(native.Uint32(m.Data[0:4]))
 					if error == 0 {
 						continue
 					}
 					if cberr != nil {
-						cberr(syscall.Errno(-error))
+						cberr(fmt.Errorf("error message: %v",
+							syscall.Errno(-error)))
 					}
-					return
+					continue
 				}
 				ifmsg := nl.DeserializeIfInfomsg(m.Data)
 				header := unix.NlMsghdr(m.Header)
@@ -1916,7 +2156,7 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
 					if cberr != nil {
 						cberr(err)
 					}
-					return
+					continue
 				}
 				ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link}
 			}
@@ -2080,6 +2320,13 @@ func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) {
 func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	vxlan := link.(*Vxlan)
 	for _, datum := range data {
+		// NOTE(vish): Apparently some messages can be sent with no value.
+		//             We special case GBP here to not change existing
+		//             functionality. It appears that GBP sends a datum.Value
+		//             of null.
+		if len(datum.Value) == 0 && datum.Attr.Type != nl.IFLA_VXLAN_GBP {
+			continue
+		}
 		switch datum.Attr.Type {
 		case nl.IFLA_VXLAN_ID:
 			vxlan.VxlanId = int(native.Uint32(datum.Value[0:4]))
@@ -2178,7 +2425,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) {
 		case nl.IFLA_BOND_LP_INTERVAL:
 			bond.LpInterval = int(native.Uint32(data[i].Value[0:4]))
 		case nl.IFLA_BOND_PACKETS_PER_SLAVE:
-			bond.PackersPerSlave = int(native.Uint32(data[i].Value[0:4]))
+			bond.PacketsPerSlave = int(native.Uint32(data[i].Value[0:4]))
 		case nl.IFLA_BOND_AD_LACP_RATE:
 			bond.LacpRate = BondLacpRate(data[i].Value[0])
 		case nl.IFLA_BOND_AD_SELECT:
@@ -2258,6 +2505,16 @@ func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) {
 	}
 }
 
+func parseVrfSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) {
+	vrfSlave := slave.(*VrfSlave)
+	for i := range data {
+		switch data[i].Attr.Type {
+		case nl.IFLA_BOND_SLAVE_STATE:
+			vrfSlave.Table = native.Uint32(data[i].Value[0:4])
+		}
+	}
+}
+
 func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	ipv := link.(*IPVlan)
 	for _, datum := range data {
@@ -2270,6 +2527,18 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	}
 }
 
+func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) {
+	ipv := link.(*IPVtap)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.IFLA_IPVLAN_MODE:
+			ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4]))
+		case nl.IFLA_IPVLAN_FLAG:
+			ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4]))
+		}
+	}
+}
+
 func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
 	macv := link.(*Macvtap)
 	parseMacvlanData(&macv.Macvlan, data)
@@ -2327,6 +2596,58 @@ func linkFlags(rawFlags uint32) net.Flags {
 	return f
 }
 
+func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) {
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
+
+	if geneve.FlowBased {
+		// In flow based mode, no other attributes need to be configured
+		linkInfo.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, boolAttr(geneve.FlowBased))
+		return
+	}
+
+	if ip := geneve.Remote; ip != nil {
+		if ip4 := ip.To4(); ip4 != nil {
+			data.AddRtAttr(nl.IFLA_GENEVE_REMOTE, ip.To4())
+		} else {
+			data.AddRtAttr(nl.IFLA_GENEVE_REMOTE6, []byte(ip))
+		}
+	}
+
+	if geneve.ID != 0 {
+		data.AddRtAttr(nl.IFLA_GENEVE_ID, nl.Uint32Attr(geneve.ID))
+	}
+
+	if geneve.Dport != 0 {
+		data.AddRtAttr(nl.IFLA_GENEVE_PORT, htons(geneve.Dport))
+	}
+
+	if geneve.Ttl != 0 {
+		data.AddRtAttr(nl.IFLA_GENEVE_TTL, nl.Uint8Attr(geneve.Ttl))
+	}
+
+	if geneve.Tos != 0 {
+		data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos))
+	}
+}
+
+func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) {
+	geneve := link.(*Geneve)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.IFLA_GENEVE_ID:
+			geneve.ID = native.Uint32(datum.Value[0:4])
+		case nl.IFLA_GENEVE_REMOTE, nl.IFLA_GENEVE_REMOTE6:
+			geneve.Remote = datum.Value
+		case nl.IFLA_GENEVE_PORT:
+			geneve.Dport = ntohs(datum.Value[0:2])
+		case nl.IFLA_GENEVE_TTL:
+			geneve.Ttl = uint8(datum.Value[0])
+		case nl.IFLA_GENEVE_TOS:
+			geneve.Tos = uint8(datum.Value[0])
+		}
+	}
+}
+
 func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
 	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
 
@@ -2513,7 +2834,8 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) {
 		case nl.IFLA_XDP_FD:
 			xdp.Fd = int(native.Uint32(attr.Value[0:4]))
 		case nl.IFLA_XDP_ATTACHED:
-			xdp.Attached = attr.Value[0] != 0
+			xdp.AttachMode = uint32(attr.Value[0])
+			xdp.Attached = xdp.AttachMode != 0
 		case nl.IFLA_XDP_FLAGS:
 			xdp.Flags = native.Uint32(attr.Value[0:4])
 		case nl.IFLA_XDP_PROG_ID:
@@ -2552,11 +2874,16 @@ func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) {
 	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags))
 	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport))
 	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport))
+	data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(iptun.Proto))
 }
 
 func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) {
 	iptun := link.(*Iptun)
 	for _, datum := range data {
+		// NOTE: same with vxlan, ip tunnel may also has null datum.Value
+		if len(datum.Value) == 0 {
+			continue
+		}
 		switch datum.Attr.Type {
 		case nl.IFLA_IPTUN_LOCAL:
 			iptun.Local = net.IP(datum.Value[0:4])
@@ -2577,7 +2904,9 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) {
 		case nl.IFLA_IPTUN_ENCAP_FLAGS:
 			iptun.EncapFlags = native.Uint16(datum.Value[0:2])
 		case nl.IFLA_IPTUN_COLLECT_METADATA:
-			iptun.FlowBased = int8(datum.Value[0]) != 0
+			iptun.FlowBased = true
+		case nl.IFLA_IPTUN_PROTO:
+			iptun.Proto = datum.Value[0]
 		}
 	}
 }
@@ -2601,10 +2930,14 @@ func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) {
 
 	data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(ip6tnl.Ttl))
 	data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(ip6tnl.Tos))
-	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit))
 	data.AddRtAttr(nl.IFLA_IPTUN_FLAGS, nl.Uint32Attr(ip6tnl.Flags))
 	data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(ip6tnl.Proto))
 	data.AddRtAttr(nl.IFLA_IPTUN_FLOWINFO, nl.Uint32Attr(ip6tnl.FlowInfo))
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit))
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(ip6tnl.EncapType))
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(ip6tnl.EncapFlags))
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(ip6tnl.EncapSport))
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(ip6tnl.EncapDport))
 }
 
 func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) {
@@ -2616,17 +2949,25 @@ func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) {
 		case nl.IFLA_IPTUN_REMOTE:
 			ip6tnl.Remote = net.IP(datum.Value[:16])
 		case nl.IFLA_IPTUN_TTL:
-			ip6tnl.Ttl = uint8(datum.Value[0])
+			ip6tnl.Ttl = datum.Value[0]
 		case nl.IFLA_IPTUN_TOS:
-			ip6tnl.Tos = uint8(datum.Value[0])
-		case nl.IFLA_IPTUN_ENCAP_LIMIT:
-			ip6tnl.EncapLimit = uint8(datum.Value[0])
+			ip6tnl.Tos = datum.Value[0]
 		case nl.IFLA_IPTUN_FLAGS:
 			ip6tnl.Flags = native.Uint32(datum.Value[:4])
 		case nl.IFLA_IPTUN_PROTO:
-			ip6tnl.Proto = uint8(datum.Value[0])
+			ip6tnl.Proto = datum.Value[0]
 		case nl.IFLA_IPTUN_FLOWINFO:
 			ip6tnl.FlowInfo = native.Uint32(datum.Value[:4])
+		case nl.IFLA_IPTUN_ENCAP_LIMIT:
+			ip6tnl.EncapLimit = datum.Value[0]
+		case nl.IFLA_IPTUN_ENCAP_TYPE:
+			ip6tnl.EncapType = native.Uint16(datum.Value[0:2])
+		case nl.IFLA_IPTUN_ENCAP_FLAGS:
+			ip6tnl.EncapFlags = native.Uint16(datum.Value[0:2])
+		case nl.IFLA_IPTUN_ENCAP_SPORT:
+			ip6tnl.EncapSport = ntohs(datum.Value[0:2])
+		case nl.IFLA_IPTUN_ENCAP_DPORT:
+			ip6tnl.EncapDport = ntohs(datum.Value[0:2])
 		}
 	}
 }
@@ -2653,8 +2994,10 @@ func addSittunAttrs(sittun *Sittun, linkInfo *nl.RtAttr) {
 		data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl))
 	}
 
+	data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(sittun.Proto))
 	data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos))
 	data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc))
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(sittun.EncapLimit))
 	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType))
 	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags))
 	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport))
@@ -2670,11 +3013,13 @@ func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) {
 		case nl.IFLA_IPTUN_REMOTE:
 			sittun.Remote = net.IP(datum.Value[0:4])
 		case nl.IFLA_IPTUN_TTL:
-			sittun.Ttl = uint8(datum.Value[0])
+			sittun.Ttl = datum.Value[0]
 		case nl.IFLA_IPTUN_TOS:
-			sittun.Tos = uint8(datum.Value[0])
+			sittun.Tos = datum.Value[0]
 		case nl.IFLA_IPTUN_PMTUDISC:
-			sittun.PMtuDisc = uint8(datum.Value[0])
+			sittun.PMtuDisc = datum.Value[0]
+		case nl.IFLA_IPTUN_PROTO:
+			sittun.Proto = datum.Value[0]
 		case nl.IFLA_IPTUN_ENCAP_TYPE:
 			sittun.EncapType = native.Uint16(datum.Value[0:2])
 		case nl.IFLA_IPTUN_ENCAP_FLAGS:
@@ -2761,6 +3106,9 @@ func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) {
 	if bridge.MulticastSnooping != nil {
 		data.AddRtAttr(nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping))
 	}
+	if bridge.AgeingTime != nil {
+		data.AddRtAttr(nl.IFLA_BR_AGEING_TIME, nl.Uint32Attr(*bridge.AgeingTime))
+	}
 	if bridge.HelloTime != nil {
 		data.AddRtAttr(nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime))
 	}
@@ -2773,6 +3121,9 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) {
 	br := bridge.(*Bridge)
 	for _, datum := range data {
 		switch datum.Attr.Type {
+		case nl.IFLA_BR_AGEING_TIME:
+			ageingTime := native.Uint32(datum.Value[0:4])
+			br.AgeingTime = &ageingTime
 		case nl.IFLA_BR_HELLO_TIME:
 			helloTime := native.Uint32(datum.Value[0:4])
 			br.HelloTime = &helloTime
@@ -2852,6 +3203,24 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo {
 			vfr := nl.DeserializeVfRate(element.Value[:])
 			vf.MaxTxRate = vfr.MaxTxRate
 			vf.MinTxRate = vfr.MinTxRate
+		case nl.IFLA_VF_STATS:
+			vfstats := nl.DeserializeVfStats(element.Value[:])
+			vf.RxPackets = vfstats.RxPackets
+			vf.TxPackets = vfstats.TxPackets
+			vf.RxBytes = vfstats.RxBytes
+			vf.TxBytes = vfstats.TxBytes
+			vf.Multicast = vfstats.Multicast
+			vf.Broadcast = vfstats.Broadcast
+			vf.RxDropped = vfstats.RxDropped
+			vf.TxDropped = vfstats.TxDropped
+
+		case nl.IFLA_VF_RSS_QUERY_EN:
+			result := nl.DeserializeVfRssQueryEn(element.Value)
+			vf.RssQuery = result.Setting
+
+		case nl.IFLA_VF_TRUST:
+			result := nl.DeserializeVfTrust(element.Value)
+			vf.Trust = result.Setting
 		}
 	}
 	return vf
@@ -2860,8 +3229,9 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo {
 func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) {
 	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
 	data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex)))
-	data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid))
-
+	if xfrmi.Ifid != 0 {
+		data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid))
+	}
 }
 
 func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) {
@@ -3010,9 +3380,86 @@ func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) {
 	}
 }
 
+func parseCanData(link Link, data []syscall.NetlinkRouteAttr) {
+	can := link.(*Can)
+	for _, datum := range data {
+
+		switch datum.Attr.Type {
+		case nl.IFLA_CAN_BITTIMING:
+			can.BitRate = native.Uint32(datum.Value)
+			can.SamplePoint = native.Uint32(datum.Value[4:])
+			can.TimeQuanta = native.Uint32(datum.Value[8:])
+			can.PropagationSegment = native.Uint32(datum.Value[12:])
+			can.PhaseSegment1 = native.Uint32(datum.Value[16:])
+			can.PhaseSegment2 = native.Uint32(datum.Value[20:])
+			can.SyncJumpWidth = native.Uint32(datum.Value[24:])
+			can.BitRatePreScaler = native.Uint32(datum.Value[28:])
+		case nl.IFLA_CAN_BITTIMING_CONST:
+			can.Name = string(datum.Value[:16])
+			can.TimeSegment1Min = native.Uint32(datum.Value[16:])
+			can.TimeSegment1Max = native.Uint32(datum.Value[20:])
+			can.TimeSegment2Min = native.Uint32(datum.Value[24:])
+			can.TimeSegment2Max = native.Uint32(datum.Value[28:])
+			can.SyncJumpWidthMax = native.Uint32(datum.Value[32:])
+			can.BitRatePreScalerMin = native.Uint32(datum.Value[36:])
+			can.BitRatePreScalerMax = native.Uint32(datum.Value[40:])
+			can.BitRatePreScalerInc = native.Uint32(datum.Value[44:])
+		case nl.IFLA_CAN_CLOCK:
+			can.ClockFrequency = native.Uint32(datum.Value)
+		case nl.IFLA_CAN_STATE:
+			can.State = native.Uint32(datum.Value)
+		case nl.IFLA_CAN_CTRLMODE:
+			can.Mask = native.Uint32(datum.Value)
+			can.Flags = native.Uint32(datum.Value[4:])
+		case nl.IFLA_CAN_BERR_COUNTER:
+			can.TxError = native.Uint16(datum.Value)
+			can.RxError = native.Uint16(datum.Value[2:])
+		case nl.IFLA_CAN_RESTART_MS:
+			can.RestartMs = native.Uint32(datum.Value)
+		case nl.IFLA_CAN_DATA_BITTIMING_CONST:
+		case nl.IFLA_CAN_RESTART:
+		case nl.IFLA_CAN_DATA_BITTIMING:
+		case nl.IFLA_CAN_TERMINATION:
+		case nl.IFLA_CAN_TERMINATION_CONST:
+		case nl.IFLA_CAN_BITRATE_CONST:
+		case nl.IFLA_CAN_DATA_BITRATE_CONST:
+		case nl.IFLA_CAN_BITRATE_MAX:
+		}
+	}
+}
+
 func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) {
 	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
 	data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey)))
 	data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode)))
 	data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast)))
 }
+
+func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) {
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
+
+	data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port)))
+	data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType)))
+	if bareudp.SrcPortMin != 0 {
+		data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin))
+	}
+	if bareudp.MultiProto {
+		data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{})
+	}
+}
+
+func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) {
+	bareudp := link.(*BareUDP)
+	for _, attr := range data {
+		switch attr.Attr.Type {
+		case nl.IFLA_BAREUDP_PORT:
+			bareudp.Port = binary.BigEndian.Uint16(attr.Value)
+		case nl.IFLA_BAREUDP_ETHERTYPE:
+			bareudp.EtherType = binary.BigEndian.Uint16(attr.Value)
+		case nl.IFLA_BAREUDP_SRCPORT_MIN:
+			bareudp.SrcPortMin = native.Uint16(attr.Value)
+		case nl.IFLA_BAREUDP_MULTIPROTO_MODE:
+			bareudp.MultiProto = true
+		}
+	}
+}

+ 1 - 0
vendor/github.com/vishvananda/netlink/neigh.go

@@ -12,6 +12,7 @@ type Neigh struct {
 	State        int
 	Type         int
 	Flags        int
+	FlagsExt     int
 	IP           net.IP
 	HardwareAddr net.HardwareAddr
 	LLIPAddr     net.IP //Used in the case of NHRP

+ 38 - 8
vendor/github.com/vishvananda/netlink/neigh_linux.go

@@ -24,7 +24,11 @@ const (
 	NDA_MASTER
 	NDA_LINK_NETNSID
 	NDA_SRC_VNI
-	NDA_MAX = NDA_SRC_VNI
+	NDA_PROTOCOL
+	NDA_NH_ID
+	NDA_FDB_EXT_ATTRS
+	NDA_FLAGS_EXT
+	NDA_MAX = NDA_FLAGS_EXT
 )
 
 // Neighbor Cache Entry States.
@@ -42,11 +46,19 @@ const (
 
 // Neighbor Flags
 const (
-	NTF_USE    = 0x01
-	NTF_SELF   = 0x02
-	NTF_MASTER = 0x04
-	NTF_PROXY  = 0x08
-	NTF_ROUTER = 0x80
+	NTF_USE         = 0x01
+	NTF_SELF        = 0x02
+	NTF_MASTER      = 0x04
+	NTF_PROXY       = 0x08
+	NTF_EXT_LEARNED = 0x10
+	NTF_OFFLOADED   = 0x20
+	NTF_STICKY      = 0x40
+	NTF_ROUTER      = 0x80
+)
+
+// Extended Neighbor Flags
+const (
+	NTF_EXT_MANAGED = 0x00000001
 )
 
 // Ndmsg is for adding, removing or receiving information about a neighbor table entry
@@ -162,11 +174,16 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
 	if neigh.LLIPAddr != nil {
 		llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4())
 		req.AddData(llIPData)
-	} else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil {
+	} else if neigh.HardwareAddr != nil {
 		hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr))
 		req.AddData(hwData)
 	}
 
+	if neigh.FlagsExt != 0 {
+		flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt)))
+		req.AddData(flagsExtData)
+	}
+
 	if neigh.Vlan != 0 {
 		vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan)))
 		req.AddData(vlanData)
@@ -243,6 +260,18 @@ func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) {
 			// Ignore messages from other interfaces
 			continue
 		}
+		if msg.Family != 0 && ndm.Family != msg.Family {
+			continue
+		}
+		if msg.State != 0 && ndm.State != msg.State {
+			continue
+		}
+		if msg.Type != 0 && ndm.Type != msg.Type {
+			continue
+		}
+		if msg.Flags != 0 && ndm.Flags != msg.Flags {
+			continue
+		}
 
 		neigh, err := NeighDeserialize(m)
 		if err != nil {
@@ -293,6 +322,8 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
 			} else {
 				neigh.HardwareAddr = net.HardwareAddr(attr.Value)
 			}
+		case NDA_FLAGS_EXT:
+			neigh.FlagsExt = int(native.Uint32(attr.Value[0:4]))
 		case NDA_VLAN:
 			neigh.Vlan = int(native.Uint16(attr.Value[0:2]))
 		case NDA_VNI:
@@ -396,7 +427,6 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <
 					continue
 				}
 				if m.Header.Type == unix.NLMSG_ERROR {
-					native := nl.NativeEndian()
 					error := int32(native.Uint32(m.Data[0:4]))
 					if error == 0 {
 						continue

+ 21 - 1
vendor/github.com/vishvananda/netlink/netlink_unspecified.go

@@ -16,7 +16,7 @@ func LinkSetMTU(link Link, mtu int) error {
 	return ErrNotImplemented
 }
 
-func LinkSetMaster(link Link, master *Bridge) error {
+func LinkSetMaster(link Link, master Link) error {
 	return ErrNotImplemented
 }
 
@@ -72,6 +72,10 @@ func LinkSetXdpFd(link Link, fd int) error {
 	return ErrNotImplemented
 }
 
+func LinkSetXdpFdWithFlags(link Link, fd, flags int) error {
+	return ErrNotImplemented
+}
+
 func LinkSetARPOff(link Link) error {
 	return ErrNotImplemented
 }
@@ -176,14 +180,30 @@ func RouteAdd(route *Route) error {
 	return ErrNotImplemented
 }
 
+func RouteAppend(route *Route) error {
+	return ErrNotImplemented
+}
+
 func RouteDel(route *Route) error {
 	return ErrNotImplemented
 }
 
+func RouteGet(destination net.IP) ([]Route, error) {
+	return nil, ErrNotImplemented
+}
+
 func RouteList(link Link, family int) ([]Route, error) {
 	return nil, ErrNotImplemented
 }
 
+func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
+	return nil, ErrNotImplemented
+}
+
+func RouteReplace(route *Route) error {
+	return ErrNotImplemented
+}
+
 func XfrmPolicyAdd(policy *XfrmPolicy) error {
 	return ErrNotImplemented
 }

+ 3 - 3
vendor/github.com/vishvananda/netlink/netns_linux.go

@@ -87,7 +87,7 @@ func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) {
 	rtgen := nl.NewRtGenMsg()
 	req.AddData(rtgen)
 
-	b := make([]byte, 4, 4)
+	b := make([]byte, 4)
 	native.PutUint32(b, val)
 	attr := nl.NewRtAttr(attrType, b)
 	req.AddData(attr)
@@ -126,12 +126,12 @@ func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error {
 	rtgen := nl.NewRtGenMsg()
 	req.AddData(rtgen)
 
-	b := make([]byte, 4, 4)
+	b := make([]byte, 4)
 	native.PutUint32(b, val)
 	attr := nl.NewRtAttr(attrType, b)
 	req.AddData(attr)
 
-	b1 := make([]byte, 4, 4)
+	b1 := make([]byte, 4)
 	native.PutUint32(b1, newnsid)
 	attr1 := nl.NewRtAttr(NETNSA_NSID, b1)
 	req.AddData(attr1)

+ 4 - 10
vendor/github.com/vishvananda/netlink/nl/addr_linux.go

@@ -54,24 +54,18 @@ func (msg *IfAddrmsg) Len() int {
 // 	__u32	tstamp; /* updated timestamp, hundredths of seconds */
 // };
 
-const IFA_CACHEINFO = 6
-const SizeofIfaCacheInfo = 0x10
-
 type IfaCacheInfo struct {
-	IfaPrefered uint32
-	IfaValid    uint32
-	Cstamp      uint32
-	Tstamp      uint32
+	unix.IfaCacheinfo
 }
 
 func (msg *IfaCacheInfo) Len() int {
-	return SizeofIfaCacheInfo
+	return unix.SizeofIfaCacheinfo
 }
 
 func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo {
-	return (*IfaCacheInfo)(unsafe.Pointer(&b[0:SizeofIfaCacheInfo][0]))
+	return (*IfaCacheInfo)(unsafe.Pointer(&b[0:unix.SizeofIfaCacheinfo][0]))
 }
 
 func (msg *IfaCacheInfo) Serialize() []byte {
-	return (*(*[SizeofIfaCacheInfo]byte)(unsafe.Pointer(msg)))[:]
+	return (*(*[unix.SizeofIfaCacheinfo]byte)(unsafe.Pointer(msg)))[:]
 }

+ 4 - 2
vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go

@@ -40,9 +40,11 @@ const (
 	NFNETLINK_V0 = 0
 )
 
-// #define NLA_F_NESTED (1 << 15)
 const (
-	NLA_F_NESTED = (1 << 15)
+	NLA_F_NESTED        uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15)
+	NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14)
+	NLA_TYPE_MASK              = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+	NLA_ALIGNTO         uint16 = 4 // #define NLA_ALIGNTO 4
 )
 
 // enum ctattr_type {

+ 61 - 5
vendor/github.com/vishvananda/netlink/nl/devlink_linux.go

@@ -10,16 +10,38 @@ const (
 
 const (
 	DEVLINK_CMD_GET         = 1
+	DEVLINK_CMD_PORT_GET    = 5
+	DEVLINK_CMD_PORT_SET    = 6
+	DEVLINK_CMD_PORT_NEW    = 7
+	DEVLINK_CMD_PORT_DEL    = 8
 	DEVLINK_CMD_ESWITCH_GET = 29
 	DEVLINK_CMD_ESWITCH_SET = 30
+	DEVLINK_CMD_INFO_GET    = 51
 )
 
 const (
-	DEVLINK_ATTR_BUS_NAME            = 1
-	DEVLINK_ATTR_DEV_NAME            = 2
-	DEVLINK_ATTR_ESWITCH_MODE        = 25
-	DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26
-	DEVLINK_ATTR_ESWITCH_ENCAP_MODE  = 62
+	DEVLINK_ATTR_BUS_NAME               = 1
+	DEVLINK_ATTR_DEV_NAME               = 2
+	DEVLINK_ATTR_PORT_INDEX             = 3
+	DEVLINK_ATTR_PORT_TYPE              = 4
+	DEVLINK_ATTR_PORT_NETDEV_IFINDEX    = 6
+	DEVLINK_ATTR_PORT_NETDEV_NAME       = 7
+	DEVLINK_ATTR_PORT_IBDEV_NAME        = 8
+	DEVLINK_ATTR_ESWITCH_MODE           = 25
+	DEVLINK_ATTR_ESWITCH_INLINE_MODE    = 26
+	DEVLINK_ATTR_ESWITCH_ENCAP_MODE     = 62
+	DEVLINK_ATTR_PORT_FLAVOUR           = 77
+	DEVLINK_ATTR_INFO_DRIVER_NAME       = 98
+	DEVLINK_ATTR_INFO_SERIAL_NUMBER     = 99
+	DEVLINK_ATTR_INFO_VERSION_FIXED     = 100
+	DEVLINK_ATTR_INFO_VERSION_RUNNING   = 101
+	DEVLINK_ATTR_INFO_VERSION_STORED    = 102
+	DEVLINK_ATTR_INFO_VERSION_NAME      = 103
+	DEVLINK_ATTR_INFO_VERSION_VALUE     = 104
+	DEVLINK_ATTR_PORT_PCI_PF_NUMBER     = 127
+	DEVLINK_ATTR_PORT_FUNCTION          = 145
+	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150
+	DEVLINK_ATTR_PORT_PCI_SF_NUMBER     = 164
 )
 
 const (
@@ -38,3 +60,37 @@ const (
 	DEVLINK_ESWITCH_ENCAP_MODE_NONE  = 0
 	DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1
 )
+
+const (
+	DEVLINK_PORT_FLAVOUR_PHYSICAL = 0
+	DEVLINK_PORT_FLAVOUR_CPU      = 1
+	DEVLINK_PORT_FLAVOUR_DSA      = 2
+	DEVLINK_PORT_FLAVOUR_PCI_PF   = 3
+	DEVLINK_PORT_FLAVOUR_PCI_VF   = 4
+	DEVLINK_PORT_FLAVOUR_VIRTUAL  = 5
+	DEVLINK_PORT_FLAVOUR_UNUSED   = 6
+	DEVLINK_PORT_FLAVOUR_PCI_SF   = 7
+)
+
+const (
+	DEVLINK_PORT_TYPE_NOTSET = 0
+	DEVLINK_PORT_TYPE_AUTO   = 1
+	DEVLINK_PORT_TYPE_ETH    = 2
+	DEVLINK_PORT_TYPE_IB     = 3
+)
+
+const (
+	DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1
+	DEVLINK_PORT_FN_ATTR_STATE         = 2
+	DEVLINK_PORT_FN_ATTR_OPSTATE       = 3
+)
+
+const (
+	DEVLINK_PORT_FN_STATE_INACTIVE = 0
+	DEVLINK_PORT_FN_STATE_ACTIVE   = 1
+)
+
+const (
+	DEVLINK_PORT_FN_OPSTATE_DETACHED = 0
+	DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1
+)

+ 222 - 0
vendor/github.com/vishvananda/netlink/nl/ipset_linux.go

@@ -0,0 +1,222 @@
+package nl
+
+import (
+	"strconv"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	/* The protocol version */
+	IPSET_PROTOCOL = 6
+
+	/* The max length of strings including NUL: set and type identifiers */
+	IPSET_MAXNAMELEN = 32
+
+	/* The maximum permissible comment length we will accept over netlink */
+	IPSET_MAX_COMMENT_SIZE = 255
+)
+
+const (
+	_                  = iota
+	IPSET_CMD_PROTOCOL /* 1: Return protocol version */
+	IPSET_CMD_CREATE   /* 2: Create a new (empty) set */
+	IPSET_CMD_DESTROY  /* 3: Destroy a (empty) set */
+	IPSET_CMD_FLUSH    /* 4: Remove all elements from a set */
+	IPSET_CMD_RENAME   /* 5: Rename a set */
+	IPSET_CMD_SWAP     /* 6: Swap two sets */
+	IPSET_CMD_LIST     /* 7: List sets */
+	IPSET_CMD_SAVE     /* 8: Save sets */
+	IPSET_CMD_ADD      /* 9: Add an element to a set */
+	IPSET_CMD_DEL      /* 10: Delete an element from a set */
+	IPSET_CMD_TEST     /* 11: Test an element in a set */
+	IPSET_CMD_HEADER   /* 12: Get set header data only */
+	IPSET_CMD_TYPE     /* 13: Get set type */
+)
+
+/* Attributes at command level */
+const (
+	_                       = iota
+	IPSET_ATTR_PROTOCOL     /* 1: Protocol version */
+	IPSET_ATTR_SETNAME      /* 2: Name of the set */
+	IPSET_ATTR_TYPENAME     /* 3: Typename */
+	IPSET_ATTR_REVISION     /* 4: Settype revision */
+	IPSET_ATTR_FAMILY       /* 5: Settype family */
+	IPSET_ATTR_FLAGS        /* 6: Flags at command level */
+	IPSET_ATTR_DATA         /* 7: Nested attributes */
+	IPSET_ATTR_ADT          /* 8: Multiple data containers */
+	IPSET_ATTR_LINENO       /* 9: Restore lineno */
+	IPSET_ATTR_PROTOCOL_MIN /* 10: Minimal supported version number */
+
+	IPSET_ATTR_SETNAME2     = IPSET_ATTR_TYPENAME     /* Setname at rename/swap */
+	IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN /* type rev min */
+)
+
+/* CADT specific attributes */
+const (
+	IPSET_ATTR_IP          = 1
+	IPSET_ATTR_IP_FROM     = 1
+	IPSET_ATTR_IP_TO       = 2
+	IPSET_ATTR_CIDR        = 3
+	IPSET_ATTR_PORT        = 4
+	IPSET_ATTR_PORT_FROM   = 4
+	IPSET_ATTR_PORT_TO     = 5
+	IPSET_ATTR_TIMEOUT     = 6
+	IPSET_ATTR_PROTO       = 7
+	IPSET_ATTR_CADT_FLAGS  = 8
+	IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO /* 9 */
+	IPSET_ATTR_MARK        = 10
+	IPSET_ATTR_MARKMASK    = 11
+
+	/* Reserve empty slots */
+	IPSET_ATTR_CADT_MAX = 16
+
+	/* Create-only specific attributes */
+	IPSET_ATTR_GC = 3 + iota
+	IPSET_ATTR_HASHSIZE
+	IPSET_ATTR_MAXELEM
+	IPSET_ATTR_NETMASK
+	IPSET_ATTR_PROBES
+	IPSET_ATTR_RESIZE
+	IPSET_ATTR_SIZE
+
+	/* Kernel-only */
+	IPSET_ATTR_ELEMENTS
+	IPSET_ATTR_REFERENCES
+	IPSET_ATTR_MEMSIZE
+
+	SET_ATTR_CREATE_MAX
+)
+
+/* ADT specific attributes */
+const (
+	IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1
+	IPSET_ATTR_NAME
+	IPSET_ATTR_NAMEREF
+	IPSET_ATTR_IP2
+	IPSET_ATTR_CIDR2
+	IPSET_ATTR_IP2_TO
+	IPSET_ATTR_IFACE
+	IPSET_ATTR_BYTES
+	IPSET_ATTR_PACKETS
+	IPSET_ATTR_COMMENT
+	IPSET_ATTR_SKBMARK
+	IPSET_ATTR_SKBPRIO
+	IPSET_ATTR_SKBQUEUE
+)
+
+/* Flags at CADT attribute level, upper half of cmdattrs */
+const (
+	IPSET_FLAG_BIT_BEFORE        = 0
+	IPSET_FLAG_BEFORE            = (1 << IPSET_FLAG_BIT_BEFORE)
+	IPSET_FLAG_BIT_PHYSDEV       = 1
+	IPSET_FLAG_PHYSDEV           = (1 << IPSET_FLAG_BIT_PHYSDEV)
+	IPSET_FLAG_BIT_NOMATCH       = 2
+	IPSET_FLAG_NOMATCH           = (1 << IPSET_FLAG_BIT_NOMATCH)
+	IPSET_FLAG_BIT_WITH_COUNTERS = 3
+	IPSET_FLAG_WITH_COUNTERS     = (1 << IPSET_FLAG_BIT_WITH_COUNTERS)
+	IPSET_FLAG_BIT_WITH_COMMENT  = 4
+	IPSET_FLAG_WITH_COMMENT      = (1 << IPSET_FLAG_BIT_WITH_COMMENT)
+	IPSET_FLAG_BIT_WITH_FORCEADD = 5
+	IPSET_FLAG_WITH_FORCEADD     = (1 << IPSET_FLAG_BIT_WITH_FORCEADD)
+	IPSET_FLAG_BIT_WITH_SKBINFO  = 6
+	IPSET_FLAG_WITH_SKBINFO      = (1 << IPSET_FLAG_BIT_WITH_SKBINFO)
+	IPSET_FLAG_CADT_MAX          = 15
+)
+
+const (
+	IPSET_ERR_PRIVATE = 4096 + iota
+	IPSET_ERR_PROTOCOL
+	IPSET_ERR_FIND_TYPE
+	IPSET_ERR_MAX_SETS
+	IPSET_ERR_BUSY
+	IPSET_ERR_EXIST_SETNAME2
+	IPSET_ERR_TYPE_MISMATCH
+	IPSET_ERR_EXIST
+	IPSET_ERR_INVALID_CIDR
+	IPSET_ERR_INVALID_NETMASK
+	IPSET_ERR_INVALID_FAMILY
+	IPSET_ERR_TIMEOUT
+	IPSET_ERR_REFERENCED
+	IPSET_ERR_IPADDR_IPV4
+	IPSET_ERR_IPADDR_IPV6
+	IPSET_ERR_COUNTER
+	IPSET_ERR_COMMENT
+	IPSET_ERR_INVALID_MARKMASK
+	IPSET_ERR_SKBINFO
+
+	/* Type specific error codes */
+	IPSET_ERR_TYPE_SPECIFIC = 4352
+)
+
+type IPSetError uintptr
+
+func (e IPSetError) Error() string {
+	switch int(e) {
+	case IPSET_ERR_PRIVATE:
+		return "private"
+	case IPSET_ERR_PROTOCOL:
+		return "invalid protocol"
+	case IPSET_ERR_FIND_TYPE:
+		return "invalid type"
+	case IPSET_ERR_MAX_SETS:
+		return "max sets reached"
+	case IPSET_ERR_BUSY:
+		return "busy"
+	case IPSET_ERR_EXIST_SETNAME2:
+		return "exist_setname2"
+	case IPSET_ERR_TYPE_MISMATCH:
+		return "type mismatch"
+	case IPSET_ERR_EXIST:
+		return "exist"
+	case IPSET_ERR_INVALID_CIDR:
+		return "invalid cidr"
+	case IPSET_ERR_INVALID_NETMASK:
+		return "invalid netmask"
+	case IPSET_ERR_INVALID_FAMILY:
+		return "invalid family"
+	case IPSET_ERR_TIMEOUT:
+		return "timeout"
+	case IPSET_ERR_REFERENCED:
+		return "referenced"
+	case IPSET_ERR_IPADDR_IPV4:
+		return "invalid ipv4 address"
+	case IPSET_ERR_IPADDR_IPV6:
+		return "invalid ipv6 address"
+	case IPSET_ERR_COUNTER:
+		return "invalid counter"
+	case IPSET_ERR_COMMENT:
+		return "invalid comment"
+	case IPSET_ERR_INVALID_MARKMASK:
+		return "invalid markmask"
+	case IPSET_ERR_SKBINFO:
+		return "skbinfo"
+	default:
+		return "errno " + strconv.Itoa(int(e))
+	}
+}
+
+func GetIpsetFlags(cmd int) int {
+	switch cmd {
+	case IPSET_CMD_CREATE:
+		return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_CREATE
+	case IPSET_CMD_DESTROY,
+		IPSET_CMD_FLUSH,
+		IPSET_CMD_RENAME,
+		IPSET_CMD_SWAP,
+		IPSET_CMD_TEST:
+		return unix.NLM_F_REQUEST | unix.NLM_F_ACK
+	case IPSET_CMD_LIST,
+		IPSET_CMD_SAVE:
+		return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_ROOT | unix.NLM_F_MATCH | unix.NLM_F_DUMP
+	case IPSET_CMD_ADD,
+		IPSET_CMD_DEL:
+		return unix.NLM_F_REQUEST | unix.NLM_F_ACK
+	case IPSET_CMD_HEADER,
+		IPSET_CMD_TYPE,
+		IPSET_CMD_PROTOCOL:
+		return unix.NLM_F_REQUEST
+	default:
+		return 0
+	}
+}

+ 111 - 1
vendor/github.com/vishvananda/netlink/nl/link_linux.go

@@ -1,6 +1,8 @@
 package nl
 
 import (
+	"bytes"
+	"encoding/binary"
 	"unsafe"
 )
 
@@ -171,6 +173,22 @@ const (
 	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE
 )
 
+const (
+	IFLA_GENEVE_UNSPEC = iota
+	IFLA_GENEVE_ID     // vni
+	IFLA_GENEVE_REMOTE
+	IFLA_GENEVE_TTL
+	IFLA_GENEVE_TOS
+	IFLA_GENEVE_PORT // destination port
+	IFLA_GENEVE_COLLECT_METADATA
+	IFLA_GENEVE_REMOTE6
+	IFLA_GENEVE_UDP_CSUM
+	IFLA_GENEVE_UDP_ZERO_CSUM6_TX
+	IFLA_GENEVE_UDP_ZERO_CSUM6_RX
+	IFLA_GENEVE_LABEL
+	IFLA_GENEVE_MAX = IFLA_GENEVE_LABEL
+)
+
 const (
 	IFLA_GRE_UNSPEC = iota
 	IFLA_GRE_LINK
@@ -243,7 +261,9 @@ const (
 	IFLA_VF_STATS_TX_BYTES
 	IFLA_VF_STATS_BROADCAST
 	IFLA_VF_STATS_MULTICAST
-	IFLA_VF_STATS_MAX = IFLA_VF_STATS_MULTICAST
+	IFLA_VF_STATS_RX_DROPPED
+	IFLA_VF_STATS_TX_DROPPED
+	IFLA_VF_STATS_MAX = IFLA_VF_STATS_TX_DROPPED
 )
 
 const (
@@ -326,6 +346,59 @@ func (msg *VfTxRate) Serialize() []byte {
 	return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:]
 }
 
+//struct ifla_vf_stats {
+//	__u64 rx_packets;
+//	__u64 tx_packets;
+//	__u64 rx_bytes;
+//	__u64 tx_bytes;
+//	__u64 broadcast;
+//	__u64 multicast;
+//};
+
+type VfStats struct {
+	RxPackets uint64
+	TxPackets uint64
+	RxBytes   uint64
+	TxBytes   uint64
+	Multicast uint64
+	Broadcast uint64
+	RxDropped uint64
+	TxDropped uint64
+}
+
+func DeserializeVfStats(b []byte) VfStats {
+	var vfstat VfStats
+	stats, err := ParseRouteAttr(b)
+	if err != nil {
+		return vfstat
+	}
+	var valueVar uint64
+	for _, stat := range stats {
+		if err := binary.Read(bytes.NewBuffer(stat.Value), NativeEndian(), &valueVar); err != nil {
+			break
+		}
+		switch stat.Attr.Type {
+		case IFLA_VF_STATS_RX_PACKETS:
+			vfstat.RxPackets = valueVar
+		case IFLA_VF_STATS_TX_PACKETS:
+			vfstat.TxPackets = valueVar
+		case IFLA_VF_STATS_RX_BYTES:
+			vfstat.RxBytes = valueVar
+		case IFLA_VF_STATS_TX_BYTES:
+			vfstat.TxBytes = valueVar
+		case IFLA_VF_STATS_MULTICAST:
+			vfstat.Multicast = valueVar
+		case IFLA_VF_STATS_BROADCAST:
+			vfstat.Broadcast = valueVar
+		case IFLA_VF_STATS_RX_DROPPED:
+			vfstat.RxDropped = valueVar
+		case IFLA_VF_STATS_TX_DROPPED:
+			vfstat.TxDropped = valueVar
+		}
+	}
+	return vfstat
+}
+
 // struct ifla_vf_rate {
 //   __u32 vf;
 //   __u32 min_tx_rate; /* Min Bandwidth in Mbps */
@@ -478,6 +551,14 @@ const (
 	IFLA_XDP_MAX      = IFLA_XDP_PROG_ID
 )
 
+// XDP program attach mode (used as dump value for IFLA_XDP_ATTACHED)
+const (
+	XDP_ATTACHED_NONE = iota
+	XDP_ATTACHED_DRV
+	XDP_ATTACHED_SKB
+	XDP_ATTACHED_HW
+)
+
 const (
 	IFLA_IPTUN_UNSPEC = iota
 	IFLA_IPTUN_LINK
@@ -608,3 +689,32 @@ const (
 	IFLA_IPOIB_UMCAST
 	IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST
 )
+
+const (
+	IFLA_CAN_UNSPEC = iota
+	IFLA_CAN_BITTIMING
+	IFLA_CAN_BITTIMING_CONST
+	IFLA_CAN_CLOCK
+	IFLA_CAN_STATE
+	IFLA_CAN_CTRLMODE
+	IFLA_CAN_RESTART_MS
+	IFLA_CAN_RESTART
+	IFLA_CAN_BERR_COUNTER
+	IFLA_CAN_DATA_BITTIMING
+	IFLA_CAN_DATA_BITTIMING_CONST
+	IFLA_CAN_TERMINATION
+	IFLA_CAN_TERMINATION_CONST
+	IFLA_CAN_BITRATE_CONST
+	IFLA_CAN_DATA_BITRATE_CONST
+	IFLA_CAN_BITRATE_MAX
+	IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX
+)
+
+const (
+	IFLA_BAREUDP_UNSPEC = iota
+	IFLA_BAREUDP_PORT
+	IFLA_BAREUDP_ETHERTYPE
+	IFLA_BAREUDP_SRCPORT_MIN
+	IFLA_BAREUDP_MULTIPROTO_MODE
+	IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE
+)

+ 29 - 0
vendor/github.com/vishvananda/netlink/nl/lwt_linux.go

@@ -0,0 +1,29 @@
+package nl
+
+const (
+	LWT_BPF_PROG_UNSPEC = iota
+	LWT_BPF_PROG_FD
+	LWT_BPF_PROG_NAME
+	__LWT_BPF_PROG_MAX
+)
+
+const (
+	LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1
+)
+
+const (
+	LWT_BPF_UNSPEC = iota
+	LWT_BPF_IN
+	LWT_BPF_OUT
+	LWT_BPF_XMIT
+	LWT_BPF_XMIT_HEADROOM
+	__LWT_BPF_MAX
+)
+
+const (
+	LWT_BPF_MAX = __LWT_BPF_MAX - 1
+)
+
+const (
+	LWT_BPF_MAX_HEADROOM = 256
+)

+ 148 - 8
vendor/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -27,7 +27,8 @@ const (
 	// tc rules or filters, or other more memory requiring data.
 	RECEIVE_BUFFER_SIZE = 65536
 	// Kernel netlink pid
-	PidKernel uint32 = 0
+	PidKernel     uint32 = 0
+	SizeofCnMsgOp        = 0x18
 )
 
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
@@ -35,6 +36,12 @@ var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETL
 
 var nextSeqNr uint32
 
+// Default netlink socket timeout, 60s
+var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0}
+
+// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets
+var EnableErrorMessageReporting bool = false
+
 // GetIPFamily returns the family type of a net.IP.
 func GetIPFamily(ip net.IP) int {
 	if len(ip) <= net.IPv4len {
@@ -77,11 +84,69 @@ func Swap32(i uint32) uint32 {
 	return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24
 }
 
+const (
+	NLMSGERR_ATTR_UNUSED = 0
+	NLMSGERR_ATTR_MSG    = 1
+	NLMSGERR_ATTR_OFFS   = 2
+	NLMSGERR_ATTR_COOKIE = 3
+	NLMSGERR_ATTR_POLICY = 4
+)
+
 type NetlinkRequestData interface {
 	Len() int
 	Serialize() []byte
 }
 
+const (
+	PROC_CN_MCAST_LISTEN = 1
+	PROC_CN_MCAST_IGNORE
+)
+
+type CbID struct {
+	Idx uint32
+	Val uint32
+}
+
+type CnMsg struct {
+	ID     CbID
+	Seq    uint32
+	Ack    uint32
+	Length uint16
+	Flags  uint16
+}
+
+type CnMsgOp struct {
+	CnMsg
+	// here we differ from the C header
+	Op uint32
+}
+
+func NewCnMsg(idx, val, op uint32) *CnMsgOp {
+	var cm CnMsgOp
+
+	cm.ID.Idx = idx
+	cm.ID.Val = val
+
+	cm.Ack = 0
+	cm.Seq = 1
+	cm.Length = uint16(binary.Size(op))
+	cm.Op = op
+
+	return &cm
+}
+
+func (msg *CnMsgOp) Serialize() []byte {
+	return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:]
+}
+
+func DeserializeCnMsgOp(b []byte) *CnMsgOp {
+	return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0]))
+}
+
+func (msg *CnMsgOp) Len() int {
+	return SizeofCnMsgOp
+}
+
 // IfInfomsg is related to links, but it is used for list requests as well
 type IfInfomsg struct {
 	unix.IfInfomsg
@@ -249,6 +314,12 @@ func (msg *IfInfomsg) EncapType() string {
 	return fmt.Sprintf("unknown%d", msg.Type)
 }
 
+// Round the length of a netlink message up to align it properly.
+// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license.
+func nlmAlignOf(msglen int) int {
+	return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1)
+}
+
 func rtaAlignOf(attrlen int) int {
 	return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1)
 }
@@ -259,6 +330,29 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
 	return msg
 }
 
+type Uint32Attribute struct {
+	Type  uint16
+	Value uint32
+}
+
+func (a *Uint32Attribute) Serialize() []byte {
+	native := NativeEndian()
+	buf := make([]byte, rtaAlignOf(8))
+	native.PutUint16(buf[0:2], 8)
+	native.PutUint16(buf[2:4], a.Type)
+
+	if a.Type&NLA_F_NET_BYTEORDER != 0 {
+		binary.BigEndian.PutUint32(buf[4:], a.Value)
+	} else {
+		native.PutUint32(buf[4:], a.Value)
+	}
+	return buf
+}
+
+func (a *Uint32Attribute) Len() int {
+	return 8
+}
+
 // Extend RtAttr to handle data and children
 type RtAttr struct {
 	unix.RtAttr
@@ -403,6 +497,19 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
 		if err != nil {
 			return nil, err
 		}
+
+		if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil {
+			return nil, err
+		}
+		if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil {
+			return nil, err
+		}
+		if EnableErrorMessageReporting {
+			if err := s.SetExtAck(true); err != nil {
+				return nil, err
+			}
+		}
+
 		defer s.Close()
 	} else {
 		s.Lock()
@@ -439,16 +546,39 @@ done:
 			if m.Header.Pid != pid {
 				continue
 			}
-			if m.Header.Type == unix.NLMSG_DONE {
-				break done
-			}
-			if m.Header.Type == unix.NLMSG_ERROR {
+			if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR {
 				native := NativeEndian()
-				error := int32(native.Uint32(m.Data[0:4]))
-				if error == 0 {
+				errno := int32(native.Uint32(m.Data[0:4]))
+				if errno == 0 {
 					break done
 				}
-				return nil, syscall.Errno(-error)
+				var err error
+				err = syscall.Errno(-errno)
+
+				unreadData := m.Data[4:]
+				if m.Header.Flags|unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr {
+					// Skip the echoed request message.
+					echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0]))
+					unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):]
+
+					// Annotate `err` using nlmsgerr attributes.
+					for len(unreadData) >= syscall.SizeofRtAttr {
+						attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0]))
+						attrData := unreadData[syscall.SizeofRtAttr:attr.Len]
+
+						switch attr.Type {
+						case NLMSGERR_ATTR_MSG:
+							err = fmt.Errorf("%w: %s", err, string(attrData))
+
+						default:
+							// TODO: handle other NLMSGERR_ATTR types
+						}
+
+						unreadData = unreadData[rtaAlignOf(int(attr.Len)):]
+					}
+				}
+
+				return nil, err
 			}
 			if resType != 0 && m.Header.Type != resType {
 				continue
@@ -663,6 +793,16 @@ func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error {
 	return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout)
 }
 
+// SetExtAck requests error messages to be reported on the socket
+func (s *NetlinkSocket) SetExtAck(enable bool) error {
+	var enableN int
+	if enable {
+		enableN = 1
+	}
+
+	return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN)
+}
+
 func (s *NetlinkSocket) GetPid() (uint32, error) {
 	fd := int(atomic.LoadInt32(&s.fd))
 	lsa, err := unix.Getsockname(fd)

+ 79 - 0
vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go

@@ -0,0 +1,79 @@
+package nl
+
+import (
+	"encoding/binary"
+	"fmt"
+	"log"
+)
+
+type Attribute struct {
+	Type  uint16
+	Value []byte
+}
+
+func ParseAttributes(data []byte) <-chan Attribute {
+	native := NativeEndian()
+	result := make(chan Attribute)
+
+	go func() {
+		i := 0
+		for i+4 < len(data) {
+			length := int(native.Uint16(data[i : i+2]))
+			attrType := native.Uint16(data[i+2 : i+4])
+
+			if length < 4 {
+				log.Printf("attribute 0x%02x has invalid length of %d bytes", attrType, length)
+				break
+			}
+
+			if len(data) < i+length {
+				log.Printf("attribute 0x%02x of length %d is truncated, only %d bytes remaining", attrType, length, len(data)-i)
+				break
+			}
+
+			result <- Attribute{
+				Type:  attrType,
+				Value: data[i+4 : i+length],
+			}
+			i += rtaAlignOf(length)
+		}
+		close(result)
+	}()
+
+	return result
+}
+
+func PrintAttributes(data []byte) {
+	printAttributes(data, 0)
+}
+
+func printAttributes(data []byte, level int) {
+	for attr := range ParseAttributes(data) {
+		for i := 0; i < level; i++ {
+			print("> ")
+		}
+		nested := attr.Type&NLA_F_NESTED != 0
+		fmt.Printf("type=%d nested=%v len=%v %v\n", attr.Type&NLA_TYPE_MASK, nested, len(attr.Value), attr.Value)
+		if nested {
+			printAttributes(attr.Value, level+1)
+		}
+	}
+}
+
+// Uint32 returns the uint32 value respecting the NET_BYTEORDER flag
+func (attr *Attribute) Uint32() uint32 {
+	if attr.Type&NLA_F_NET_BYTEORDER != 0 {
+		return binary.BigEndian.Uint32(attr.Value)
+	} else {
+		return NativeEndian().Uint32(attr.Value)
+	}
+}
+
+// Uint64 returns the uint64 value respecting the NET_BYTEORDER flag
+func (attr *Attribute) Uint64() uint64 {
+	if attr.Type&NLA_F_NET_BYTEORDER != 0 {
+		return binary.BigEndian.Uint64(attr.Value)
+	} else {
+		return NativeEndian().Uint64(attr.Value)
+	}
+}

+ 4 - 0
vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go

@@ -11,6 +11,8 @@ const (
 const (
 	RDMA_NLDEV_CMD_GET     = 1
 	RDMA_NLDEV_CMD_SET     = 2
+	RDMA_NLDEV_CMD_NEWLINK = 3
+	RDMA_NLDEV_CMD_DELLINK = 4
 	RDMA_NLDEV_CMD_SYS_GET = 6
 	RDMA_NLDEV_CMD_SYS_SET = 7
 )
@@ -30,6 +32,8 @@ const (
 	RDMA_NLDEV_ATTR_PORT_STATE      = 12
 	RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13
 	RDMA_NLDEV_ATTR_DEV_NODE_TYPE   = 14
+	RDMA_NLDEV_ATTR_NDEV_NAME       = 51
+	RDMA_NLDEV_ATTR_LINK_TYPE       = 65
 	RDMA_NLDEV_SYS_ATTR_NETNS_MODE  = 66
 	RDMA_NLDEV_NET_NS_FD            = 68
 )

+ 2 - 2
vendor/github.com/vishvananda/netlink/nl/seg6_linux.go

@@ -23,7 +23,7 @@ func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool {
 		return false
 	}
 	for i := range s1.Segments {
-		if s1.Segments[i].Equal(s2.Segments[i]) != true {
+		if !s1.Segments[i].Equal(s2.Segments[i]) {
 			return false
 		}
 	}
@@ -89,7 +89,7 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) {
 	}
 	buf = buf[12:]
 	if len(buf)%16 != 0 {
-		err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf))
+		err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf))
 		return mode, nil, err
 	}
 	for len(buf) > 0 {

+ 8 - 1
vendor/github.com/vishvananda/netlink/nl/syscall.go

@@ -1,6 +1,6 @@
 package nl
 
-// syscall package lack of rule atributes type.
+// syscall package lack of rule attributes type.
 // Thus there are defined below
 const (
 	FRA_UNSPEC  = iota
@@ -21,6 +21,13 @@ const (
 	FRA_TABLE  /* Extended table id */
 	FRA_FWMASK /* mask for netfilter mark */
 	FRA_OIFNAME
+	FRA_PAD
+	FRA_L3MDEV      /* iif or oif is l3mdev goto its table */
+	FRA_UID_RANGE   /* UID range */
+	FRA_PROTOCOL    /* Originator of the rule */
+	FRA_IP_PROTO    /* ip proto */
+	FRA_SPORT_RANGE /* sport */
+	FRA_DPORT_RANGE /* dport */
 )
 
 // ip rule netlink request types

+ 246 - 1
vendor/github.com/vishvananda/netlink/nl/tc_linux.go

@@ -90,10 +90,14 @@ const (
 	SizeofTcU32Sel       = 0x10 // without keys
 	SizeofTcGen          = 0x14
 	SizeofTcConnmark     = SizeofTcGen + 0x04
+	SizeofTcCsum         = SizeofTcGen + 0x04
 	SizeofTcMirred       = SizeofTcGen + 0x08
 	SizeofTcTunnelKey    = SizeofTcGen + 0x04
 	SizeofTcSkbEdit      = SizeofTcGen
 	SizeofTcPolice       = 2*SizeofTcRateSpec + 0x20
+	SizeofTcSfqQopt      = 0x0b
+	SizeofTcSfqRedStats  = 0x18
+	SizeofTcSfqQoptV1    = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c
 )
 
 // struct tcmsg {
@@ -691,6 +695,36 @@ func (x *TcConnmark) Serialize() []byte {
 	return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:]
 }
 
+const (
+	TCA_CSUM_UNSPEC = iota
+	TCA_CSUM_PARMS
+	TCA_CSUM_TM
+	TCA_CSUM_PAD
+	TCA_CSUM_MAX = TCA_CSUM_PAD
+)
+
+// struct tc_csum {
+//   tc_gen;
+//   __u32 update_flags;
+// }
+
+type TcCsum struct {
+	TcGen
+	UpdateFlags uint32
+}
+
+func (msg *TcCsum) Len() int {
+	return SizeofTcCsum
+}
+
+func DeserializeTcCsum(b []byte) *TcCsum {
+	return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0]))
+}
+
+func (x *TcCsum) Serialize() []byte {
+	return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:]
+}
+
 const (
 	TCA_ACT_MIRRED = 8
 )
@@ -735,7 +769,13 @@ const (
 	TCA_TUNNEL_KEY_ENC_IPV6_SRC
 	TCA_TUNNEL_KEY_ENC_IPV6_DST
 	TCA_TUNNEL_KEY_ENC_KEY_ID
-	TCA_TUNNEL_KEY_MAX = TCA_TUNNEL_KEY_ENC_KEY_ID
+	TCA_TUNNEL_KEY_PAD
+	TCA_TUNNEL_KEY_ENC_DST_PORT
+	TCA_TUNNEL_KEY_NO_CSUM
+	TCA_TUNNEL_KEY_ENC_OPTS
+	TCA_TUNNEL_KEY_ENC_TOS
+	TCA_TUNNEL_KEY_ENC_TTL
+	TCA_TUNNEL_KEY_MAX
 )
 
 type TcTunnelKey struct {
@@ -872,3 +912,208 @@ const (
 	TCA_HFSC_FSC
 	TCA_HFSC_USC
 )
+
+const (
+	TCA_FLOWER_UNSPEC = iota
+	TCA_FLOWER_CLASSID
+	TCA_FLOWER_INDEV
+	TCA_FLOWER_ACT
+	TCA_FLOWER_KEY_ETH_DST       /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK  /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC       /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK  /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE      /* be16 */
+	TCA_FLOWER_KEY_IP_PROTO      /* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC      /* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */
+	TCA_FLOWER_KEY_IPV4_DST      /* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC      /* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST      /* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC       /* be16 */
+	TCA_FLOWER_KEY_TCP_DST       /* be16 */
+	TCA_FLOWER_KEY_UDP_SRC       /* be16 */
+	TCA_FLOWER_KEY_UDP_DST       /* be16 */
+
+	TCA_FLOWER_FLAGS
+	TCA_FLOWER_KEY_VLAN_ID       /* be16 */
+	TCA_FLOWER_KEY_VLAN_PRIO     /* u8   */
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */
+
+	TCA_FLOWER_KEY_ENC_KEY_ID        /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC      /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST      /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC      /* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST      /* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */
+
+	TCA_FLOWER_KEY_TCP_SRC_MASK  /* be16 */
+	TCA_FLOWER_KEY_TCP_DST_MASK  /* be16 */
+	TCA_FLOWER_KEY_UDP_SRC_MASK  /* be16 */
+	TCA_FLOWER_KEY_UDP_DST_MASK  /* be16 */
+	TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */
+	TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */
+
+	TCA_FLOWER_KEY_SCTP_SRC /* be16 */
+	TCA_FLOWER_KEY_SCTP_DST /* be16 */
+
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT      /* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT      /* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */
+
+	TCA_FLOWER_KEY_FLAGS      /* be32 */
+	TCA_FLOWER_KEY_FLAGS_MASK /* be32 */
+
+	TCA_FLOWER_KEY_ICMPV4_CODE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */
+
+	TCA_FLOWER_KEY_ARP_SIP      /* be32 */
+	TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */
+	TCA_FLOWER_KEY_ARP_TIP      /* be32 */
+	TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */
+	TCA_FLOWER_KEY_ARP_OP       /* u8 */
+	TCA_FLOWER_KEY_ARP_OP_MASK  /* u8 */
+	TCA_FLOWER_KEY_ARP_SHA      /* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA      /* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */
+
+	TCA_FLOWER_KEY_MPLS_TTL   /* u8 - 8 bits */
+	TCA_FLOWER_KEY_MPLS_BOS   /* u8 - 1 bit */
+	TCA_FLOWER_KEY_MPLS_TC    /* u8 - 3 bits */
+	TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */
+
+	TCA_FLOWER_KEY_TCP_FLAGS      /* be16 */
+	TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */
+
+	TCA_FLOWER_KEY_IP_TOS      /* u8 */
+	TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */
+	TCA_FLOWER_KEY_IP_TTL      /* u8 */
+	TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */
+
+	TCA_FLOWER_KEY_CVLAN_ID       /* be16 */
+	TCA_FLOWER_KEY_CVLAN_PRIO     /* u8   */
+	TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */
+
+	TCA_FLOWER_KEY_ENC_IP_TOS      /* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL      /* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */
+
+	TCA_FLOWER_KEY_ENC_OPTS
+	TCA_FLOWER_KEY_ENC_OPTS_MASK
+
+	__TCA_FLOWER_MAX
+)
+
+// struct tc_sfq_qopt {
+// 	unsigned	quantum;	/* Bytes per round allocated to flow */
+// 	int		perturb_period;	/* Period of hash perturbation */
+// 	__u32		limit;		/* Maximal packets in queue */
+// 	unsigned	divisor;	/* Hash divisor  */
+// 	unsigned	flows;		/* Maximal number of flows  */
+// };
+
+type TcSfqQopt struct {
+	Quantum uint8
+	Perturb int32
+	Limit   uint32
+	Divisor uint8
+	Flows   uint8
+}
+
+func (x *TcSfqQopt) Len() int {
+	return SizeofTcSfqQopt
+}
+
+func DeserializeTcSfqQopt(b []byte) *TcSfqQopt {
+	return (*TcSfqQopt)(unsafe.Pointer(&b[0:SizeofTcSfqQopt][0]))
+}
+
+func (x *TcSfqQopt) Serialize() []byte {
+	return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_sfqred_stats {
+// 	__u32           prob_drop;      /* Early drops, below max threshold */
+// 	__u32           forced_drop;	/* Early drops, after max threshold */
+// 	__u32           prob_mark;      /* Marked packets, below max threshold */
+// 	__u32           forced_mark;    /* Marked packets, after max threshold */
+// 	__u32           prob_mark_head; /* Marked packets, below max threshold */
+// 	__u32           forced_mark_head;/* Marked packets, after max threshold */
+// };
+type TcSfqRedStats struct {
+	ProbDrop       uint32
+	ForcedDrop     uint32
+	ProbMark       uint32
+	ForcedMark     uint32
+	ProbMarkHead   uint32
+	ForcedMarkHead uint32
+}
+
+func (x *TcSfqRedStats) Len() int {
+	return SizeofTcSfqRedStats
+}
+
+func DeserializeTcSfqRedStats(b []byte) *TcSfqRedStats {
+	return (*TcSfqRedStats)(unsafe.Pointer(&b[0:SizeofTcSfqRedStats][0]))
+}
+
+func (x *TcSfqRedStats) Serialize() []byte {
+	return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_sfq_qopt_v1 {
+// 	struct tc_sfq_qopt v0;
+// 	unsigned int	depth;		/* max number of packets per flow */
+// 	unsigned int	headdrop;
+// /* SFQRED parameters */
+// 	__u32		limit;		/* HARD maximal flow queue length (bytes) */
+// 	__u32		qth_min;	/* Min average length threshold (bytes) */
+// 	__u32		qth_max;	/* Max average length threshold (bytes) */
+// 	unsigned char   Wlog;		/* log(W)		*/
+// 	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
+// 	unsigned char   Scell_log;	/* cell size for idle damping */
+// 	unsigned char	flags;
+// 	__u32		max_P;		/* probability, high resolution */
+// /* SFQRED stats */
+// 	struct tc_sfqred_stats stats;
+// };
+type TcSfqQoptV1 struct {
+	TcSfqQopt
+	Depth    uint32
+	HeadDrop uint32
+	Limit    uint32
+	QthMin   uint32
+	QthMax   uint32
+	Wlog     byte
+	Plog     byte
+	ScellLog byte
+	Flags    byte
+	MaxP     uint32
+	TcSfqRedStats
+}
+
+func (x *TcSfqQoptV1) Len() int {
+	return SizeofTcSfqQoptV1
+}
+
+func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 {
+	return (*TcSfqQoptV1)(unsafe.Pointer(&b[0:SizeofTcSfqQoptV1][0]))
+}
+
+func (x *TcSfqQoptV1) Serialize() []byte {
+	return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:]
+}

+ 1 - 1
vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go

@@ -13,7 +13,7 @@ const (
 	SizeofXfrmAlgoAuth       = 0x48
 	SizeofXfrmAlgoAEAD       = 0x48
 	SizeofXfrmEncapTmpl      = 0x18
-	SizeofXfrmUsersaFlush    = 0x8
+	SizeofXfrmUsersaFlush    = 0x1
 	SizeofXfrmReplayStateEsn = 0x18
 )
 

+ 217 - 0
vendor/github.com/vishvananda/netlink/proc_event_linux.go

@@ -0,0 +1,217 @@
+package netlink
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"os"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+	"github.com/vishvananda/netns"
+	"golang.org/x/sys/unix"
+)
+
+const CN_IDX_PROC = 0x1
+
+const (
+	PROC_EVENT_NONE     = 0x00000000
+	PROC_EVENT_FORK     = 0x00000001
+	PROC_EVENT_EXEC     = 0x00000002
+	PROC_EVENT_UID      = 0x00000004
+	PROC_EVENT_GID      = 0x00000040
+	PROC_EVENT_SID      = 0x00000080
+	PROC_EVENT_PTRACE   = 0x00000100
+	PROC_EVENT_COMM     = 0x00000200
+	PROC_EVENT_COREDUMP = 0x40000000
+	PROC_EVENT_EXIT     = 0x80000000
+)
+
+const (
+	CN_VAL_PROC          = 0x1
+	PROC_CN_MCAST_LISTEN = 0x1
+)
+
+type ProcEventMsg interface {
+	Pid() uint32
+	Tgid() uint32
+}
+
+type ProcEventHeader struct {
+	What      uint32
+	CPU       uint32
+	Timestamp uint64
+}
+
+type ProcEvent struct {
+	ProcEventHeader
+	Msg ProcEventMsg
+}
+
+func (pe *ProcEvent) setHeader(h ProcEventHeader) {
+	pe.What = h.What
+	pe.CPU = h.CPU
+	pe.Timestamp = h.Timestamp
+}
+
+type ExitProcEvent struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+	ExitCode    uint32
+	ExitSignal  uint32
+	ParentPid   uint32
+	ParentTgid  uint32
+}
+
+type ExitProcEvent2 struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+	ExitCode    uint32
+	ExitSignal  uint32
+	ParentPid   uint32
+	ParentTgid  uint32
+}
+
+func (e *ExitProcEvent) Pid() uint32 {
+	return e.ProcessPid
+}
+
+func (e *ExitProcEvent) Tgid() uint32 {
+	return e.ProcessTgid
+}
+
+type ExecProcEvent struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+}
+
+func (e *ExecProcEvent) Pid() uint32 {
+	return e.ProcessPid
+}
+
+func (e *ExecProcEvent) Tgid() uint32 {
+	return e.ProcessTgid
+}
+
+type ForkProcEvent struct {
+	ParentPid  uint32
+	ParentTgid uint32
+	ChildPid   uint32
+	ChildTgid  uint32
+}
+
+func (e *ForkProcEvent) Pid() uint32 {
+	return e.ParentPid
+}
+
+func (e *ForkProcEvent) Tgid() uint32 {
+	return e.ParentTgid
+}
+
+type CommProcEvent struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+	Comm        [16]byte
+}
+
+func (e *CommProcEvent) Pid() uint32 {
+	return e.ProcessPid
+}
+
+func (e *CommProcEvent) Tgid() uint32 {
+	return e.ProcessTgid
+}
+
+func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error {
+	h, err := NewHandle()
+	if err != nil {
+		return err
+	}
+	defer h.Delete()
+
+	s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC)
+	if err != nil {
+		return err
+	}
+
+	var nlmsg nl.NetlinkRequest
+
+	nlmsg.Pid = uint32(os.Getpid())
+	nlmsg.Type = unix.NLMSG_DONE
+	nlmsg.Len = uint32(unix.SizeofNlMsghdr)
+
+	cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN)
+	nlmsg.AddData(cm)
+
+	s.Send(&nlmsg)
+
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
+
+	go func() {
+		defer close(ch)
+		for {
+			msgs, from, err := s.Receive()
+			if err != nil {
+				errorChan <- err
+				return
+			}
+			if from.Pid != nl.PidKernel {
+				errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
+				return
+			}
+
+			for _, m := range msgs {
+				e := parseNetlinkMessage(m)
+				if e != nil {
+					ch <- *e
+				}
+			}
+
+		}
+	}()
+
+	return nil
+}
+
+func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent {
+	if m.Header.Type == unix.NLMSG_DONE {
+		buf := bytes.NewBuffer(m.Data)
+		msg := &nl.CnMsg{}
+		hdr := &ProcEventHeader{}
+		binary.Read(buf, nl.NativeEndian(), msg)
+		binary.Read(buf, nl.NativeEndian(), hdr)
+
+		pe := &ProcEvent{}
+		pe.setHeader(*hdr)
+		switch hdr.What {
+		case PROC_EVENT_EXIT:
+			event := &ExitProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		case PROC_EVENT_FORK:
+			event := &ForkProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		case PROC_EVENT_EXEC:
+			event := &ExecProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		case PROC_EVENT_COMM:
+			event := &CommProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		}
+		return nil
+	}
+
+	return nil
+}

+ 33 - 7
vendor/github.com/vishvananda/netlink/qdisc.go

@@ -308,13 +308,15 @@ func (qdisc *Fq) Type() string {
 // FQ_Codel (Fair Queuing Controlled Delay) is queuing discipline that combines Fair Queuing with the CoDel AQM scheme.
 type FqCodel struct {
 	QdiscAttrs
-	Target   uint32
-	Limit    uint32
-	Interval uint32
-	ECN      uint32
-	Flows    uint32
-	Quantum  uint32
-	// There are some more attributes here, but support for them seems not ubiquitous
+	Target        uint32
+	Limit         uint32
+	Interval      uint32
+	ECN           uint32
+	Flows         uint32
+	Quantum       uint32
+	CEThreshold   uint32
+	DropBatchSize uint32
+	MemoryLimit   uint32
 }
 
 func (fqcodel *FqCodel) String() string {
@@ -338,3 +340,27 @@ func (qdisc *FqCodel) Attrs() *QdiscAttrs {
 func (qdisc *FqCodel) Type() string {
 	return "fq_codel"
 }
+
+type Sfq struct {
+	QdiscAttrs
+	// TODO: Only the simplified options for SFQ are handled here. Support for the extended one can be added later.
+	Quantum uint8
+	Perturb uint8
+	Limit   uint32
+	Divisor uint8
+}
+
+func (sfq *Sfq) String() string {
+	return fmt.Sprintf(
+		"{%v -- Quantum: %v, Perturb: %v, Limit: %v, Divisor: %v}",
+		sfq.Attrs(), sfq.Quantum, sfq.Perturb, sfq.Limit, sfq.Divisor,
+	)
+}
+
+func (qdisc *Sfq) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Sfq) Type() string {
+	return "sfq"
+}

+ 55 - 11
vendor/github.com/vishvananda/netlink/qdisc_linux.go

@@ -250,7 +250,15 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		if qdisc.Quantum > 0 {
 			options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum))))
 		}
-
+		if qdisc.CEThreshold > 0 {
+			options.AddRtAttr(nl.TCA_FQ_CODEL_CE_THRESHOLD, nl.Uint32Attr(qdisc.CEThreshold))
+		}
+		if qdisc.DropBatchSize > 0 {
+			options.AddRtAttr(nl.TCA_FQ_CODEL_DROP_BATCH_SIZE, nl.Uint32Attr(qdisc.DropBatchSize))
+		}
+		if qdisc.MemoryLimit > 0 {
+			options.AddRtAttr(nl.TCA_FQ_CODEL_MEMORY_LIMIT, nl.Uint32Attr(qdisc.MemoryLimit))
+		}
 	case *Fq:
 		options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing))))
 
@@ -278,6 +286,14 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		if qdisc.FlowDefaultRate > 0 {
 			options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate))))
 		}
+	case *Sfq:
+		opt := nl.TcSfqQoptV1{}
+		opt.TcSfqQopt.Quantum = qdisc.Quantum
+		opt.TcSfqQopt.Perturb = int32(qdisc.Perturb)
+		opt.TcSfqQopt.Limit = qdisc.Limit
+		opt.TcSfqQopt.Divisor = qdisc.Divisor
+
+		options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
 	default:
 		options = nil
 	}
@@ -362,6 +378,8 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
 					qdisc = &FqCodel{}
 				case "netem":
 					qdisc = &Netem{}
+				case "sfq":
+					qdisc = &Sfq{}
 				default:
 					qdisc = &GenericQdisc{QdiscType: qdiscType}
 				}
@@ -417,6 +435,10 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
 					if err := parseNetemData(qdisc, attr.Value); err != nil {
 						return nil, err
 					}
+				case "sfq":
+					if err := parseSfqData(qdisc, attr.Value); err != nil {
+						return nil, err
+					}
 
 					// no options for ingress
 				}
@@ -446,7 +468,6 @@ func parsePrioData(qdisc Qdisc, value []byte) error {
 }
 
 func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	htb := qdisc.(*Htb)
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -466,7 +487,6 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 }
 
 func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	fqCodel := qdisc.(*FqCodel)
 	for _, datum := range data {
 
@@ -483,6 +503,12 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 			fqCodel.Flows = native.Uint32(datum.Value)
 		case nl.TCA_FQ_CODEL_QUANTUM:
 			fqCodel.Quantum = native.Uint32(datum.Value)
+		case nl.TCA_FQ_CODEL_CE_THRESHOLD:
+			fqCodel.CEThreshold = native.Uint32(datum.Value)
+		case nl.TCA_FQ_CODEL_DROP_BATCH_SIZE:
+			fqCodel.DropBatchSize = native.Uint32(datum.Value)
+		case nl.TCA_FQ_CODEL_MEMORY_LIMIT:
+			fqCodel.MemoryLimit = native.Uint32(datum.Value)
 		}
 	}
 	return nil
@@ -490,13 +516,11 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 
 func parseHfscData(qdisc Qdisc, data []byte) error {
 	Hfsc := qdisc.(*Hfsc)
-	native = nl.NativeEndian()
 	Hfsc.Defcls = native.Uint16(data)
 	return nil
 }
 
 func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	fq := qdisc.(*Fq)
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -561,7 +585,6 @@ func parseNetemData(qdisc Qdisc, value []byte) error {
 }
 
 func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	tbf := qdisc.(*Tbf)
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -582,6 +605,17 @@ func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 	return nil
 }
 
+func parseSfqData(qdisc Qdisc, value []byte) error {
+	sfq := qdisc.(*Sfq)
+	opt := nl.DeserializeTcSfqQoptV1(value)
+	sfq.Quantum = opt.TcSfqQopt.Quantum
+	sfq.Perturb = uint8(opt.TcSfqQopt.Perturb)
+	sfq.Limit = opt.TcSfqQopt.Limit
+	sfq.Divisor = opt.TcSfqQopt.Divisor
+
+	return nil
+}
+
 const (
 	TIME_UNITS_PER_SEC = 1000000
 )
@@ -598,10 +632,10 @@ func initClock() {
 		return
 	}
 	parts := strings.Split(strings.TrimSpace(string(data)), " ")
-	if len(parts) < 3 {
+	if len(parts) < 4 {
 		return
 	}
-	var vals [3]uint64
+	var vals [4]uint64
 	for i := range vals {
 		val, err := strconv.ParseUint(parts[i], 16, 32)
 		if err != nil {
@@ -615,7 +649,12 @@ func initClock() {
 	}
 	clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
 	tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
-	hz = float64(vals[0])
+	if vals[2] == 1000000 {
+		// ref https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/lib/utils.c#n963
+		hz = float64(vals[3])
+	} else {
+		hz = 100
+	}
 }
 
 func TickInUsec() float64 {
@@ -663,6 +702,11 @@ func latency(rate uint64, limit, buffer uint32) float64 {
 	return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
 }
 
-func Xmittime(rate uint64, size uint32) float64 {
-	return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
+func Xmittime(rate uint64, size uint32) uint32 {
+	// https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62
+	return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate))))
+}
+
+func Xmitsize(rate uint64, ticks uint32) uint32 {
+	return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC)
 }

+ 82 - 15
vendor/github.com/vishvananda/netlink/rdma_link_linux.go

@@ -77,28 +77,39 @@ func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) {
 	return &link, nil
 }
 
-func execRdmaGetLink(req *nl.NetlinkRequest, name string) (*RdmaLink, error) {
+func execRdmaSetLink(req *nl.NetlinkRequest) error {
+
+	_, err := req.Execute(unix.NETLINK_RDMA, 0)
+	return err
+}
+
+// RdmaLinkList gets a list of RDMA link devices.
+// Equivalent to: `rdma dev show`
+func RdmaLinkList() ([]*RdmaLink, error) {
+	return pkgHandle.RdmaLinkList()
+}
+
+// RdmaLinkList gets a list of RDMA link devices.
+// Equivalent to: `rdma dev show`
+func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) {
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP)
 
 	msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
 	if err != nil {
 		return nil, err
 	}
+
+	var res []*RdmaLink
 	for _, m := range msgs {
 		link, err := executeOneGetRdmaLink(m)
 		if err != nil {
 			return nil, err
 		}
-		if link.Attrs.Name == name {
-			return link, nil
-		}
+		res = append(res, link)
 	}
-	return nil, fmt.Errorf("Rdma device %v not found", name)
-}
-
-func execRdmaSetLink(req *nl.NetlinkRequest) error {
 
-	_, err := req.Execute(unix.NETLINK_RDMA, 0)
-	return err
+	return res, nil
 }
 
 // RdmaLinkByName finds a link by name and returns a pointer to the object if
@@ -110,11 +121,16 @@ func RdmaLinkByName(name string) (*RdmaLink, error) {
 // RdmaLinkByName finds a link by name and returns a pointer to the object if
 // found and nil error, otherwise returns error code.
 func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) {
-
-	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET)
-	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP)
-
-	return execRdmaGetLink(req, name)
+	links, err := h.RdmaLinkList()
+	if err != nil {
+		return nil, err
+	}
+	for _, link := range links {
+		if link.Attrs.Name == name {
+			return link, nil
+		}
+	}
+	return nil, fmt.Errorf("Rdma device %v not found", name)
 }
 
 // RdmaLinkSetName sets the name of the rdma link device. Return nil on success
@@ -262,3 +278,54 @@ func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error {
 
 	return execRdmaSetLink(req)
 }
+
+// RdmaLinkDel deletes an rdma link
+//
+// Similar to: rdma link delete NAME
+// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html
+func RdmaLinkDel(name string) error {
+	return pkgHandle.RdmaLinkDel(name)
+}
+
+// RdmaLinkDel deletes an rdma link.
+func (h *Handle) RdmaLinkDel(name string) error {
+	link, err := h.RdmaLinkByName(name)
+	if err != nil {
+		return err
+	}
+
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
+
+	b := make([]byte, 4)
+	native.PutUint32(b, link.Attrs.Index)
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b))
+
+	_, err = req.Execute(unix.NETLINK_RDMA, 0)
+	return err
+}
+
+// RdmaLinkAdd adds an rdma link for the specified type to the network device.
+// Similar to: rdma link add NAME type TYPE netdev NETDEV
+//	NAME - specifies the new name of the rdma link to add
+//	TYPE - specifies which rdma type to use.  Link types:
+//		rxe - Soft RoCE driver
+//		siw - Soft iWARP driver
+//	NETDEV - specifies the network device to which the link is bound
+//
+// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html
+func RdmaLinkAdd(linkName, linkType, netdev string) error {
+	return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev)
+}
+
+// RdmaLinkAdd adds an rdma link for the specified type to the network device.
+func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error {
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
+
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName)))
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType)))
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev)))
+	_, err := req.Execute(unix.NETLINK_RDMA, 0)
+	return err
+}

+ 66 - 19
vendor/github.com/vishvananda/netlink/route.go

@@ -11,6 +11,24 @@ type Scope uint8
 
 type NextHopFlag int
 
+const (
+	RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
+	RT_FILTER_SCOPE
+	RT_FILTER_TYPE
+	RT_FILTER_TOS
+	RT_FILTER_IIF
+	RT_FILTER_OIF
+	RT_FILTER_DST
+	RT_FILTER_SRC
+	RT_FILTER_GW
+	RT_FILTER_TABLE
+	RT_FILTER_HOPLIMIT
+	RT_FILTER_PRIORITY
+	RT_FILTER_MARK
+	RT_FILTER_MASK
+	RT_FILTER_REALM
+)
+
 type Destination interface {
 	Family() int
 	Decode([]byte) error
@@ -27,27 +45,46 @@ type Encap interface {
 	Equal(Encap) bool
 }
 
+//Protocol describe what was the originator of the route
+type RouteProtocol int
+
 // Route represents a netlink route.
 type Route struct {
-	LinkIndex  int
-	ILinkIndex int
-	Scope      Scope
-	Dst        *net.IPNet
-	Src        net.IP
-	Gw         net.IP
-	MultiPath  []*NexthopInfo
-	Protocol   int
-	Priority   int
-	Table      int
-	Type       int
-	Tos        int
-	Flags      int
-	MPLSDst    *int
-	NewDst     Destination
-	Encap      Encap
-	MTU        int
-	AdvMSS     int
-	Hoplimit   int
+	LinkIndex        int
+	ILinkIndex       int
+	Scope            Scope
+	Dst              *net.IPNet
+	Src              net.IP
+	Gw               net.IP
+	MultiPath        []*NexthopInfo
+	Protocol         RouteProtocol
+	Priority         int
+	Family           int
+	Table            int
+	Type             int
+	Tos              int
+	Flags            int
+	MPLSDst          *int
+	NewDst           Destination
+	Encap            Encap
+	Via              Destination
+	Realm            int
+	MTU              int
+	Window           int
+	Rtt              int
+	RttVar           int
+	Ssthresh         int
+	Cwnd             int
+	AdvMSS           int
+	Reordering       int
+	Hoplimit         int
+	InitCwnd         int
+	Features         int
+	RtoMin           int
+	InitRwnd         int
+	QuickACK         int
+	Congctl          string
+	FastOpenNoCookie int
 }
 
 func (r Route) String() string {
@@ -66,6 +103,9 @@ func (r Route) String() string {
 	if r.Encap != nil {
 		elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap))
 	}
+	if r.Via != nil {
+		elems = append(elems, fmt.Sprintf("Via: %s", r.Via))
+	}
 	elems = append(elems, fmt.Sprintf("Src: %s", r.Src))
 	if len(r.MultiPath) > 0 {
 		elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath))
@@ -74,6 +114,7 @@ func (r Route) String() string {
 	}
 	elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags()))
 	elems = append(elems, fmt.Sprintf("Table: %d", r.Table))
+	elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm))
 	return fmt.Sprintf("{%s}", strings.Join(elems, " "))
 }
 
@@ -87,6 +128,7 @@ func (r Route) Equal(x Route) bool {
 		nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) &&
 		r.Protocol == x.Protocol &&
 		r.Priority == x.Priority &&
+		r.Realm == x.Realm &&
 		r.Table == x.Table &&
 		r.Type == x.Type &&
 		r.Tos == x.Tos &&
@@ -94,6 +136,7 @@ func (r Route) Equal(x Route) bool {
 		r.Flags == x.Flags &&
 		(r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) &&
 		(r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) &&
+		(r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) &&
 		(r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap)))
 }
 
@@ -123,6 +166,7 @@ type NexthopInfo struct {
 	Flags     int
 	NewDst    Destination
 	Encap     Encap
+	Via       Destination
 }
 
 func (n *NexthopInfo) String() string {
@@ -134,6 +178,9 @@ func (n *NexthopInfo) String() string {
 	if n.Encap != nil {
 		elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap))
 	}
+	if n.Via != nil {
+		elems = append(elems, fmt.Sprintf("Via: %s", n.Via))
+	}
 	elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1))
 	elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw))
 	elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags()))

+ 513 - 39
vendor/github.com/vishvananda/netlink/route_linux.go

@@ -1,8 +1,11 @@
 package netlink
 
 import (
+	"bytes"
+	"encoding/binary"
 	"fmt"
 	"net"
+	"strconv"
 	"strings"
 	"syscall"
 
@@ -21,19 +24,23 @@ const (
 	SCOPE_NOWHERE  Scope = unix.RT_SCOPE_NOWHERE
 )
 
-const (
-	RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
-	RT_FILTER_SCOPE
-	RT_FILTER_TYPE
-	RT_FILTER_TOS
-	RT_FILTER_IIF
-	RT_FILTER_OIF
-	RT_FILTER_DST
-	RT_FILTER_SRC
-	RT_FILTER_GW
-	RT_FILTER_TABLE
-	RT_FILTER_HOPLIMIT
-)
+func (s Scope) String() string {
+	switch s {
+	case SCOPE_UNIVERSE:
+		return "universe"
+	case SCOPE_SITE:
+		return "site"
+	case SCOPE_LINK:
+		return "link"
+	case SCOPE_HOST:
+		return "host"
+	case SCOPE_NOWHERE:
+		return "nowhere"
+	default:
+		return "unknown"
+	}
+}
+
 
 const (
 	FLAG_ONLINK    NextHopFlag = unix.RTNH_F_ONLINK
@@ -128,7 +135,6 @@ func (e *MPLSEncap) Decode(buf []byte) error {
 	if len(buf) < 4 {
 		return fmt.Errorf("lack of bytes")
 	}
-	native := nl.NativeEndian()
 	l := native.Uint16(buf)
 	if len(buf) < int(l) {
 		return fmt.Errorf("lack of bytes")
@@ -144,7 +150,6 @@ func (e *MPLSEncap) Decode(buf []byte) error {
 
 func (e *MPLSEncap) Encode() ([]byte, error) {
 	s := nl.EncodeMPLSStack(e.Labels...)
-	native := nl.NativeEndian()
 	hdr := make([]byte, 4)
 	native.PutUint16(hdr, uint16(len(s)+4))
 	native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST)
@@ -200,7 +205,6 @@ func (e *SEG6Encap) Decode(buf []byte) error {
 	if len(buf) < 4 {
 		return fmt.Errorf("lack of bytes")
 	}
-	native := nl.NativeEndian()
 	// Get Length(l) & Type(typ) : 2 + 2 bytes
 	l := native.Uint16(buf)
 	if len(buf) < int(l) {
@@ -220,7 +224,6 @@ func (e *SEG6Encap) Decode(buf []byte) error {
 }
 func (e *SEG6Encap) Encode() ([]byte, error) {
 	s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments)
-	native := nl.NativeEndian()
 	hdr := make([]byte, 4)
 	native.PutUint16(hdr, uint16(len(s)+4))
 	native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH)
@@ -230,7 +233,7 @@ func (e *SEG6Encap) String() string {
 	segs := make([]string, 0, len(e.Segments))
 	// append segment backwards (from n to 0) since seg#0 is the last segment.
 	for i := len(e.Segments); i > 0; i-- {
-		segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
+		segs = append(segs, e.Segments[i-1].String())
 	}
 	str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode),
 		len(e.Segments), strings.Join(segs, " "))
@@ -281,7 +284,6 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error {
 	if err != nil {
 		return err
 	}
-	native := nl.NativeEndian()
 	for _, attr := range attrs {
 		switch attr.Attr.Type {
 		case nl.SEG6_LOCAL_ACTION:
@@ -311,7 +313,6 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error {
 }
 func (e *SEG6LocalEncap) Encode() ([]byte, error) {
 	var err error
-	native := nl.NativeEndian()
 	res := make([]byte, 8)
 	native.PutUint16(res, 8) // length
 	native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION)
@@ -402,7 +403,7 @@ func (e *SEG6LocalEncap) String() string {
 		segs := make([]string, 0, len(e.Segments))
 		//append segment backwards (from n to 0) since seg#0 is the last segment.
 		for i := len(e.Segments); i > 0; i-- {
-			segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
+			segs = append(segs, e.Segments[i-1].String())
 		}
 		strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " ")))
 	}
@@ -443,6 +444,207 @@ func (e *SEG6LocalEncap) Equal(x Encap) bool {
 	return true
 }
 
+// Encap BPF definitions
+type bpfObj struct {
+	progFd   int
+	progName string
+}
+type BpfEncap struct {
+	progs    [nl.LWT_BPF_MAX]bpfObj
+	headroom int
+}
+
+// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf
+// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should
+// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT).
+func (e *BpfEncap) SetProg(mode, progFd int, progName string) error {
+	if progFd <= 0 {
+		return fmt.Errorf("lwt bpf SetProg: invalid fd")
+	}
+	if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM {
+		return fmt.Errorf("lwt bpf SetProg:invalid mode")
+	}
+	e.progs[mode].progFd = progFd
+	e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd)
+	return nil
+}
+
+// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP.
+// maximum headroom is LWT_BPF_MAX_HEADROOM
+func (e *BpfEncap) SetXmitHeadroom(headroom int) error {
+	if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 {
+		return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM)
+	}
+	e.headroom = headroom
+	return nil
+}
+
+func (e *BpfEncap) Type() int {
+	return nl.LWTUNNEL_ENCAP_BPF
+}
+func (e *BpfEncap) Decode(buf []byte) error {
+	if len(buf) < 4 {
+		return fmt.Errorf("lwt bpf decode: lack of bytes")
+	}
+	native := nl.NativeEndian()
+	attrs, err := nl.ParseRouteAttr(buf)
+	if err != nil {
+		return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err)
+	}
+	for _, attr := range attrs {
+		if int(attr.Attr.Type) < 1 {
+			// nl.LWT_BPF_UNSPEC
+			continue
+		}
+		if int(attr.Attr.Type) > nl.LWT_BPF_MAX {
+			return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type)
+		}
+		switch int(attr.Attr.Type) {
+		case nl.LWT_BPF_MAX_HEADROOM:
+			e.headroom = int(native.Uint32(attr.Value))
+		default:
+			bpfO := bpfObj{}
+			parsedAttrs, err := nl.ParseRouteAttr(attr.Value)
+			if err != nil {
+				return fmt.Errorf("lwt bpf decode: failed parsing route attribute")
+			}
+			for _, parsedAttr := range parsedAttrs {
+				switch int(parsedAttr.Attr.Type) {
+				case nl.LWT_BPF_PROG_FD:
+					bpfO.progFd = int(native.Uint32(parsedAttr.Value))
+				case nl.LWT_BPF_PROG_NAME:
+					bpfO.progName = string(parsedAttr.Value)
+				default:
+					return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len)
+				}
+			}
+			e.progs[attr.Attr.Type] = bpfO
+		}
+	}
+	return nil
+}
+
+func (e *BpfEncap) Encode() ([]byte, error) {
+	buf := make([]byte, 0)
+	native = nl.NativeEndian()
+	for index, attr := range e.progs {
+		nlMsg := nl.NewRtAttr(index, []byte{})
+		if attr.progFd != 0 {
+			nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd)))
+		}
+		if attr.progName != "" {
+			nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName))
+		}
+		if nlMsg.Len() > 4 {
+			buf = append(buf, nlMsg.Serialize()...)
+		}
+	}
+	if len(buf) <= 4 {
+		return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer")
+	}
+	if e.headroom > 0 {
+		hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom)))
+		buf = append(buf, hRoom.Serialize()...)
+	}
+	return buf, nil
+}
+
+func (e *BpfEncap) String() string {
+	progs := make([]string, 0)
+	for index, obj := range e.progs {
+		empty := bpfObj{}
+		switch index {
+		case nl.LWT_BPF_IN:
+			if obj != empty {
+				progs = append(progs, fmt.Sprintf("in: %s", obj.progName))
+			}
+		case nl.LWT_BPF_OUT:
+			if obj != empty {
+				progs = append(progs, fmt.Sprintf("out: %s", obj.progName))
+			}
+		case nl.LWT_BPF_XMIT:
+			if obj != empty {
+				progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName))
+			}
+		}
+	}
+	if e.headroom > 0 {
+		progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom))
+	}
+	return strings.Join(progs, " ")
+}
+
+func (e *BpfEncap) Equal(x Encap) bool {
+	o, ok := x.(*BpfEncap)
+	if !ok {
+		return false
+	}
+	if e.headroom != o.headroom {
+		return false
+	}
+	for i := range o.progs {
+		if o.progs[i] != e.progs[i] {
+			return false
+		}
+	}
+	return true
+}
+
+type Via struct {
+	AddrFamily int
+	Addr       net.IP
+}
+
+func (v *Via) Equal(x Destination) bool {
+	o, ok := x.(*Via)
+	if !ok {
+		return false
+	}
+	if v.AddrFamily == x.Family() && v.Addr.Equal(o.Addr) {
+		return true
+	}
+	return false
+}
+
+func (v *Via) String() string {
+	return fmt.Sprintf("Family: %d, Address: %s", v.AddrFamily, v.Addr.String())
+}
+
+func (v *Via) Family() int {
+	return v.AddrFamily
+}
+
+func (v *Via) Encode() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	err := binary.Write(buf, native, uint16(v.AddrFamily))
+	if err != nil {
+		return nil, err
+	}
+	err = binary.Write(buf, native, v.Addr)
+	if err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+func (v *Via) Decode(b []byte) error {
+	if len(b) < 6 {
+		return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b))
+	}
+	v.AddrFamily = int(native.Uint16(b[0:2]))
+	if v.AddrFamily == nl.FAMILY_V4 {
+		v.Addr = net.IP(b[2:6])
+		return nil
+	} else if v.AddrFamily == nl.FAMILY_V6 {
+		if len(b) < 18 {
+			return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b))
+		}
+		v.Addr = net.IP(b[2:])
+		return nil
+	}
+	return fmt.Errorf("decoding failed: address family %d unknown", v.AddrFamily)
+}
+
 // RouteAdd will add a route to the system.
 // Equivalent to: `ip route add $route`
 func RouteAdd(route *Route) error {
@@ -457,6 +659,32 @@ func (h *Handle) RouteAdd(route *Route) error {
 	return h.routeHandle(route, req, nl.NewRtMsg())
 }
 
+// RouteAppend will append a route to the system.
+// Equivalent to: `ip route append $route`
+func RouteAppend(route *Route) error {
+	return pkgHandle.RouteAppend(route)
+}
+
+// RouteAppend will append a route to the system.
+// Equivalent to: `ip route append $route`
+func (h *Handle) RouteAppend(route *Route) error {
+	flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK
+	req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
+	return h.routeHandle(route, req, nl.NewRtMsg())
+}
+
+// RouteAddEcmp will add a route to the system.
+func RouteAddEcmp(route *Route) error {
+	return pkgHandle.RouteAddEcmp(route)
+}
+
+// RouteAddEcmp will add a route to the system.
+func (h *Handle) RouteAddEcmp(route *Route) error {
+	flags := unix.NLM_F_CREATE | unix.NLM_F_ACK
+	req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
+	return h.routeHandle(route, req, nl.NewRtMsg())
+}
+
 // RouteReplace will add a route to the system.
 // Equivalent to: `ip route replace $route`
 func RouteReplace(route *Route) error {
@@ -530,7 +758,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 		if err != nil {
 			return err
 		}
-		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf))
+		switch route.Encap.Type() {
+		case nl.LWTUNNEL_ENCAP_BPF:
+			rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf))
+		default:
+			rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf))
+		}
+
 	}
 
 	if route.Src != nil {
@@ -564,6 +798,14 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData))
 	}
 
+	if route.Via != nil {
+		buf, err := route.Via.Encode()
+		if err != nil {
+			return fmt.Errorf("failed to encode RTA_VIA: %v", err)
+		}
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf))
+	}
+
 	if len(route.MultiPath) > 0 {
 		buf := []byte{}
 		for _, nh := range route.MultiPath {
@@ -606,6 +848,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 				}
 				children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf))
 			}
+			if nh.Via != nil {
+				buf, err := nh.Via.Encode()
+				if err != nil {
+					return err
+				}
+				children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf))
+			}
 			rtnh.Children = children
 			buf = append(buf, rtnh.Serialize()...)
 		}
@@ -628,6 +877,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 		native.PutUint32(b, uint32(route.Priority))
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b))
 	}
+	if route.Realm > 0 {
+		b := make([]byte, 4)
+		native.PutUint32(b, uint32(route.Realm))
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b))
+	}
 	if route.Tos > 0 {
 		msg.Tos = uint8(route.Tos)
 	}
@@ -639,19 +893,70 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 	}
 
 	var metrics []*nl.RtAttr
-	// TODO: support other rta_metric values
 	if route.MTU > 0 {
 		b := nl.Uint32Attr(uint32(route.MTU))
 		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b))
 	}
+	if route.Window > 0 {
+		b := nl.Uint32Attr(uint32(route.Window))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_WINDOW, b))
+	}
+	if route.Rtt > 0 {
+		b := nl.Uint32Attr(uint32(route.Rtt))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTT, b))
+	}
+	if route.RttVar > 0 {
+		b := nl.Uint32Attr(uint32(route.RttVar))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTTVAR, b))
+	}
+	if route.Ssthresh > 0 {
+		b := nl.Uint32Attr(uint32(route.Ssthresh))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_SSTHRESH, b))
+	}
+	if route.Cwnd > 0 {
+		b := nl.Uint32Attr(uint32(route.Cwnd))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CWND, b))
+	}
 	if route.AdvMSS > 0 {
 		b := nl.Uint32Attr(uint32(route.AdvMSS))
 		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b))
 	}
+	if route.Reordering > 0 {
+		b := nl.Uint32Attr(uint32(route.Reordering))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_REORDERING, b))
+	}
 	if route.Hoplimit > 0 {
 		b := nl.Uint32Attr(uint32(route.Hoplimit))
 		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_HOPLIMIT, b))
 	}
+	if route.InitCwnd > 0 {
+		b := nl.Uint32Attr(uint32(route.InitCwnd))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITCWND, b))
+	}
+	if route.Features > 0 {
+		b := nl.Uint32Attr(uint32(route.Features))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FEATURES, b))
+	}
+	if route.RtoMin > 0 {
+		b := nl.Uint32Attr(uint32(route.RtoMin))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTO_MIN, b))
+	}
+	if route.InitRwnd > 0 {
+		b := nl.Uint32Attr(uint32(route.InitRwnd))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITRWND, b))
+	}
+	if route.QuickACK > 0 {
+		b := nl.Uint32Attr(uint32(route.QuickACK))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_QUICKACK, b))
+	}
+	if route.Congctl != "" {
+		b := nl.ZeroTerminated(route.Congctl)
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CC_ALGO, b))
+	}
+	if route.FastOpenNoCookie > 0 {
+		b := nl.Uint32Attr(uint32(route.FastOpenNoCookie))
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FASTOPEN_NO_COOKIE, b))
+	}
 
 	if metrics != nil {
 		attr := nl.NewRtAttr(unix.RTA_METRICS, nil)
@@ -669,10 +974,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 		req.AddData(attr)
 	}
 
-	var (
-		b      = make([]byte, 4)
-		native = nl.NativeEndian()
-	)
+	b := make([]byte, 4)
 	native.PutUint32(b, uint32(route.LinkIndex))
 
 	req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
@@ -711,8 +1013,9 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e
 // All rules must be defined in RouteFilter struct
 func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP)
-	infmsg := nl.NewIfInfomsg(family)
-	req.AddData(infmsg)
+	rtmsg := nl.NewRtMsg()
+	rtmsg.Family = uint8(family)
+	req.AddData(rtmsg)
 
 	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
 	if err != nil {
@@ -748,6 +1051,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
 				continue
 			case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos:
 				continue
+			case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm:
+				continue
 			case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex:
 				continue
 			case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex:
@@ -780,14 +1085,14 @@ func deserializeRoute(m []byte) (Route, error) {
 	}
 	route := Route{
 		Scope:    Scope(msg.Scope),
-		Protocol: int(msg.Protocol),
+		Protocol: RouteProtocol(int(msg.Protocol)),
 		Table:    int(msg.Table),
 		Type:     int(msg.Type),
 		Tos:      int(msg.Tos),
 		Flags:    int(msg.Flags),
+		Family:   int(msg.Family),
 	}
 
-	native := nl.NativeEndian()
 	var encap, encapType syscall.NetlinkRouteAttr
 	for _, attr := range attrs {
 		switch attr.Attr.Type {
@@ -814,6 +1119,8 @@ func deserializeRoute(m []byte) (Route, error) {
 			route.ILinkIndex = int(native.Uint32(attr.Value[0:4]))
 		case unix.RTA_PRIORITY:
 			route.Priority = int(native.Uint32(attr.Value[0:4]))
+		case unix.RTA_FLOW:
+			route.Realm = int(native.Uint32(attr.Value[0:4]))
 		case unix.RTA_TABLE:
 			route.Table = int(native.Uint32(attr.Value[0:4]))
 		case unix.RTA_MULTIPATH:
@@ -853,6 +1160,12 @@ func deserializeRoute(m []byte) (Route, error) {
 						encapType = attr
 					case unix.RTA_ENCAP:
 						encap = attr
+					case unix.RTA_VIA:
+						d := &Via{}
+						if err := d.Decode(attr.Value); err != nil {
+							return nil, nil, err
+						}
+						info.Via = d
 					}
 				}
 
@@ -890,6 +1203,12 @@ func deserializeRoute(m []byte) (Route, error) {
 				return route, err
 			}
 			route.NewDst = d
+		case unix.RTA_VIA:
+			v := &Via{}
+			if err := v.Decode(attr.Value); err != nil {
+				return route, err
+			}
+			route.Via = v
 		case unix.RTA_ENCAP_TYPE:
 			encapType = attr
 		case unix.RTA_ENCAP:
@@ -903,10 +1222,36 @@ func deserializeRoute(m []byte) (Route, error) {
 				switch metric.Attr.Type {
 				case unix.RTAX_MTU:
 					route.MTU = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_WINDOW:
+					route.Window = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_RTT:
+					route.Rtt = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_RTTVAR:
+					route.RttVar = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_SSTHRESH:
+					route.Ssthresh = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_CWND:
+					route.Cwnd = int(native.Uint32(metric.Value[0:4]))
 				case unix.RTAX_ADVMSS:
 					route.AdvMSS = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_REORDERING:
+					route.Reordering = int(native.Uint32(metric.Value[0:4]))
 				case unix.RTAX_HOPLIMIT:
 					route.Hoplimit = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_INITCWND:
+					route.InitCwnd = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_FEATURES:
+					route.Features = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_RTO_MIN:
+					route.RtoMin = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_INITRWND:
+					route.InitRwnd = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_QUICKACK:
+					route.QuickACK = int(native.Uint32(metric.Value[0:4]))
+				case unix.RTAX_CC_ALGO:
+					route.Congctl = nl.BytesToString(metric.Value)
+				case unix.RTAX_FASTOPEN_NO_COOKIE:
+					route.FastOpenNoCookie = int(native.Uint32(metric.Value[0:4]))
 				}
 			}
 		}
@@ -931,6 +1276,11 @@ func deserializeRoute(m []byte) (Route, error) {
 			if err := e.Decode(encap.Value); err != nil {
 				return route, err
 			}
+		case nl.LWTUNNEL_ENCAP_BPF:
+			e = &BpfEncap{}
+			if err := e.Decode(encap.Value); err != nil {
+				return route, err
+			}
 		}
 		route.Encap = e
 	}
@@ -938,15 +1288,30 @@ func deserializeRoute(m []byte) (Route, error) {
 	return route, nil
 }
 
+// RouteGetOptions contains a set of options to use with
+// RouteGetWithOptions
+type RouteGetOptions struct {
+	Iif     string
+	Oif     string
+	VrfName string
+	SrcAddr net.IP
+}
+
+// RouteGetWithOptions gets a route to a specific destination from the host system.
+// Equivalent to: 'ip route get <> vrf <VrfName>'.
+func RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) {
+	return pkgHandle.RouteGetWithOptions(destination, options)
+}
+
 // RouteGet gets a route to a specific destination from the host system.
 // Equivalent to: 'ip route get'.
 func RouteGet(destination net.IP) ([]Route, error) {
 	return pkgHandle.RouteGet(destination)
 }
 
-// RouteGet gets a route to a specific destination from the host system.
-// Equivalent to: 'ip route get'.
-func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
+// RouteGetWithOptions gets a route to a specific destination from the host system.
+// Equivalent to: 'ip route get <> vrf <VrfName>'.
+func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST)
 	family := nl.GetIPFamily(destination)
 	var destinationData []byte
@@ -961,11 +1326,63 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
 	msg := &nl.RtMsg{}
 	msg.Family = uint8(family)
 	msg.Dst_len = bitlen
+	if options != nil && options.SrcAddr != nil {
+		msg.Src_len = bitlen
+	}
+	msg.Flags = unix.RTM_F_LOOKUP_TABLE
 	req.AddData(msg)
 
 	rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData)
 	req.AddData(rtaDst)
 
+	if options != nil {
+		if options.VrfName != "" {
+			link, err := LinkByName(options.VrfName)
+			if err != nil {
+				return nil, err
+			}
+			b := make([]byte, 4)
+			native.PutUint32(b, uint32(link.Attrs().Index))
+
+			req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
+		}
+
+		if len(options.Iif) > 0 {
+			link, err := LinkByName(options.Iif)
+			if err != nil {
+				return nil, err
+			}
+
+			b := make([]byte, 4)
+			native.PutUint32(b, uint32(link.Attrs().Index))
+
+			req.AddData(nl.NewRtAttr(unix.RTA_IIF, b))
+		}
+
+		if len(options.Oif) > 0 {
+			link, err := LinkByName(options.Oif)
+			if err != nil {
+				return nil, err
+			}
+
+			b := make([]byte, 4)
+			native.PutUint32(b, uint32(link.Attrs().Index))
+
+			req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
+		}
+
+		if options.SrcAddr != nil {
+			var srcAddr []byte
+			if family == FAMILY_V4 {
+				srcAddr = options.SrcAddr.To4()
+			} else {
+				srcAddr = options.SrcAddr.To16()
+			}
+
+			req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr))
+		}
+	}
+
 	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
 	if err != nil {
 		return nil, err
@@ -980,7 +1397,12 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
 		res = append(res, route)
 	}
 	return res, nil
+}
 
+// RouteGet gets a route to a specific destination from the host system.
+// Equivalent to: 'ip route get'.
+func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
+	return h.RouteGetWithOptions(destination, nil)
 }
 
 // RouteSubscribe takes a chan down which notifications will be sent
@@ -1040,7 +1462,8 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
 			msgs, from, err := s.Receive()
 			if err != nil {
 				if cberr != nil {
-					cberr(err)
+					cberr(fmt.Errorf("Receive failed: %v",
+						err))
 				}
 				return
 			}
@@ -1055,22 +1478,22 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
 					continue
 				}
 				if m.Header.Type == unix.NLMSG_ERROR {
-					native := nl.NativeEndian()
 					error := int32(native.Uint32(m.Data[0:4]))
 					if error == 0 {
 						continue
 					}
 					if cberr != nil {
-						cberr(syscall.Errno(-error))
+						cberr(fmt.Errorf("error message: %v",
+							syscall.Errno(-error)))
 					}
-					return
+					continue
 				}
 				route, err := deserializeRoute(m.Data)
 				if err != nil {
 					if cberr != nil {
 						cberr(err)
 					}
-					return
+					continue
 				}
 				ch <- RouteUpdate{Type: m.Header.Type, Route: route}
 			}
@@ -1079,3 +1502,54 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
 
 	return nil
 }
+
+func (p RouteProtocol) String() string {
+	switch int(p) {
+	case unix.RTPROT_BABEL:
+		return "babel"
+	case unix.RTPROT_BGP:
+		return "bgp"
+	case unix.RTPROT_BIRD:
+		return "bird"
+	case unix.RTPROT_BOOT:
+		return "boot"
+	case unix.RTPROT_DHCP:
+		return "dhcp"
+	case unix.RTPROT_DNROUTED:
+		return "dnrouted"
+	case unix.RTPROT_EIGRP:
+		return "eigrp"
+	case unix.RTPROT_GATED:
+		return "gated"
+	case unix.RTPROT_ISIS:
+		return "isis"
+	//case unix.RTPROT_KEEPALIVED:
+	//	return "keepalived"
+	case unix.RTPROT_KERNEL:
+		return "kernel"
+	case unix.RTPROT_MROUTED:
+		return "mrouted"
+	case unix.RTPROT_MRT:
+		return "mrt"
+	case unix.RTPROT_NTK:
+		return "ntk"
+	case unix.RTPROT_OSPF:
+		return "ospf"
+	case unix.RTPROT_RA:
+		return "ra"
+	case unix.RTPROT_REDIRECT:
+		return "redirect"
+	case unix.RTPROT_RIP:
+		return "rip"
+	case unix.RTPROT_STATIC:
+		return "static"
+	case unix.RTPROT_UNSPEC:
+		return "unspec"
+	case unix.RTPROT_XORP:
+		return "xorp"
+	case unix.RTPROT_ZEBRA:
+		return "zebra"
+	default:
+		return strconv.Itoa(int(p))
+	}
+}

+ 10 - 0
vendor/github.com/vishvananda/netlink/route_unspecified.go

@@ -2,6 +2,8 @@
 
 package netlink
 
+import "strconv"
+
 func (r *Route) ListFlags() []string {
 	return []string{}
 }
@@ -9,3 +11,11 @@ func (r *Route) ListFlags() []string {
 func (n *NexthopInfo) ListFlags() []string {
 	return []string{}
 }
+
+func (s Scope) String() string {
+	return "unknown"
+}
+
+func (p RouteProtocol) String() string {
+	return strconv.Itoa(int(p))
+}

+ 27 - 1
vendor/github.com/vishvananda/netlink/rule.go

@@ -12,6 +12,7 @@ type Rule struct {
 	Table             int
 	Mark              int
 	Mask              int
+	Tos               uint
 	TunID             uint
 	Goto              int
 	Src               *net.IPNet
@@ -22,10 +23,24 @@ type Rule struct {
 	SuppressIfgroup   int
 	SuppressPrefixlen int
 	Invert            bool
+	Dport             *RulePortRange
+	Sport             *RulePortRange
+	IPProto           int
 }
 
 func (r Rule) String() string {
-	return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table)
+	from := "all"
+	if r.Src != nil && r.Src.String() != "<nil>" {
+		from = r.Src.String()
+	}
+
+	to := "all"
+	if r.Dst != nil && r.Dst.String() != "<nil>" {
+		to = r.Dst.String()
+	}
+
+	return fmt.Sprintf("ip rule %d: from %s to %s table %d",
+		r.Priority, from, to, r.Table)
 }
 
 // NewRule return empty rules.
@@ -40,3 +55,14 @@ func NewRule() *Rule {
 		Flow:              -1,
 	}
 }
+
+// NewRulePortRange creates rule sport/dport range.
+func NewRulePortRange(start, end uint16) *RulePortRange {
+	return &RulePortRange{Start: start, End: end}
+}
+
+// RulePortRange represents rule sport/dport range.
+type RulePortRange struct {
+	Start uint16
+	End   uint16
+}

+ 73 - 6
vendor/github.com/vishvananda/netlink/rule_linux.go

@@ -1,6 +1,7 @@
 package netlink
 
 import (
+	"bytes"
 	"fmt"
 	"net"
 
@@ -55,6 +56,9 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 	if rule.Table >= 0 && rule.Table < 256 {
 		msg.Table = uint8(rule.Table)
 	}
+	if rule.Tos != 0 {
+		msg.Tos = uint8(rule.Tos)
+	}
 
 	var dstFamily uint8
 	var rtAttrs []*nl.RtAttr
@@ -93,8 +97,6 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 		req.AddData(rtAttrs[i])
 	}
 
-	native := nl.NativeEndian()
-
 	if rule.Priority >= 0 {
 		b := make([]byte, 4)
 		native.PutUint32(b, uint32(rule.Priority))
@@ -138,10 +140,10 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 		}
 	}
 	if rule.IifName != "" {
-		req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName)))
+		req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName+"\x00")))
 	}
 	if rule.OifName != "" {
-		req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName)))
+		req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName+"\x00")))
 	}
 	if rule.Goto >= 0 {
 		msg.Type = nl.FR_ACT_GOTO
@@ -150,6 +152,22 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 		req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b))
 	}
 
+	if rule.IPProto > 0 {
+		b := make([]byte, 4)
+		native.PutUint32(b, uint32(rule.IPProto))
+		req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b))
+	}
+
+	if rule.Dport != nil {
+		b := rule.Dport.toRtAttrData()
+		req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b))
+	}
+
+	if rule.Sport != nil {
+		b := rule.Sport.toRtAttrData()
+		req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b))
+	}
+
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
 	return err
 }
@@ -163,6 +181,19 @@ func RuleList(family int) ([]Rule, error) {
 // RuleList lists rules in the system.
 // Equivalent to: ip rule list
 func (h *Handle) RuleList(family int) ([]Rule, error) {
+	return h.RuleListFiltered(family, nil, 0)
+}
+
+// RuleListFiltered gets a list of rules in the system filtered by the
+// specified rule template `filter`.
+// Equivalent to: ip rule list
+func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) {
+	return pkgHandle.RuleListFiltered(family, filter, filterMask)
+}
+
+// RuleListFiltered lists rules in the system.
+// Equivalent to: ip rule list
+func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST)
 	msg := nl.NewIfInfomsg(family)
 	req.AddData(msg)
@@ -172,7 +203,6 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
 		return nil, err
 	}
 
-	native := nl.NativeEndian()
 	var res = make([]Rule, 0)
 	for i := range msgs {
 		msg := nl.DeserializeRtMsg(msgs[i])
@@ -184,6 +214,7 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
 		rule := NewRule()
 
 		rule.Invert = msg.Flags&FibRuleInvert > 0
+		rule.Tos = uint(msg.Tos)
 
 		for j := range attrs {
 			switch attrs[j].Attr.Type {
@@ -204,7 +235,7 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
 			case nl.FRA_FWMASK:
 				rule.Mask = int(native.Uint32(attrs[j].Value[0:4]))
 			case nl.FRA_TUN_ID:
-				rule.TunID = uint(native.Uint64(attrs[j].Value[0:4]))
+				rule.TunID = uint(native.Uint64(attrs[j].Value[0:8]))
 			case nl.FRA_IIFNAME:
 				rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
 			case nl.FRA_OIFNAME:
@@ -225,10 +256,46 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
 				rule.Goto = int(native.Uint32(attrs[j].Value[0:4]))
 			case nl.FRA_PRIORITY:
 				rule.Priority = int(native.Uint32(attrs[j].Value[0:4]))
+			case nl.FRA_IP_PROTO:
+				rule.IPProto = int(native.Uint32(attrs[j].Value[0:4]))
+			case nl.FRA_DPORT_RANGE:
+				rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4]))
+			case nl.FRA_SPORT_RANGE:
+				rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4]))
+			}
+		}
+
+		if filter != nil {
+			switch {
+			case filterMask&RT_FILTER_SRC != 0 &&
+				(rule.Src == nil || rule.Src.String() != filter.Src.String()):
+				continue
+			case filterMask&RT_FILTER_DST != 0 &&
+				(rule.Dst == nil || rule.Dst.String() != filter.Dst.String()):
+				continue
+			case filterMask&RT_FILTER_TABLE != 0 &&
+				filter.Table != unix.RT_TABLE_UNSPEC && rule.Table != filter.Table:
+				continue
+			case filterMask&RT_FILTER_TOS != 0 && rule.Tos != filter.Tos:
+				continue
+			case filterMask&RT_FILTER_PRIORITY != 0 && rule.Priority != filter.Priority:
+				continue
+			case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark:
+				continue
+			case filterMask&RT_FILTER_MASK != 0 && rule.Mask != filter.Mask:
+				continue
 			}
 		}
+
 		res = append(res, *rule)
 	}
 
 	return res, nil
 }
+
+func (pr *RulePortRange) toRtAttrData() []byte {
+	b := [][]byte{make([]byte, 2), make([]byte, 2)}
+	native.PutUint16(b[0], pr.Start)
+	native.PutUint16(b[1], pr.End)
+	return bytes.Join(b, []byte{})
+}

+ 137 - 8
vendor/github.com/vishvananda/netlink/socket_linux.go

@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"net"
+	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
@@ -49,10 +50,15 @@ func (r *socketRequest) Serialize() []byte {
 	native.PutUint32(b.Next(4), r.States)
 	networkOrder.PutUint16(b.Next(2), r.ID.SourcePort)
 	networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort)
-	copy(b.Next(4), r.ID.Source.To4())
-	b.Next(12)
-	copy(b.Next(4), r.ID.Destination.To4())
-	b.Next(12)
+	if r.Family == unix.AF_INET6 {
+		copy(b.Next(16), r.ID.Source)
+		copy(b.Next(16), r.ID.Destination)
+	} else {
+		copy(b.Next(4), r.ID.Source.To4())
+		b.Next(12)
+		copy(b.Next(4), r.ID.Destination.To4())
+		b.Next(12)
+	}
 	native.PutUint32(b.Next(4), r.ID.Interface)
 	native.PutUint32(b.Next(4), r.ID.Cookie[0])
 	native.PutUint32(b.Next(4), r.ID.Cookie[1])
@@ -89,10 +95,15 @@ func (s *Socket) deserialize(b []byte) error {
 	s.Retrans = rb.Read()
 	s.ID.SourcePort = networkOrder.Uint16(rb.Next(2))
 	s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2))
-	s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
-	rb.Next(12)
-	s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
-	rb.Next(12)
+	if s.Family == unix.AF_INET6 {
+		s.ID.Source = net.IP(rb.Next(16))
+		s.ID.Destination = net.IP(rb.Next(16))
+	} else {
+		s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
+		rb.Next(12)
+		s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read())
+		rb.Next(12)
+	}
 	s.ID.Interface = native.Uint32(rb.Next(4))
 	s.ID.Cookie[0] = native.Uint32(rb.Next(4))
 	s.ID.Cookie[1] = native.Uint32(rb.Next(4))
@@ -160,3 +171,121 @@ func SocketGet(local, remote net.Addr) (*Socket, error) {
 	}
 	return sock, nil
 }
+
+// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info.
+func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) {
+	var result []*InetDiagTCPInfoResp
+	err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error {
+		sockInfo := &Socket{}
+		if err := sockInfo.deserialize(m.Data); err != nil {
+			return err
+		}
+		attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:])
+		if err != nil {
+			return err
+		}
+
+		res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo)
+		if err != nil {
+			return err
+		}
+
+		result = append(result, res)
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return result, nil
+}
+
+// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket.
+func SocketDiagTCP(family uint8) ([]*Socket, error) {
+	var result []*Socket
+	err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error {
+		sockInfo := &Socket{}
+		if err := sockInfo.deserialize(m.Data); err != nil {
+			return err
+		}
+		result = append(result, sockInfo)
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return result, nil
+}
+
+// socketDiagTCPExecutor requests INET_DIAG_INFO for TCP protocol for specified family type.
+func socketDiagTCPExecutor(family uint8, receiver func(syscall.NetlinkMessage) error) error {
+	s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
+	if err != nil {
+		return err
+	}
+	defer s.Close()
+
+	req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&socketRequest{
+		Family:   family,
+		Protocol: unix.IPPROTO_TCP,
+		Ext:      (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)),
+		States:   uint32(0xfff), // All TCP states
+	})
+	s.Send(req)
+
+loop:
+	for {
+		msgs, from, err := s.Receive()
+		if err != nil {
+			return err
+		}
+		if from.Pid != nl.PidKernel {
+			return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
+		}
+		if len(msgs) == 0 {
+			return errors.New("no message nor error from netlink")
+		}
+
+		for _, m := range msgs {
+			switch m.Header.Type {
+			case unix.NLMSG_DONE:
+				break loop
+			case unix.NLMSG_ERROR:
+				error := int32(native.Uint32(m.Data[0:4]))
+				return syscall.Errno(-error)
+			}
+			if err := receiver(m); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) {
+	var tcpInfo *TCPInfo
+	var tcpBBRInfo *TCPBBRInfo
+	for _, a := range attrs {
+		if a.Attr.Type == INET_DIAG_INFO {
+			tcpInfo = &TCPInfo{}
+			if err := tcpInfo.deserialize(a.Value); err != nil {
+				return nil, err
+			}
+			continue
+		}
+
+		if a.Attr.Type == INET_DIAG_BBRINFO {
+			tcpBBRInfo = &TCPBBRInfo{}
+			if err := tcpBBRInfo.deserialize(a.Value); err != nil {
+				return nil, err
+			}
+			continue
+		}
+	}
+
+	return &InetDiagTCPInfoResp{
+		InetDiagMsg: sockInfo,
+		TCPInfo:     tcpInfo,
+		TCPBBRInfo:  tcpBBRInfo,
+	}, nil
+}

+ 84 - 0
vendor/github.com/vishvananda/netlink/tcp.go

@@ -0,0 +1,84 @@
+package netlink
+
+// TCP States
+const (
+	TCP_ESTABLISHED = iota + 0x01
+	TCP_SYN_SENT
+	TCP_SYN_RECV
+	TCP_FIN_WAIT1
+	TCP_FIN_WAIT2
+	TCP_TIME_WAIT
+	TCP_CLOSE
+	TCP_CLOSE_WAIT
+	TCP_LAST_ACK
+	TCP_LISTEN
+	TCP_CLOSING
+	TCP_NEW_SYN_REC
+	TCP_MAX_STATES
+)
+
+type TCPInfo struct {
+	State                     uint8
+	Ca_state                  uint8
+	Retransmits               uint8
+	Probes                    uint8
+	Backoff                   uint8
+	Options                   uint8
+	Snd_wscale                uint8 // no uint4
+	Rcv_wscale                uint8
+	Delivery_rate_app_limited uint8
+	Fastopen_client_fail      uint8
+	Rto                       uint32
+	Ato                       uint32
+	Snd_mss                   uint32
+	Rcv_mss                   uint32
+	Unacked                   uint32
+	Sacked                    uint32
+	Lost                      uint32
+	Retrans                   uint32
+	Fackets                   uint32
+	Last_data_sent            uint32
+	Last_ack_sent             uint32
+	Last_data_recv            uint32
+	Last_ack_recv             uint32
+	Pmtu                      uint32
+	Rcv_ssthresh              uint32
+	Rtt                       uint32
+	Rttvar                    uint32
+	Snd_ssthresh              uint32
+	Snd_cwnd                  uint32
+	Advmss                    uint32
+	Reordering                uint32
+	Rcv_rtt                   uint32
+	Rcv_space                 uint32
+	Total_retrans             uint32
+	Pacing_rate               uint64
+	Max_pacing_rate           uint64
+	Bytes_acked               uint64 /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+	Bytes_received            uint64 /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+	Segs_out                  uint32 /* RFC4898 tcpEStatsPerfSegsOut */
+	Segs_in                   uint32 /* RFC4898 tcpEStatsPerfSegsIn */
+	Notsent_bytes             uint32
+	Min_rtt                   uint32
+	Data_segs_in              uint32 /* RFC4898 tcpEStatsDataSegsIn */
+	Data_segs_out             uint32 /* RFC4898 tcpEStatsDataSegsOut */
+	Delivery_rate             uint64
+	Busy_time                 uint64 /* Time (usec) busy sending data */
+	Rwnd_limited              uint64 /* Time (usec) limited by receive window */
+	Sndbuf_limited            uint64 /* Time (usec) limited by send buffer */
+	Delivered                 uint32
+	Delivered_ce              uint32
+	Bytes_sent                uint64 /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
+	Bytes_retrans             uint64 /* RFC4898 tcpEStatsPerfOctetsRetrans */
+	Dsack_dups                uint32 /* RFC4898 tcpEStatsStackDSACKDups */
+	Reord_seen                uint32 /* reordering events seen */
+	Rcv_ooopack               uint32 /* Out-of-order packets received */
+	Snd_wnd                   uint32 /* peer's advertised receive window after * scaling (bytes) */
+}
+
+type TCPBBRInfo struct {
+	BBRBW         uint64
+	BBRMinRTT     uint32
+	BBRPacingGain uint32
+	BBRCwndGain   uint32
+}

+ 353 - 0
vendor/github.com/vishvananda/netlink/tcp_linux.go

@@ -0,0 +1,353 @@
+package netlink
+
+import (
+	"bytes"
+	"errors"
+	"io"
+)
+
+const (
+	tcpBBRInfoLen = 20
+)
+
+func checkDeserErr(err error) error {
+	if err == io.EOF {
+		return nil
+	}
+	return err
+}
+
+func (t *TCPInfo) deserialize(b []byte) error {
+	var err error
+	rb := bytes.NewBuffer(b)
+
+	t.State, err = rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+
+	t.Ca_state, err = rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+
+	t.Retransmits, err = rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+
+	t.Probes, err = rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+
+	t.Backoff, err = rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+	t.Options, err = rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+
+	scales, err := rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+	t.Snd_wscale = scales >> 4  // first 4 bits
+	t.Rcv_wscale = scales & 0xf // last 4 bits
+
+	rateLimAndFastOpen, err := rb.ReadByte()
+	if err != nil {
+		return checkDeserErr(err)
+	}
+	t.Delivery_rate_app_limited = rateLimAndFastOpen >> 7 // get first bit
+	t.Fastopen_client_fail = rateLimAndFastOpen >> 5 & 3  // get next two bits
+
+	next := rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rto = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Ato = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Snd_mss = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rcv_mss = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Unacked = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Sacked = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Lost = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Retrans = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Fackets = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Last_data_sent = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Last_ack_sent = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Last_data_recv = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Last_ack_recv = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Pmtu = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rcv_ssthresh = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rtt = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rttvar = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Snd_ssthresh = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Snd_cwnd = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Advmss = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Reordering = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rcv_rtt = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rcv_space = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Total_retrans = native.Uint32(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Pacing_rate = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Max_pacing_rate = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Bytes_acked = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Bytes_received = native.Uint64(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Segs_out = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Segs_in = native.Uint32(next)
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Notsent_bytes = native.Uint32(next)
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Min_rtt = native.Uint32(next)
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Data_segs_in = native.Uint32(next)
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Data_segs_out = native.Uint32(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Delivery_rate = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Busy_time = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rwnd_limited = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Sndbuf_limited = native.Uint64(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Delivered = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Delivered_ce = native.Uint32(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Bytes_sent = native.Uint64(next)
+
+	next = rb.Next(8)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Bytes_retrans = native.Uint64(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Dsack_dups = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Reord_seen = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Rcv_ooopack = native.Uint32(next)
+
+	next = rb.Next(4)
+	if len(next) == 0 {
+		return nil
+	}
+	t.Snd_wnd = native.Uint32(next)
+	return nil
+}
+
+func (t *TCPBBRInfo) deserialize(b []byte) error {
+	if len(b) != tcpBBRInfoLen {
+		return errors.New("Invalid length")
+	}
+
+	rb := bytes.NewBuffer(b)
+	t.BBRBW = native.Uint64(rb.Next(8))
+	t.BBRMinRTT = native.Uint32(rb.Next(4))
+	t.BBRPacingGain = native.Uint32(rb.Next(4))
+	t.BBRCwndGain = native.Uint32(rb.Next(4))
+
+	return nil
+}

+ 7 - 6
vendor/github.com/vishvananda/netlink/xfrm_policy.go

@@ -58,12 +58,13 @@ func (a PolicyAction) String() string {
 // policy. These rules are matched with XfrmState to determine encryption
 // and authentication algorithms.
 type XfrmPolicyTmpl struct {
-	Dst   net.IP
-	Src   net.IP
-	Proto Proto
-	Mode  Mode
-	Spi   int
-	Reqid int
+	Dst      net.IP
+	Src      net.IP
+	Proto    Proto
+	Mode     Mode
+	Spi      int
+	Reqid    int
+	Optional int
 }
 
 func (t XfrmPolicyTmpl) String() string {

+ 10 - 4
vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go

@@ -79,6 +79,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
 		userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi))
 		userTmpl.Mode = uint8(tmpl.Mode)
 		userTmpl.Reqid = uint32(tmpl.Reqid)
+		userTmpl.Optional = uint8(tmpl.Optional)
 		userTmpl.Aalgos = ^uint32(0)
 		userTmpl.Ealgos = ^uint32(0)
 		userTmpl.Calgos = ^uint32(0)
@@ -92,8 +93,10 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
 		req.AddData(out)
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
-	req.AddData(ifId)
+	if policy.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
+		req.AddData(ifId)
+	}
 
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
 	return err
@@ -188,8 +191,10 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo
 		req.AddData(out)
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
-	req.AddData(ifId)
+	if policy.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
+		req.AddData(ifId)
+	}
 
 	resType := nl.XFRM_MSG_NEWPOLICY
 	if nlProto == nl.XFRM_MSG_DELPOLICY {
@@ -247,6 +252,7 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) {
 				resTmpl.Mode = Mode(tmpl.Mode)
 				resTmpl.Spi = int(nl.Swap32(tmpl.XfrmId.Spi))
 				resTmpl.Reqid = int(tmpl.Reqid)
+				resTmpl.Optional = int(tmpl.Optional)
 				policy.Tmpls = append(policy.Tmpls, resTmpl)
 			}
 		case nl.XFRMA_MARK:

+ 2 - 2
vendor/github.com/vishvananda/netlink/xfrm_state.go

@@ -94,7 +94,7 @@ type XfrmState struct {
 	Limits       XfrmStateLimits
 	Statistics   XfrmStateStats
 	Mark         *XfrmMark
-	OutputMark   int
+	OutputMark   *XfrmMark
 	Ifid         int
 	Auth         *XfrmStateAlgo
 	Crypt        *XfrmStateAlgo
@@ -104,7 +104,7 @@ type XfrmState struct {
 }
 
 func (sa XfrmState) String() string {
-	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %d, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
+	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
 		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN)
 }
 func (sa XfrmState) Print(stats bool) string {

+ 28 - 9
vendor/github.com/vishvananda/netlink/xfrm_state_linux.go

@@ -111,7 +111,7 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
 
 	// A state with spi 0 can't be deleted so don't allow it to be set
 	if state.Spi == 0 {
-		return fmt.Errorf("Spi must be set when adding xfrm state.")
+		return fmt.Errorf("Spi must be set when adding xfrm state")
 	}
 	req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
 
@@ -158,13 +158,19 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
 		out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow))
 		req.AddData(out)
 	}
-	if state.OutputMark != 0 {
-		out := nl.NewRtAttr(nl.XFRMA_OUTPUT_MARK, nl.Uint32Attr(uint32(state.OutputMark)))
+	if state.OutputMark != nil {
+		out := nl.NewRtAttr(nl.XFRMA_SET_MARK, nl.Uint32Attr(state.OutputMark.Value))
 		req.AddData(out)
+		if state.OutputMark.Mask != 0 {
+			out = nl.NewRtAttr(nl.XFRMA_SET_MARK_MASK, nl.Uint32Attr(state.OutputMark.Mask))
+			req.AddData(out)
+		}
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
-	req.AddData(ifId)
+	if state.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
+		req.AddData(ifId)
+	}
 
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
 	return err
@@ -277,8 +283,10 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState
 		req.AddData(out)
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
-	req.AddData(ifId)
+	if state.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
+		req.AddData(ifId)
+	}
 
 	resType := nl.XFRM_MSG_NEWSA
 	if nlProto == nl.XFRM_MSG_DELSA {
@@ -377,8 +385,19 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 			state.Mark = new(XfrmMark)
 			state.Mark.Value = mark.Value
 			state.Mark.Mask = mark.Mask
-		case nl.XFRMA_OUTPUT_MARK:
-			state.OutputMark = int(native.Uint32(attr.Value))
+		case nl.XFRMA_SET_MARK:
+			if state.OutputMark == nil {
+				state.OutputMark = new(XfrmMark)
+			}
+			state.OutputMark.Value = native.Uint32(attr.Value)
+		case nl.XFRMA_SET_MARK_MASK:
+			if state.OutputMark == nil {
+				state.OutputMark = new(XfrmMark)
+			}
+			state.OutputMark.Mask = native.Uint32(attr.Value)
+			if state.OutputMark.Mask == 0xffffffff {
+				state.OutputMark.Mask = 0
+			}
 		case nl.XFRMA_IF_ID:
 			state.Ifid = int(native.Uint32(attr.Value))
 		}

+ 1 - 2
vendor/modules.txt

@@ -768,7 +768,7 @@ github.com/tonistiigi/units
 github.com/vbatts/tar-split/archive/tar
 github.com/vbatts/tar-split/tar/asm
 github.com/vbatts/tar-split/tar/storage
-# github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5 => github.com/vishvananda/netlink v1.1.0
+# github.com/vishvananda/netlink v1.2.1-beta.2
 ## explicit; go 1.12
 github.com/vishvananda/netlink
 github.com/vishvananda/netlink/nl
@@ -1126,7 +1126,6 @@ gotest.tools/v3/skip
 # github.com/matttproud/golang_protobuf_extensions => github.com/matttproud/golang_protobuf_extensions v1.0.1
 # github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.6.0
 # github.com/prometheus/procfs => github.com/prometheus/procfs v0.0.11
-# github.com/vishvananda/netlink => github.com/vishvananda/netlink v1.1.0
 # go.opencensus.io => go.opencensus.io v0.22.3
 # github.com/rexray/gocsi => github.com/dperny/gocsi v1.2.3-pre
 # github.com/google/certificate-transparency-go => github.com/google/certificate-transparency-go v1.0.20