Browse Source

Vendoring libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90

Signed-off-by: Alessandro Boch <aboch@docker.com>
Alessandro Boch 9 years ago
parent
commit
8fe6b3835c
34 changed files with 2209 additions and 171 deletions
  1. 2 2
      hack/vendor.sh
  2. 6 0
      integration-cli/docker_cli_network_unix_test.go
  3. 52 5
      vendor/src/github.com/docker/libnetwork/MAINTAINERS
  4. 0 6
      vendor/src/github.com/docker/libnetwork/README.md
  5. 16 9
      vendor/src/github.com/docker/libnetwork/bitseq/sequence.go
  6. 1 1
      vendor/src/github.com/docker/libnetwork/circle.yml
  7. 4 4
      vendor/src/github.com/docker/libnetwork/controller.go
  8. 8 0
      vendor/src/github.com/docker/libnetwork/default_gateway.go
  9. 3 0
      vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go
  10. 4 0
      vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_serf.go
  11. 5 4
      vendor/src/github.com/docker/libnetwork/drivers/remote/api/api.go
  12. 3 0
      vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go
  13. 15 5
      vendor/src/github.com/docker/libnetwork/endpoint.go
  14. 26 3
      vendor/src/github.com/docker/libnetwork/endpoint_info.go
  15. 26 8
      vendor/src/github.com/docker/libnetwork/etchosts/etchosts.go
  16. 20 15
      vendor/src/github.com/docker/libnetwork/ipam/allocator.go
  17. 4 5
      vendor/src/github.com/docker/libnetwork/ipam/store.go
  18. 3 3
      vendor/src/github.com/docker/libnetwork/ipam/structures.go
  19. 17 17
      vendor/src/github.com/docker/libnetwork/ipamapi/contract.go
  20. 7 3
      vendor/src/github.com/docker/libnetwork/network.go
  21. 8 29
      vendor/src/github.com/docker/libnetwork/sandbox.go
  22. 110 0
      vendor/src/github.com/vishvananda/netlink/class.go
  23. 144 0
      vendor/src/github.com/vishvananda/netlink/class_linux.go
  24. 140 0
      vendor/src/github.com/vishvananda/netlink/filter.go
  25. 322 0
      vendor/src/github.com/vishvananda/netlink/filter_linux.go
  26. 51 5
      vendor/src/github.com/vishvananda/netlink/link.go
  27. 92 1
      vendor/src/github.com/vishvananda/netlink/link_linux.go
  28. 14 0
      vendor/src/github.com/vishvananda/netlink/link_tuntap_linux.go
  29. 1 1
      vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go
  30. 508 0
      vendor/src/github.com/vishvananda/netlink/nl/tc_linux.go
  31. 167 0
      vendor/src/github.com/vishvananda/netlink/qdisc.go
  32. 316 0
      vendor/src/github.com/vishvananda/netlink/qdisc_linux.go
  33. 44 2
      vendor/src/github.com/vishvananda/netlink/route.go
  34. 70 43
      vendor/src/github.com/vishvananda/netlink/route_linux.go

+ 2 - 2
hack/vendor.sh

@@ -22,14 +22,14 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
 clone git golang.org/x/net 47990a1ba55743e6ef1affd3a14e5bac8553615d https://github.com/golang/net.git
 
 #get libnetwork packages
-clone git github.com/docker/libnetwork 04cc1fa0a89f8c407b7be8cab883d4b17531ea7d
+clone git github.com/docker/libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
 clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
 clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
 clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2
 clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410
 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
-clone git github.com/vishvananda/netlink 4b5dce31de6d42af5bb9811c6d265472199e0fec
+clone git github.com/vishvananda/netlink 8e810149a2e531fed9b837c0c7d8a8922d2bedf7
 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
 clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d

+ 6 - 0
integration-cli/docker_cli_network_unix_test.go

@@ -569,6 +569,9 @@ func (s *DockerDaemonSuite) TestDockerNetworkNoDiscoveryDefaultBridgeNetwork(c *
 	out, err = s.d.Cmd("network", "connect", network, cid1)
 	c.Assert(err, check.IsNil, check.Commentf(out))
 
+	hosts, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
+	c.Assert(err, checker.IsNil)
+
 	hostsPost, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
 	c.Assert(err, checker.IsNil)
 	c.Assert(string(hosts), checker.Equals, string(hostsPost),
@@ -631,6 +634,9 @@ func (s *DockerNetworkSuite) TestDockerNetworkAnonymousEndpoint(c *check.C) {
 
 	dockerCmd(c, "network", "connect", cstmBridgeNw1, cid2)
 
+	hosts2, err = readContainerFileWithExec(cid2, hostsFile)
+	c.Assert(err, checker.IsNil)
+
 	hosts1post, err = readContainerFileWithExec(cid1, hostsFile)
 	c.Assert(err, checker.IsNil)
 	c.Assert(string(hosts1), checker.Equals, string(hosts1post),

+ 52 - 5
vendor/src/github.com/docker/libnetwork/MAINTAINERS

@@ -1,5 +1,52 @@
-Alessandro Boch <aboch@docker.com> (@aboch)
-Alexandr Morozov <lk4d4@docker.com> (@LK4D4)
-Arnaud Porterie <arnaud@docker.com> (@icecrime)
-Jana Radhakrishnan <mrjana@docker.com> (@mrjana)
-Madhu Venugopal <madhu@docker.com> (@mavenugo)
+# Libnetwork maintainers file
+#
+# This file describes who runs the docker/libnetwork project and how.
+# This is a living document - if you see something out of date or missing, speak up!
+#
+# It is structured to be consumable by both humans and programs.
+# To extract its contents programmatically, use any TOML-compliant parser.
+#
+# This file is compiled into the MAINTAINERS file in docker/opensource.
+#
+[Org]
+	[Org."Core maintainers"]
+		people = [
+			"aboch",
+			"LK4D4",
+			"icecrime",
+			"mrjana",
+			"mavenugo",
+		]
+
+[people]
+
+# A reference list of all people associated with the project.
+# All other sections should refer to people by their canonical key
+# in the people section.
+
+	# ADD YOURSELF HERE IN ALPHABETICAL ORDER
+
+	[people.aboch]
+	Name = "Alessandro Boch"
+	Email = "aboch@docker.com"
+	GitHub = "aboch"
+
+	[people.LK4D4]
+	Name = "Alexandr Morozov"
+	Email = "lk4d4@docker.com"
+	GitHub = "LK4D4"
+
+	[people.icecrime]
+	Name = "Arnaud Porterie"
+	Email = "arnaud@docker.com"
+	GitHub = "icecrime"
+
+	[people.mrjana]
+	Name = "Jana Radhakrishnan"
+	Email = "mrjana@docker.com"
+	GitHub = "mrjana"
+
+	[people.mavenugo]
+	Name = "Madhu Venugopal"
+	Email = "madhu@docker.com"
+	GitHub = "mavenugo"

+ 0 - 6
vendor/src/github.com/docker/libnetwork/README.md

@@ -6,8 +6,6 @@ Libnetwork provides a native Go implementation for connecting containers
 
 The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications.
 
-**NOTE**: libnetwork project is under heavy development and is not ready for general use.
-
 #### Design
 Please refer to the [design](docs/design.md) for more information.
 
@@ -67,10 +65,6 @@ There are many networking solutions available to suit a broad range of use-cases
 		}
 	}
 ```
-#### Current Status
-Please watch this space for updates on the progress.
-
-Currently libnetwork is nothing more than an attempt to modularize the Docker platform's networking subsystem by moving it into libnetwork as a library.
 
 ## Future
 Please refer to [roadmap](ROADMAP.md) for more information.

+ 16 - 9
vendor/src/github.com/docker/libnetwork/bitseq/sequence.go

@@ -24,7 +24,10 @@ const (
 )
 
 var (
-	errNoBitAvailable = fmt.Errorf("no bit available")
+	// ErrNoBitAvailable is returned when no more bits are available to set
+	ErrNoBitAvailable = fmt.Errorf("no bit available")
+	// ErrBitAllocated is returned when the specific bit requested is already set
+	ErrBitAllocated = fmt.Errorf("requested bit is already allocated")
 )
 
 // Handle contains the sequece representing the bitmask and its identifier
@@ -94,7 +97,7 @@ func (s *sequence) toString() string {
 // GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence
 func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) {
 	if s.block == blockMAX || s.count == 0 {
-		return invalidPos, invalidPos, errNoBitAvailable
+		return invalidPos, invalidPos, ErrNoBitAvailable
 	}
 	bits := from
 	bitSel := blockFirstBit >> from
@@ -197,7 +200,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
 		return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end)
 	}
 	if h.Unselected() == 0 {
-		return invalidPos, errNoBitAvailable
+		return invalidPos, ErrNoBitAvailable
 	}
 	return h.set(0, start, end, true, false)
 }
@@ -205,7 +208,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
 // SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal
 func (h *Handle) SetAny() (uint64, error) {
 	if h.Unselected() == 0 {
-		return invalidPos, errNoBitAvailable
+		return invalidPos, ErrNoBitAvailable
 	}
 	return h.set(0, 0, h.bits-1, true, false)
 }
@@ -250,8 +253,12 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
 	)
 
 	for {
-		if h.store != nil {
-			if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
+		var store datastore.DataStore
+		h.Lock()
+		store = h.store
+		h.Unlock()
+		if store != nil {
+			if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
 				return ret, err
 			}
 		}
@@ -265,7 +272,7 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
 				bytePos, bitPos, err = getFirstAvailable(h.head, start)
 				ret = posToOrdinal(bytePos, bitPos)
 				if end < ret {
-					err = errNoBitAvailable
+					err = ErrNoBitAvailable
 				}
 			} else {
 				bytePos, bitPos, err = checkIfAvailable(h.head, ordinal)
@@ -445,7 +452,7 @@ func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) {
 		byteOffset += current.count * blockBytes
 		current = current.next
 	}
-	return invalidPos, invalidPos, errNoBitAvailable
+	return invalidPos, invalidPos, ErrNoBitAvailable
 }
 
 // checkIfAvailable checks if the bit correspondent to the specified ordinal is unset
@@ -463,7 +470,7 @@ func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) {
 		}
 	}
 
-	return invalidPos, invalidPos, fmt.Errorf("requested bit is not available")
+	return invalidPos, invalidPos, ErrBitAllocated
 }
 
 // Given the byte position and the sequences list head, return the pointer to the

+ 1 - 1
vendor/src/github.com/docker/libnetwork/circle.yml

@@ -4,7 +4,7 @@ machine:
 
 dependencies:
   override:
-    - sudo apt-get update; sudo apt-get install -y iptables zookeeperd
+    - sudo apt-get update; sudo apt-get install -y iptables zookeeperd 
     - go get golang.org/x/tools/cmd/vet
     - go get golang.org/x/tools/cmd/goimports
     - go get golang.org/x/tools/cmd/cover

+ 4 - 4
vendor/src/github.com/docker/libnetwork/controller.go

@@ -315,17 +315,17 @@ func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error
 	_, ok := c.ipamDrivers[name]
 	c.Unlock()
 	if ok {
-		return driverapi.ErrActiveRegistration(name)
+		return types.ForbiddenErrorf("ipam driver %q already registered", name)
 	}
 	locAS, glbAS, err := driver.GetDefaultAddressSpaces()
 	if err != nil {
-		return fmt.Errorf("ipam driver %s failed to return default address spaces: %v", name, err)
+		return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err)
 	}
 	c.Lock()
 	c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS}
 	c.Unlock()
 
-	log.Debugf("Registering ipam provider: %s", name)
+	log.Debugf("Registering ipam driver: %q", name)
 
 	return nil
 }
@@ -667,7 +667,7 @@ func (c *controller) loadIpamDriver(name string) (*ipamData, error) {
 	id, ok := c.ipamDrivers[name]
 	c.Unlock()
 	if !ok {
-		return nil, ErrInvalidNetworkDriver(name)
+		return nil, types.BadRequestErrorf("invalid ipam driver: %q", name)
 	}
 	return id, nil
 }

+ 8 - 0
vendor/src/github.com/docker/libnetwork/default_gateway.go

@@ -103,10 +103,18 @@ func (sb *sandbox) needDefaultGW() bool {
 		if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
 			continue
 		}
+		if ep.joinInfo.disableGatewayService {
+			return false
+		}
 		// TODO v6 needs to be handled.
 		if len(ep.Gateway()) > 0 {
 			return false
 		}
+		for _, r := range ep.StaticRoutes() {
+			if r.Destination.String() == "0.0.0.0/0" {
+				return false
+			}
+		}
 		needGW = true
 	}
 	return needGW

+ 3 - 0
vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go

@@ -91,6 +91,9 @@ type JoinInfo interface {
 	// AddStaticRoute adds a routes to the sandbox.
 	// It may be used in addtion to or instead of a default gateway (as above).
 	AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error
+
+	// DisableGatewayService tells libnetwork not to provide Default GW for the container
+	DisableGatewayService()
 }
 
 // DriverCallback provides a Callback interface for Drivers into LibNetwork

+ 4 - 0
vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_serf.go

@@ -151,6 +151,10 @@ func (d *driver) processQuery(q *serf.Query) {
 }
 
 func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
+	if d.serfInstance == nil {
+		return nil, nil, nil, fmt.Errorf("could not resolve peer: serf instance not initialized")
+	}
+
 	qPayload := fmt.Sprintf("%s %s", string(nid), peerIP.String())
 	resp, err := d.serfInstance.Query("peerlookup", []byte(qPayload), nil)
 	if err != nil {

+ 5 - 4
vendor/src/github.com/docker/libnetwork/drivers/remote/api/api.go

@@ -134,10 +134,11 @@ type StaticRoute struct {
 // JoinResponse is the response to a JoinRequest.
 type JoinResponse struct {
 	Response
-	InterfaceName *InterfaceName
-	Gateway       string
-	GatewayIPv6   string
-	StaticRoutes  []StaticRoute
+	InterfaceName         *InterfaceName
+	Gateway               string
+	GatewayIPv6           string
+	StaticRoutes          []StaticRoute
+	DisableGatewayService bool
 }
 
 // LeaveRequest describes the API for detaching an endpoint from a sandbox.

+ 3 - 0
vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go

@@ -231,6 +231,9 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 			}
 		}
 	}
+	if res.DisableGatewayService {
+		jinfo.DisableGatewayService()
+	}
 	return nil
 }
 

+ 15 - 5
vendor/src/github.com/docker/libnetwork/endpoint.go

@@ -60,6 +60,8 @@ type endpoint struct {
 	anonymous     bool
 	generic       map[string]interface{}
 	joinLeaveDone chan struct{}
+	prefAddress   net.IP
+	ipamOptions   map[string]string
 	dbIndex       uint64
 	dbExists      bool
 	sync.Mutex
@@ -386,6 +388,9 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
 		}
 	}()
 
+	// Watch for service records
+	network.getController().watchSvcRecord(ep)
+
 	address := ""
 	if ip := ep.getFirstInterfaceAddress(); ip != nil {
 		address = ip.String()
@@ -394,9 +399,6 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
 		return err
 	}
 
-	// Watch for service records
-	network.getController().watchSvcRecord(ep)
-
 	if err = sb.updateDNS(network.enableIPv6); err != nil {
 		return err
 	}
@@ -559,7 +561,7 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error {
 
 	sb.deleteHostsEntries(n.getSvcRecords(ep))
 
-	if sb.needDefaultGW() {
+	if !sb.inDelete && sb.needDefaultGW() {
 		ep := sb.getEPwithoutGateway()
 		if ep == nil {
 			return fmt.Errorf("endpoint without GW expected, but not found")
@@ -685,6 +687,14 @@ func EndpointOptionGeneric(generic map[string]interface{}) EndpointOption {
 	}
 }
 
+// CreateOptionIpam function returns an option setter for the ipam configuration for this endpoint
+func CreateOptionIpam(prefAddress net.IP, ipamOptions map[string]string) EndpointOption {
+	return func(ep *endpoint) {
+		ep.prefAddress = prefAddress
+		ep.ipamOptions = ipamOptions
+	}
+}
+
 // CreateOptionExposedPorts function returns an option setter for the container exposed
 // ports option to be passed to network.CreateEndpoint() method.
 func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption {
@@ -799,7 +809,7 @@ func (ep *endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error {
 		if *address != nil {
 			prefIP = (*address).IP
 		}
-		addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, nil)
+		addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, ep.ipamOptions)
 		if err == nil {
 			ep.Lock()
 			*address = addr

+ 26 - 3
vendor/src/github.com/docker/libnetwork/endpoint_info.go

@@ -25,6 +25,10 @@ type EndpointInfo interface {
 	// This will only return a valid value if a container has joined the endpoint.
 	GatewayIPv6() net.IP
 
+	// StaticRoutes returns the list of static routes configured by the network
+	// driver when the container joins a network
+	StaticRoutes() []*types.StaticRoute
+
 	// Sandbox returns the attached sandbox if there, nil otherwise.
 	Sandbox() Sandbox
 }
@@ -136,9 +140,10 @@ func (epi *endpointInterface) CopyTo(dstEpi *endpointInterface) error {
 }
 
 type endpointJoinInfo struct {
-	gw           net.IP
-	gw6          net.IP
-	StaticRoutes []*types.StaticRoute
+	gw                    net.IP
+	gw6                   net.IP
+	StaticRoutes          []*types.StaticRoute
+	disableGatewayService bool
 }
 
 func (ep *endpoint) Info() EndpointInfo {
@@ -295,6 +300,17 @@ func (ep *endpoint) Sandbox() Sandbox {
 	return cnt
 }
 
+func (ep *endpoint) StaticRoutes() []*types.StaticRoute {
+	ep.Lock()
+	defer ep.Unlock()
+
+	if ep.joinInfo == nil {
+		return nil
+	}
+
+	return ep.joinInfo.StaticRoutes
+}
+
 func (ep *endpoint) Gateway() net.IP {
 	ep.Lock()
 	defer ep.Unlock()
@@ -340,3 +356,10 @@ func (ep *endpoint) retrieveFromStore() (*endpoint, error) {
 	}
 	return n.getEndpointFromStore(ep.ID())
 }
+
+func (ep *endpoint) DisableGatewayService() {
+	ep.Lock()
+	defer ep.Unlock()
+
+	ep.joinInfo.disableGatewayService = true
+}

+ 26 - 8
vendor/src/github.com/docker/libnetwork/etchosts/etchosts.go

@@ -1,6 +1,7 @@
 package etchosts
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
 	"io"
@@ -138,19 +139,36 @@ func Delete(path string, recs []Record) error {
 	if len(recs) == 0 {
 		return nil
 	}
-
-	old, err := ioutil.ReadFile(path)
+	old, err := os.Open(path)
 	if err != nil {
 		return err
 	}
 
-	regexpStr := fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(recs[0].Hosts))
-	for _, r := range recs[1:] {
-		regexpStr = regexpStr + "|" + fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(r.Hosts))
+	var buf bytes.Buffer
+
+	s := bufio.NewScanner(old)
+	eol := []byte{'\n'}
+loop:
+	for s.Scan() {
+		b := s.Bytes()
+		if b[0] == '#' {
+			buf.Write(b)
+			buf.Write(eol)
+			continue
+		}
+		for _, r := range recs {
+			if bytes.HasSuffix(b, []byte("\t"+r.Hosts)) {
+				continue loop
+			}
+		}
+		buf.Write(b)
+		buf.Write(eol)
 	}
-
-	var re = regexp.MustCompile(regexpStr)
-	return ioutil.WriteFile(path, re.ReplaceAll(old, []byte("")), 0644)
+	old.Close()
+	if err := s.Err(); err != nil {
+		return err
+	}
+	return ioutil.WriteFile(path, buf.Bytes(), 0644)
 }
 
 // Update all IP addresses where hostname matches.

+ 20 - 15
vendor/src/github.com/docker/libnetwork/ipam/allocator.go

@@ -76,8 +76,7 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) {
 func (a *Allocator) refresh(as string) error {
 	aSpace, err := a.getAddressSpaceFromStore(as)
 	if err != nil {
-		return fmt.Errorf("error getting pools config from store during init: %v",
-			err)
+		return types.InternalErrorf("error getting pools config from store: %v", err)
 	}
 
 	if aSpace == nil {
@@ -239,7 +238,7 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error {
 
 	store := a.getStore(key.AddressSpace)
 	if store == nil {
-		return fmt.Errorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
+		return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
 	}
 
 	ipVer := getAddressVersion(pool.IP)
@@ -279,7 +278,7 @@ func (a *Allocator) retrieveBitmask(k SubnetKey, n *net.IPNet) (*bitseq.Handle,
 	if !ok {
 		log.Debugf("Retrieving bitmask (%s, %s)", k.String(), n.String())
 		if err := a.insertBitMask(k, n); err != nil {
-			return nil, fmt.Errorf("could not find bitmask in datastore for %s", k.String())
+			return nil, types.InternalErrorf("could not find bitmask in datastore for %s", k.String())
 		}
 		a.Lock()
 		bm = a.addresses[k]
@@ -306,7 +305,7 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error)
 	}
 
 	if as != localAddressSpace && as != globalAddressSpace {
-		return nil, fmt.Errorf("no default pool availbale for non-default addresss spaces")
+		return nil, types.NotImplementedErrorf("no default pool availbale for non-default addresss spaces")
 	}
 
 	aSpace, err := a.getAddrSpace(as)
@@ -378,7 +377,7 @@ func (a *Allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[s
 
 	bm, err := a.retrieveBitmask(k, c.Pool)
 	if err != nil {
-		return nil, nil, fmt.Errorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
+		return nil, nil, types.InternalErrorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
 			k.String(), prefAddress, poolID, err)
 	}
 	ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range)
@@ -410,12 +409,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
 	p, ok := aSpace.subnets[k]
 	if !ok {
 		aSpace.Unlock()
-		return ipamapi.ErrBadPool
+		return types.NotFoundErrorf("cannot find address pool for poolID:%s", poolID)
 	}
 
 	if address == nil {
 		aSpace.Unlock()
-		return ipamapi.ErrInvalidRequest
+		return types.BadRequestErrorf("invalid address: nil")
 	}
 
 	if !p.Pool.Contains(address) {
@@ -434,12 +433,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
 
 	h, err := types.GetHostPartIP(address, mask)
 	if err != nil {
-		return fmt.Errorf("failed to release address %s: %v", address.String(), err)
+		return types.InternalErrorf("failed to release address %s: %v", address.String(), err)
 	}
 
 	bm, err := a.retrieveBitmask(k, c.Pool)
 	if err != nil {
-		return fmt.Errorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
+		return types.InternalErrorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
 			k.String(), address, poolID, err)
 	}
 
@@ -463,19 +462,25 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres
 	} else if prefAddress != nil {
 		hostPart, e := types.GetHostPartIP(prefAddress, base.Mask)
 		if e != nil {
-			return nil, fmt.Errorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
+			return nil, types.InternalErrorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
 		}
 		ordinal = ipToUint64(types.GetMinimalIP(hostPart))
 		err = bitmask.Set(ordinal)
 	} else {
 		ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End)
 	}
-	if err != nil {
+
+	switch err {
+	case nil:
+		// Convert IP ordinal for this subnet into IP address
+		return generateAddress(ordinal, base), nil
+	case bitseq.ErrBitAllocated:
+		return nil, ipamapi.ErrIPAlreadyAllocated
+	case bitseq.ErrNoBitAvailable:
 		return nil, ipamapi.ErrNoAvailableIPs
+	default:
+		return nil, err
 	}
-
-	// Convert IP ordinal for this subnet into IP address
-	return generateAddress(ordinal, base), nil
 }
 
 // DumpDatabase dumps the internal info

+ 4 - 5
vendor/src/github.com/docker/libnetwork/ipam/store.go

@@ -2,7 +2,6 @@ package ipam
 
 import (
 	"encoding/json"
-	"fmt"
 
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/datastore"
@@ -84,7 +83,7 @@ func (a *Allocator) getStore(as string) datastore.DataStore {
 func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
 	store := a.getStore(as)
 	if store == nil {
-		return nil, fmt.Errorf("store for address space %s not found", as)
+		return nil, types.InternalErrorf("store for address space %s not found", as)
 	}
 
 	pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a}
@@ -93,7 +92,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
 			return nil, nil
 		}
 
-		return nil, fmt.Errorf("could not get pools config from store: %v", err)
+		return nil, types.InternalErrorf("could not get pools config from store: %v", err)
 	}
 
 	return pc, nil
@@ -102,7 +101,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
 func (a *Allocator) writeToStore(aSpace *addrSpace) error {
 	store := aSpace.store()
 	if store == nil {
-		return fmt.Errorf("invalid store while trying to write %s address space", aSpace.DataScope())
+		return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope())
 	}
 
 	err := store.PutObjectAtomic(aSpace)
@@ -116,7 +115,7 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error {
 func (a *Allocator) deleteFromStore(aSpace *addrSpace) error {
 	store := aSpace.store()
 	if store == nil {
-		return fmt.Errorf("invalid store while trying to delete %s address space", aSpace.DataScope())
+		return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope())
 	}
 
 	return store.DeleteObjectAtomic(aSpace)

+ 3 - 3
vendor/src/github.com/docker/libnetwork/ipam/structures.go

@@ -88,12 +88,12 @@ func (s *SubnetKey) String() string {
 // FromString populate the SubnetKey object reading it from string
 func (s *SubnetKey) FromString(str string) error {
 	if str == "" || !strings.Contains(str, "/") {
-		return fmt.Errorf("invalid string form for subnetkey: %s", str)
+		return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
 	}
 
 	p := strings.Split(str, "/")
 	if len(p) != 3 && len(p) != 5 {
-		return fmt.Errorf("invalid string form for subnetkey: %s", str)
+		return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
 	}
 	s.AddressSpace = p[0]
 	s.Subnet = fmt.Sprintf("%s/%s", p[1], p[2])
@@ -317,7 +317,7 @@ func (aSpace *addrSpace) updatePoolDBOnRemoval(k SubnetKey) (func() error, error
 				return func() error {
 					bm, err := aSpace.alloc.retrieveBitmask(k, c.Pool)
 					if err != nil {
-						return fmt.Errorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
+						return types.InternalErrorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
 					}
 					return bm.Destroy()
 				}, nil

+ 17 - 17
vendor/src/github.com/docker/libnetwork/ipamapi/contract.go

@@ -2,8 +2,9 @@
 package ipamapi
 
 import (
-	"errors"
 	"net"
+
+	"github.com/docker/libnetwork/types"
 )
 
 /********************
@@ -15,6 +16,8 @@ const (
 	DefaultIPAM = "default"
 	// PluginEndpointType represents the Endpoint Type used by Plugin system
 	PluginEndpointType = "IpamDriver"
+	// RequestAddressType represents the Address Type used when requesting an address
+	RequestAddressType = "RequestAddressType"
 )
 
 // Callback provides a Callback interface for registering an IPAM instance into LibNetwork
@@ -29,22 +32,19 @@ type Callback interface {
 
 // Weel-known errors returned by IPAM
 var (
-	ErrInvalidIpamService       = errors.New("Invalid IPAM Service")
-	ErrInvalidIpamConfigService = errors.New("Invalid IPAM Config Service")
-	ErrIpamNotAvailable         = errors.New("IPAM Service not available")
-	ErrIpamInternalError        = errors.New("IPAM Internal Error")
-	ErrInvalidAddressSpace      = errors.New("Invalid Address Space")
-	ErrInvalidPool              = errors.New("Invalid Address Pool")
-	ErrInvalidSubPool           = errors.New("Invalid Address SubPool")
-	ErrInvalidRequest           = errors.New("Invalid Request")
-	ErrPoolNotFound             = errors.New("Address Pool not found")
-	ErrOverlapPool              = errors.New("Address pool overlaps with existing pool on this address space")
-	ErrNoAvailablePool          = errors.New("No available pool")
-	ErrNoAvailableIPs           = errors.New("No available addresses on this pool")
-	ErrIPAlreadyAllocated       = errors.New("Address already in use")
-	ErrIPOutOfRange             = errors.New("Requested address is out of range")
-	ErrPoolOverlap              = errors.New("Pool overlaps with other one on this address space")
-	ErrBadPool                  = errors.New("Address space does not contain specified address pool")
+	ErrIpamInternalError   = types.InternalErrorf("IPAM Internal Error")
+	ErrInvalidAddressSpace = types.BadRequestErrorf("Invalid Address Space")
+	ErrInvalidPool         = types.BadRequestErrorf("Invalid Address Pool")
+	ErrInvalidSubPool      = types.BadRequestErrorf("Invalid Address SubPool")
+	ErrInvalidRequest      = types.BadRequestErrorf("Invalid Request")
+	ErrPoolNotFound        = types.BadRequestErrorf("Address Pool not found")
+	ErrOverlapPool         = types.ForbiddenErrorf("Address pool overlaps with existing pool on this address space")
+	ErrNoAvailablePool     = types.NoServiceErrorf("No available pool")
+	ErrNoAvailableIPs      = types.NoServiceErrorf("No available addresses on this pool")
+	ErrIPAlreadyAllocated  = types.ForbiddenErrorf("Address already in use")
+	ErrIPOutOfRange        = types.BadRequestErrorf("Requested address is out of range")
+	ErrPoolOverlap         = types.ForbiddenErrorf("Pool overlaps with other one on this address space")
+	ErrBadPool             = types.BadRequestErrorf("Address space does not contain specified address pool")
 )
 
 /*******************************

+ 7 - 3
vendor/src/github.com/docker/libnetwork/network.go

@@ -603,12 +603,13 @@ func (n *network) Delete() error {
 	if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
 		return fmt.Errorf("error deleting network endpoint count from store: %v", err)
 	}
+
+	n.ipamRelease()
+
 	if err = n.getController().deleteFromStore(n); err != nil {
 		return fmt.Errorf("error deleting network from store: %v", err)
 	}
 
-	n.ipamRelease()
-
 	return nil
 }
 
@@ -970,7 +971,10 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error {
 		// irrespective of whether ipam driver returned a gateway already.
 		// If none of the above is true, libnetwork will allocate one.
 		if cfg.Gateway != "" || d.Gateway == nil {
-			if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), nil); err != nil {
+			var gatewayOpts = map[string]string{
+				ipamapi.RequestAddressType: netlabel.Gateway,
+			}
+			if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), gatewayOpts); err != nil {
 				return types.InternalErrorf("failed to allocate gateway (%v): %v", cfg.Gateway, err)
 			}
 		}

+ 8 - 29
vendor/src/github.com/docker/libnetwork/sandbox.go

@@ -62,7 +62,6 @@ type sandbox struct {
 	osSbox        osl.Sandbox
 	controller    *controller
 	refCnt        int
-	hostsOnce     sync.Once
 	endpoints     epHeap
 	epPriority    map[string]int
 	joinLeaveDone chan struct{}
@@ -601,41 +600,21 @@ func (sb *sandbox) buildHostsFile() error {
 }
 
 func (sb *sandbox) updateHostsFile(ifaceIP string, svcRecords []etchosts.Record) error {
-	var err error
+	var mhost string
 
 	if sb.config.originHostsPath != "" {
 		return nil
 	}
 
-	max := func(a, b int) int {
-		if a < b {
-			return b
-		}
-
-		return a
-	}
-
-	extraContent := make([]etchosts.Record, 0,
-		max(len(sb.config.extraHosts), len(svcRecords)))
-
-	sb.hostsOnce.Do(func() {
-		// Rebuild the hosts file accounting for the passed
-		// interface IP and service records
-
-		for _, extraHost := range sb.config.extraHosts {
-			extraContent = append(extraContent,
-				etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
-		}
-
-		err = etchosts.Build(sb.config.hostsPath, ifaceIP,
-			sb.config.hostName, sb.config.domainName, extraContent)
-	})
-
-	if err != nil {
-		return err
+	if sb.config.domainName != "" {
+		mhost = fmt.Sprintf("%s.%s %s", sb.config.hostName, sb.config.domainName,
+			sb.config.hostName)
+	} else {
+		mhost = sb.config.hostName
 	}
 
-	extraContent = extraContent[:0]
+	extraContent := make([]etchosts.Record, 0, len(svcRecords)+1)
+	extraContent = append(extraContent, etchosts.Record{Hosts: mhost, IP: ifaceIP})
 	for _, svc := range svcRecords {
 		extraContent = append(extraContent, svc)
 	}

+ 110 - 0
vendor/src/github.com/vishvananda/netlink/class.go

@@ -0,0 +1,110 @@
+package netlink
+
+import (
+	"fmt"
+)
+
+type Class interface {
+	Attrs() *ClassAttrs
+	Type() string
+}
+
+// Class represents a netlink class. A filter is associated with a link,
+// has a handle and a parent. The root filter of a device should have a
+// parent == HANDLE_ROOT.
+type ClassAttrs struct {
+	LinkIndex int
+	Handle    uint32
+	Parent    uint32
+	Leaf      uint32
+}
+
+func (q ClassAttrs) String() string {
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
+}
+
+type HtbClassAttrs struct {
+	// TODO handle all attributes
+	Rate    uint64
+	Ceil    uint64
+	Buffer  uint32
+	Cbuffer uint32
+	Quantum uint32
+	Level   uint32
+	Prio    uint32
+}
+
+func (q HtbClassAttrs) String() string {
+	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
+}
+
+// Htb class
+type HtbClass struct {
+	ClassAttrs
+	Rate    uint64
+	Ceil    uint64
+	Buffer  uint32
+	Cbuffer uint32
+	Quantum uint32
+	Level   uint32
+	Prio    uint32
+}
+
+func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
+	mtu := 1600
+	rate := cattrs.Rate / 8
+	ceil := cattrs.Ceil / 8
+	buffer := cattrs.Buffer
+	cbuffer := cattrs.Cbuffer
+	if ceil == 0 {
+		ceil = rate
+	}
+
+	if buffer == 0 {
+		buffer = uint32(float64(rate)/Hz() + float64(mtu))
+	}
+	buffer = uint32(Xmittime(rate, buffer))
+
+	if cbuffer == 0 {
+		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
+	}
+	cbuffer = uint32(Xmittime(ceil, cbuffer))
+
+	return &HtbClass{
+		ClassAttrs: attrs,
+		Rate:       rate,
+		Ceil:       ceil,
+		Buffer:     buffer,
+		Cbuffer:    cbuffer,
+		Quantum:    10,
+		Level:      0,
+		Prio:       0,
+	}
+}
+
+func (q HtbClass) String() string {
+	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
+}
+
+func (class *HtbClass) Attrs() *ClassAttrs {
+	return &class.ClassAttrs
+}
+
+func (class *HtbClass) Type() string {
+	return "htb"
+}
+
+// GenericClass classes represent types that are not currently understood
+// by this netlink library.
+type GenericClass struct {
+	ClassAttrs
+	ClassType string
+}
+
+func (class *GenericClass) Attrs() *ClassAttrs {
+	return &class.ClassAttrs
+}
+
+func (class *GenericClass) Type() string {
+	return class.ClassType
+}

+ 144 - 0
vendor/src/github.com/vishvananda/netlink/class_linux.go

@@ -0,0 +1,144 @@
+package netlink
+
+import (
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+// ClassDel will delete a class from the system.
+// Equivalent to: `tc class del $class`
+func ClassDel(class Class) error {
+	req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK)
+	base := class.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+	}
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// ClassAdd will add a class to the system.
+// Equivalent to: `tc class add $class`
+func ClassAdd(class Class) error {
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+	base := class.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+	}
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
+
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
+	if htb, ok := class.(*HtbClass); ok {
+		opt := nl.TcHtbCopt{}
+		opt.Rate.Rate = uint32(htb.Rate)
+		opt.Ceil.Rate = uint32(htb.Ceil)
+		opt.Buffer = htb.Buffer
+		opt.Cbuffer = htb.Cbuffer
+		opt.Quantum = htb.Quantum
+		opt.Level = htb.Level
+		opt.Prio = htb.Prio
+		// TODO: Handle Debug properly. For now default to 0
+		nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
+	}
+	req.AddData(options)
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// ClassList gets a list of classes in the system.
+// Equivalent to: `tc class show`.
+// Generally retunrs nothing if link and parent are not specified.
+func ClassList(link Link, parent uint32) ([]Class, error) {
+	req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
+	msg := &nl.TcMsg{
+		Family: nl.FAMILY_ALL,
+		Parent: parent,
+	}
+	if link != nil {
+		base := link.Attrs()
+		ensureIndex(base)
+		msg.Ifindex = int32(base.Index)
+	}
+	req.AddData(msg)
+
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
+	if err != nil {
+		return nil, err
+	}
+
+	var res []Class
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		base := ClassAttrs{
+			LinkIndex: int(msg.Ifindex),
+			Handle:    msg.Handle,
+			Parent:    msg.Parent,
+		}
+
+		var class Class
+		classType := ""
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_KIND:
+				classType = string(attr.Value[:len(attr.Value)-1])
+				switch classType {
+				case "htb":
+					class = &HtbClass{}
+				default:
+					class = &GenericClass{ClassType: classType}
+				}
+			case nl.TCA_OPTIONS:
+				switch classType {
+				case "htb":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					_, err = parseHtbClassData(class, data)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+		}
+		*class.Attrs() = base
+		res = append(res, class)
+	}
+
+	return res, nil
+}
+
+func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
+	htb := class.(*HtbClass)
+	detailed := false
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_HTB_PARMS:
+			opt := nl.DeserializeTcHtbCopt(datum.Value)
+			htb.Rate = uint64(opt.Rate.Rate)
+			htb.Ceil = uint64(opt.Ceil.Rate)
+			htb.Buffer = opt.Buffer
+			htb.Cbuffer = opt.Cbuffer
+			htb.Quantum = opt.Quantum
+			htb.Level = opt.Level
+			htb.Prio = opt.Prio
+		}
+	}
+	return detailed, nil
+}

+ 140 - 0
vendor/src/github.com/vishvananda/netlink/filter.go

@@ -0,0 +1,140 @@
+package netlink
+
+import (
+	"errors"
+	"fmt"
+	"github.com/vishvananda/netlink/nl"
+)
+
+type Filter interface {
+	Attrs() *FilterAttrs
+	Type() string
+}
+
+// Filter represents a netlink filter. A filter is associated with a link,
+// has a handle and a parent. The root filter of a device should have a
+// parent == HANDLE_ROOT.
+type FilterAttrs struct {
+	LinkIndex int
+	Handle    uint32
+	Parent    uint32
+	Priority  uint16 // lower is higher priority
+	Protocol  uint16 // syscall.ETH_P_*
+}
+
+func (q FilterAttrs) String() string {
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
+}
+
+// U32 filters on many packet related properties
+type U32 struct {
+	FilterAttrs
+	// Currently only supports redirecting to another interface
+	RedirIndex int
+}
+
+func (filter *U32) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *U32) Type() string {
+	return "u32"
+}
+
+type FilterFwAttrs struct {
+	ClassId   uint32
+	InDev     string
+	Mask      uint32
+	Index     uint32
+	Buffer    uint32
+	Mtu       uint32
+	Mpu       uint16
+	Rate      uint32
+	AvRate    uint32
+	PeakRate  uint32
+	Action    int
+	Overhead  uint16
+	LinkLayer int
+}
+
+// FwFilter filters on firewall marks
+type Fw struct {
+	FilterAttrs
+	ClassId uint32
+	Police  nl.TcPolice
+	InDev   string
+	// TODO Action
+	Mask   uint32
+	AvRate uint32
+	Rtab   [256]uint32
+	Ptab   [256]uint32
+}
+
+func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
+	var rtab [256]uint32
+	var ptab [256]uint32
+	rcell_log := -1
+	pcell_log := -1
+	avrate := fattrs.AvRate / 8
+	police := nl.TcPolice{}
+	police.Rate.Rate = fattrs.Rate / 8
+	police.PeakRate.Rate = fattrs.PeakRate / 8
+	buffer := fattrs.Buffer
+	linklayer := nl.LINKLAYER_ETHERNET
+
+	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
+		linklayer = fattrs.LinkLayer
+	}
+
+	police.Action = int32(fattrs.Action)
+	if police.Rate.Rate != 0 {
+		police.Rate.Mpu = fattrs.Mpu
+		police.Rate.Overhead = fattrs.Overhead
+		if CalcRtable(&police.Rate, rtab, rcell_log, fattrs.Mtu, linklayer) < 0 {
+			return nil, errors.New("TBF: failed to calculate rate table.")
+		}
+		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
+	}
+	police.Mtu = fattrs.Mtu
+	if police.PeakRate.Rate != 0 {
+		police.PeakRate.Mpu = fattrs.Mpu
+		police.PeakRate.Overhead = fattrs.Overhead
+		if CalcRtable(&police.PeakRate, ptab, pcell_log, fattrs.Mtu, linklayer) < 0 {
+			return nil, errors.New("POLICE: failed to calculate peak rate table.")
+		}
+	}
+
+	return &Fw{
+		FilterAttrs: attrs,
+		ClassId:     fattrs.ClassId,
+		InDev:       fattrs.InDev,
+		Mask:        fattrs.Mask,
+		Police:      police,
+		AvRate:      avrate,
+		Rtab:        rtab,
+		Ptab:        ptab,
+	}, nil
+}
+
+func (filter *Fw) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *Fw) Type() string {
+	return "fw"
+}
+
+// GenericFilter filters represent types that are not currently understood
+// by this netlink library.
+type GenericFilter struct {
+	FilterAttrs
+	FilterType string
+}
+
+func (filter *GenericFilter) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *GenericFilter) Type() string {
+	return filter.FilterType
+}

+ 322 - 0
vendor/src/github.com/vishvananda/netlink/filter_linux.go

@@ -0,0 +1,322 @@
+package netlink
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+// FilterDel will delete a filter from the system.
+// Equivalent to: `tc filter del $filter`
+func FilterDel(filter Filter) error {
+	req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
+	base := filter.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
+	}
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// FilterAdd will add a filter to the system.
+// Equivalent to: `tc filter add $filter`
+func FilterAdd(filter Filter) error {
+	native = nl.NativeEndian()
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+	base := filter.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
+	}
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
+
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
+	if u32, ok := filter.(*U32); ok {
+		// match all
+		sel := nl.TcU32Sel{
+			Nkeys: 1,
+			Flags: nl.TC_U32_TERMINAL,
+		}
+		sel.Keys = append(sel.Keys, nl.TcU32Key{})
+		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
+		actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
+		table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil)
+		nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred"))
+		// redirect to other interface
+		mir := nl.TcMirred{
+			Action:  nl.TC_ACT_STOLEN,
+			Eaction: nl.TCA_EGRESS_REDIR,
+			Ifindex: uint32(u32.RedirIndex),
+		}
+		aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil)
+		nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize())
+	} else if fw, ok := filter.(*Fw); ok {
+		if fw.Mask != 0 {
+			b := make([]byte, 4)
+			native.PutUint32(b, fw.Mask)
+			nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
+		}
+		if fw.InDev != "" {
+			nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
+		}
+		if (fw.Police != nl.TcPolice{}) {
+
+			police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
+			nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
+			if (fw.Police.Rate != nl.TcRateSpec{}) {
+				payload := SerializeRtab(fw.Rtab)
+				nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
+			}
+			if (fw.Police.PeakRate != nl.TcRateSpec{}) {
+				payload := SerializeRtab(fw.Ptab)
+				nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
+			}
+		}
+		if fw.ClassId != 0 {
+			b := make([]byte, 4)
+			native.PutUint32(b, fw.ClassId)
+			nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
+		}
+	}
+
+	req.AddData(options)
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// FilterList gets a list of filters in the system.
+// Equivalent to: `tc filter show`.
+// Generally retunrs nothing if link and parent are not specified.
+func FilterList(link Link, parent uint32) ([]Filter, error) {
+	req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
+	msg := &nl.TcMsg{
+		Family: nl.FAMILY_ALL,
+		Parent: parent,
+	}
+	if link != nil {
+		base := link.Attrs()
+		ensureIndex(base)
+		msg.Ifindex = int32(base.Index)
+	}
+	req.AddData(msg)
+
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
+	if err != nil {
+		return nil, err
+	}
+
+	var res []Filter
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		base := FilterAttrs{
+			LinkIndex: int(msg.Ifindex),
+			Handle:    msg.Handle,
+			Parent:    msg.Parent,
+		}
+		base.Priority, base.Protocol = MajorMinor(msg.Info)
+		base.Protocol = nl.Swap16(base.Protocol)
+
+		var filter Filter
+		filterType := ""
+		detailed := false
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_KIND:
+				filterType = string(attr.Value[:len(attr.Value)-1])
+				switch filterType {
+				case "u32":
+					filter = &U32{}
+				case "fw":
+					filter = &Fw{}
+				default:
+					filter = &GenericFilter{FilterType: filterType}
+				}
+			case nl.TCA_OPTIONS:
+				switch filterType {
+				case "u32":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					detailed, err = parseU32Data(filter, data)
+					if err != nil {
+						return nil, err
+					}
+				case "fw":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					detailed, err = parseFwData(filter, data)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+		}
+		// only return the detailed version of the filter
+		if detailed {
+			*filter.Attrs() = base
+			res = append(res, filter)
+		}
+	}
+
+	return res, nil
+}
+
+func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+	native = nl.NativeEndian()
+	u32 := filter.(*U32)
+	detailed := false
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_U32_SEL:
+			detailed = true
+			sel := nl.DeserializeTcU32Sel(datum.Value)
+			// only parse if we have a very basic redirect
+			if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
+				return detailed, nil
+			}
+		case nl.TCA_U32_ACT:
+			table, err := nl.ParseRouteAttr(datum.Value)
+			if err != nil {
+				return detailed, err
+			}
+			if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB {
+				return detailed, fmt.Errorf("Action table not formed properly")
+			}
+			aattrs, err := nl.ParseRouteAttr(table[0].Value)
+			for _, aattr := range aattrs {
+				switch aattr.Attr.Type {
+				case nl.TCA_KIND:
+					actionType := string(aattr.Value[:len(aattr.Value)-1])
+					// only parse if the action is mirred
+					if actionType != "mirred" {
+						return detailed, nil
+					}
+				case nl.TCA_OPTIONS:
+					adata, err := nl.ParseRouteAttr(aattr.Value)
+					if err != nil {
+						return detailed, err
+					}
+					for _, adatum := range adata {
+						switch adatum.Attr.Type {
+						case nl.TCA_MIRRED_PARMS:
+							mir := nl.DeserializeTcMirred(adatum.Value)
+							u32.RedirIndex = int(mir.Ifindex)
+						}
+					}
+				}
+			}
+		}
+	}
+	return detailed, nil
+}
+
+func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+	native = nl.NativeEndian()
+	fw := filter.(*Fw)
+	detailed := true
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_FW_MASK:
+			fw.Mask = native.Uint32(datum.Value[0:4])
+		case nl.TCA_FW_CLASSID:
+			fw.ClassId = native.Uint32(datum.Value[0:4])
+		case nl.TCA_FW_INDEV:
+			fw.InDev = string(datum.Value[:len(datum.Value)-1])
+		case nl.TCA_FW_POLICE:
+			adata, _ := nl.ParseRouteAttr(datum.Value)
+			for _, aattr := range adata {
+				switch aattr.Attr.Type {
+				case nl.TCA_POLICE_TBF:
+					fw.Police = *nl.DeserializeTcPolice(aattr.Value)
+				case nl.TCA_POLICE_RATE:
+					fw.Rtab = DeserializeRtab(aattr.Value)
+				case nl.TCA_POLICE_PEAKRATE:
+					fw.Ptab = DeserializeRtab(aattr.Value)
+				}
+			}
+		}
+	}
+	return detailed, nil
+}
+
+func AlignToAtm(size uint) uint {
+	var linksize, cells int
+	cells = int(size / nl.ATM_CELL_PAYLOAD)
+	if (size % nl.ATM_CELL_PAYLOAD) > 0 {
+		cells++
+	}
+	linksize = cells * nl.ATM_CELL_SIZE
+	return uint(linksize)
+}
+
+func AdjustSize(sz uint, mpu uint, linklayer int) uint {
+	if sz < mpu {
+		sz = mpu
+	}
+	switch linklayer {
+	case nl.LINKLAYER_ATM:
+		return AlignToAtm(sz)
+	default:
+		return sz
+	}
+}
+
+func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cell_log int, mtu uint32, linklayer int) int {
+	bps := rate.Rate
+	mpu := rate.Mpu
+	var sz uint
+	if mtu == 0 {
+		mtu = 2047
+	}
+	if cell_log < 0 {
+		cell_log = 0
+		for (mtu >> uint(cell_log)) > 255 {
+			cell_log++
+		}
+	}
+	for i := 0; i < 256; i++ {
+		sz = AdjustSize(uint((i+1)<<uint32(cell_log)), uint(mpu), linklayer)
+		rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
+	}
+	rate.CellAlign = -1
+	rate.CellLog = uint8(cell_log)
+	rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
+	return cell_log
+}
+
+func DeserializeRtab(b []byte) [256]uint32 {
+	var rtab [256]uint32
+	native := nl.NativeEndian()
+	r := bytes.NewReader(b)
+	_ = binary.Read(r, native, &rtab)
+	return rtab
+}
+
+func SerializeRtab(rtab [256]uint32) []byte {
+	native := nl.NativeEndian()
+	var w bytes.Buffer
+	_ = binary.Write(&w, native, rtab)
+	return w.Bytes()
+}

+ 51 - 5
vendor/src/github.com/vishvananda/netlink/link.go

@@ -1,6 +1,9 @@
 package netlink
 
-import "net"
+import (
+	"net"
+	"syscall"
+)
 
 // Link represents a link device from netlink. Shared link attributes
 // like name may be retrieved using the Attrs() method. Unique data
@@ -62,6 +65,19 @@ func (dummy *Dummy) Type() string {
 	return "dummy"
 }
 
+// Ifb links are advanced dummy devices for packet filtering
+type Ifb struct {
+	LinkAttrs
+}
+
+func (ifb *Ifb) Attrs() *LinkAttrs {
+	return &ifb.LinkAttrs
+}
+
+func (ifb *Ifb) Type() string {
+	return "ifb"
+}
+
 // Bridge links are simple linux bridges
 type Bridge struct {
 	LinkAttrs
@@ -114,6 +130,36 @@ func (macvlan *Macvlan) Type() string {
 	return "macvlan"
 }
 
+// Macvtap - macvtap is a virtual interfaces based on macvlan
+type Macvtap struct {
+	Macvlan
+}
+
+func (macvtap Macvtap) Type() string {
+	return "macvtap"
+}
+
+type TuntapMode uint16
+
+const (
+	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
+	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
+)
+
+// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
+type Tuntap struct {
+	LinkAttrs
+	Mode TuntapMode
+}
+
+func (tuntap *Tuntap) Attrs() *LinkAttrs {
+	return &tuntap.LinkAttrs
+}
+
+func (tuntap *Tuntap) Type() string {
+	return "tuntap"
+}
+
 // Veth devices must specify PeerName on create
 type Veth struct {
 	LinkAttrs
@@ -128,18 +174,18 @@ func (veth *Veth) Type() string {
 	return "veth"
 }
 
-// Generic links represent types that are not currently understood
+// GenericLink links represent types that are not currently understood
 // by this netlink library.
-type Generic struct {
+type GenericLink struct {
 	LinkAttrs
 	LinkType string
 }
 
-func (generic *Generic) Attrs() *LinkAttrs {
+func (generic *GenericLink) Attrs() *LinkAttrs {
 	return &generic.LinkAttrs
 }
 
-func (generic *Generic) Type() string {
+func (generic *GenericLink) Type() string {
 	return generic.LinkType
 }
 

+ 92 - 1
vendor/src/github.com/vishvananda/netlink/link_linux.go

@@ -5,7 +5,9 @@ import (
 	"encoding/binary"
 	"fmt"
 	"net"
+	"os"
 	"syscall"
+	"unsafe"
 
 	"github.com/vishvananda/netlink/nl"
 )
@@ -285,6 +287,44 @@ func LinkAdd(link Link) error {
 		return fmt.Errorf("LinkAttrs.Name cannot be empty!")
 	}
 
+	if tuntap, ok := link.(*Tuntap); ok {
+		// TODO: support user
+		// TODO: support group
+		// TODO: support non- one_queue
+		// TODO: support pi | vnet_hdr | multi_queue
+		// TODO: support non- exclusive
+		// TODO: support non- persistent
+		if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP {
+			return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
+		}
+		file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
+		if err != nil {
+			return err
+		}
+		defer file.Close()
+		var req ifReq
+		req.Flags |= syscall.IFF_ONE_QUEUE
+		req.Flags |= syscall.IFF_TUN_EXCL
+		copy(req.Name[:15], base.Name)
+		req.Flags |= uint16(tuntap.Mode)
+		_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req)))
+		if errno != 0 {
+			return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno)
+		}
+		_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1)
+		if errno != 0 {
+			return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
+		}
+		ensureIndex(base)
+
+		// can't set master during create, so set it afterwards
+		if base.MasterIndex != 0 {
+			// TODO: verify MasterIndex is actually a bridge?
+			return LinkSetMasterByIndex(link, base.MasterIndex)
+		}
+		return nil
+	}
+
 	req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
 
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
@@ -493,6 +533,8 @@ func linkDeserialize(m []byte) (Link, error) {
 					switch linkType {
 					case "dummy":
 						link = &Dummy{}
+					case "ifb":
+						link = &Ifb{}
 					case "bridge":
 						link = &Bridge{}
 					case "vlan":
@@ -505,8 +547,10 @@ func linkDeserialize(m []byte) (Link, error) {
 						link = &IPVlan{}
 					case "macvlan":
 						link = &Macvlan{}
+					case "macvtap":
+						link = &Macvtap{}
 					default:
-						link = &Generic{LinkType: linkType}
+						link = &GenericLink{LinkType: linkType}
 					}
 				case nl.IFLA_INFO_DATA:
 					data, err := nl.ParseRouteAttr(info.Value)
@@ -522,6 +566,8 @@ func linkDeserialize(m []byte) (Link, error) {
 						parseIPVlanData(link, data)
 					case "macvlan":
 						parseMacvlanData(link, data)
+					case "macvtap":
+						parseMacvtapData(link, data)
 					}
 				}
 			}
@@ -583,6 +629,46 @@ func LinkList() ([]Link, error) {
 	return res, nil
 }
 
+// LinkUpdate is used to pass information back from LinkSubscribe()
+type LinkUpdate struct {
+	nl.IfInfomsg
+	Link
+}
+
+// LinkSubscribe takes a chan down which notifications will be sent
+// when links change.  Close the 'done' chan to stop subscription.
+func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
+	s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
+	if err != nil {
+		return err
+	}
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
+	go func() {
+		defer close(ch)
+		for {
+			msgs, err := s.Receive()
+			if err != nil {
+				return
+			}
+			for _, m := range msgs {
+				ifmsg := nl.DeserializeIfInfomsg(m.Data)
+				link, err := linkDeserialize(m.Data)
+				if err != nil {
+					return
+				}
+				ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
+			}
+		}
+	}()
+
+	return nil
+}
+
 func LinkSetHairpin(link Link, mode bool) error {
 	return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
 }
@@ -696,6 +782,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	}
 }
 
+func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
+	macv := link.(*Macvtap)
+	parseMacvlanData(&macv.Macvlan, data)
+}
+
 func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) {
 	macv := link.(*Macvlan)
 	for _, datum := range data {

+ 14 - 0
vendor/src/github.com/vishvananda/netlink/link_tuntap_linux.go

@@ -0,0 +1,14 @@
+package netlink
+
+// ideally golang.org/x/sys/unix would define IfReq but it only has
+// IFNAMSIZ, hence this minimalistic implementation
+const (
+	SizeOfIfReq = 40
+	IFNAMSIZ    = 16
+)
+
+type ifReq struct {
+	Name  [IFNAMSIZ]byte
+	Flags uint16
+	pad   [SizeOfIfReq - IFNAMSIZ - 2]byte
+}

+ 1 - 1
vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -142,7 +142,7 @@ func (a *RtAttr) Len() int {
 }
 
 // Serialize the RtAttr into a byte array
-// This can't ust unsafe.cast because it must iterate through children.
+// This can't just unsafe.cast because it must iterate through children.
 func (a *RtAttr) Serialize() []byte {
 	native := NativeEndian()
 

+ 508 - 0
vendor/src/github.com/vishvananda/netlink/nl/tc_linux.go

@@ -0,0 +1,508 @@
+package nl
+
+import (
+	"unsafe"
+)
+
+// LinkLayer
+const (
+	LINKLAYER_UNSPEC = iota
+	LINKLAYER_ETHERNET
+	LINKLAYER_ATM
+)
+
+// ATM
+const (
+	ATM_CELL_PAYLOAD = 48
+	ATM_CELL_SIZE    = 53
+)
+
+const TC_LINKLAYER_MASK = 0x0F
+
+// Police
+const (
+	TCA_POLICE_UNSPEC = iota
+	TCA_POLICE_TBF
+	TCA_POLICE_RATE
+	TCA_POLICE_PEAKRATE
+	TCA_POLICE_AVRATE
+	TCA_POLICE_RESULT
+	TCA_POLICE_MAX = TCA_POLICE_RESULT
+)
+
+// Message types
+const (
+	TCA_UNSPEC = iota
+	TCA_KIND
+	TCA_OPTIONS
+	TCA_STATS
+	TCA_XSTATS
+	TCA_RATE
+	TCA_FCNT
+	TCA_STATS2
+	TCA_STAB
+	TCA_MAX = TCA_STAB
+)
+
+const (
+	TCA_ACT_TAB = 1
+	TCAA_MAX    = 1
+)
+
+const (
+	TCA_PRIO_UNSPEC = iota
+	TCA_PRIO_MQ
+	TCA_PRIO_MAX = TCA_PRIO_MQ
+)
+
+const (
+	SizeofTcMsg       = 0x14
+	SizeofTcActionMsg = 0x04
+	SizeofTcPrioMap   = 0x14
+	SizeofTcRateSpec  = 0x0c
+	SizeofTcTbfQopt   = 2*SizeofTcRateSpec + 0x0c
+	SizeofTcHtbCopt   = 2*SizeofTcRateSpec + 0x14
+	SizeofTcHtbGlob   = 0x14
+	SizeofTcU32Key    = 0x10
+	SizeofTcU32Sel    = 0x10 // without keys
+	SizeofTcMirred    = 0x1c
+	SizeofTcPolice    = 2*SizeofTcRateSpec + 0x20
+)
+
+// struct tcmsg {
+//   unsigned char tcm_family;
+//   unsigned char tcm__pad1;
+//   unsigned short  tcm__pad2;
+//   int   tcm_ifindex;
+//   __u32   tcm_handle;
+//   __u32   tcm_parent;
+//   __u32   tcm_info;
+// };
+
+type TcMsg struct {
+	Family  uint8
+	Pad     [3]byte
+	Ifindex int32
+	Handle  uint32
+	Parent  uint32
+	Info    uint32
+}
+
+func (msg *TcMsg) Len() int {
+	return SizeofTcMsg
+}
+
+func DeserializeTcMsg(b []byte) *TcMsg {
+	return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0]))
+}
+
+func (x *TcMsg) Serialize() []byte {
+	return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tcamsg {
+//   unsigned char tca_family;
+//   unsigned char tca__pad1;
+//   unsigned short  tca__pad2;
+// };
+
+type TcActionMsg struct {
+	Family uint8
+	Pad    [3]byte
+}
+
+func (msg *TcActionMsg) Len() int {
+	return SizeofTcActionMsg
+}
+
+func DeserializeTcActionMsg(b []byte) *TcActionMsg {
+	return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0]))
+}
+
+func (x *TcActionMsg) Serialize() []byte {
+	return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TC_PRIO_MAX = 15
+)
+
+// struct tc_prio_qopt {
+// 	int bands;      /* Number of bands */
+// 	__u8  priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+// };
+
+type TcPrioMap struct {
+	Bands   int32
+	Priomap [TC_PRIO_MAX + 1]uint8
+}
+
+func (msg *TcPrioMap) Len() int {
+	return SizeofTcPrioMap
+}
+
+func DeserializeTcPrioMap(b []byte) *TcPrioMap {
+	return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0]))
+}
+
+func (x *TcPrioMap) Serialize() []byte {
+	return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_TBF_UNSPEC = iota
+	TCA_TBF_PARMS
+	TCA_TBF_RTAB
+	TCA_TBF_PTAB
+	TCA_TBF_RATE64
+	TCA_TBF_PRATE64
+	TCA_TBF_BURST
+	TCA_TBF_PBURST
+	TCA_TBF_MAX = TCA_TBF_PBURST
+)
+
+// struct tc_ratespec {
+//   unsigned char cell_log;
+//   __u8    linklayer; /* lower 4 bits */
+//   unsigned short  overhead;
+//   short   cell_align;
+//   unsigned short  mpu;
+//   __u32   rate;
+// };
+
+type TcRateSpec struct {
+	CellLog   uint8
+	Linklayer uint8
+	Overhead  uint16
+	CellAlign int16
+	Mpu       uint16
+	Rate      uint32
+}
+
+func (msg *TcRateSpec) Len() int {
+	return SizeofTcRateSpec
+}
+
+func DeserializeTcRateSpec(b []byte) *TcRateSpec {
+	return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0]))
+}
+
+func (x *TcRateSpec) Serialize() []byte {
+	return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_tbf_qopt {
+//   struct tc_ratespec rate;
+//   struct tc_ratespec peakrate;
+//   __u32   limit;
+//   __u32   buffer;
+//   __u32   mtu;
+// };
+
+type TcTbfQopt struct {
+	Rate     TcRateSpec
+	Peakrate TcRateSpec
+	Limit    uint32
+	Buffer   uint32
+	Mtu      uint32
+}
+
+func (msg *TcTbfQopt) Len() int {
+	return SizeofTcTbfQopt
+}
+
+func DeserializeTcTbfQopt(b []byte) *TcTbfQopt {
+	return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0]))
+}
+
+func (x *TcTbfQopt) Serialize() []byte {
+	return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_HTB_UNSPEC = iota
+	TCA_HTB_PARMS
+	TCA_HTB_INIT
+	TCA_HTB_CTAB
+	TCA_HTB_RTAB
+	TCA_HTB_DIRECT_QLEN
+	TCA_HTB_RATE64
+	TCA_HTB_CEIL64
+	TCA_HTB_MAX = TCA_HTB_CEIL64
+)
+
+//struct tc_htb_opt {
+//	struct tc_ratespec	rate;
+//	struct tc_ratespec	ceil;
+//	__u32	buffer;
+//	__u32	cbuffer;
+//	__u32	quantum;
+//	__u32	level;		/* out only */
+//	__u32	prio;
+//};
+
+type TcHtbCopt struct {
+	Rate    TcRateSpec
+	Ceil    TcRateSpec
+	Buffer  uint32
+	Cbuffer uint32
+	Quantum uint32
+	Level   uint32
+	Prio    uint32
+}
+
+func (msg *TcHtbCopt) Len() int {
+	return SizeofTcHtbCopt
+}
+
+func DeserializeTcHtbCopt(b []byte) *TcHtbCopt {
+	return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0]))
+}
+
+func (x *TcHtbCopt) Serialize() []byte {
+	return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:]
+}
+
+type TcHtbGlob struct {
+	Version      uint32
+	Rate2Quantum uint32
+	Defcls       uint32
+	Debug        uint32
+	DirectPkts   uint32
+}
+
+func (msg *TcHtbGlob) Len() int {
+	return SizeofTcHtbGlob
+}
+
+func DeserializeTcHtbGlob(b []byte) *TcHtbGlob {
+	return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0]))
+}
+
+func (x *TcHtbGlob) Serialize() []byte {
+	return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_U32_UNSPEC = iota
+	TCA_U32_CLASSID
+	TCA_U32_HASH
+	TCA_U32_LINK
+	TCA_U32_DIVISOR
+	TCA_U32_SEL
+	TCA_U32_POLICE
+	TCA_U32_ACT
+	TCA_U32_INDEV
+	TCA_U32_PCNT
+	TCA_U32_MARK
+	TCA_U32_MAX = TCA_U32_MARK
+)
+
+// struct tc_u32_key {
+//   __be32    mask;
+//   __be32    val;
+//   int   off;
+//   int   offmask;
+// };
+
+type TcU32Key struct {
+	Mask    uint32 // big endian
+	Val     uint32 // big endian
+	Off     int32
+	OffMask int32
+}
+
+func (msg *TcU32Key) Len() int {
+	return SizeofTcU32Key
+}
+
+func DeserializeTcU32Key(b []byte) *TcU32Key {
+	return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0]))
+}
+
+func (x *TcU32Key) Serialize() []byte {
+	return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:]
+}
+
+// struct tc_u32_sel {
+//   unsigned char   flags;
+//   unsigned char   offshift;
+//   unsigned char   nkeys;
+//
+//   __be16      offmask;
+//   __u16     off;
+//   short     offoff;
+//
+//   short     hoff;
+//   __be32      hmask;
+//   struct tc_u32_key keys[0];
+// };
+
+const (
+	TC_U32_TERMINAL  = 1 << iota
+	TC_U32_OFFSET    = 1 << iota
+	TC_U32_VAROFFSET = 1 << iota
+	TC_U32_EAT       = 1 << iota
+)
+
+type TcU32Sel struct {
+	Flags    uint8
+	Offshift uint8
+	Nkeys    uint8
+	Pad      uint8
+	Offmask  uint16 // big endian
+	Off      uint16
+	Offoff   int16
+	Hoff     int16
+	Hmask    uint32 // big endian
+	Keys     []TcU32Key
+}
+
+func (msg *TcU32Sel) Len() int {
+	return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key
+}
+
+func DeserializeTcU32Sel(b []byte) *TcU32Sel {
+	x := &TcU32Sel{}
+	copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b)
+	next := SizeofTcU32Sel
+	var i uint8
+	for i = 0; i < x.Nkeys; i++ {
+		x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:]))
+		next += SizeofTcU32Key
+	}
+	return x
+}
+
+func (x *TcU32Sel) Serialize() []byte {
+	// This can't just unsafe.cast because it must iterate through keys.
+	buf := make([]byte, x.Len())
+	copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:])
+	next := SizeofTcU32Sel
+	for _, key := range x.Keys {
+		keyBuf := key.Serialize()
+		copy(buf[next:], keyBuf)
+		next += SizeofTcU32Key
+	}
+	return buf
+}
+
+const (
+	TCA_ACT_MIRRED = 8
+)
+
+const (
+	TCA_MIRRED_UNSPEC = iota
+	TCA_MIRRED_TM
+	TCA_MIRRED_PARMS
+	TCA_MIRRED_MAX = TCA_MIRRED_PARMS
+)
+
+const (
+	TCA_EGRESS_REDIR   = 1 /* packet redirect to EGRESS*/
+	TCA_EGRESS_MIRROR  = 2 /* mirror packet to EGRESS */
+	TCA_INGRESS_REDIR  = 3 /* packet redirect to INGRESS*/
+	TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */
+)
+
+const (
+	TC_ACT_UNSPEC     = int32(-1)
+	TC_ACT_OK         = 0
+	TC_ACT_RECLASSIFY = 1
+	TC_ACT_SHOT       = 2
+	TC_ACT_PIPE       = 3
+	TC_ACT_STOLEN     = 4
+	TC_ACT_QUEUED     = 5
+	TC_ACT_REPEAT     = 6
+	TC_ACT_JUMP       = 0x10000000
+)
+
+// #define tc_gen \
+//   __u32                 index; \
+//   __u32                 capab; \
+//   int                   action; \
+//   int                   refcnt; \
+//   int                   bindcnt
+// struct tc_mirred {
+// 	tc_gen;
+// 	int                     eaction;   /* one of IN/EGRESS_MIRROR/REDIR */
+// 	__u32                   ifindex;  /* ifindex of egress port */
+// };
+
+type TcMirred struct {
+	Index   uint32
+	Capab   uint32
+	Action  int32
+	Refcnt  int32
+	Bindcnt int32
+	Eaction int32
+	Ifindex uint32
+}
+
+func (msg *TcMirred) Len() int {
+	return SizeofTcMirred
+}
+
+func DeserializeTcMirred(b []byte) *TcMirred {
+	return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0]))
+}
+
+func (x *TcMirred) Serialize() []byte {
+	return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TC_POLICE_UNSPEC     = TC_ACT_UNSPEC
+	TC_POLICE_OK         = TC_ACT_OK
+	TC_POLICE_RECLASSIFY = TC_ACT_RECLASSIFY
+	TC_POLICE_SHOT       = TC_ACT_SHOT
+	TC_POLICE_PIPE       = TC_ACT_PIPE
+)
+
+// struct tc_police {
+// 	__u32			index;
+// 	int			action;
+// 	__u32			limit;
+// 	__u32			burst;
+// 	__u32			mtu;
+// 	struct tc_ratespec	rate;
+// 	struct tc_ratespec	peakrate;
+// 	int				refcnt;
+// 	int				bindcnt;
+// 	__u32			capab;
+// };
+
+type TcPolice struct {
+	Index    uint32
+	Action   int32
+	Limit    uint32
+	Burst    uint32
+	Mtu      uint32
+	Rate     TcRateSpec
+	PeakRate TcRateSpec
+	Refcnt   int32
+	Bindcnt  int32
+	Capab    uint32
+}
+
+func (msg *TcPolice) Len() int {
+	return SizeofTcPolice
+}
+
+func DeserializeTcPolice(b []byte) *TcPolice {
+	return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0]))
+}
+
+func (x *TcPolice) Serialize() []byte {
+	return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:]
+}
+
+const (
+	TCA_FW_UNSPEC = iota
+	TCA_FW_CLASSID
+	TCA_FW_POLICE
+	TCA_FW_INDEV
+	TCA_FW_ACT
+	TCA_FW_MASK
+	TCA_FW_MAX = TCA_FW_MASK
+)

+ 167 - 0
vendor/src/github.com/vishvananda/netlink/qdisc.go

@@ -0,0 +1,167 @@
+package netlink
+
+import (
+	"fmt"
+)
+
+const (
+	HANDLE_NONE      = 0
+	HANDLE_INGRESS   = 0xFFFFFFF1
+	HANDLE_ROOT      = 0xFFFFFFFF
+	PRIORITY_MAP_LEN = 16
+)
+
+type Qdisc interface {
+	Attrs() *QdiscAttrs
+	Type() string
+}
+
+// Qdisc represents a netlink qdisc. A qdisc is associated with a link,
+// has a handle, a parent and a refcnt. The root qdisc of a device should
+// have parent == HANDLE_ROOT.
+type QdiscAttrs struct {
+	LinkIndex int
+	Handle    uint32
+	Parent    uint32
+	Refcnt    uint32 // read only
+}
+
+func (q QdiscAttrs) String() string {
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
+}
+
+func MakeHandle(major, minor uint16) uint32 {
+	return (uint32(major) << 16) | uint32(minor)
+}
+
+func MajorMinor(handle uint32) (uint16, uint16) {
+	return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF)
+}
+
+func HandleStr(handle uint32) string {
+	switch handle {
+	case HANDLE_NONE:
+		return "none"
+	case HANDLE_INGRESS:
+		return "ingress"
+	case HANDLE_ROOT:
+		return "root"
+	default:
+		major, minor := MajorMinor(handle)
+		return fmt.Sprintf("%x:%x", major, minor)
+	}
+}
+
+// PfifoFast is the default qdisc created by the kernel if one has not
+// been defined for the interface
+type PfifoFast struct {
+	QdiscAttrs
+	Bands       uint8
+	PriorityMap [PRIORITY_MAP_LEN]uint8
+}
+
+func (qdisc *PfifoFast) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *PfifoFast) Type() string {
+	return "pfifo_fast"
+}
+
+// Prio is a basic qdisc that works just like PfifoFast
+type Prio struct {
+	QdiscAttrs
+	Bands       uint8
+	PriorityMap [PRIORITY_MAP_LEN]uint8
+}
+
+func NewPrio(attrs QdiscAttrs) *Prio {
+	return &Prio{
+		QdiscAttrs:  attrs,
+		Bands:       3,
+		PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+	}
+}
+
+func (qdisc *Prio) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Prio) Type() string {
+	return "prio"
+}
+
+// Htb is a classful qdisc that rate limits based on tokens
+type Htb struct {
+	QdiscAttrs
+	Version      uint32
+	Rate2Quantum uint32
+	Defcls       uint32
+	Debug        uint32
+	DirectPkts   uint32
+}
+
+func NewHtb(attrs QdiscAttrs) *Htb {
+	return &Htb{
+		QdiscAttrs:   attrs,
+		Version:      3,
+		Defcls:       0,
+		Rate2Quantum: 10,
+		Debug:        0,
+		DirectPkts:   0,
+	}
+}
+
+func (qdisc *Htb) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Htb) Type() string {
+	return "htb"
+}
+
+// Tbf is a classless qdisc that rate limits based on tokens
+type Tbf struct {
+	QdiscAttrs
+	// TODO: handle 64bit rate properly
+	Rate   uint64
+	Limit  uint32
+	Buffer uint32
+	// TODO: handle other settings
+}
+
+func (qdisc *Tbf) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Tbf) Type() string {
+	return "tbf"
+}
+
+// Ingress is a qdisc for adding ingress filters
+type Ingress struct {
+	QdiscAttrs
+}
+
+func (qdisc *Ingress) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Ingress) Type() string {
+	return "ingress"
+}
+
+// GenericQdisc qdiscs represent types that are not currently understood
+// by this netlink library.
+type GenericQdisc struct {
+	QdiscAttrs
+	QdiscType string
+}
+
+func (qdisc *GenericQdisc) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *GenericQdisc) Type() string {
+	return qdisc.QdiscType
+}

+ 316 - 0
vendor/src/github.com/vishvananda/netlink/qdisc_linux.go

@@ -0,0 +1,316 @@
+package netlink
+
+import (
+	"fmt"
+	"io/ioutil"
+	"strconv"
+	"strings"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+// QdiscDel will delete a qdisc from the system.
+// Equivalent to: `tc qdisc del $qdisc`
+func QdiscDel(qdisc Qdisc) error {
+	req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK)
+	base := qdisc.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+	}
+	req.AddData(msg)
+
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// QdiscAdd will add a qdisc to the system.
+// Equivalent to: `tc qdisc add $qdisc`
+func QdiscAdd(qdisc Qdisc) error {
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
+	base := qdisc.Attrs()
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: int32(base.LinkIndex),
+		Handle:  base.Handle,
+		Parent:  base.Parent,
+	}
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
+
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
+	if prio, ok := qdisc.(*Prio); ok {
+		tcmap := nl.TcPrioMap{
+			Bands:   int32(prio.Bands),
+			Priomap: prio.PriorityMap,
+		}
+		options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
+	} else if tbf, ok := qdisc.(*Tbf); ok {
+		opt := nl.TcTbfQopt{}
+		// TODO: handle rate > uint32
+		opt.Rate.Rate = uint32(tbf.Rate)
+		opt.Limit = tbf.Limit
+		opt.Buffer = tbf.Buffer
+		nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
+	} else if htb, ok := qdisc.(*Htb); ok {
+		opt := nl.TcHtbGlob{}
+		opt.Version = htb.Version
+		opt.Rate2Quantum = htb.Rate2Quantum
+		opt.Defcls = htb.Defcls
+		// TODO: Handle Debug properly. For now default to 0
+		opt.Debug = htb.Debug
+		opt.DirectPkts = htb.DirectPkts
+		nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
+		// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
+	} else if _, ok := qdisc.(*Ingress); ok {
+		// ingress filters must use the proper handle
+		if msg.Parent != HANDLE_INGRESS {
+			return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
+		}
+	}
+	req.AddData(options)
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
+	return err
+}
+
+// QdiscList gets a list of qdiscs in the system.
+// Equivalent to: `tc qdisc show`.
+// The list can be filtered by link.
+func QdiscList(link Link) ([]Qdisc, error) {
+	req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
+	index := int32(0)
+	if link != nil {
+		base := link.Attrs()
+		ensureIndex(base)
+		index = int32(base.Index)
+	}
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: index,
+	}
+	req.AddData(msg)
+
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
+	if err != nil {
+		return nil, err
+	}
+
+	var res []Qdisc
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		// skip qdiscs from other interfaces
+		if link != nil && msg.Ifindex != index {
+			continue
+		}
+
+		base := QdiscAttrs{
+			LinkIndex: int(msg.Ifindex),
+			Handle:    msg.Handle,
+			Parent:    msg.Parent,
+			Refcnt:    msg.Info,
+		}
+		var qdisc Qdisc
+		qdiscType := ""
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_KIND:
+				qdiscType = string(attr.Value[:len(attr.Value)-1])
+				switch qdiscType {
+				case "pfifo_fast":
+					qdisc = &PfifoFast{}
+				case "prio":
+					qdisc = &Prio{}
+				case "tbf":
+					qdisc = &Tbf{}
+				case "ingress":
+					qdisc = &Ingress{}
+				case "htb":
+					qdisc = &Htb{}
+				default:
+					qdisc = &GenericQdisc{QdiscType: qdiscType}
+				}
+			case nl.TCA_OPTIONS:
+				switch qdiscType {
+				case "pfifo_fast":
+					// pfifo returns TcPrioMap directly without wrapping it in rtattr
+					if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
+						return nil, err
+					}
+				case "prio":
+					// prio returns TcPrioMap directly without wrapping it in rtattr
+					if err := parsePrioData(qdisc, attr.Value); err != nil {
+						return nil, err
+					}
+				case "tbf":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					if err := parseTbfData(qdisc, data); err != nil {
+						return nil, err
+					}
+				case "htb":
+					data, err := nl.ParseRouteAttr(attr.Value)
+					if err != nil {
+						return nil, err
+					}
+					if err := parseHtbData(qdisc, data); err != nil {
+						return nil, err
+					}
+
+					// no options for ingress
+				}
+			}
+		}
+		*qdisc.Attrs() = base
+		res = append(res, qdisc)
+	}
+
+	return res, nil
+}
+
+func parsePfifoFastData(qdisc Qdisc, value []byte) error {
+	pfifo := qdisc.(*PfifoFast)
+	tcmap := nl.DeserializeTcPrioMap(value)
+	pfifo.PriorityMap = tcmap.Priomap
+	pfifo.Bands = uint8(tcmap.Bands)
+	return nil
+}
+
+func parsePrioData(qdisc Qdisc, value []byte) error {
+	prio := qdisc.(*Prio)
+	tcmap := nl.DeserializeTcPrioMap(value)
+	prio.PriorityMap = tcmap.Priomap
+	prio.Bands = uint8(tcmap.Bands)
+	return nil
+}
+
+func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
+	native = nl.NativeEndian()
+	htb := qdisc.(*Htb)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_HTB_INIT:
+			opt := nl.DeserializeTcHtbGlob(datum.Value)
+			htb.Version = opt.Version
+			htb.Rate2Quantum = opt.Rate2Quantum
+			htb.Defcls = opt.Defcls
+			htb.Debug = opt.Debug
+			htb.DirectPkts = opt.DirectPkts
+		case nl.TCA_HTB_DIRECT_QLEN:
+			// TODO
+			//htb.DirectQlen = native.uint32(datum.Value)
+		}
+	}
+	return nil
+}
+func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
+	native = nl.NativeEndian()
+	tbf := qdisc.(*Tbf)
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_TBF_PARMS:
+			opt := nl.DeserializeTcTbfQopt(datum.Value)
+			tbf.Rate = uint64(opt.Rate.Rate)
+			tbf.Limit = opt.Limit
+			tbf.Buffer = opt.Buffer
+		case nl.TCA_TBF_RATE64:
+			tbf.Rate = native.Uint64(datum.Value[0:4])
+		}
+	}
+	return nil
+}
+
+const (
+	TIME_UNITS_PER_SEC = 1000000
+)
+
+var (
+	tickInUsec  float64 = 0.0
+	clockFactor float64 = 0.0
+	hz          float64 = 0.0
+)
+
+func initClock() {
+	data, err := ioutil.ReadFile("/proc/net/psched")
+	if err != nil {
+		return
+	}
+	parts := strings.Split(strings.TrimSpace(string(data)), " ")
+	if len(parts) < 3 {
+		return
+	}
+	var vals [3]uint64
+	for i := range vals {
+		val, err := strconv.ParseUint(parts[i], 16, 32)
+		if err != nil {
+			return
+		}
+		vals[i] = val
+	}
+	// compatibility
+	if vals[2] == 1000000000 {
+		vals[0] = vals[1]
+	}
+	clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
+	tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
+	hz = float64(vals[0])
+}
+
+func TickInUsec() float64 {
+	if tickInUsec == 0.0 {
+		initClock()
+	}
+	return tickInUsec
+}
+
+func ClockFactor() float64 {
+	if clockFactor == 0.0 {
+		initClock()
+	}
+	return clockFactor
+}
+
+func Hz() float64 {
+	if hz == 0.0 {
+		initClock()
+	}
+	return hz
+}
+
+func time2Tick(time uint32) uint32 {
+	return uint32(float64(time) * TickInUsec())
+}
+
+func tick2Time(tick uint32) uint32 {
+	return uint32(float64(tick) / TickInUsec())
+}
+
+func time2Ktime(time uint32) uint32 {
+	return uint32(float64(time) * ClockFactor())
+}
+
+func ktime2Time(ktime uint32) uint32 {
+	return uint32(float64(ktime) / ClockFactor())
+}
+
+func burst(rate uint64, buffer uint32) uint32 {
+	return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
+}
+
+func latency(rate uint64, limit, buffer uint32) float64 {
+	return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
+}
+
+func Xmittime(rate uint64, size uint32) float64 {
+	return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
+}

+ 44 - 2
vendor/src/github.com/vishvananda/netlink/route.go

@@ -17,6 +17,13 @@ const (
 	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
 )
 
+type NextHopFlag int
+
+const (
+	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
+	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
+)
+
 // Route represents a netlink route. A route is associated with a link,
 // has a destination network, an optional source ip, and optional
 // gateway. Advanced route parameters and non-main routing tables are
@@ -27,9 +34,44 @@ type Route struct {
 	Dst       *net.IPNet
 	Src       net.IP
 	Gw        net.IP
+	Flags     int
 }
 
 func (r Route) String() string {
-	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst,
-		r.Src, r.Gw)
+	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s}", r.LinkIndex, r.Dst,
+		r.Src, r.Gw, r.ListFlags())
+}
+
+func (r *Route) SetFlag(flag NextHopFlag) {
+	r.Flags |= int(flag)
+}
+
+func (r *Route) ClearFlag(flag NextHopFlag) {
+	r.Flags &^= int(flag)
+}
+
+type flagString struct {
+	f NextHopFlag
+	s string
+}
+
+var testFlags = []flagString{
+	flagString{f: FLAG_ONLINK, s: "onlink"},
+	flagString{f: FLAG_PERVASIVE, s: "pervasive"},
+}
+
+func (r *Route) ListFlags() []string {
+	var flags []string
+	for _, tf := range testFlags {
+		if r.Flags&int(tf.f) != 0 {
+			flags = append(flags, tf.s)
+		}
+	}
+	return flags
+}
+
+// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
+type RouteUpdate struct {
+	Type uint16
+	Route
 }

+ 70 - 43
vendor/src/github.com/vishvananda/netlink/route_linux.go

@@ -17,7 +17,7 @@ func RouteAdd(route *Route) error {
 	return routeHandle(route, req, nl.NewRtMsg())
 }
 
-// RouteAdd will delete a route from the system.
+// RouteDel will delete a route from the system.
 // Equivalent to: `ip route del $route`
 func RouteDel(route *Route) error {
 	req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
@@ -30,6 +30,7 @@ func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
 	}
 
 	msg.Scope = uint8(route.Scope)
+	msg.Flags = uint32(route.Flags)
 	family := -1
 	var rtAttrs []*nl.RtAttr
 
@@ -117,7 +118,6 @@ func RouteList(link Link, family int) ([]Route, error) {
 		index = base.Index
 	}
 
-	native := nl.NativeEndian()
 	var res []Route
 	for _, m := range msgs {
 		msg := nl.DeserializeRtMsg(m)
@@ -132,31 +132,14 @@ func RouteList(link Link, family int) ([]Route, error) {
 			continue
 		}
 
-		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		route, err := deserializeRoute(m)
 		if err != nil {
 			return nil, err
 		}
 
-		route := Route{Scope: Scope(msg.Scope)}
-		for _, attr := range attrs {
-			switch attr.Attr.Type {
-			case syscall.RTA_GATEWAY:
-				route.Gw = net.IP(attr.Value)
-			case syscall.RTA_PREFSRC:
-				route.Src = net.IP(attr.Value)
-			case syscall.RTA_DST:
-				route.Dst = &net.IPNet{
-					IP:   attr.Value,
-					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
-				}
-			case syscall.RTA_OIF:
-				routeIndex := int(native.Uint32(attr.Value[0:4]))
-				if link != nil && routeIndex != index {
-					// Ignore routes from other interfaces
-					continue
-				}
-				route.LinkIndex = routeIndex
-			}
+		if link != nil && route.LinkIndex != index {
+			// Ignore routes from other interfaces
+			continue
 		}
 		res = append(res, route)
 	}
@@ -164,6 +147,37 @@ func RouteList(link Link, family int) ([]Route, error) {
 	return res, nil
 }
 
+// deserializeRoute decodes a binary netlink message into a Route struct
+func deserializeRoute(m []byte) (Route, error) {
+	route := Route{}
+	msg := nl.DeserializeRtMsg(m)
+	attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+	if err != nil {
+		return route, err
+	}
+	route.Scope = Scope(msg.Scope)
+	route.Flags = int(msg.Flags)
+
+	native := nl.NativeEndian()
+	for _, attr := range attrs {
+		switch attr.Attr.Type {
+		case syscall.RTA_GATEWAY:
+			route.Gw = net.IP(attr.Value)
+		case syscall.RTA_PREFSRC:
+			route.Src = net.IP(attr.Value)
+		case syscall.RTA_DST:
+			route.Dst = &net.IPNet{
+				IP:   attr.Value,
+				Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+			}
+		case syscall.RTA_OIF:
+			routeIndex := int(native.Uint32(attr.Value[0:4]))
+			route.LinkIndex = routeIndex
+		}
+	}
+	return route, nil
+}
+
 // RouteGet gets a route to a specific destination from the host system.
 // Equivalent to: 'ip route get'.
 func RouteGet(destination net.IP) ([]Route, error) {
@@ -191,34 +205,47 @@ func RouteGet(destination net.IP) ([]Route, error) {
 		return nil, err
 	}
 
-	native := nl.NativeEndian()
 	var res []Route
 	for _, m := range msgs {
-		msg := nl.DeserializeRtMsg(m)
-		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		route, err := deserializeRoute(m)
 		if err != nil {
 			return nil, err
 		}
+		res = append(res, route)
+	}
+	return res, nil
+
+}
 
-		route := Route{}
-		for _, attr := range attrs {
-			switch attr.Attr.Type {
-			case syscall.RTA_GATEWAY:
-				route.Gw = net.IP(attr.Value)
-			case syscall.RTA_PREFSRC:
-				route.Src = net.IP(attr.Value)
-			case syscall.RTA_DST:
-				route.Dst = &net.IPNet{
-					IP:   attr.Value,
-					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
+// RouteSubscribe takes a chan down which notifications will be sent
+// when routes are added or deleted. Close the 'done' chan to stop subscription.
+func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
+	s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
+	if err != nil {
+		return err
+	}
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
+	go func() {
+		defer close(ch)
+		for {
+			msgs, err := s.Receive()
+			if err != nil {
+				return
+			}
+			for _, m := range msgs {
+				route, err := deserializeRoute(m.Data)
+				if err != nil {
+					return
 				}
-			case syscall.RTA_OIF:
-				routeIndex := int(native.Uint32(attr.Value[0:4]))
-				route.LinkIndex = routeIndex
+				ch <- RouteUpdate{Type: m.Header.Type, Route: route}
 			}
 		}
-		res = append(res, route)
-	}
-	return res, nil
+	}()
 
+	return nil
 }