Browse Source

Merge pull request #23859 from aboch/vnd

Update check-config.sh, netlink and libnetwork vendoring
Vincent Demeester 9 years ago
parent
commit
18398b2933
38 changed files with 752 additions and 422 deletions
  1. 2 0
      contrib/check-config.sh
  2. 2 2
      hack/vendor.sh
  3. 137 34
      vendor/src/github.com/docker/libnetwork/agent.go
  4. 3 0
      vendor/src/github.com/docker/libnetwork/controller.go
  5. 8 8
      vendor/src/github.com/docker/libnetwork/discoverapi/discoverapi.go
  6. 4 21
      vendor/src/github.com/docker/libnetwork/drivers/overlay/encryption.go
  7. 7 2
      vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go
  8. 1 1
      vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go
  9. 2 1
      vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_utils.go
  10. 18 12
      vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go
  11. 7 0
      vendor/src/github.com/docker/libnetwork/networkdb/cluster.go
  12. 23 1
      vendor/src/github.com/docker/libnetwork/ns/init_linux.go
  13. 7 1
      vendor/src/github.com/docker/libnetwork/osl/interface_linux.go
  14. 2 4
      vendor/src/github.com/docker/libnetwork/osl/namespace_linux.go
  15. 92 6
      vendor/src/github.com/docker/libnetwork/resolver.go
  16. 1 1
      vendor/src/github.com/vishvananda/netlink/README.md
  17. 0 33
      vendor/src/github.com/vishvananda/netlink/class.go
  18. 34 0
      vendor/src/github.com/vishvananda/netlink/class_linux.go
  19. 1 74
      vendor/src/github.com/vishvananda/netlink/filter.go
  20. 73 2
      vendor/src/github.com/vishvananda/netlink/filter_linux.go
  21. 0 86
      vendor/src/github.com/vishvananda/netlink/handle.go
  22. 86 0
      vendor/src/github.com/vishvananda/netlink/handle_linux.go
  23. 0 6
      vendor/src/github.com/vishvananda/netlink/link.go
  24. 10 0
      vendor/src/github.com/vishvananda/netlink/link_linux.go
  25. 1 12
      vendor/src/github.com/vishvananda/netlink/netlink.go
  26. 10 0
      vendor/src/github.com/vishvananda/netlink/netlink_linux.go
  27. 2 2
      vendor/src/github.com/vishvananda/netlink/netlink_unspecified.go
  28. 25 12
      vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go
  29. 12 0
      vendor/src/github.com/vishvananda/netlink/nl/route_linux.go
  30. 0 64
      vendor/src/github.com/vishvananda/netlink/qdisc.go
  31. 65 0
      vendor/src/github.com/vishvananda/netlink/qdisc_linux.go
  32. 15 29
      vendor/src/github.com/vishvananda/netlink/route.go
  33. 93 0
      vendor/src/github.com/vishvananda/netlink/route_linux.go
  34. 7 0
      vendor/src/github.com/vishvananda/netlink/route_unspecified.go
  35. 0 3
      vendor/src/github.com/vishvananda/netlink/rule.go
  36. 0 1
      vendor/src/github.com/vishvananda/netlink/rule_linux.go
  37. 1 1
      vendor/src/github.com/vishvananda/netlink/xfrm.go
  38. 1 3
      vendor/src/github.com/vishvananda/netlink/xfrm_state.go

+ 2 - 0
contrib/check-config.sh

@@ -249,6 +249,8 @@ echo '- Network Drivers:'
 {
 	echo '- "'$(wrap_color 'overlay' blue)'":'
 	check_flags VXLAN | sed 's/^/  /'
+	echo '  Optional (for secure networks):'
+	check_flags XFRM_ALGO XFRM_USER | sed 's/^/  /'
 } | sed 's/^/  /'
 
 echo '- Storage Drivers:'

+ 2 - 2
hack/vendor.sh

@@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
 clone git github.com/imdario/mergo 0.2.1
 
 #get libnetwork packages
-clone git github.com/docker/libnetwork ab62dd6bf06bf0637d66d529931b69a5544468cb
+clone git github.com/docker/libnetwork  ed311d050fda7821f2e7c53a7e08a0205923aef5
 clone git github.com/docker/go-events 39718a26497694185f8fb58a7d6f31947f3dc42d
 clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
@@ -75,7 +75,7 @@ clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa07
 clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
 clone git github.com/docker/libkv 7283ef27ed32fe267388510a91709b307bb9942c
 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
-clone git github.com/vishvananda/netlink 7995ff5647a22cbf0dc41bf5c0e977bdb0d5c6b7
+clone git github.com/vishvananda/netlink 734d02c3e202f682c74b71314b2c61eec0170fd4
 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
 clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d

+ 137 - 34
vendor/src/github.com/docker/libnetwork/agent.go

@@ -3,12 +3,10 @@ package libnetwork
 //go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf  --gogo_out=import_path=github.com/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
 
 import (
-	"encoding/hex"
 	"fmt"
 	"net"
 	"os"
 	"sort"
-	"strconv"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/go-events"
@@ -20,6 +18,12 @@ import (
 	"github.com/gogo/protobuf/proto"
 )
 
+const (
+	subsysGossip = "networking:gossip"
+	subsysIPSec  = "networking:ipsec"
+	keyringSize  = 3
+)
+
 // ByTime implements sort.Interface for []*types.EncryptionKey based on
 // the LamportTime field.
 type ByTime []*types.EncryptionKey
@@ -80,6 +84,82 @@ func resolveAddr(addrOrInterface string) (string, error) {
 func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 	drvEnc := discoverapi.DriverEncryptionUpdate{}
 
+	a := c.agent
+	// Find the deleted key. If the deleted key was the primary key,
+	// a new primary key should be set before removing if from keyring.
+	deleted := []byte{}
+	j := len(c.keys)
+	for i := 0; i < j; {
+		same := false
+		for _, key := range keys {
+			if same = key.LamportTime == c.keys[i].LamportTime; same {
+				break
+			}
+		}
+		if !same {
+			cKey := c.keys[i]
+			if cKey.Subsystem == subsysGossip {
+				deleted = cKey.Key
+			}
+
+			if cKey.Subsystem == subsysGossip /* subsysIPSec */ {
+				drvEnc.Prune = cKey.Key
+				drvEnc.PruneTag = cKey.LamportTime
+			}
+			c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
+			c.keys[j-1] = nil
+			j--
+		}
+		i++
+	}
+	c.keys = c.keys[:j]
+
+	// Find the new key and add it to the key ring
+	for _, key := range keys {
+		same := false
+		for _, cKey := range c.keys {
+			if same = cKey.LamportTime == key.LamportTime; same {
+				break
+			}
+		}
+		if !same {
+			c.keys = append(c.keys, key)
+			if key.Subsystem == subsysGossip {
+				a.networkDB.SetKey(key.Key)
+			}
+
+			if key.Subsystem == subsysGossip /*subsysIPSec*/ {
+				drvEnc.Key = key.Key
+				drvEnc.Tag = key.LamportTime
+			}
+		}
+	}
+
+	key, tag := c.getPrimaryKeyTag(subsysGossip)
+	a.networkDB.SetPrimaryKey(key)
+
+	//key, tag = c.getPrimaryKeyTag(subsysIPSec)
+	drvEnc.Primary = key
+	drvEnc.PrimaryTag = tag
+
+	if len(deleted) > 0 {
+		a.networkDB.RemoveKey(deleted)
+	}
+
+	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
+		err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
+		if err != nil {
+			logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
+		}
+		return false
+	})
+
+	return nil
+}
+
+func (c *controller) handleKeyChangeV1(keys []*types.EncryptionKey) error {
+	drvEnc := discoverapi.DriverEncryptionUpdate{}
+
 	// Find the new key and add it to the key ring
 	a := c.agent
 	for _, key := range keys {
@@ -91,12 +171,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 		}
 		if !same {
 			c.keys = append(c.keys, key)
-			if key.Subsystem == "networking:gossip" {
+			if key.Subsystem == subsysGossip {
 				a.networkDB.SetKey(key.Key)
 			}
-			if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-				drvEnc.Key = hex.EncodeToString(key.Key)
-				drvEnc.Tag = strconv.FormatUint(key.LamportTime, 10)
+			if key.Subsystem == subsysGossip /*subsysIPSec*/ {
+				drvEnc.Key = key.Key
+				drvEnc.Tag = key.LamportTime
 			}
 			break
 		}
@@ -112,12 +192,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 			}
 		}
 		if !same {
-			if cKey.Subsystem == "networking:gossip" {
+			if cKey.Subsystem == subsysGossip {
 				deleted = cKey.Key
 			}
-			if cKey.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-				drvEnc.Prune = hex.EncodeToString(cKey.Key)
-				drvEnc.PruneTag = strconv.FormatUint(cKey.LamportTime, 10)
+			if cKey.Subsystem == subsysGossip /*subsysIPSec*/ {
+				drvEnc.Prune = cKey.Key
+				drvEnc.PruneTag = cKey.LamportTime
 			}
 			c.keys = append(c.keys[:i], c.keys[i+1:]...)
 			break
@@ -126,15 +206,15 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 
 	sort.Sort(ByTime(c.keys))
 	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" {
+		if key.Subsystem == subsysGossip {
 			a.networkDB.SetPrimaryKey(key.Key)
 			break
 		}
 	}
 	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-			drvEnc.Primary = hex.EncodeToString(key.Key)
-			drvEnc.PrimaryTag = strconv.FormatUint(key.LamportTime, 10)
+		if key.Subsystem == subsysGossip /*subsysIPSec*/ {
+			drvEnc.Primary = key.Key
+			drvEnc.PrimaryTag = key.LamportTime
 			break
 		}
 	}
@@ -197,26 +277,51 @@ func (c *controller) agentSetup() error {
 	return nil
 }
 
-func (c *controller) agentInit(bindAddrOrInterface string) error {
-	if !c.isAgent() {
-		return nil
+// For a given subsystem getKeys sorts the keys by lamport time and returns
+// slice of keys and lamport time which can used as a unique tag for the keys
+func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
+	sort.Sort(ByTime(c.keys))
+
+	keys := [][]byte{}
+	tags := []uint64{}
+	for _, key := range c.keys {
+		if key.Subsystem == subsys {
+			keys = append(keys, key.Key)
+			tags = append(tags, key.LamportTime)
+		}
 	}
 
-	drvEnc := discoverapi.DriverEncryptionConfig{}
+	if len(keys) < keyringSize {
+		return keys, tags
+	}
+	keys[0], keys[1] = keys[1], keys[0]
+	tags[0], tags[1] = tags[1], tags[0]
+	return keys, tags
+}
 
-	// sort the keys by lamport time
+// getPrimaryKeyTag returns the primary key for a given subsytem from the
+// list of sorted key and the associated tag
+func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64) {
 	sort.Sort(ByTime(c.keys))
-
-	gossipkey := [][]byte{}
+	keys := []*types.EncryptionKey{}
 	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" {
-			gossipkey = append(gossipkey, key.Key)
-		}
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-			drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
-			drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
+		if key.Subsystem == subsys {
+			keys = append(keys, key)
 		}
 	}
+	return keys[1].Key, keys[1].LamportTime
+}
+
+func (c *controller) agentInit(bindAddrOrInterface string) error {
+	if !c.isAgent() {
+		return nil
+	}
+
+	drvEnc := discoverapi.DriverEncryptionConfig{}
+
+	keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
+	drvEnc.Keys = keys
+	drvEnc.Tags = tags
 
 	bindAddr, err := resolveAddr(bindAddrOrInterface)
 	if err != nil {
@@ -227,7 +332,7 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
 	nDB, err := networkdb.New(&networkdb.Config{
 		BindAddr: bindAddr,
 		NodeName: hostname,
-		Keys:     gossipkey,
+		Keys:     keys,
 	})
 
 	if err != nil {
@@ -275,12 +380,10 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
 	})
 
 	drvEnc := discoverapi.DriverEncryptionConfig{}
-	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-			drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
-			drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
-		}
-	}
+	keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
+	drvEnc.Keys = keys
+	drvEnc.Tags = tags
+
 	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
 		err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
 		if err != nil {

+ 3 - 0
vendor/src/github.com/docker/libnetwork/controller.go

@@ -264,6 +264,9 @@ func (c *controller) SetKeys(keys []*types.EncryptionKey) error {
 		c.Unlock()
 		return nil
 	}
+	if len(keys) < keyringSize {
+		return c.handleKeyChangeV1(keys)
+	}
 	return c.handleKeyChange(keys)
 }
 

+ 8 - 8
vendor/src/github.com/docker/libnetwork/discoverapi/discoverapi.go

@@ -42,18 +42,18 @@ type DatastoreConfigData struct {
 // Key in first position is the primary key, the one to be used in tx.
 // Original key and tag types are []byte and uint64
 type DriverEncryptionConfig struct {
-	Keys []string
-	Tags []string
+	Keys [][]byte
+	Tags []uint64
 }
 
 // DriverEncryptionUpdate carries an update to the encryption key(s) as:
 // a new key and/or set a primary key and/or a removal of an existing key.
 // Original key and tag types are []byte and uint64
 type DriverEncryptionUpdate struct {
-	Key        string
-	Tag        string
-	Primary    string
-	PrimaryTag string
-	Prune      string
-	PruneTag   string
+	Key        []byte
+	Tag        uint64
+	Primary    []byte
+	PrimaryTag uint64
+	Prune      []byte
+	PruneTag   uint64
 }

+ 4 - 21
vendor/src/github.com/docker/libnetwork/drivers/overlay/encryption.go

@@ -33,7 +33,10 @@ type key struct {
 }
 
 func (k *key) String() string {
-	return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
+	if k != nil {
+		return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
+	}
+	return ""
 }
 
 type spi struct {
@@ -557,23 +560,3 @@ func updateNodeKey(lIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx,
 
 	return spis
 }
-
-func parseEncryptionKey(value, tag string) (*key, error) {
-	var (
-		k   *key
-		err error
-	)
-	if value == "" {
-		return nil, nil
-	}
-	k = &key{}
-	if k.value, err = hex.DecodeString(value); err != nil {
-		return nil, types.BadRequestErrorf("failed to decode key (%s): %v", value, err)
-	}
-	t, err := strconv.ParseUint(tag, 10, 64)
-	if err != nil {
-		return nil, types.BadRequestErrorf("failed to decode tag (%s): %v", tag, err)
-	}
-	k.tag = uint32(t)
-	return k, nil
-}

+ 7 - 2
vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go

@@ -3,6 +3,7 @@ package overlay
 import (
 	"fmt"
 	"net"
+	"syscall"
 
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/driverapi"
@@ -31,6 +32,12 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		return fmt.Errorf("cannot join secure network: encryption keys not present")
 	}
 
+	nlh := ns.NlHandle()
+
+	if n.secure && !nlh.SupportsNetlinkFamily(syscall.NETLINK_XFRM) {
+		return fmt.Errorf("cannot join secure network: required modules to install IPSEC rules are missing on host")
+	}
+
 	s := n.getSubnetforIP(ep.addr)
 	if s == nil {
 		return fmt.Errorf("could not find subnet for endpoint %s", eid)
@@ -65,8 +72,6 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
 	}
 
-	nlh := ns.NlHandle()
-
 	// Set the container interface and its peer MTU to 1450 to allow
 	// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
 	// outer UDP(8) + vxlan header(8))

+ 1 - 1
vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go

@@ -284,7 +284,7 @@ func populateVNITbl() {
 			}
 			defer ns.Close()
 
-			nlh, err := netlink.NewHandleAt(ns)
+			nlh, err := netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
 			if err != nil {
 				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
 				return nil

+ 2 - 1
vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_utils.go

@@ -3,6 +3,7 @@ package overlay
 import (
 	"fmt"
 	"strings"
+	"syscall"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/netutils"
@@ -128,7 +129,7 @@ func deleteVxlanByVNI(path string, vni uint32) error {
 		}
 		defer ns.Close()
 
-		nlh, err = netlink.NewHandleAt(ns)
+		nlh, err = netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
 		if err != nil {
 			return fmt.Errorf("failed to get netlink handle for ns %s: %v", path, err)
 		}

+ 18 - 12
vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go

@@ -306,9 +306,9 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
 		}
 		keys := make([]*key, 0, len(encrData.Keys))
 		for i := 0; i < len(encrData.Keys); i++ {
-			k, err := parseEncryptionKey(encrData.Keys[i], encrData.Tags[i])
-			if err != nil {
-				return err
+			k := &key{
+				value: encrData.Keys[i],
+				tag:   uint32(encrData.Tags[i]),
 			}
 			keys = append(keys, k)
 		}
@@ -319,17 +319,23 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
 		if !ok {
 			return fmt.Errorf("invalid encryption key notification data")
 		}
-		newKey, err = parseEncryptionKey(encrData.Key, encrData.Tag)
-		if err != nil {
-			return err
+		if encrData.Key != nil {
+			newKey = &key{
+				value: encrData.Key,
+				tag:   uint32(encrData.Tag),
+			}
 		}
-		priKey, err = parseEncryptionKey(encrData.Primary, encrData.PrimaryTag)
-		if err != nil {
-			return err
+		if encrData.Primary != nil {
+			priKey = &key{
+				value: encrData.Primary,
+				tag:   uint32(encrData.PrimaryTag),
+			}
 		}
-		delKey, err = parseEncryptionKey(encrData.Prune, encrData.PruneTag)
-		if err != nil {
-			return err
+		if encrData.Prune != nil {
+			delKey = &key{
+				value: encrData.Prune,
+				tag:   uint32(encrData.PruneTag),
+			}
 		}
 		d.updateKeys(newKey, priKey, delKey)
 	default:

+ 7 - 0
vendor/src/github.com/docker/libnetwork/networkdb/cluster.go

@@ -3,6 +3,7 @@ package networkdb
 import (
 	"bytes"
 	"crypto/rand"
+	"encoding/hex"
 	"fmt"
 	"math/big"
 	rnd "math/rand"
@@ -36,6 +37,7 @@ func (l *logWriter) Write(p []byte) (int, error) {
 
 // SetKey adds a new key to the key ring
 func (nDB *NetworkDB) SetKey(key []byte) {
+	logrus.Debugf("Adding key %s", hex.EncodeToString(key)[0:5])
 	for _, dbKey := range nDB.config.Keys {
 		if bytes.Equal(key, dbKey) {
 			return
@@ -50,6 +52,7 @@ func (nDB *NetworkDB) SetKey(key []byte) {
 // SetPrimaryKey sets the given key as the primary key. This should have
 // been added apriori through SetKey
 func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
+	logrus.Debugf("Primary Key %s", hex.EncodeToString(key)[0:5])
 	for _, dbKey := range nDB.config.Keys {
 		if bytes.Equal(key, dbKey) {
 			if nDB.keyring != nil {
@@ -63,6 +66,7 @@ func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
 // RemoveKey removes a key from the key ring. The key being removed
 // can't be the primary key
 func (nDB *NetworkDB) RemoveKey(key []byte) {
+	logrus.Debugf("Remove Key %s", hex.EncodeToString(key)[0:5])
 	for i, dbKey := range nDB.config.Keys {
 		if bytes.Equal(key, dbKey) {
 			nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...)
@@ -90,6 +94,9 @@ func (nDB *NetworkDB) clusterInit() error {
 
 	var err error
 	if len(nDB.config.Keys) > 0 {
+		for i, key := range nDB.config.Keys {
+			logrus.Debugf("Encryption key %d: %s", i+1, hex.EncodeToString(key)[0:5])
+		}
 		nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
 		if err != nil {
 			return err

+ 23 - 1
vendor/src/github.com/docker/libnetwork/ns/init_linux.go

@@ -3,6 +3,8 @@ package ns
 import (
 	"fmt"
 	"os"
+	"os/exec"
+	"strings"
 	"sync"
 	"syscall"
 
@@ -24,7 +26,7 @@ func Init() {
 	if err != nil {
 		log.Errorf("could not get initial namespace: %v", err)
 	}
-	initNl, err = netlink.NewHandle()
+	initNl, err = netlink.NewHandle(getSupportedNlFamilies()...)
 	if err != nil {
 		log.Errorf("could not create netlink handle on initial namespace: %v", err)
 	}
@@ -32,6 +34,7 @@ func Init() {
 
 // SetNamespace sets the initial namespace handler
 func SetNamespace() error {
+	initOnce.Do(Init)
 	if err := netns.Set(initNs); err != nil {
 		linkInfo, linkErr := getLink()
 		if linkErr != nil {
@@ -62,3 +65,22 @@ func NlHandle() *netlink.Handle {
 	initOnce.Do(Init)
 	return initNl
 }
+
+func getSupportedNlFamilies() []int {
+	fams := []int{syscall.NETLINK_ROUTE}
+	if err := loadXfrmModules(); err != nil {
+		log.Warnf("Could not load necessary modules for IPSEC rules: %v", err)
+		return fams
+	}
+	return append(fams, syscall.NETLINK_XFRM)
+}
+
+func loadXfrmModules() error {
+	if out, err := exec.Command("modprobe", "-va", "xfrm_user").CombinedOutput(); err != nil {
+		return fmt.Errorf("Running modprobe xfrm_user failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
+	}
+	if out, err := exec.Command("modprobe", "-va", "xfrm_algo").CombinedOutput(); err != nil {
+		return fmt.Errorf("Running modprobe xfrm_algo failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
+	}
+	return nil
+}

+ 7 - 1
vendor/src/github.com/docker/libnetwork/osl/interface_linux.go

@@ -6,6 +6,7 @@ import (
 	"regexp"
 	"sync"
 	"syscall"
+	"time"
 
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/ns"
@@ -290,7 +291,12 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
 	}
 
 	// Up the interface.
-	if err := nlh.LinkSetUp(iface); err != nil {
+	cnt := 0
+	for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
+		log.Debugf("retrying link setup because of: %v", err)
+		time.Sleep(10 * time.Millisecond)
+	}
+	if err != nil {
 		return fmt.Errorf("failed to set link up: %v", err)
 	}
 

+ 2 - 4
vendor/src/github.com/docker/libnetwork/osl/namespace_linux.go

@@ -30,7 +30,6 @@ var (
 	gpmWg            sync.WaitGroup
 	gpmCleanupPeriod = 60 * time.Second
 	gpmChan          = make(chan chan struct{})
-	nsOnce           sync.Once
 )
 
 // The networkNamespace type is the linux implementation of the Sandbox
@@ -196,7 +195,7 @@ func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
 	}
 	defer sboxNs.Close()
 
-	n.nlHandle, err = netlink.NewHandleAt(sboxNs)
+	n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
 	}
@@ -238,7 +237,7 @@ func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
 	}
 	defer sboxNs.Close()
 
-	n.nlHandle, err = netlink.NewHandleAt(sboxNs)
+	n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
 	}
@@ -326,7 +325,6 @@ func (n *networkNamespace) InvokeFunc(f func()) error {
 
 // InitOSContext initializes OS context while configuring network resources
 func InitOSContext() func() {
-	nsOnce.Do(ns.Init)
 	runtime.LockOSThread()
 	if err := ns.SetNamespace(); err != nil {
 		log.Error(err)

+ 92 - 6
vendor/src/github.com/docker/libnetwork/resolver.go

@@ -62,6 +62,21 @@ type extDNSEntry struct {
 	extOnce sync.Once
 }
 
+type sboxQuery struct {
+	sboxID string
+	dnsID  uint16
+}
+
+type clientConnGC struct {
+	toDelete bool
+	client   clientConn
+}
+
+var (
+	queryGCMutex sync.Mutex
+	queryGC      map[sboxQuery]*clientConnGC
+)
+
 // resolver implements the Resolver interface
 type resolver struct {
 	sb         *sandbox
@@ -79,6 +94,21 @@ type resolver struct {
 
 func init() {
 	rand.Seed(time.Now().Unix())
+	queryGC = make(map[sboxQuery]*clientConnGC)
+	go func() {
+		ticker := time.NewTicker(1 * time.Minute)
+		for range ticker.C {
+			queryGCMutex.Lock()
+			for query, conn := range queryGC {
+				if !conn.toDelete {
+					conn.toDelete = true
+					continue
+				}
+				delete(queryGC, query)
+			}
+			queryGCMutex.Unlock()
+		}
+	}()
 }
 
 // NewResolver creates a new instance of the Resolver
@@ -370,6 +400,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
 		writer = w
 	} else {
 		queryID := query.Id
+	extQueryLoop:
 		for i := 0; i < maxExtDNS; i++ {
 			extDNS := &r.extDNSList[i]
 			if extDNS.ipStr == "" {
@@ -435,14 +466,26 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
 				log.Debugf("Send to DNS server failed, %s", err)
 				continue
 			}
+			for {
+				// If a reply comes after a read timeout it will remain in the socket buffer
+				// and will be read after sending next query. To ignore such stale replies
+				// save the query context in a GC queue when read timesout. On the next reply
+				// if the context is present in the GC queue its a old reply. Ignore it and
+				// read again
+				resp, err = co.ReadMsg()
+				if err != nil {
+					if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
+						r.addQueryToGC(w, query)
+					}
+					r.forwardQueryEnd(w, query)
+					log.Debugf("Read from DNS server failed, %s", err)
+					continue extQueryLoop
+				}
 
-			resp, err = co.ReadMsg()
-			if err != nil {
-				r.forwardQueryEnd(w, query)
-				log.Debugf("Read from DNS server failed, %s", err)
-				continue
+				if !r.checkRespInGC(w, resp) {
+					break
+				}
 			}
-
 			// Retrieves the context for the forwarded query and returns the client connection
 			// to send the reply to
 			writer = r.forwardQueryEnd(w, resp)
@@ -501,6 +544,49 @@ func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg, queryID
 	return true
 }
 
+func (r *resolver) addQueryToGC(w dns.ResponseWriter, msg *dns.Msg) {
+	if w.LocalAddr().Network() != "udp" {
+		return
+	}
+
+	r.queryLock.Lock()
+	cc, ok := r.client[msg.Id]
+	r.queryLock.Unlock()
+	if !ok {
+		return
+	}
+
+	query := sboxQuery{
+		sboxID: r.sb.ID(),
+		dnsID:  msg.Id,
+	}
+	clientGC := &clientConnGC{
+		client: cc,
+	}
+	queryGCMutex.Lock()
+	queryGC[query] = clientGC
+	queryGCMutex.Unlock()
+}
+
+func (r *resolver) checkRespInGC(w dns.ResponseWriter, msg *dns.Msg) bool {
+	if w.LocalAddr().Network() != "udp" {
+		return false
+	}
+
+	query := sboxQuery{
+		sboxID: r.sb.ID(),
+		dnsID:  msg.Id,
+	}
+
+	queryGCMutex.Lock()
+	defer queryGCMutex.Unlock()
+	if _, ok := queryGC[query]; ok {
+		delete(queryGC, query)
+		return true
+	}
+	return false
+}
+
 func (r *resolver) forwardQueryEnd(w dns.ResponseWriter, msg *dns.Msg) dns.ResponseWriter {
 	var (
 		cc clientConn

+ 1 - 1
vendor/src/github.com/vishvananda/netlink/README.md

@@ -8,7 +8,7 @@ the kernel. It can be used to add and remove interfaces, set ip addresses
 and routes, and configure ipsec. Netlink communication requires elevated
 privileges, so in most cases this code needs to be run as root. Since
 low-level netlink messages are inscrutable at best, the library attempts
-to provide an api that is loosely modeled on the CLI provied by iproute2.
+to provide an api that is loosely modeled on the CLI provided by iproute2.
 Actions like `ip link add` will be accomplished via a similarly named
 function like AddLink(). This library began its life as a fork of the
 netlink functionality in

+ 0 - 33
vendor/src/github.com/vishvananda/netlink/class.go

@@ -50,39 +50,6 @@ type HtbClass struct {
 	Prio    uint32
 }
 
-func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
-	mtu := 1600
-	rate := cattrs.Rate / 8
-	ceil := cattrs.Ceil / 8
-	buffer := cattrs.Buffer
-	cbuffer := cattrs.Cbuffer
-
-	if ceil == 0 {
-		ceil = rate
-	}
-
-	if buffer == 0 {
-		buffer = uint32(float64(rate)/Hz() + float64(mtu))
-	}
-	buffer = uint32(Xmittime(rate, buffer))
-
-	if cbuffer == 0 {
-		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
-	}
-	cbuffer = uint32(Xmittime(ceil, cbuffer))
-
-	return &HtbClass{
-		ClassAttrs: attrs,
-		Rate:       rate,
-		Ceil:       ceil,
-		Buffer:     buffer,
-		Cbuffer:    cbuffer,
-		Quantum:    10,
-		Level:      0,
-		Prio:       0,
-	}
-}
-
 func (q HtbClass) String() string {
 	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
 }

+ 34 - 0
vendor/src/github.com/vishvananda/netlink/class_linux.go

@@ -7,6 +7,40 @@ import (
 	"github.com/vishvananda/netlink/nl"
 )
 
+// NOTE: function is in here because it uses other linux functions
+func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
+	mtu := 1600
+	rate := cattrs.Rate / 8
+	ceil := cattrs.Ceil / 8
+	buffer := cattrs.Buffer
+	cbuffer := cattrs.Cbuffer
+
+	if ceil == 0 {
+		ceil = rate
+	}
+
+	if buffer == 0 {
+		buffer = uint32(float64(rate)/Hz() + float64(mtu))
+	}
+	buffer = uint32(Xmittime(rate, buffer))
+
+	if cbuffer == 0 {
+		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
+	}
+	cbuffer = uint32(Xmittime(ceil, cbuffer))
+
+	return &HtbClass{
+		ClassAttrs: attrs,
+		Rate:       rate,
+		Ceil:       ceil,
+		Buffer:     buffer,
+		Cbuffer:    cbuffer,
+		Quantum:    10,
+		Level:      0,
+		Prio:       0,
+	}
+}
+
 // ClassDel will delete a class from the system.
 // Equivalent to: `tc class del $class`
 func ClassDel(class Class) error {

+ 1 - 74
vendor/src/github.com/vishvananda/netlink/filter.go

@@ -1,11 +1,6 @@
 package netlink
 
-import (
-	"errors"
-	"fmt"
-
-	"github.com/vishvananda/netlink/nl"
-)
+import "fmt"
 
 type Filter interface {
 	Attrs() *FilterAttrs
@@ -217,74 +212,6 @@ type FilterFwAttrs struct {
 	LinkLayer int
 }
 
-// Fw filter filters on firewall marks
-type Fw struct {
-	FilterAttrs
-	ClassId uint32
-	// TODO remove nl type from interface
-	Police nl.TcPolice
-	InDev  string
-	// TODO Action
-	Mask   uint32
-	AvRate uint32
-	Rtab   [256]uint32
-	Ptab   [256]uint32
-}
-
-func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
-	var rtab [256]uint32
-	var ptab [256]uint32
-	rcellLog := -1
-	pcellLog := -1
-	avrate := fattrs.AvRate / 8
-	police := nl.TcPolice{}
-	police.Rate.Rate = fattrs.Rate / 8
-	police.PeakRate.Rate = fattrs.PeakRate / 8
-	buffer := fattrs.Buffer
-	linklayer := nl.LINKLAYER_ETHERNET
-
-	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
-		linklayer = fattrs.LinkLayer
-	}
-
-	police.Action = int32(fattrs.Action)
-	if police.Rate.Rate != 0 {
-		police.Rate.Mpu = fattrs.Mpu
-		police.Rate.Overhead = fattrs.Overhead
-		if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
-			return nil, errors.New("TBF: failed to calculate rate table")
-		}
-		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
-	}
-	police.Mtu = fattrs.Mtu
-	if police.PeakRate.Rate != 0 {
-		police.PeakRate.Mpu = fattrs.Mpu
-		police.PeakRate.Overhead = fattrs.Overhead
-		if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
-			return nil, errors.New("POLICE: failed to calculate peak rate table")
-		}
-	}
-
-	return &Fw{
-		FilterAttrs: attrs,
-		ClassId:     fattrs.ClassId,
-		InDev:       fattrs.InDev,
-		Mask:        fattrs.Mask,
-		Police:      police,
-		AvRate:      avrate,
-		Rtab:        rtab,
-		Ptab:        ptab,
-	}, nil
-}
-
-func (filter *Fw) Attrs() *FilterAttrs {
-	return &filter.FilterAttrs
-}
-
-func (filter *Fw) Type() string {
-	return "fw"
-}
-
 type BpfFilter struct {
 	FilterAttrs
 	ClassId      uint32

+ 73 - 2
vendor/src/github.com/vishvananda/netlink/filter_linux.go

@@ -3,12 +3,83 @@ package netlink
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
 )
 
+// Fw filter filters on firewall marks
+// NOTE: this is in filter_linux because it refers to nl.TcPolice which
+//       is defined in nl/tc_linux.go
+type Fw struct {
+	FilterAttrs
+	ClassId uint32
+	// TODO remove nl type from interface
+	Police nl.TcPolice
+	InDev  string
+	// TODO Action
+	Mask   uint32
+	AvRate uint32
+	Rtab   [256]uint32
+	Ptab   [256]uint32
+}
+
+func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
+	var rtab [256]uint32
+	var ptab [256]uint32
+	rcellLog := -1
+	pcellLog := -1
+	avrate := fattrs.AvRate / 8
+	police := nl.TcPolice{}
+	police.Rate.Rate = fattrs.Rate / 8
+	police.PeakRate.Rate = fattrs.PeakRate / 8
+	buffer := fattrs.Buffer
+	linklayer := nl.LINKLAYER_ETHERNET
+
+	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
+		linklayer = fattrs.LinkLayer
+	}
+
+	police.Action = int32(fattrs.Action)
+	if police.Rate.Rate != 0 {
+		police.Rate.Mpu = fattrs.Mpu
+		police.Rate.Overhead = fattrs.Overhead
+		if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
+			return nil, errors.New("TBF: failed to calculate rate table")
+		}
+		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
+	}
+	police.Mtu = fattrs.Mtu
+	if police.PeakRate.Rate != 0 {
+		police.PeakRate.Mpu = fattrs.Mpu
+		police.PeakRate.Overhead = fattrs.Overhead
+		if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
+			return nil, errors.New("POLICE: failed to calculate peak rate table")
+		}
+	}
+
+	return &Fw{
+		FilterAttrs: attrs,
+		ClassId:     fattrs.ClassId,
+		InDev:       fattrs.InDev,
+		Mask:        fattrs.Mask,
+		Police:      police,
+		AvRate:      avrate,
+		Rtab:        rtab,
+		Ptab:        ptab,
+	}, nil
+}
+
+func (filter *Fw) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *Fw) Type() string {
+	return "fw"
+}
+
 // FilterDel will delete a filter from the system.
 // Equivalent to: `tc filter del $filter`
 func FilterDel(filter Filter) error {
@@ -126,14 +197,14 @@ func (h *Handle) FilterAdd(filter Filter) error {
 
 // FilterList gets a list of filters in the system.
 // Equivalent to: `tc filter show`.
-// Generally retunrs nothing if link and parent are not specified.
+// Generally returns nothing if link and parent are not specified.
 func FilterList(link Link, parent uint32) ([]Filter, error) {
 	return pkgHandle.FilterList(link, parent)
 }
 
 // FilterList gets a list of filters in the system.
 // Equivalent to: `tc filter show`.
-// Generally retunrs nothing if link and parent are not specified.
+// Generally returns nothing if link and parent are not specified.
 func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 	req := h.newNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
 	msg := &nl.TcMsg{

+ 0 - 86
vendor/src/github.com/vishvananda/netlink/handle.go

@@ -1,86 +0,0 @@
-package netlink
-
-import (
-	"sync/atomic"
-	"syscall"
-
-	"github.com/vishvananda/netlink/nl"
-	"github.com/vishvananda/netns"
-)
-
-// Empty handle used by the netlink package methods
-var pkgHandle = &Handle{}
-
-// Handle is an handle for the netlink requests
-// on a specific network namespace. All the requests
-// share the same netlink socket, which gets released
-// when the handle is deleted.
-type Handle struct {
-	seq          uint32
-	routeSocket  *nl.NetlinkSocket
-	xfrmSocket   *nl.NetlinkSocket
-	lookupByDump bool
-}
-
-// NewHandle returns a netlink handle on the current network namespace.
-func NewHandle() (*Handle, error) {
-	return newHandle(netns.None(), netns.None())
-}
-
-// NewHandle returns a netlink handle on the network namespace
-// specified by ns. If ns=netns.None(), current network namespace
-// will be assumed
-func NewHandleAt(ns netns.NsHandle) (*Handle, error) {
-	return newHandle(ns, netns.None())
-}
-
-// NewHandleAtFrom works as NewHandle but allows client to specify the
-// new and the origin netns Handle.
-func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
-	return newHandle(newNs, curNs)
-}
-
-func newHandle(newNs, curNs netns.NsHandle) (*Handle, error) {
-	var (
-		err     error
-		rSocket *nl.NetlinkSocket
-		xSocket *nl.NetlinkSocket
-	)
-	rSocket, err = nl.GetNetlinkSocketAt(newNs, curNs, syscall.NETLINK_ROUTE)
-	if err != nil {
-		return nil, err
-	}
-	xSocket, err = nl.GetNetlinkSocketAt(newNs, curNs, syscall.NETLINK_XFRM)
-	if err != nil {
-		return nil, err
-	}
-	return &Handle{routeSocket: rSocket, xfrmSocket: xSocket}, nil
-}
-
-// Delete releases the resources allocated to this handle
-func (h *Handle) Delete() {
-	if h.routeSocket != nil {
-		h.routeSocket.Close()
-	}
-	if h.xfrmSocket != nil {
-		h.xfrmSocket.Close()
-	}
-	h.routeSocket, h.xfrmSocket = nil, nil
-}
-
-func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
-	// Do this so that package API still use nl package variable nextSeqNr
-	if h.routeSocket == nil {
-		return nl.NewNetlinkRequest(proto, flags)
-	}
-	return &nl.NetlinkRequest{
-		NlMsghdr: syscall.NlMsghdr{
-			Len:   uint32(syscall.SizeofNlMsghdr),
-			Type:  uint16(proto),
-			Flags: syscall.NLM_F_REQUEST | uint16(flags),
-			Seq:   atomic.AddUint32(&h.seq, 1),
-		},
-		RouteSocket: h.routeSocket,
-		XfmrSocket:  h.xfrmSocket,
-	}
-}

+ 86 - 0
vendor/src/github.com/vishvananda/netlink/handle_linux.go

@@ -0,0 +1,86 @@
+package netlink
+
+import (
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+	"github.com/vishvananda/netns"
+)
+
+// Empty handle used by the netlink package methods
+var pkgHandle = &Handle{}
+
+// Handle is an handle for the netlink requests on a
+// specific network namespace. All the requests on the
+// same netlink family share the same netlink socket,
+// which gets released when the handle is deleted.
+type Handle struct {
+	sockets      map[int]*nl.SocketHandle
+	lookupByDump bool
+}
+
+// SupportsNetlinkFamily reports whether the passed netlink family is supported by this Handle
+func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
+	_, ok := h.sockets[nlFamily]
+	return ok
+}
+
+// NewHandle returns a netlink handle on the current network namespace.
+// Caller may specify the netlink families the handle should support.
+// If no families are specified, all the families the netlink package
+// supports will be automatically added.
+func NewHandle(nlFamilies ...int) (*Handle, error) {
+	return newHandle(netns.None(), netns.None(), nlFamilies...)
+}
+
+// NewHandle returns a netlink handle on the network namespace
+// specified by ns. If ns=netns.None(), current network namespace
+// will be assumed
+func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
+	return newHandle(ns, netns.None(), nlFamilies...)
+}
+
+// NewHandleAtFrom works as NewHandle but allows client to specify the
+// new and the origin netns Handle.
+func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
+	return newHandle(newNs, curNs)
+}
+
+func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error) {
+	h := &Handle{sockets: map[int]*nl.SocketHandle{}}
+	fams := nl.SupportedNlFamilies
+	if len(nlFamilies) != 0 {
+		fams = nlFamilies
+	}
+	for _, f := range fams {
+		s, err := nl.GetNetlinkSocketAt(newNs, curNs, f)
+		if err != nil {
+			return nil, err
+		}
+		h.sockets[f] = &nl.SocketHandle{Socket: s}
+	}
+	return h, nil
+}
+
+// Delete releases the resources allocated to this handle
+func (h *Handle) Delete() {
+	for _, sh := range h.sockets {
+		sh.Close()
+	}
+	h.sockets = nil
+}
+
+func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
+	// Do this so that package API still use nl package variable nextSeqNr
+	if h.sockets == nil {
+		return nl.NewNetlinkRequest(proto, flags)
+	}
+	return &nl.NetlinkRequest{
+		NlMsghdr: syscall.NlMsghdr{
+			Len:   uint32(syscall.SizeofNlMsghdr),
+			Type:  uint16(proto),
+			Flags: syscall.NLM_F_REQUEST | uint16(flags),
+		},
+		Sockets: h.sockets,
+	}
+}

+ 0 - 6
vendor/src/github.com/vishvananda/netlink/link.go

@@ -3,7 +3,6 @@ package netlink
 import (
 	"fmt"
 	"net"
-	"syscall"
 )
 
 // Link represents a link device from netlink. Shared link attributes
@@ -173,11 +172,6 @@ func (macvtap Macvtap) Type() string {
 
 type TuntapMode uint16
 
-const (
-	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
-	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
-)
-
 // Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
 type Tuntap struct {
 	LinkAttrs

+ 10 - 0
vendor/src/github.com/vishvananda/netlink/link_linux.go

@@ -14,6 +14,11 @@ import (
 
 const SizeofLinkStats = 0x5c
 
+const (
+	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
+	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
+)
+
 var native = nl.NativeEndian()
 var lookupByDump = false
 
@@ -675,6 +680,11 @@ func (h *Handle) LinkAdd(link Link) error {
 			data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
 			nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
 		}
+	} else if macv, ok := link.(*Macvtap); ok {
+		if macv.Mode != MACVLAN_MODE_DEFAULT {
+			data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
+			nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
+		}
 	} else if gretap, ok := link.(*Gretap); ok {
 		addGretapAttrs(gretap, linkInfo)
 	}

+ 1 - 12
vendor/src/github.com/vishvananda/netlink/netlink.go

@@ -8,18 +8,7 @@
 // interface that is loosly modeled on the iproute2 cli.
 package netlink
 
-import (
-	"net"
-
-	"github.com/vishvananda/netlink/nl"
-)
-
-// Family type definitions
-const (
-	FAMILY_ALL = nl.FAMILY_ALL
-	FAMILY_V4  = nl.FAMILY_V4
-	FAMILY_V6  = nl.FAMILY_V6
-)
+import "net"
 
 // ParseIPNet parses a string in ip/net format and returns a net.IPNet.
 // This is valuable because addresses in netlink are often IPNets and

+ 10 - 0
vendor/src/github.com/vishvananda/netlink/netlink_linux.go

@@ -0,0 +1,10 @@
+package netlink
+
+import "github.com/vishvananda/netlink/nl"
+
+// Family type definitions
+const (
+	FAMILY_ALL = nl.FAMILY_ALL
+	FAMILY_V4  = nl.FAMILY_V4
+	FAMILY_V6  = nl.FAMILY_V6
+)

+ 2 - 2
vendor/src/github.com/vishvananda/netlink/netlink_unspecified.go

@@ -138,6 +138,6 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
 	return nil, ErrNotImplemented
 }
 
-func NeighDeserialize(m []byte) (*Ndmsg, *Neigh, error) {
-	return nil, nil, ErrNotImplemented
+func NeighDeserialize(m []byte) (*Neigh, error) {
+	return nil, ErrNotImplemented
 }

+ 25 - 12
vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -22,6 +22,9 @@ const (
 	FAMILY_V6  = syscall.AF_INET6
 )
 
+// SupportedNlFamilies contains the list of netlink families this netlink package supports
+var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
+
 var nextSeqNr uint32
 
 // GetIPFamily returns the family type of a net.IP.
@@ -175,9 +178,8 @@ func (a *RtAttr) Serialize() []byte {
 
 type NetlinkRequest struct {
 	syscall.NlMsghdr
-	Data        []NetlinkRequestData
-	RouteSocket *NetlinkSocket
-	XfmrSocket  *NetlinkSocket
+	Data    []NetlinkRequestData
+	Sockets map[int]*SocketHandle
 }
 
 // Serialize the Netlink Request into a byte array
@@ -209,7 +211,7 @@ func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
 }
 
 // Execute the request against a the given sockType.
-// Returns a list of netlink messages in seriaized format, optionally filtered
+// Returns a list of netlink messages in serialized format, optionally filtered
 // by resType.
 func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
 	var (
@@ -217,15 +219,12 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
 		err error
 	)
 
-	switch sockType {
-	case syscall.NETLINK_XFRM:
-		s = req.XfmrSocket
-	case syscall.NETLINK_ROUTE:
-		s = req.RouteSocket
-	default:
-		return nil, fmt.Errorf("Socket type %d is not handled", sockType)
+	if req.Sockets != nil {
+		if sh, ok := req.Sockets[sockType]; ok {
+			s = sh.Socket
+			req.Seq = atomic.AddUint32(&sh.Seq, 1)
+		}
 	}
-
 	sharedSocket := s != nil
 
 	if s == nil {
@@ -486,3 +485,17 @@ func netlinkRouteAttrAndValue(b []byte) (*syscall.RtAttr, []byte, int, error) {
 	}
 	return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
 }
+
+// SocketHandle contains the netlink socket and the associated
+// sequence counter for a specific netlink family
+type SocketHandle struct {
+	Seq    uint32
+	Socket *NetlinkSocket
+}
+
+// Close closes the netlink socket
+func (sh *SocketHandle) Close() {
+	if sh.Socket != nil {
+		sh.Socket.Close()
+	}
+}

+ 12 - 0
vendor/src/github.com/vishvananda/netlink/nl/route_linux.go

@@ -40,3 +40,15 @@ func DeserializeRtMsg(b []byte) *RtMsg {
 func (msg *RtMsg) Serialize() []byte {
 	return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:]
 }
+
+type RtNexthop struct {
+	syscall.RtNexthop
+}
+
+func DeserializeRtNexthop(b []byte) *RtNexthop {
+	return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0]))
+}
+
+func (msg *RtNexthop) Serialize() []byte {
+	return (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]
+}

+ 0 - 64
vendor/src/github.com/vishvananda/netlink/qdisc.go

@@ -176,70 +176,6 @@ type Netem struct {
 	CorruptCorr   uint32
 }
 
-func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
-	var limit uint32 = 1000
-	var lossCorr, delayCorr, duplicateCorr uint32
-	var reorderProb, reorderCorr uint32
-	var corruptProb, corruptCorr uint32
-
-	latency := nattrs.Latency
-	loss := Percentage2u32(nattrs.Loss)
-	gap := nattrs.Gap
-	duplicate := Percentage2u32(nattrs.Duplicate)
-	jitter := nattrs.Jitter
-
-	// Correlation
-	if latency > 0 && jitter > 0 {
-		delayCorr = Percentage2u32(nattrs.DelayCorr)
-	}
-	if loss > 0 {
-		lossCorr = Percentage2u32(nattrs.LossCorr)
-	}
-	if duplicate > 0 {
-		duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
-	}
-	// FIXME should validate values(like loss/duplicate are percentages...)
-	latency = time2Tick(latency)
-
-	if nattrs.Limit != 0 {
-		limit = nattrs.Limit
-	}
-	// Jitter is only value if latency is > 0
-	if latency > 0 {
-		jitter = time2Tick(jitter)
-	}
-
-	reorderProb = Percentage2u32(nattrs.ReorderProb)
-	reorderCorr = Percentage2u32(nattrs.ReorderCorr)
-
-	if reorderProb > 0 {
-		// ERROR if lantency == 0
-		if gap == 0 {
-			gap = 1
-		}
-	}
-
-	corruptProb = Percentage2u32(nattrs.CorruptProb)
-	corruptCorr = Percentage2u32(nattrs.CorruptCorr)
-
-	return &Netem{
-		QdiscAttrs:    attrs,
-		Latency:       latency,
-		DelayCorr:     delayCorr,
-		Limit:         limit,
-		Loss:          loss,
-		LossCorr:      lossCorr,
-		Gap:           gap,
-		Duplicate:     duplicate,
-		DuplicateCorr: duplicateCorr,
-		Jitter:        jitter,
-		ReorderProb:   reorderProb,
-		ReorderCorr:   reorderCorr,
-		CorruptProb:   corruptProb,
-		CorruptCorr:   corruptCorr,
-	}
-}
-
 func (qdisc *Netem) Attrs() *QdiscAttrs {
 	return &qdisc.QdiscAttrs
 }

+ 65 - 0
vendor/src/github.com/vishvananda/netlink/qdisc_linux.go

@@ -10,6 +10,71 @@ import (
 	"github.com/vishvananda/netlink/nl"
 )
 
+// NOTE function is here because it uses other linux functions
+func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
+	var limit uint32 = 1000
+	var lossCorr, delayCorr, duplicateCorr uint32
+	var reorderProb, reorderCorr uint32
+	var corruptProb, corruptCorr uint32
+
+	latency := nattrs.Latency
+	loss := Percentage2u32(nattrs.Loss)
+	gap := nattrs.Gap
+	duplicate := Percentage2u32(nattrs.Duplicate)
+	jitter := nattrs.Jitter
+
+	// Correlation
+	if latency > 0 && jitter > 0 {
+		delayCorr = Percentage2u32(nattrs.DelayCorr)
+	}
+	if loss > 0 {
+		lossCorr = Percentage2u32(nattrs.LossCorr)
+	}
+	if duplicate > 0 {
+		duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
+	}
+	// FIXME should validate values(like loss/duplicate are percentages...)
+	latency = time2Tick(latency)
+
+	if nattrs.Limit != 0 {
+		limit = nattrs.Limit
+	}
+	// Jitter is only value if latency is > 0
+	if latency > 0 {
+		jitter = time2Tick(jitter)
+	}
+
+	reorderProb = Percentage2u32(nattrs.ReorderProb)
+	reorderCorr = Percentage2u32(nattrs.ReorderCorr)
+
+	if reorderProb > 0 {
+		// ERROR if lantency == 0
+		if gap == 0 {
+			gap = 1
+		}
+	}
+
+	corruptProb = Percentage2u32(nattrs.CorruptProb)
+	corruptCorr = Percentage2u32(nattrs.CorruptCorr)
+
+	return &Netem{
+		QdiscAttrs:    attrs,
+		Latency:       latency,
+		DelayCorr:     delayCorr,
+		Limit:         limit,
+		Loss:          loss,
+		LossCorr:      lossCorr,
+		Gap:           gap,
+		Duplicate:     duplicate,
+		DuplicateCorr: duplicateCorr,
+		Jitter:        jitter,
+		ReorderProb:   reorderProb,
+		ReorderCorr:   reorderCorr,
+		CorruptProb:   corruptProb,
+		CorruptCorr:   corruptCorr,
+	}
+}
+
 // QdiscDel will delete a qdisc from the system.
 // Equivalent to: `tc qdisc del $qdisc`
 func QdiscDel(qdisc Qdisc) error {

+ 15 - 29
vendor/src/github.com/vishvananda/netlink/route.go

@@ -3,27 +3,13 @@ package netlink
 import (
 	"fmt"
 	"net"
-	"syscall"
 )
 
 // Scope is an enum representing a route scope.
 type Scope uint8
 
-const (
-	SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
-	SCOPE_SITE     Scope = syscall.RT_SCOPE_SITE
-	SCOPE_LINK     Scope = syscall.RT_SCOPE_LINK
-	SCOPE_HOST     Scope = syscall.RT_SCOPE_HOST
-	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
-)
-
 type NextHopFlag int
 
-const (
-	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
-	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
-)
-
 // Route represents a netlink route.
 type Route struct {
 	LinkIndex  int
@@ -32,6 +18,7 @@ type Route struct {
 	Dst        *net.IPNet
 	Src        net.IP
 	Gw         net.IP
+	MultiPath  []*NexthopInfo
 	Protocol   int
 	Priority   int
 	Table      int
@@ -41,6 +28,10 @@ type Route struct {
 }
 
 func (r Route) String() string {
+	if len(r.MultiPath) > 0 {
+		return fmt.Sprintf("{Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.Dst,
+			r.Src, r.MultiPath, r.ListFlags(), r.Table)
+	}
 	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.LinkIndex, r.Dst,
 		r.Src, r.Gw, r.ListFlags(), r.Table)
 }
@@ -58,23 +49,18 @@ type flagString struct {
 	s string
 }
 
-var testFlags = []flagString{
-	{f: FLAG_ONLINK, s: "onlink"},
-	{f: FLAG_PERVASIVE, s: "pervasive"},
-}
-
-func (r *Route) ListFlags() []string {
-	var flags []string
-	for _, tf := range testFlags {
-		if r.Flags&int(tf.f) != 0 {
-			flags = append(flags, tf.s)
-		}
-	}
-	return flags
-}
-
 // RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
 type RouteUpdate struct {
 	Type uint16
 	Route
 }
+
+type NexthopInfo struct {
+	LinkIndex int
+	Hops      int
+	Gw        net.IP
+}
+
+func (n *NexthopInfo) String() string {
+	return fmt.Sprintf("{Ifindex: %d Weight: %d, Gw: %s}", n.LinkIndex, n.Hops+1, n.Gw)
+}

+ 93 - 0
vendor/src/github.com/vishvananda/netlink/route_linux.go

@@ -10,6 +10,14 @@ import (
 
 // RtAttr is shared so it is in netlink_linux.go
 
+const (
+	SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
+	SCOPE_SITE     Scope = syscall.RT_SCOPE_SITE
+	SCOPE_LINK     Scope = syscall.RT_SCOPE_LINK
+	SCOPE_HOST     Scope = syscall.RT_SCOPE_HOST
+	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
+)
+
 const (
 	RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
 	RT_FILTER_SCOPE
@@ -23,6 +31,26 @@ const (
 	RT_FILTER_TABLE
 )
 
+const (
+	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
+	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
+)
+
+var testFlags = []flagString{
+	{f: FLAG_ONLINK, s: "onlink"},
+	{f: FLAG_PERVASIVE, s: "pervasive"},
+}
+
+func (r *Route) ListFlags() []string {
+	var flags []string
+	for _, tf := range testFlags {
+		if r.Flags&int(tf.f) != 0 {
+			flags = append(flags, tf.s)
+		}
+	}
+	return flags
+}
+
 // RouteAdd will add a route to the system.
 // Equivalent to: `ip route add $route`
 func RouteAdd(route *Route) error {
@@ -102,6 +130,37 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData))
 	}
 
+	if len(route.MultiPath) > 0 {
+		buf := []byte{}
+		for _, nh := range route.MultiPath {
+			rtnh := &nl.RtNexthop{
+				RtNexthop: syscall.RtNexthop{
+					Hops:    uint8(nh.Hops),
+					Ifindex: int32(nh.LinkIndex),
+					Len:     uint16(syscall.SizeofRtNexthop),
+				},
+			}
+			var gwData []byte
+			if nh.Gw != nil {
+				gwFamily := nl.GetIPFamily(nh.Gw)
+				if family != -1 && family != gwFamily {
+					return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
+				}
+				var gw *nl.RtAttr
+				if gwFamily == FAMILY_V4 {
+					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4()))
+				} else {
+					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16()))
+				}
+				gwData := gw.Serialize()
+				rtnh.Len += uint16(len(gwData))
+			}
+			buf = append(buf, rtnh.Serialize()...)
+			buf = append(buf, gwData...)
+		}
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf))
+	}
+
 	if route.Table > 0 {
 		if route.Table >= 256 {
 			msg.Table = syscall.RT_TABLE_UNSPEC
@@ -275,6 +334,40 @@ func deserializeRoute(m []byte) (Route, error) {
 			route.Priority = int(native.Uint32(attr.Value[0:4]))
 		case syscall.RTA_TABLE:
 			route.Table = int(native.Uint32(attr.Value[0:4]))
+		case syscall.RTA_MULTIPATH:
+			parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) {
+				if len(value) < syscall.SizeofRtNexthop {
+					return nil, nil, fmt.Errorf("Lack of bytes")
+				}
+				nh := nl.DeserializeRtNexthop(value)
+				if len(value) < int(nh.RtNexthop.Len) {
+					return nil, nil, fmt.Errorf("Lack of bytes")
+				}
+				info := &NexthopInfo{
+					LinkIndex: int(nh.RtNexthop.Ifindex),
+					Hops:      int(nh.RtNexthop.Hops),
+				}
+				attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)])
+				if err != nil {
+					return nil, nil, err
+				}
+				for _, attr := range attrs {
+					switch attr.Attr.Type {
+					case syscall.RTA_GATEWAY:
+						info.Gw = net.IP(attr.Value)
+					}
+				}
+				return info, value[int(nh.RtNexthop.Len):], nil
+			}
+			rest := attr.Value
+			for len(rest) > 0 {
+				info, buf, err := parseRtNexthop(rest)
+				if err != nil {
+					return route, err
+				}
+				route.MultiPath = append(route.MultiPath, info)
+				rest = buf
+			}
 		}
 	}
 	return route, nil

+ 7 - 0
vendor/src/github.com/vishvananda/netlink/route_unspecified.go

@@ -0,0 +1,7 @@
+// +build !linux
+
+package netlink
+
+func (r *Route) ListFlags() []string {
+	return []string{}
+}

+ 0 - 3
vendor/src/github.com/vishvananda/netlink/rule.go

@@ -3,13 +3,10 @@ package netlink
 import (
 	"fmt"
 	"net"
-
-	"github.com/vishvananda/netlink/nl"
 )
 
 // Rule represents a netlink rule.
 type Rule struct {
-	*nl.RtMsg
 	Priority          int
 	Table             int
 	Mark              int

+ 0 - 1
vendor/src/github.com/vishvananda/netlink/rule_linux.go

@@ -165,7 +165,6 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
 		}
 
 		rule := NewRule()
-		rule.RtMsg = msg
 
 		for j := range attrs {
 			switch attrs[j].Attr.Type {

+ 1 - 1
vendor/src/github.com/vishvananda/netlink/xfrm.go

@@ -13,7 +13,7 @@ const (
 	XFRM_PROTO_ESP       Proto = syscall.IPPROTO_ESP
 	XFRM_PROTO_AH        Proto = syscall.IPPROTO_AH
 	XFRM_PROTO_HAO       Proto = syscall.IPPROTO_DSTOPTS
-	XFRM_PROTO_COMP      Proto = syscall.IPPROTO_COMP
+	XFRM_PROTO_COMP      Proto = 0x6c // NOTE not defined on darwin
 	XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW
 )
 

+ 1 - 3
vendor/src/github.com/vishvananda/netlink/xfrm_state.go

@@ -3,8 +3,6 @@ package netlink
 import (
 	"fmt"
 	"net"
-
-	"github.com/vishvananda/netlink/nl"
 )
 
 // XfrmStateAlgo represents the algorithm to use for the ipsec encryption.
@@ -93,7 +91,7 @@ func (sa XfrmState) Print(stats bool) string {
 }
 
 func printLimit(lmt uint64) string {
-	if lmt == nl.XFRM_INF {
+	if lmt == ^uint64(0) {
 		return "(INF)"
 	}
 	return fmt.Sprintf("%d", lmt)