فهرست منبع

Vendoring libnetwork ed311d0

Signed-off-by: Alessandro Boch <aboch@docker.com>
Alessandro Boch 9 سال پیش
والد
کامیت
e26c513fa8

+ 1 - 1
hack/vendor.sh

@@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
 clone git github.com/imdario/mergo 0.2.1
 
 #get libnetwork packages
-clone git github.com/docker/libnetwork ab62dd6bf06bf0637d66d529931b69a5544468cb
+clone git github.com/docker/libnetwork  ed311d050fda7821f2e7c53a7e08a0205923aef5
 clone git github.com/docker/go-events 39718a26497694185f8fb58a7d6f31947f3dc42d
 clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec

+ 137 - 34
vendor/src/github.com/docker/libnetwork/agent.go

@@ -3,12 +3,10 @@ package libnetwork
 //go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf  --gogo_out=import_path=github.com/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
 
 import (
-	"encoding/hex"
 	"fmt"
 	"net"
 	"os"
 	"sort"
-	"strconv"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/go-events"
@@ -20,6 +18,12 @@ import (
 	"github.com/gogo/protobuf/proto"
 )
 
+const (
+	subsysGossip = "networking:gossip"
+	subsysIPSec  = "networking:ipsec"
+	keyringSize  = 3
+)
+
 // ByTime implements sort.Interface for []*types.EncryptionKey based on
 // the LamportTime field.
 type ByTime []*types.EncryptionKey
@@ -80,6 +84,82 @@ func resolveAddr(addrOrInterface string) (string, error) {
 func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 	drvEnc := discoverapi.DriverEncryptionUpdate{}
 
+	a := c.agent
+	// Find the deleted key. If the deleted key was the primary key,
+	// a new primary key should be set before removing if from keyring.
+	deleted := []byte{}
+	j := len(c.keys)
+	for i := 0; i < j; {
+		same := false
+		for _, key := range keys {
+			if same = key.LamportTime == c.keys[i].LamportTime; same {
+				break
+			}
+		}
+		if !same {
+			cKey := c.keys[i]
+			if cKey.Subsystem == subsysGossip {
+				deleted = cKey.Key
+			}
+
+			if cKey.Subsystem == subsysGossip /* subsysIPSec */ {
+				drvEnc.Prune = cKey.Key
+				drvEnc.PruneTag = cKey.LamportTime
+			}
+			c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
+			c.keys[j-1] = nil
+			j--
+		}
+		i++
+	}
+	c.keys = c.keys[:j]
+
+	// Find the new key and add it to the key ring
+	for _, key := range keys {
+		same := false
+		for _, cKey := range c.keys {
+			if same = cKey.LamportTime == key.LamportTime; same {
+				break
+			}
+		}
+		if !same {
+			c.keys = append(c.keys, key)
+			if key.Subsystem == subsysGossip {
+				a.networkDB.SetKey(key.Key)
+			}
+
+			if key.Subsystem == subsysGossip /*subsysIPSec*/ {
+				drvEnc.Key = key.Key
+				drvEnc.Tag = key.LamportTime
+			}
+		}
+	}
+
+	key, tag := c.getPrimaryKeyTag(subsysGossip)
+	a.networkDB.SetPrimaryKey(key)
+
+	//key, tag = c.getPrimaryKeyTag(subsysIPSec)
+	drvEnc.Primary = key
+	drvEnc.PrimaryTag = tag
+
+	if len(deleted) > 0 {
+		a.networkDB.RemoveKey(deleted)
+	}
+
+	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
+		err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
+		if err != nil {
+			logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
+		}
+		return false
+	})
+
+	return nil
+}
+
+func (c *controller) handleKeyChangeV1(keys []*types.EncryptionKey) error {
+	drvEnc := discoverapi.DriverEncryptionUpdate{}
+
 	// Find the new key and add it to the key ring
 	a := c.agent
 	for _, key := range keys {
@@ -91,12 +171,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 		}
 		if !same {
 			c.keys = append(c.keys, key)
-			if key.Subsystem == "networking:gossip" {
+			if key.Subsystem == subsysGossip {
 				a.networkDB.SetKey(key.Key)
 			}
-			if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-				drvEnc.Key = hex.EncodeToString(key.Key)
-				drvEnc.Tag = strconv.FormatUint(key.LamportTime, 10)
+			if key.Subsystem == subsysGossip /*subsysIPSec*/ {
+				drvEnc.Key = key.Key
+				drvEnc.Tag = key.LamportTime
 			}
 			break
 		}
@@ -112,12 +192,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 			}
 		}
 		if !same {
-			if cKey.Subsystem == "networking:gossip" {
+			if cKey.Subsystem == subsysGossip {
 				deleted = cKey.Key
 			}
-			if cKey.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-				drvEnc.Prune = hex.EncodeToString(cKey.Key)
-				drvEnc.PruneTag = strconv.FormatUint(cKey.LamportTime, 10)
+			if cKey.Subsystem == subsysGossip /*subsysIPSec*/ {
+				drvEnc.Prune = cKey.Key
+				drvEnc.PruneTag = cKey.LamportTime
 			}
 			c.keys = append(c.keys[:i], c.keys[i+1:]...)
 			break
@@ -126,15 +206,15 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
 
 	sort.Sort(ByTime(c.keys))
 	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" {
+		if key.Subsystem == subsysGossip {
 			a.networkDB.SetPrimaryKey(key.Key)
 			break
 		}
 	}
 	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-			drvEnc.Primary = hex.EncodeToString(key.Key)
-			drvEnc.PrimaryTag = strconv.FormatUint(key.LamportTime, 10)
+		if key.Subsystem == subsysGossip /*subsysIPSec*/ {
+			drvEnc.Primary = key.Key
+			drvEnc.PrimaryTag = key.LamportTime
 			break
 		}
 	}
@@ -197,26 +277,51 @@ func (c *controller) agentSetup() error {
 	return nil
 }
 
-func (c *controller) agentInit(bindAddrOrInterface string) error {
-	if !c.isAgent() {
-		return nil
+// For a given subsystem getKeys sorts the keys by lamport time and returns
+// slice of keys and lamport time which can used as a unique tag for the keys
+func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
+	sort.Sort(ByTime(c.keys))
+
+	keys := [][]byte{}
+	tags := []uint64{}
+	for _, key := range c.keys {
+		if key.Subsystem == subsys {
+			keys = append(keys, key.Key)
+			tags = append(tags, key.LamportTime)
+		}
 	}
 
-	drvEnc := discoverapi.DriverEncryptionConfig{}
+	if len(keys) < keyringSize {
+		return keys, tags
+	}
+	keys[0], keys[1] = keys[1], keys[0]
+	tags[0], tags[1] = tags[1], tags[0]
+	return keys, tags
+}
 
-	// sort the keys by lamport time
+// getPrimaryKeyTag returns the primary key for a given subsytem from the
+// list of sorted key and the associated tag
+func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64) {
 	sort.Sort(ByTime(c.keys))
-
-	gossipkey := [][]byte{}
+	keys := []*types.EncryptionKey{}
 	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" {
-			gossipkey = append(gossipkey, key.Key)
-		}
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-			drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
-			drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
+		if key.Subsystem == subsys {
+			keys = append(keys, key)
 		}
 	}
+	return keys[1].Key, keys[1].LamportTime
+}
+
+func (c *controller) agentInit(bindAddrOrInterface string) error {
+	if !c.isAgent() {
+		return nil
+	}
+
+	drvEnc := discoverapi.DriverEncryptionConfig{}
+
+	keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
+	drvEnc.Keys = keys
+	drvEnc.Tags = tags
 
 	bindAddr, err := resolveAddr(bindAddrOrInterface)
 	if err != nil {
@@ -227,7 +332,7 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
 	nDB, err := networkdb.New(&networkdb.Config{
 		BindAddr: bindAddr,
 		NodeName: hostname,
-		Keys:     gossipkey,
+		Keys:     keys,
 	})
 
 	if err != nil {
@@ -275,12 +380,10 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
 	})
 
 	drvEnc := discoverapi.DriverEncryptionConfig{}
-	for _, key := range c.keys {
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
-			drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
-			drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
-		}
-	}
+	keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
+	drvEnc.Keys = keys
+	drvEnc.Tags = tags
+
 	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
 		err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
 		if err != nil {

+ 3 - 0
vendor/src/github.com/docker/libnetwork/controller.go

@@ -264,6 +264,9 @@ func (c *controller) SetKeys(keys []*types.EncryptionKey) error {
 		c.Unlock()
 		return nil
 	}
+	if len(keys) < keyringSize {
+		return c.handleKeyChangeV1(keys)
+	}
 	return c.handleKeyChange(keys)
 }
 

+ 8 - 8
vendor/src/github.com/docker/libnetwork/discoverapi/discoverapi.go

@@ -42,18 +42,18 @@ type DatastoreConfigData struct {
 // Key in first position is the primary key, the one to be used in tx.
 // Original key and tag types are []byte and uint64
 type DriverEncryptionConfig struct {
-	Keys []string
-	Tags []string
+	Keys [][]byte
+	Tags []uint64
 }
 
 // DriverEncryptionUpdate carries an update to the encryption key(s) as:
 // a new key and/or set a primary key and/or a removal of an existing key.
 // Original key and tag types are []byte and uint64
 type DriverEncryptionUpdate struct {
-	Key        string
-	Tag        string
-	Primary    string
-	PrimaryTag string
-	Prune      string
-	PruneTag   string
+	Key        []byte
+	Tag        uint64
+	Primary    []byte
+	PrimaryTag uint64
+	Prune      []byte
+	PruneTag   uint64
 }

+ 4 - 21
vendor/src/github.com/docker/libnetwork/drivers/overlay/encryption.go

@@ -33,7 +33,10 @@ type key struct {
 }
 
 func (k *key) String() string {
-	return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
+	if k != nil {
+		return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
+	}
+	return ""
 }
 
 type spi struct {
@@ -557,23 +560,3 @@ func updateNodeKey(lIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx,
 
 	return spis
 }
-
-func parseEncryptionKey(value, tag string) (*key, error) {
-	var (
-		k   *key
-		err error
-	)
-	if value == "" {
-		return nil, nil
-	}
-	k = &key{}
-	if k.value, err = hex.DecodeString(value); err != nil {
-		return nil, types.BadRequestErrorf("failed to decode key (%s): %v", value, err)
-	}
-	t, err := strconv.ParseUint(tag, 10, 64)
-	if err != nil {
-		return nil, types.BadRequestErrorf("failed to decode tag (%s): %v", tag, err)
-	}
-	k.tag = uint32(t)
-	return k, nil
-}

+ 7 - 2
vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go

@@ -3,6 +3,7 @@ package overlay
 import (
 	"fmt"
 	"net"
+	"syscall"
 
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/driverapi"
@@ -31,6 +32,12 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		return fmt.Errorf("cannot join secure network: encryption keys not present")
 	}
 
+	nlh := ns.NlHandle()
+
+	if n.secure && !nlh.SupportsNetlinkFamily(syscall.NETLINK_XFRM) {
+		return fmt.Errorf("cannot join secure network: required modules to install IPSEC rules are missing on host")
+	}
+
 	s := n.getSubnetforIP(ep.addr)
 	if s == nil {
 		return fmt.Errorf("could not find subnet for endpoint %s", eid)
@@ -65,8 +72,6 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
 	}
 
-	nlh := ns.NlHandle()
-
 	// Set the container interface and its peer MTU to 1450 to allow
 	// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
 	// outer UDP(8) + vxlan header(8))

+ 1 - 1
vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_network.go

@@ -284,7 +284,7 @@ func populateVNITbl() {
 			}
 			defer ns.Close()
 
-			nlh, err := netlink.NewHandleAt(ns)
+			nlh, err := netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
 			if err != nil {
 				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
 				return nil

+ 2 - 1
vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_utils.go

@@ -3,6 +3,7 @@ package overlay
 import (
 	"fmt"
 	"strings"
+	"syscall"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/netutils"
@@ -128,7 +129,7 @@ func deleteVxlanByVNI(path string, vni uint32) error {
 		}
 		defer ns.Close()
 
-		nlh, err = netlink.NewHandleAt(ns)
+		nlh, err = netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
 		if err != nil {
 			return fmt.Errorf("failed to get netlink handle for ns %s: %v", path, err)
 		}

+ 18 - 12
vendor/src/github.com/docker/libnetwork/drivers/overlay/overlay.go

@@ -306,9 +306,9 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
 		}
 		keys := make([]*key, 0, len(encrData.Keys))
 		for i := 0; i < len(encrData.Keys); i++ {
-			k, err := parseEncryptionKey(encrData.Keys[i], encrData.Tags[i])
-			if err != nil {
-				return err
+			k := &key{
+				value: encrData.Keys[i],
+				tag:   uint32(encrData.Tags[i]),
 			}
 			keys = append(keys, k)
 		}
@@ -319,17 +319,23 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
 		if !ok {
 			return fmt.Errorf("invalid encryption key notification data")
 		}
-		newKey, err = parseEncryptionKey(encrData.Key, encrData.Tag)
-		if err != nil {
-			return err
+		if encrData.Key != nil {
+			newKey = &key{
+				value: encrData.Key,
+				tag:   uint32(encrData.Tag),
+			}
 		}
-		priKey, err = parseEncryptionKey(encrData.Primary, encrData.PrimaryTag)
-		if err != nil {
-			return err
+		if encrData.Primary != nil {
+			priKey = &key{
+				value: encrData.Primary,
+				tag:   uint32(encrData.PrimaryTag),
+			}
 		}
-		delKey, err = parseEncryptionKey(encrData.Prune, encrData.PruneTag)
-		if err != nil {
-			return err
+		if encrData.Prune != nil {
+			delKey = &key{
+				value: encrData.Prune,
+				tag:   uint32(encrData.PruneTag),
+			}
 		}
 		d.updateKeys(newKey, priKey, delKey)
 	default:

+ 7 - 0
vendor/src/github.com/docker/libnetwork/networkdb/cluster.go

@@ -3,6 +3,7 @@ package networkdb
 import (
 	"bytes"
 	"crypto/rand"
+	"encoding/hex"
 	"fmt"
 	"math/big"
 	rnd "math/rand"
@@ -36,6 +37,7 @@ func (l *logWriter) Write(p []byte) (int, error) {
 
 // SetKey adds a new key to the key ring
 func (nDB *NetworkDB) SetKey(key []byte) {
+	logrus.Debugf("Adding key %s", hex.EncodeToString(key)[0:5])
 	for _, dbKey := range nDB.config.Keys {
 		if bytes.Equal(key, dbKey) {
 			return
@@ -50,6 +52,7 @@ func (nDB *NetworkDB) SetKey(key []byte) {
 // SetPrimaryKey sets the given key as the primary key. This should have
 // been added apriori through SetKey
 func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
+	logrus.Debugf("Primary Key %s", hex.EncodeToString(key)[0:5])
 	for _, dbKey := range nDB.config.Keys {
 		if bytes.Equal(key, dbKey) {
 			if nDB.keyring != nil {
@@ -63,6 +66,7 @@ func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
 // RemoveKey removes a key from the key ring. The key being removed
 // can't be the primary key
 func (nDB *NetworkDB) RemoveKey(key []byte) {
+	logrus.Debugf("Remove Key %s", hex.EncodeToString(key)[0:5])
 	for i, dbKey := range nDB.config.Keys {
 		if bytes.Equal(key, dbKey) {
 			nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...)
@@ -90,6 +94,9 @@ func (nDB *NetworkDB) clusterInit() error {
 
 	var err error
 	if len(nDB.config.Keys) > 0 {
+		for i, key := range nDB.config.Keys {
+			logrus.Debugf("Encryption key %d: %s", i+1, hex.EncodeToString(key)[0:5])
+		}
 		nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
 		if err != nil {
 			return err

+ 23 - 1
vendor/src/github.com/docker/libnetwork/ns/init_linux.go

@@ -3,6 +3,8 @@ package ns
 import (
 	"fmt"
 	"os"
+	"os/exec"
+	"strings"
 	"sync"
 	"syscall"
 
@@ -24,7 +26,7 @@ func Init() {
 	if err != nil {
 		log.Errorf("could not get initial namespace: %v", err)
 	}
-	initNl, err = netlink.NewHandle()
+	initNl, err = netlink.NewHandle(getSupportedNlFamilies()...)
 	if err != nil {
 		log.Errorf("could not create netlink handle on initial namespace: %v", err)
 	}
@@ -32,6 +34,7 @@ func Init() {
 
 // SetNamespace sets the initial namespace handler
 func SetNamespace() error {
+	initOnce.Do(Init)
 	if err := netns.Set(initNs); err != nil {
 		linkInfo, linkErr := getLink()
 		if linkErr != nil {
@@ -62,3 +65,22 @@ func NlHandle() *netlink.Handle {
 	initOnce.Do(Init)
 	return initNl
 }
+
+func getSupportedNlFamilies() []int {
+	fams := []int{syscall.NETLINK_ROUTE}
+	if err := loadXfrmModules(); err != nil {
+		log.Warnf("Could not load necessary modules for IPSEC rules: %v", err)
+		return fams
+	}
+	return append(fams, syscall.NETLINK_XFRM)
+}
+
+func loadXfrmModules() error {
+	if out, err := exec.Command("modprobe", "-va", "xfrm_user").CombinedOutput(); err != nil {
+		return fmt.Errorf("Running modprobe xfrm_user failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
+	}
+	if out, err := exec.Command("modprobe", "-va", "xfrm_algo").CombinedOutput(); err != nil {
+		return fmt.Errorf("Running modprobe xfrm_algo failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
+	}
+	return nil
+}

+ 7 - 1
vendor/src/github.com/docker/libnetwork/osl/interface_linux.go

@@ -6,6 +6,7 @@ import (
 	"regexp"
 	"sync"
 	"syscall"
+	"time"
 
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/ns"
@@ -290,7 +291,12 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
 	}
 
 	// Up the interface.
-	if err := nlh.LinkSetUp(iface); err != nil {
+	cnt := 0
+	for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
+		log.Debugf("retrying link setup because of: %v", err)
+		time.Sleep(10 * time.Millisecond)
+	}
+	if err != nil {
 		return fmt.Errorf("failed to set link up: %v", err)
 	}
 

+ 2 - 4
vendor/src/github.com/docker/libnetwork/osl/namespace_linux.go

@@ -30,7 +30,6 @@ var (
 	gpmWg            sync.WaitGroup
 	gpmCleanupPeriod = 60 * time.Second
 	gpmChan          = make(chan chan struct{})
-	nsOnce           sync.Once
 )
 
 // The networkNamespace type is the linux implementation of the Sandbox
@@ -196,7 +195,7 @@ func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
 	}
 	defer sboxNs.Close()
 
-	n.nlHandle, err = netlink.NewHandleAt(sboxNs)
+	n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
 	}
@@ -238,7 +237,7 @@ func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
 	}
 	defer sboxNs.Close()
 
-	n.nlHandle, err = netlink.NewHandleAt(sboxNs)
+	n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
 	}
@@ -326,7 +325,6 @@ func (n *networkNamespace) InvokeFunc(f func()) error {
 
 // InitOSContext initializes OS context while configuring network resources
 func InitOSContext() func() {
-	nsOnce.Do(ns.Init)
 	runtime.LockOSThread()
 	if err := ns.SetNamespace(); err != nil {
 		log.Error(err)

+ 92 - 6
vendor/src/github.com/docker/libnetwork/resolver.go

@@ -62,6 +62,21 @@ type extDNSEntry struct {
 	extOnce sync.Once
 }
 
+type sboxQuery struct {
+	sboxID string
+	dnsID  uint16
+}
+
+type clientConnGC struct {
+	toDelete bool
+	client   clientConn
+}
+
+var (
+	queryGCMutex sync.Mutex
+	queryGC      map[sboxQuery]*clientConnGC
+)
+
 // resolver implements the Resolver interface
 type resolver struct {
 	sb         *sandbox
@@ -79,6 +94,21 @@ type resolver struct {
 
 func init() {
 	rand.Seed(time.Now().Unix())
+	queryGC = make(map[sboxQuery]*clientConnGC)
+	go func() {
+		ticker := time.NewTicker(1 * time.Minute)
+		for range ticker.C {
+			queryGCMutex.Lock()
+			for query, conn := range queryGC {
+				if !conn.toDelete {
+					conn.toDelete = true
+					continue
+				}
+				delete(queryGC, query)
+			}
+			queryGCMutex.Unlock()
+		}
+	}()
 }
 
 // NewResolver creates a new instance of the Resolver
@@ -370,6 +400,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
 		writer = w
 	} else {
 		queryID := query.Id
+	extQueryLoop:
 		for i := 0; i < maxExtDNS; i++ {
 			extDNS := &r.extDNSList[i]
 			if extDNS.ipStr == "" {
@@ -435,14 +466,26 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
 				log.Debugf("Send to DNS server failed, %s", err)
 				continue
 			}
+			for {
+				// If a reply comes after a read timeout it will remain in the socket buffer
+				// and will be read after sending next query. To ignore such stale replies
+				// save the query context in a GC queue when read timesout. On the next reply
+				// if the context is present in the GC queue its a old reply. Ignore it and
+				// read again
+				resp, err = co.ReadMsg()
+				if err != nil {
+					if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
+						r.addQueryToGC(w, query)
+					}
+					r.forwardQueryEnd(w, query)
+					log.Debugf("Read from DNS server failed, %s", err)
+					continue extQueryLoop
+				}
 
-			resp, err = co.ReadMsg()
-			if err != nil {
-				r.forwardQueryEnd(w, query)
-				log.Debugf("Read from DNS server failed, %s", err)
-				continue
+				if !r.checkRespInGC(w, resp) {
+					break
+				}
 			}
-
 			// Retrieves the context for the forwarded query and returns the client connection
 			// to send the reply to
 			writer = r.forwardQueryEnd(w, resp)
@@ -501,6 +544,49 @@ func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg, queryID
 	return true
 }
 
+func (r *resolver) addQueryToGC(w dns.ResponseWriter, msg *dns.Msg) {
+	if w.LocalAddr().Network() != "udp" {
+		return
+	}
+
+	r.queryLock.Lock()
+	cc, ok := r.client[msg.Id]
+	r.queryLock.Unlock()
+	if !ok {
+		return
+	}
+
+	query := sboxQuery{
+		sboxID: r.sb.ID(),
+		dnsID:  msg.Id,
+	}
+	clientGC := &clientConnGC{
+		client: cc,
+	}
+	queryGCMutex.Lock()
+	queryGC[query] = clientGC
+	queryGCMutex.Unlock()
+}
+
+func (r *resolver) checkRespInGC(w dns.ResponseWriter, msg *dns.Msg) bool {
+	if w.LocalAddr().Network() != "udp" {
+		return false
+	}
+
+	query := sboxQuery{
+		sboxID: r.sb.ID(),
+		dnsID:  msg.Id,
+	}
+
+	queryGCMutex.Lock()
+	defer queryGCMutex.Unlock()
+	if _, ok := queryGC[query]; ok {
+		delete(queryGC, query)
+		return true
+	}
+	return false
+}
+
 func (r *resolver) forwardQueryEnd(w dns.ResponseWriter, msg *dns.Msg) dns.ResponseWriter {
 	var (
 		cc clientConn