Vendoring libnetwork ed311d0

Signed-off-by: Alessandro Boch <aboch@docker.com>
(cherry picked from commit e26c513fa8)
This commit is contained in:
Alessandro Boch 2016-06-22 09:40:20 -07:00 committed by Tibor Vass
parent e928358dfb
commit ada93c7182
14 changed files with 312 additions and 92 deletions

View file

@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
clone git github.com/imdario/mergo 0.2.1
#get libnetwork packages
clone git github.com/docker/libnetwork ab62dd6bf06bf0637d66d529931b69a5544468cb
clone git github.com/docker/libnetwork ed311d050fda7821f2e7c53a7e08a0205923aef5
clone git github.com/docker/go-events 39718a26497694185f8fb58a7d6f31947f3dc42d
clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec

View file

@ -3,12 +3,10 @@ package libnetwork
//go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
import (
"encoding/hex"
"fmt"
"net"
"os"
"sort"
"strconv"
"github.com/Sirupsen/logrus"
"github.com/docker/go-events"
@ -20,6 +18,12 @@ import (
"github.com/gogo/protobuf/proto"
)
const (
subsysGossip = "networking:gossip"
subsysIPSec = "networking:ipsec"
keyringSize = 3
)
// ByTime implements sort.Interface for []*types.EncryptionKey based on
// the LamportTime field.
type ByTime []*types.EncryptionKey
@ -80,6 +84,82 @@ func resolveAddr(addrOrInterface string) (string, error) {
func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
drvEnc := discoverapi.DriverEncryptionUpdate{}
a := c.agent
// Find the deleted key. If the deleted key was the primary key,
// a new primary key should be set before removing if from keyring.
deleted := []byte{}
j := len(c.keys)
for i := 0; i < j; {
same := false
for _, key := range keys {
if same = key.LamportTime == c.keys[i].LamportTime; same {
break
}
}
if !same {
cKey := c.keys[i]
if cKey.Subsystem == subsysGossip {
deleted = cKey.Key
}
if cKey.Subsystem == subsysGossip /* subsysIPSec */ {
drvEnc.Prune = cKey.Key
drvEnc.PruneTag = cKey.LamportTime
}
c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
c.keys[j-1] = nil
j--
}
i++
}
c.keys = c.keys[:j]
// Find the new key and add it to the key ring
for _, key := range keys {
same := false
for _, cKey := range c.keys {
if same = cKey.LamportTime == key.LamportTime; same {
break
}
}
if !same {
c.keys = append(c.keys, key)
if key.Subsystem == subsysGossip {
a.networkDB.SetKey(key.Key)
}
if key.Subsystem == subsysGossip /*subsysIPSec*/ {
drvEnc.Key = key.Key
drvEnc.Tag = key.LamportTime
}
}
}
key, tag := c.getPrimaryKeyTag(subsysGossip)
a.networkDB.SetPrimaryKey(key)
//key, tag = c.getPrimaryKeyTag(subsysIPSec)
drvEnc.Primary = key
drvEnc.PrimaryTag = tag
if len(deleted) > 0 {
a.networkDB.RemoveKey(deleted)
}
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
if err != nil {
logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
}
return false
})
return nil
}
func (c *controller) handleKeyChangeV1(keys []*types.EncryptionKey) error {
drvEnc := discoverapi.DriverEncryptionUpdate{}
// Find the new key and add it to the key ring
a := c.agent
for _, key := range keys {
@ -91,12 +171,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
}
if !same {
c.keys = append(c.keys, key)
if key.Subsystem == "networking:gossip" {
if key.Subsystem == subsysGossip {
a.networkDB.SetKey(key.Key)
}
if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
drvEnc.Key = hex.EncodeToString(key.Key)
drvEnc.Tag = strconv.FormatUint(key.LamportTime, 10)
if key.Subsystem == subsysGossip /*subsysIPSec*/ {
drvEnc.Key = key.Key
drvEnc.Tag = key.LamportTime
}
break
}
@ -112,12 +192,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
}
}
if !same {
if cKey.Subsystem == "networking:gossip" {
if cKey.Subsystem == subsysGossip {
deleted = cKey.Key
}
if cKey.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
drvEnc.Prune = hex.EncodeToString(cKey.Key)
drvEnc.PruneTag = strconv.FormatUint(cKey.LamportTime, 10)
if cKey.Subsystem == subsysGossip /*subsysIPSec*/ {
drvEnc.Prune = cKey.Key
drvEnc.PruneTag = cKey.LamportTime
}
c.keys = append(c.keys[:i], c.keys[i+1:]...)
break
@ -126,15 +206,15 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
sort.Sort(ByTime(c.keys))
for _, key := range c.keys {
if key.Subsystem == "networking:gossip" {
if key.Subsystem == subsysGossip {
a.networkDB.SetPrimaryKey(key.Key)
break
}
}
for _, key := range c.keys {
if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
drvEnc.Primary = hex.EncodeToString(key.Key)
drvEnc.PrimaryTag = strconv.FormatUint(key.LamportTime, 10)
if key.Subsystem == subsysGossip /*subsysIPSec*/ {
drvEnc.Primary = key.Key
drvEnc.PrimaryTag = key.LamportTime
break
}
}
@ -197,6 +277,41 @@ func (c *controller) agentSetup() error {
return nil
}
// For a given subsystem getKeys sorts the keys by lamport time and returns
// slice of keys and lamport time which can used as a unique tag for the keys
func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
sort.Sort(ByTime(c.keys))
keys := [][]byte{}
tags := []uint64{}
for _, key := range c.keys {
if key.Subsystem == subsys {
keys = append(keys, key.Key)
tags = append(tags, key.LamportTime)
}
}
if len(keys) < keyringSize {
return keys, tags
}
keys[0], keys[1] = keys[1], keys[0]
tags[0], tags[1] = tags[1], tags[0]
return keys, tags
}
// getPrimaryKeyTag returns the primary key for a given subsytem from the
// list of sorted key and the associated tag
func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64) {
sort.Sort(ByTime(c.keys))
keys := []*types.EncryptionKey{}
for _, key := range c.keys {
if key.Subsystem == subsys {
keys = append(keys, key)
}
}
return keys[1].Key, keys[1].LamportTime
}
func (c *controller) agentInit(bindAddrOrInterface string) error {
if !c.isAgent() {
return nil
@ -204,19 +319,9 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
drvEnc := discoverapi.DriverEncryptionConfig{}
// sort the keys by lamport time
sort.Sort(ByTime(c.keys))
gossipkey := [][]byte{}
for _, key := range c.keys {
if key.Subsystem == "networking:gossip" {
gossipkey = append(gossipkey, key.Key)
}
if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
}
}
keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
drvEnc.Keys = keys
drvEnc.Tags = tags
bindAddr, err := resolveAddr(bindAddrOrInterface)
if err != nil {
@ -227,7 +332,7 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
nDB, err := networkdb.New(&networkdb.Config{
BindAddr: bindAddr,
NodeName: hostname,
Keys: gossipkey,
Keys: keys,
})
if err != nil {
@ -275,12 +380,10 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
})
drvEnc := discoverapi.DriverEncryptionConfig{}
for _, key := range c.keys {
if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
}
}
keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
drvEnc.Keys = keys
drvEnc.Tags = tags
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
if err != nil {

View file

@ -264,6 +264,9 @@ func (c *controller) SetKeys(keys []*types.EncryptionKey) error {
c.Unlock()
return nil
}
if len(keys) < keyringSize {
return c.handleKeyChangeV1(keys)
}
return c.handleKeyChange(keys)
}

View file

@ -42,18 +42,18 @@ type DatastoreConfigData struct {
// Key in first position is the primary key, the one to be used in tx.
// Original key and tag types are []byte and uint64
type DriverEncryptionConfig struct {
Keys []string
Tags []string
Keys [][]byte
Tags []uint64
}
// DriverEncryptionUpdate carries an update to the encryption key(s) as:
// a new key and/or set a primary key and/or a removal of an existing key.
// Original key and tag types are []byte and uint64
type DriverEncryptionUpdate struct {
Key string
Tag string
Primary string
PrimaryTag string
Prune string
PruneTag string
Key []byte
Tag uint64
Primary []byte
PrimaryTag uint64
Prune []byte
PruneTag uint64
}

View file

@ -33,7 +33,10 @@ type key struct {
}
func (k *key) String() string {
return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
if k != nil {
return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
}
return ""
}
type spi struct {
@ -557,23 +560,3 @@ func updateNodeKey(lIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx,
return spis
}
func parseEncryptionKey(value, tag string) (*key, error) {
var (
k *key
err error
)
if value == "" {
return nil, nil
}
k = &key{}
if k.value, err = hex.DecodeString(value); err != nil {
return nil, types.BadRequestErrorf("failed to decode key (%s): %v", value, err)
}
t, err := strconv.ParseUint(tag, 10, 64)
if err != nil {
return nil, types.BadRequestErrorf("failed to decode tag (%s): %v", tag, err)
}
k.tag = uint32(t)
return k, nil
}

View file

@ -3,6 +3,7 @@ package overlay
import (
"fmt"
"net"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/driverapi"
@ -31,6 +32,12 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return fmt.Errorf("cannot join secure network: encryption keys not present")
}
nlh := ns.NlHandle()
if n.secure && !nlh.SupportsNetlinkFamily(syscall.NETLINK_XFRM) {
return fmt.Errorf("cannot join secure network: required modules to install IPSEC rules are missing on host")
}
s := n.getSubnetforIP(ep.addr)
if s == nil {
return fmt.Errorf("could not find subnet for endpoint %s", eid)
@ -65,8 +72,6 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
}
nlh := ns.NlHandle()
// Set the container interface and its peer MTU to 1450 to allow
// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
// outer UDP(8) + vxlan header(8))

View file

@ -284,7 +284,7 @@ func populateVNITbl() {
}
defer ns.Close()
nlh, err := netlink.NewHandleAt(ns)
nlh, err := netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
if err != nil {
logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
return nil

View file

@ -3,6 +3,7 @@ package overlay
import (
"fmt"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/netutils"
@ -128,7 +129,7 @@ func deleteVxlanByVNI(path string, vni uint32) error {
}
defer ns.Close()
nlh, err = netlink.NewHandleAt(ns)
nlh, err = netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
if err != nil {
return fmt.Errorf("failed to get netlink handle for ns %s: %v", path, err)
}

View file

@ -306,9 +306,9 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
}
keys := make([]*key, 0, len(encrData.Keys))
for i := 0; i < len(encrData.Keys); i++ {
k, err := parseEncryptionKey(encrData.Keys[i], encrData.Tags[i])
if err != nil {
return err
k := &key{
value: encrData.Keys[i],
tag: uint32(encrData.Tags[i]),
}
keys = append(keys, k)
}
@ -319,17 +319,23 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
if !ok {
return fmt.Errorf("invalid encryption key notification data")
}
newKey, err = parseEncryptionKey(encrData.Key, encrData.Tag)
if err != nil {
return err
if encrData.Key != nil {
newKey = &key{
value: encrData.Key,
tag: uint32(encrData.Tag),
}
}
priKey, err = parseEncryptionKey(encrData.Primary, encrData.PrimaryTag)
if err != nil {
return err
if encrData.Primary != nil {
priKey = &key{
value: encrData.Primary,
tag: uint32(encrData.PrimaryTag),
}
}
delKey, err = parseEncryptionKey(encrData.Prune, encrData.PruneTag)
if err != nil {
return err
if encrData.Prune != nil {
delKey = &key{
value: encrData.Prune,
tag: uint32(encrData.PruneTag),
}
}
d.updateKeys(newKey, priKey, delKey)
default:

View file

@ -3,6 +3,7 @@ package networkdb
import (
"bytes"
"crypto/rand"
"encoding/hex"
"fmt"
"math/big"
rnd "math/rand"
@ -36,6 +37,7 @@ func (l *logWriter) Write(p []byte) (int, error) {
// SetKey adds a new key to the key ring
func (nDB *NetworkDB) SetKey(key []byte) {
logrus.Debugf("Adding key %s", hex.EncodeToString(key)[0:5])
for _, dbKey := range nDB.config.Keys {
if bytes.Equal(key, dbKey) {
return
@ -50,6 +52,7 @@ func (nDB *NetworkDB) SetKey(key []byte) {
// SetPrimaryKey sets the given key as the primary key. This should have
// been added apriori through SetKey
func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
logrus.Debugf("Primary Key %s", hex.EncodeToString(key)[0:5])
for _, dbKey := range nDB.config.Keys {
if bytes.Equal(key, dbKey) {
if nDB.keyring != nil {
@ -63,6 +66,7 @@ func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
// RemoveKey removes a key from the key ring. The key being removed
// can't be the primary key
func (nDB *NetworkDB) RemoveKey(key []byte) {
logrus.Debugf("Remove Key %s", hex.EncodeToString(key)[0:5])
for i, dbKey := range nDB.config.Keys {
if bytes.Equal(key, dbKey) {
nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...)
@ -90,6 +94,9 @@ func (nDB *NetworkDB) clusterInit() error {
var err error
if len(nDB.config.Keys) > 0 {
for i, key := range nDB.config.Keys {
logrus.Debugf("Encryption key %d: %s", i+1, hex.EncodeToString(key)[0:5])
}
nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
if err != nil {
return err

View file

@ -3,6 +3,8 @@ package ns
import (
"fmt"
"os"
"os/exec"
"strings"
"sync"
"syscall"
@ -24,7 +26,7 @@ func Init() {
if err != nil {
log.Errorf("could not get initial namespace: %v", err)
}
initNl, err = netlink.NewHandle()
initNl, err = netlink.NewHandle(getSupportedNlFamilies()...)
if err != nil {
log.Errorf("could not create netlink handle on initial namespace: %v", err)
}
@ -32,6 +34,7 @@ func Init() {
// SetNamespace sets the initial namespace handler
func SetNamespace() error {
initOnce.Do(Init)
if err := netns.Set(initNs); err != nil {
linkInfo, linkErr := getLink()
if linkErr != nil {
@ -62,3 +65,22 @@ func NlHandle() *netlink.Handle {
initOnce.Do(Init)
return initNl
}
func getSupportedNlFamilies() []int {
fams := []int{syscall.NETLINK_ROUTE}
if err := loadXfrmModules(); err != nil {
log.Warnf("Could not load necessary modules for IPSEC rules: %v", err)
return fams
}
return append(fams, syscall.NETLINK_XFRM)
}
func loadXfrmModules() error {
if out, err := exec.Command("modprobe", "-va", "xfrm_user").CombinedOutput(); err != nil {
return fmt.Errorf("Running modprobe xfrm_user failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
}
if out, err := exec.Command("modprobe", "-va", "xfrm_algo").CombinedOutput(); err != nil {
return fmt.Errorf("Running modprobe xfrm_algo failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
}
return nil
}

View file

@ -6,6 +6,7 @@ import (
"regexp"
"sync"
"syscall"
"time"
log "github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/ns"
@ -290,7 +291,12 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
}
// Up the interface.
if err := nlh.LinkSetUp(iface); err != nil {
cnt := 0
for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
log.Debugf("retrying link setup because of: %v", err)
time.Sleep(10 * time.Millisecond)
}
if err != nil {
return fmt.Errorf("failed to set link up: %v", err)
}

View file

@ -30,7 +30,6 @@ var (
gpmWg sync.WaitGroup
gpmCleanupPeriod = 60 * time.Second
gpmChan = make(chan chan struct{})
nsOnce sync.Once
)
// The networkNamespace type is the linux implementation of the Sandbox
@ -196,7 +195,7 @@ func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
}
defer sboxNs.Close()
n.nlHandle, err = netlink.NewHandleAt(sboxNs)
n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
if err != nil {
return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
}
@ -238,7 +237,7 @@ func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
}
defer sboxNs.Close()
n.nlHandle, err = netlink.NewHandleAt(sboxNs)
n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
if err != nil {
return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
}
@ -326,7 +325,6 @@ func (n *networkNamespace) InvokeFunc(f func()) error {
// InitOSContext initializes OS context while configuring network resources
func InitOSContext() func() {
nsOnce.Do(ns.Init)
runtime.LockOSThread()
if err := ns.SetNamespace(); err != nil {
log.Error(err)

View file

@ -62,6 +62,21 @@ type extDNSEntry struct {
extOnce sync.Once
}
type sboxQuery struct {
sboxID string
dnsID uint16
}
type clientConnGC struct {
toDelete bool
client clientConn
}
var (
queryGCMutex sync.Mutex
queryGC map[sboxQuery]*clientConnGC
)
// resolver implements the Resolver interface
type resolver struct {
sb *sandbox
@ -79,6 +94,21 @@ type resolver struct {
func init() {
rand.Seed(time.Now().Unix())
queryGC = make(map[sboxQuery]*clientConnGC)
go func() {
ticker := time.NewTicker(1 * time.Minute)
for range ticker.C {
queryGCMutex.Lock()
for query, conn := range queryGC {
if !conn.toDelete {
conn.toDelete = true
continue
}
delete(queryGC, query)
}
queryGCMutex.Unlock()
}
}()
}
// NewResolver creates a new instance of the Resolver
@ -370,6 +400,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
writer = w
} else {
queryID := query.Id
extQueryLoop:
for i := 0; i < maxExtDNS; i++ {
extDNS := &r.extDNSList[i]
if extDNS.ipStr == "" {
@ -435,14 +466,26 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
log.Debugf("Send to DNS server failed, %s", err)
continue
}
for {
// If a reply comes after a read timeout it will remain in the socket buffer
// and will be read after sending next query. To ignore such stale replies
// save the query context in a GC queue when read timesout. On the next reply
// if the context is present in the GC queue its a old reply. Ignore it and
// read again
resp, err = co.ReadMsg()
if err != nil {
if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
r.addQueryToGC(w, query)
}
r.forwardQueryEnd(w, query)
log.Debugf("Read from DNS server failed, %s", err)
continue extQueryLoop
}
resp, err = co.ReadMsg()
if err != nil {
r.forwardQueryEnd(w, query)
log.Debugf("Read from DNS server failed, %s", err)
continue
if !r.checkRespInGC(w, resp) {
break
}
}
// Retrieves the context for the forwarded query and returns the client connection
// to send the reply to
writer = r.forwardQueryEnd(w, resp)
@ -501,6 +544,49 @@ func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg, queryID
return true
}
func (r *resolver) addQueryToGC(w dns.ResponseWriter, msg *dns.Msg) {
if w.LocalAddr().Network() != "udp" {
return
}
r.queryLock.Lock()
cc, ok := r.client[msg.Id]
r.queryLock.Unlock()
if !ok {
return
}
query := sboxQuery{
sboxID: r.sb.ID(),
dnsID: msg.Id,
}
clientGC := &clientConnGC{
client: cc,
}
queryGCMutex.Lock()
queryGC[query] = clientGC
queryGCMutex.Unlock()
}
func (r *resolver) checkRespInGC(w dns.ResponseWriter, msg *dns.Msg) bool {
if w.LocalAddr().Network() != "udp" {
return false
}
query := sboxQuery{
sboxID: r.sb.ID(),
dnsID: msg.Id,
}
queryGCMutex.Lock()
defer queryGCMutex.Unlock()
if _, ok := queryGC[query]; ok {
delete(queryGC, query)
return true
}
return false
}
func (r *resolver) forwardQueryEnd(w dns.ResponseWriter, msg *dns.Msg) dns.ResponseWriter {
var (
cc clientConn