Vendor libnetwork and github.com/vishvananda/netlink

Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
This commit is contained in:
Aaron Lehmann 2016-07-21 14:27:47 -07:00
parent d626875a94
commit 24d2d53f5d
33 changed files with 351 additions and 111 deletions

View file

@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
clone git github.com/imdario/mergo 0.2.1
#get libnetwork packages
clone git github.com/docker/libnetwork 905d374c096ca1f3a9b75529e52518b7540179f3
clone git github.com/docker/libnetwork 83ab4deaa2da3deb32cb5e64ceec43801dc17370
clone git github.com/docker/go-events afb2b9f2c23f33ada1a22b03651775fdc65a5089
clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
@ -75,7 +75,7 @@ clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa07
clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
clone git github.com/docker/libkv v0.2.1
clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
clone git github.com/vishvananda/netlink 734d02c3e202f682c74b71314b2c61eec0170fd4
clone git github.com/vishvananda/netlink e73bad418fd727ed3a02830b1af1ad0283a1de6c
clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d

View file

@ -35,6 +35,7 @@ func (b ByTime) Less(i, j int) bool { return b[i].LamportTime < b[j].LamportTime
type agent struct {
networkDB *networkdb.NetworkDB
bindAddr string
advertiseAddr string
epTblCancel func()
driverCancelFuncs map[string][]func()
}
@ -236,25 +237,14 @@ func (c *controller) handleKeyChangeV1(keys []*types.EncryptionKey) error {
func (c *controller) agentSetup() error {
clusterProvider := c.cfg.Daemon.ClusterProvider
bindAddr, _, _ := net.SplitHostPort(clusterProvider.GetListenAddress())
bindAddr := clusterProvider.GetLocalAddress()
advAddr := clusterProvider.GetAdvertiseAddress()
remote := clusterProvider.GetRemoteAddress()
remoteAddr, _, _ := net.SplitHostPort(remote)
// Determine the BindAddress from RemoteAddress or through best-effort routing
if !isValidClusteringIP(bindAddr) {
if !isValidClusteringIP(remoteAddr) {
remote = "8.8.8.8:53"
}
conn, err := net.Dial("udp", remote)
if err == nil {
bindHostPort := conn.LocalAddr().String()
bindAddr, _, _ = net.SplitHostPort(bindHostPort)
conn.Close()
}
}
if bindAddr != "" && c.agent == nil {
if err := c.agentInit(bindAddr); err != nil {
logrus.Infof("Initializing Libnetwork Agent Local-addr=%s Adv-addr=%s Remote-addr =%s", bindAddr, advAddr, remoteAddr)
if advAddr != "" && c.agent == nil {
if err := c.agentInit(bindAddr, advAddr); err != nil {
logrus.Errorf("Error in agentInit : %v", err)
} else {
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
@ -312,7 +302,7 @@ func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64) {
return keys[1].Key, keys[1].LamportTime
}
func (c *controller) agentInit(bindAddrOrInterface string) error {
func (c *controller) agentInit(bindAddrOrInterface, advertiseAddr string) error {
if !c.isAgent() {
return nil
}
@ -325,9 +315,9 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
keys, tags := c.getKeys(subsysGossip)
hostname, _ := os.Hostname()
nDB, err := networkdb.New(&networkdb.Config{
BindAddr: bindAddr,
NodeName: hostname,
Keys: keys,
AdvertiseAddr: advertiseAddr,
NodeName: hostname,
Keys: keys,
})
if err != nil {
@ -339,6 +329,7 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
c.agent = &agent{
networkDB: nDB,
bindAddr: bindAddr,
advertiseAddr: advertiseAddr,
epTblCancel: cancel,
driverCancelFuncs: make(map[string][]func()),
}
@ -377,8 +368,9 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
}
d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{
Address: c.agent.bindAddr,
Self: true,
Address: c.agent.advertiseAddr,
BindAddress: c.agent.bindAddr,
Self: true,
})
drvEnc := discoverapi.DriverEncryptionConfig{}

View file

@ -4,7 +4,8 @@ package cluster
type Provider interface {
IsManager() bool
IsAgent() bool
GetListenAddress() string
GetLocalAddress() string
GetAdvertiseAddress() string
GetRemoteAddress() string
ListenClusterEvents() <-chan struct{}
}

View file

@ -11,6 +11,7 @@ import (
"github.com/docker/libnetwork/cluster"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/osl"
)
// Config encapsulates configurations of various Libnetwork components
@ -197,6 +198,13 @@ func OptionDataDir(dataDir string) Option {
}
}
// OptionExecRoot function returns an option setter for exec root folder
func OptionExecRoot(execRoot string) Option {
return func(c *Config) {
osl.SetBasePath(execRoot)
}
}
// ProcessOptions processes options and stores it in config
func (c *Config) ProcessOptions(options ...Option) {
for _, opt := range options {

View file

@ -378,6 +378,10 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error {
return nil
}
c.Lock()
c.cfg = cfg
c.Unlock()
var dsConfig *discoverapi.DatastoreConfigData
for scope, sCfg := range cfg.Scopes {
if scope == datastore.LocalScope || !sCfg.IsValid() {

View file

@ -26,8 +26,9 @@ const (
// NodeDiscoveryData represents the structure backing the node discovery data json string
type NodeDiscoveryData struct {
Address string
Self bool
Address string
BindAddress string
Self bool
}
// DatastoreConfigData is the data for the datastore update event message

View file

@ -83,9 +83,9 @@ func (d *driver) populateEndpoints() error {
n, ok := d.networks[ep.nid]
if !ok {
logrus.Debugf("Network (%s) not found for restored bridge endpoint (%s)", ep.nid[0:7], ep.id[0:7])
logrus.Debugf("Deleting stale bridge endpoint (%s) from store", ep.nid[0:7])
logrus.Debugf("Deleting stale bridge endpoint (%s) from store", ep.id[0:7])
if err := d.storeDelete(ep); err != nil {
logrus.Debugf("Failed to delete stale bridge endpoint (%s) from store", ep.nid[0:7])
logrus.Debugf("Failed to delete stale bridge endpoint (%s) from store", ep.id[0:7])
}
continue
}

View file

@ -82,6 +82,6 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
if err := d.storeDelete(ep); err != nil {
logrus.Warnf("Failed to remove ipvlan endpoint %s from store: %v", ep.id[0:7], err)
}
n.deleteEndpoint(ep.id)
return nil
}

View file

@ -96,9 +96,9 @@ func (d *driver) populateEndpoints() error {
n, ok := d.networks[ep.nid]
if !ok {
logrus.Debugf("Network (%s) not found for restored ipvlan endpoint (%s)", ep.nid[0:7], ep.id[0:7])
logrus.Debugf("Deleting stale ipvlan endpoint (%s) from store", ep.nid[0:7])
logrus.Debugf("Deleting stale ipvlan endpoint (%s) from store", ep.id[0:7])
if err := d.storeDelete(ep); err != nil {
logrus.Debugf("Failed to delete stale ipvlan endpoint (%s) from store", ep.nid[0:7])
logrus.Debugf("Failed to delete stale ipvlan endpoint (%s) from store", ep.id[0:7])
}
continue
}

View file

@ -96,9 +96,9 @@ func (d *driver) populateEndpoints() error {
n, ok := d.networks[ep.nid]
if !ok {
logrus.Debugf("Network (%s) not found for restored macvlan endpoint (%s)", ep.nid[0:7], ep.id[0:7])
logrus.Debugf("Deleting stale macvlan endpoint (%s) from store", ep.nid[0:7])
logrus.Debugf("Deleting stale macvlan endpoint (%s) from store", ep.id[0:7])
if err := d.storeDelete(ep); err != nil {
logrus.Debugf("Failed to delete stale macvlan endpoint (%s) from store", ep.nid[0:7])
logrus.Debugf("Failed to delete stale macvlan endpoint (%s) from store", ep.id[0:7])
}
continue
}

View file

@ -2,23 +2,27 @@ package overlay
import (
"bytes"
"encoding/binary"
"encoding/hex"
"fmt"
"hash/fnv"
"net"
"sync"
"syscall"
"strconv"
log "github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/iptables"
"github.com/docker/libnetwork/ns"
"github.com/docker/libnetwork/types"
"github.com/vishvananda/netlink"
"strconv"
)
const (
mark = uint32(0xD0C4E3)
timeout = 30
mark = uint32(0xD0C4E3)
timeout = 30
pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
)
const (
@ -85,6 +89,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal
}
lIP := types.GetMinimalIP(net.ParseIP(d.bindAddress))
aIP := types.GetMinimalIP(net.ParseIP(d.advertiseAddress))
nodes := map[string]net.IP{}
switch {
@ -107,7 +112,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal
if add {
for _, rIP := range nodes {
if err := setupEncryption(lIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
log.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err)
}
}
@ -122,7 +127,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal
return nil
}
func setupEncryption(localIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
log.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP)
rIPs := remoteIP.String()
@ -134,7 +139,7 @@ func setupEncryption(localIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*
}
for i, k := range keys {
spis := &spi{buildSPI(localIP, remoteIP, k.tag), buildSPI(remoteIP, localIP, k.tag)}
spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)}
dir := reverse
if i == 0 {
dir = bidir
@ -216,7 +221,6 @@ func programMangle(vni uint32, add bool) (err error) {
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
var (
crypt *netlink.XfrmStateAlgo
action = "Removing"
xfrmProgram = ns.NlHandle().XfrmStateDel
)
@ -224,7 +228,6 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f
if add {
action = "Adding"
xfrmProgram = ns.NlHandle().XfrmStateAdd
crypt = &netlink.XfrmStateAlgo{Name: "cbc(aes)", Key: k.value}
}
if dir&reverse > 0 {
@ -236,7 +239,7 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f
Mode: netlink.XFRM_MODE_TRANSPORT,
}
if add {
rSA.Crypt = crypt
rSA.Aead = buildAeadAlgo(k, spi.reverse)
}
exists, err := saExists(rSA)
@ -261,7 +264,7 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f
Mode: netlink.XFRM_MODE_TRANSPORT,
}
if add {
fSA.Crypt = crypt
fSA.Aead = buildAeadAlgo(k, spi.forward)
}
exists, err := saExists(fSA)
@ -354,13 +357,23 @@ func spExists(sp *netlink.XfrmPolicy) (bool, error) {
}
func buildSPI(src, dst net.IP, st uint32) int {
spi := int(st)
f := src[len(src)-4:]
t := dst[len(dst)-4:]
for i := 0; i < 4; i++ {
spi = spi ^ (int(f[i])^int(t[3-i]))<<uint32(8*i)
b := make([]byte, 4)
binary.BigEndian.PutUint32(b, st)
h := fnv.New32a()
h.Write(src)
h.Write(b)
h.Write(dst)
return int(binary.BigEndian.Uint32(h.Sum(nil)))
}
func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
salt := make([]byte, 4)
binary.BigEndian.PutUint32(salt, uint32(s))
return &netlink.XfrmStateAlgo{
Name: "rfc4106(gcm(aes))",
Key: append(k.value, salt...),
ICVLen: 64,
}
return spi
}
func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error {
@ -560,3 +573,14 @@ func updateNodeKey(lIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx,
return spis
}
func (n *network) maxMTU() int {
mtu := vxlanVethMTU
if n.secure {
// In case of encryption account for the
// esp packet espansion and padding
mtu -= pktExpansion
mtu -= (mtu % 4)
}
return mtu
}

View file

@ -75,11 +75,13 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
// Set the container interface and its peer MTU to 1450 to allow
// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
// outer UDP(8) + vxlan header(8))
mtu := n.maxMTU()
veth, err := nlh.LinkByName(overlayIfName)
if err != nil {
return fmt.Errorf("cound not find link by name %s: %v", overlayIfName, err)
}
err = nlh.LinkSetMTU(veth, vxlanVethMTU)
err = nlh.LinkSetMTU(veth, mtu)
if err != nil {
return err
}
@ -93,7 +95,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
if err != nil {
return fmt.Errorf("could not find link by name %s: %v", containerIfName, err)
}
err = nlh.LinkSetMTU(veth, vxlanVethMTU)
err = nlh.LinkSetMTU(veth, mtu)
if err != nil {
return err
}
@ -119,7 +121,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
}
d.peerDbAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac,
net.ParseIP(d.bindAddress), true)
net.ParseIP(d.advertiseAddress), true)
if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
log.Warn(err)
@ -128,7 +130,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
buf, err := proto.Marshal(&PeerRecord{
EndpointIP: ep.addr.String(),
EndpointMAC: ep.mac.String(),
TunnelEndpointIP: d.bindAddress,
TunnelEndpointIP: d.advertiseAddress,
})
if err != nil {
return err
@ -159,7 +161,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
// Ignore local peers. We already know about them and they
// should not be added to vxlan fdb.
if peer.TunnelEndpointIP == d.bindAddress {
if peer.TunnelEndpointIP == d.advertiseAddress {
return
}

View file

@ -40,7 +40,7 @@ func (d *driver) serfInit() error {
config := serf.DefaultConfig()
config.Init()
config.MemberlistConfig.BindAddr = d.bindAddress
config.MemberlistConfig.BindAddr = d.advertiseAddress
d.eventCh = make(chan serf.Event, 4)
config.EventCh = d.eventCh

View file

@ -31,22 +31,23 @@ const (
var initVxlanIdm = make(chan (bool), 1)
type driver struct {
eventCh chan serf.Event
notifyCh chan ovNotify
exitCh chan chan struct{}
bindAddress string
neighIP string
config map[string]interface{}
peerDb peerNetworkMap
secMap *encrMap
serfInstance *serf.Serf
networks networkTable
store datastore.DataStore
localStore datastore.DataStore
vxlanIdm *idm.Idm
once sync.Once
joinOnce sync.Once
keys []*key
eventCh chan serf.Event
notifyCh chan ovNotify
exitCh chan chan struct{}
bindAddress string
advertiseAddress string
neighIP string
config map[string]interface{}
peerDb peerNetworkMap
secMap *encrMap
serfInstance *serf.Serf
networks networkTable
store datastore.DataStore
localStore datastore.DataStore
vxlanIdm *idm.Idm
once sync.Once
joinOnce sync.Once
keys []*key
sync.Mutex
}
@ -111,7 +112,11 @@ func (d *driver) restoreEndpoints() error {
ep := kvo.(*endpoint)
n := d.network(ep.nid)
if n == nil {
logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid, ep.id)
logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid[0:7], ep.id[0:7])
logrus.Debugf("Deleting stale overlay endpoint (%s) from store", ep.id[0:7])
if err := d.deleteEndpointFromStore(ep); err != nil {
logrus.Debugf("Failed to delete stale overlay endpoint (%s) from store", ep.id[0:7])
}
continue
}
n.addEndpoint(ep)
@ -140,7 +145,7 @@ func (d *driver) restoreEndpoints() error {
}
n.incEndpointCount()
d.peerDbAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.bindAddress), true)
d.peerDbAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true)
}
return nil
}
@ -211,20 +216,25 @@ func validateSelf(node string) error {
return fmt.Errorf("Multi-Host overlay networking requires cluster-advertise(%s) to be configured with a local ip-address that is reachable within the cluster", advIP.String())
}
func (d *driver) nodeJoin(node string, self bool) {
func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
if self && !d.isSerfAlive() {
if err := validateSelf(node); err != nil {
logrus.Errorf("%s", err.Error())
}
d.Lock()
d.bindAddress = node
d.advertiseAddress = advertiseAddress
d.bindAddress = bindAddress
d.Unlock()
// If there is no cluster store there is no need to start serf.
if d.store != nil {
if err := validateSelf(advertiseAddress); err != nil {
logrus.Warnf("%s", err.Error())
}
err := d.serfInit()
if err != nil {
logrus.Errorf("initializing serf instance failed: %v", err)
d.Lock()
d.advertiseAddress = ""
d.bindAddress = ""
d.Unlock()
return
}
}
@ -232,7 +242,7 @@ func (d *driver) nodeJoin(node string, self bool) {
d.Lock()
if !self {
d.neighIP = node
d.neighIP = advertiseAddress
}
neighIP := d.neighIP
d.Unlock()
@ -246,7 +256,7 @@ func (d *driver) nodeJoin(node string, self bool) {
}
})
if err != nil {
logrus.Errorf("joining serf neighbor %s failed: %v", node, err)
logrus.Errorf("joining serf neighbor %s failed: %v", advertiseAddress, err)
d.Lock()
d.joinOnce = sync.Once{}
d.Unlock()
@ -286,7 +296,7 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
if !ok || nodeData.Address == "" {
return fmt.Errorf("invalid discovery data")
}
d.nodeJoin(nodeData.Address, nodeData.Self)
d.nodeJoin(nodeData.Address, nodeData.BindAddress, nodeData.Self)
case discoverapi.DatastoreConfig:
if d.store != nil {
return types.ForbiddenErrorf("cannot accept datastore configuration: Overlay driver has a datastore configured already")

View file

@ -113,6 +113,9 @@ func (ec *endpointCnt) updateStore() error {
if store == nil {
return fmt.Errorf("store not found for scope %s on endpoint count update", ec.DataScope())
}
// make a copy of count and n to avoid being overwritten by store.GetObject
count := ec.EndpointCnt()
n := ec.n
for {
if err := ec.n.getController().updateToStore(ec); err == nil || err != datastore.ErrKeyModified {
return err
@ -120,6 +123,10 @@ func (ec *endpointCnt) updateStore() error {
if err := store.GetObject(datastore.Key(ec.Key()...), ec); err != nil {
return fmt.Errorf("could not update the kvobject to latest on endpoint count update: %v", err)
}
ec.Lock()
ec.Count = count
ec.n = n
ec.Unlock()
}
}
@ -136,7 +143,9 @@ retry:
if inc {
ec.Count++
} else {
ec.Count--
if ec.Count > 0 {
ec.Count--
}
}
ec.Unlock()

View file

@ -1105,9 +1105,13 @@ func (n *network) getSvcRecords(ep *endpoint) []etchosts.Record {
}
var recs []etchosts.Record
sr, _ := n.ctrlr.svcRecords[n.id]
epName := ep.Name()
n.ctrlr.Lock()
sr, _ := n.ctrlr.svcRecords[n.id]
n.ctrlr.Unlock()
for h, ip := range sr.svcMap {
if strings.Split(h, ".")[0] == epName {
continue

View file

@ -81,7 +81,7 @@ func (nDB *NetworkDB) RemoveKey(key []byte) {
func (nDB *NetworkDB) clusterInit() error {
config := memberlist.DefaultLANConfig()
config.Name = nDB.config.NodeName
config.BindAddr = nDB.config.BindAddr
config.AdvertiseAddr = nDB.config.AdvertiseAddr
if nDB.config.BindPort != 0 {
config.BindPort = nDB.config.BindPort

View file

@ -107,9 +107,9 @@ type Config struct {
// NodeName is the cluster wide unique name for this node.
NodeName string
// BindAddr is the local node's IP address that we bind to for
// AdvertiseAddr is the node's IP address that we advertise for
// cluster communication.
BindAddr string
AdvertiseAddr string
// BindPort is the local node's port to which we bind to for
// cluster communication.

View file

@ -303,6 +303,7 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
log.Debugf("retrying link setup because of: %v", err)
time.Sleep(10 * time.Millisecond)
err = nlh.LinkSetUp(iface)
}
if err != nil {
return fmt.Errorf("failed to set link up: %v", err)

View file

@ -6,6 +6,7 @@ import (
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
@ -21,7 +22,7 @@ import (
"github.com/vishvananda/netns"
)
const prefix = "/var/run/docker/netns"
const defaultPrefix = "/var/run/docker"
var (
once sync.Once
@ -30,6 +31,7 @@ var (
gpmWg sync.WaitGroup
gpmCleanupPeriod = 60 * time.Second
gpmChan = make(chan chan struct{})
prefix = defaultPrefix
)
// The networkNamespace type is the linux implementation of the Sandbox
@ -48,12 +50,21 @@ type networkNamespace struct {
sync.Mutex
}
// SetBasePath sets the base url prefix for the ns path
func SetBasePath(path string) {
prefix = path
}
func init() {
reexec.Register("netns-create", reexecCreateNamespace)
}
func basePath() string {
return filepath.Join(prefix, "netns")
}
func createBasePath() {
err := os.MkdirAll(prefix, 0755)
err := os.MkdirAll(basePath(), 0755)
if err != nil {
panic("Could not create net namespace path directory")
}
@ -142,7 +153,7 @@ func GenerateKey(containerID string) string {
indexStr string
tmpkey string
)
dir, err := ioutil.ReadDir(prefix)
dir, err := ioutil.ReadDir(basePath())
if err != nil {
return ""
}
@ -172,7 +183,7 @@ func GenerateKey(containerID string) string {
maxLen = len(containerID)
}
return prefix + "/" + containerID[:maxLen]
return basePath() + "/" + containerID[:maxLen]
}
// NewSandbox provides a new sandbox instance created in an os specific way

View file

@ -10,3 +10,7 @@ func GC() {
func GetSandboxForExternalKey(path string, key string) (Sandbox, error) {
return nil, nil
}
// SetBasePath sets the base url prefix for the ns path
func SetBasePath(path string) {
}

View file

@ -37,3 +37,7 @@ func InitOSContext() func() {
func SetupTestOSContext(t *testing.T) func() {
return func() {}
}
// SetBasePath sets the base url prefix for the ns path
func SetBasePath(path string) {
}

View file

@ -38,3 +38,7 @@ func InitOSContext() func() {
func SetupTestOSContext(t *testing.T) func() {
return func() {}
}
// SetBasePath sets the base url prefix for the ns path
func SetBasePath(path string) {
}

View file

@ -413,7 +413,12 @@ func (sb *sandbox) ResolveIP(ip string) string {
for _, ep := range sb.getConnectedEndpoints() {
n := ep.getNetwork()
sr, ok := n.getController().svcRecords[n.ID()]
c := n.getController()
c.Lock()
sr, ok := c.svcRecords[n.ID()]
c.Unlock()
if !ok {
continue
}
@ -454,7 +459,12 @@ func (sb *sandbox) ResolveService(name string) ([]*net.SRV, []net.IP, error) {
for _, ep := range sb.getConnectedEndpoints() {
n := ep.getNetwork()
sr, ok := n.getController().svcRecords[n.ID()]
c := n.getController()
c.Lock()
sr, ok := c.svcRecords[n.ID()]
c.Unlock()
if !ok {
continue
}
@ -575,7 +585,11 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin
ep.Unlock()
}
sr, ok := n.getController().svcRecords[n.ID()]
c := n.getController()
c.Lock()
sr, ok := c.svcRecords[n.ID()]
c.Unlock()
if !ok {
continue
}

View file

@ -15,7 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
const udsBase = "/var/lib/docker/network/files/"
const udsBase = "/run/docker/libnetwork/"
const success = "success"
// processSetKeyReexec is a private function that must be called only on an reexec path

View file

@ -8,6 +8,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
// IFA_FLAGS is a u32 attribute.
@ -192,7 +193,17 @@ type AddrUpdate struct {
// AddrSubscribe takes a chan down which notifications will be sent
// when addresses change. Close the 'done' chan to stop subscription.
func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR)
return addrSubscribe(netns.None(), netns.None(), ch, done)
}
// AddrSubscribeAt works like AddrSubscribe plus it allows the caller
// to choose the network namespace in which to subscribe (ns).
func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
return addrSubscribe(ns, netns.None(), ch, done)
}
func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR)
if err != nil {
return err
}

View file

@ -143,7 +143,7 @@ func (h *Handle) FilterAdd(filter Filter) error {
if u32.RedirIndex != 0 {
u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...)
}
if err := encodeActions(actionsAttr, u32.Actions); err != nil {
if err := EncodeActions(actionsAttr, u32.Actions); err != nil {
return err
}
} else if fw, ok := filter.(*Fw); ok {
@ -309,7 +309,7 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) {
attrs.Bindcnt = int(tcgen.Bindcnt)
}
func encodeActions(attr *nl.RtAttr, actions []Action) error {
func EncodeActions(attr *nl.RtAttr, actions []Action) error {
tabIndex := int(nl.TCA_ACT_TAB)
for _, action := range actions {

View file

@ -10,6 +10,7 @@ import (
"unsafe"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
const SizeofLinkStats = 0x5c
@ -425,7 +426,7 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum))
}
if vxlan.GBP {
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP))
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, []byte{})
}
if vxlan.NoAge {
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
@ -1011,7 +1012,17 @@ type LinkUpdate struct {
// LinkSubscribe takes a chan down which notifications will be sent
// when links change. Close the 'done' chan to stop subscription.
func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
return linkSubscribe(netns.None(), netns.None(), ch, done)
}
// LinkSubscribeAt works like LinkSubscribe plus it allows the caller
// to choose the network namespace in which to subscribe (ns).
func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error {
return linkSubscribe(ns, netns.None(), ch, done)
}
func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error {
s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
if err != nil {
return err
}
@ -1152,7 +1163,7 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) {
case nl.IFLA_VXLAN_UDP_CSUM:
vxlan.UDPCSum = int8(datum.Value[0]) != 0
case nl.IFLA_VXLAN_GBP:
vxlan.GBP = int8(datum.Value[0]) != 0
vxlan.GBP = true
case nl.IFLA_VXLAN_AGEING:
vxlan.Age = int(native.Uint32(datum.Value[0:4]))
vxlan.NoAge = vxlan.Age == 0

View file

@ -331,24 +331,63 @@ func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
// moves back into it when done. If newNs is close, the socket will be opened
// in the current network namespace.
func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) {
var err error
c, err := executeInNetns(newNs, curNs)
if err != nil {
return nil, err
}
defer c()
return getNetlinkSocket(protocol)
}
// executeInNetns sets execution of the code following this call to the
// network namespace newNs, then moves the thread back to curNs if open,
// otherwise to the current netns at the time the function was invoked
// In case of success, the caller is expected to execute the returned function
// at the end of the code that needs to be executed in the network namespace.
// Example:
// func jobAt(...) error {
// d, err := executeInNetns(...)
// if err != nil { return err}
// defer d()
// < code which needs to be executed in specific netns>
// }
// TODO: his function probably belongs to netns pkg.
func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
var (
err error
moveBack func(netns.NsHandle) error
closeNs func() error
unlockThd func()
)
restore := func() {
// order matters
if moveBack != nil {
moveBack(curNs)
}
if closeNs != nil {
closeNs()
}
if unlockThd != nil {
unlockThd()
}
}
if newNs.IsOpen() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
unlockThd = runtime.UnlockOSThread
if !curNs.IsOpen() {
if curNs, err = netns.Get(); err != nil {
restore()
return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err)
}
defer curNs.Close()
closeNs = curNs.Close
}
if err := netns.Set(newNs); err != nil {
restore()
return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err)
}
defer netns.Set(curNs)
moveBack = netns.Set
}
return getNetlinkSocket(protocol)
return restore, nil
}
// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE)
@ -377,6 +416,18 @@ func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
return s, nil
}
// SubscribeAt works like Subscribe plus let's the caller choose the network
// namespace in which the socket would be opened (newNs). Then control goes back
// to curNs if open, otherwise to the netns at the time this function was called.
func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) {
c, err := executeInNetns(newNs, curNs)
if err != nil {
return nil, err
}
defer c()
return Subscribe(protocol, groups...)
}
func (s *NetlinkSocket) Close() {
syscall.Close(s.fd)
s.fd = -1

View file

@ -10,6 +10,7 @@ const (
SizeofXfrmUsersaInfo = 0xe0
SizeofXfrmAlgo = 0x44
SizeofXfrmAlgoAuth = 0x48
SizeofXfrmAlgoAEAD = 0x48
SizeofXfrmEncapTmpl = 0x18
SizeofXfrmUsersaFlush = 0x8
)
@ -194,6 +195,35 @@ func (msg *XfrmAlgoAuth) Serialize() []byte {
// char alg_key[0];
// }
type XfrmAlgoAEAD struct {
AlgName [64]byte
AlgKeyLen uint32
AlgICVLen uint32
AlgKey []byte
}
func (msg *XfrmAlgoAEAD) Len() int {
return SizeofXfrmAlgoAEAD + int(msg.AlgKeyLen/8)
}
func DeserializeXfrmAlgoAEAD(b []byte) *XfrmAlgoAEAD {
ret := XfrmAlgoAEAD{}
copy(ret.AlgName[:], b[0:64])
ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64]))
ret.AlgICVLen = *(*uint32)(unsafe.Pointer(&b[68]))
ret.AlgKey = b[72:ret.Len()]
return &ret
}
func (msg *XfrmAlgoAEAD) Serialize() []byte {
b := make([]byte, msg.Len())
copy(b[0:64], msg.AlgName[:])
copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:])
copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgICVLen)))[:])
copy(b[72:msg.Len()], msg.AlgKey[:])
return b
}
// struct xfrm_encap_tmpl {
// __u16 encap_type;
// __be16 encap_sport;

View file

@ -6,6 +6,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
// RtAttr is shared so it is in netlink_linux.go
@ -421,7 +422,17 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
// RouteSubscribe takes a chan down which notifications will be sent
// when routes are added or deleted. Close the 'done' chan to stop subscription.
func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
return routeSubscribeAt(netns.None(), netns.None(), ch, done)
}
// RouteSubscribeAt works like RouteSubscribe plus it allows the caller
// to choose the network namespace in which to subscribe (ns).
func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
return routeSubscribeAt(ns, netns.None(), ch, done)
}
func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
if err != nil {
return err
}

View file

@ -10,10 +10,18 @@ type XfrmStateAlgo struct {
Name string
Key []byte
TruncateLen int // Auth only
ICVLen int // AEAD only
}
func (a XfrmStateAlgo) String() string {
return fmt.Sprintf("{Name: %s, Key: 0x%x, TruncateLen: %d}", a.Name, a.Key, a.TruncateLen)
base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key)
if a.TruncateLen != 0 {
base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen)
}
if a.ICVLen != 0 {
base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen)
}
return fmt.Sprintf("%s}", base)
}
// EncapType is an enum representing the optional packet encapsulation.
@ -73,12 +81,13 @@ type XfrmState struct {
Mark *XfrmMark
Auth *XfrmStateAlgo
Crypt *XfrmStateAlgo
Aead *XfrmStateAlgo
Encap *XfrmStateEncap
}
func (sa XfrmState) String() string {
return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Encap: %v",
sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Encap)
return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v,Encap: %v",
sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap)
}
func (sa XfrmState) Print(stats bool) string {
if !stats {

View file

@ -35,6 +35,20 @@ func writeStateAlgoAuth(a *XfrmStateAlgo) []byte {
return algo.Serialize()
}
func writeStateAlgoAead(a *XfrmStateAlgo) []byte {
algo := nl.XfrmAlgoAEAD{
AlgKeyLen: uint32(len(a.Key) * 8),
AlgICVLen: uint32(a.ICVLen),
AlgKey: a.Key,
}
end := len(a.Name)
if end > 64 {
end = 64
}
copy(algo.AlgName[:end], a.Name)
return algo.Serialize()
}
func writeMark(m *XfrmMark) []byte {
mark := &nl.XfrmMark{
Value: m.Value,
@ -97,6 +111,10 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
out := nl.NewRtAttr(nl.XFRMA_ALG_CRYPT, writeStateAlgo(state.Crypt))
req.AddData(out)
}
if state.Aead != nil {
out := nl.NewRtAttr(nl.XFRMA_ALG_AEAD, writeStateAlgoAead(state.Aead))
req.AddData(out)
}
if state.Encap != nil {
encapData := make([]byte, nl.SizeofXfrmEncapTmpl)
encap := nl.DeserializeXfrmEncapTmpl(encapData)
@ -271,6 +289,12 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
state.Auth.Name = nl.BytesToString(algo.AlgName[:])
state.Auth.Key = algo.AlgKey
state.Auth.TruncateLen = int(algo.AlgTruncLen)
case nl.XFRMA_ALG_AEAD:
state.Aead = new(XfrmStateAlgo)
algo := nl.DeserializeXfrmAlgoAEAD(attr.Value[:])
state.Aead.Name = nl.BytesToString(algo.AlgName[:])
state.Aead.Key = algo.AlgKey
state.Aead.ICVLen = int(algo.AlgICVLen)
case nl.XFRMA_ENCAP:
encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:])
state.Encap = new(XfrmStateEncap)