|
@@ -4,7 +4,7 @@
|
|
|
package overlay
|
|
|
|
|
|
import (
|
|
|
- "encoding/json"
|
|
|
+ "errors"
|
|
|
"fmt"
|
|
|
"net"
|
|
|
"os"
|
|
@@ -14,24 +14,19 @@ import (
|
|
|
"strings"
|
|
|
"sync"
|
|
|
|
|
|
- "github.com/docker/docker/libnetwork/datastore"
|
|
|
"github.com/docker/docker/libnetwork/driverapi"
|
|
|
"github.com/docker/docker/libnetwork/netlabel"
|
|
|
- "github.com/docker/docker/libnetwork/netutils"
|
|
|
"github.com/docker/docker/libnetwork/ns"
|
|
|
"github.com/docker/docker/libnetwork/osl"
|
|
|
- "github.com/docker/docker/libnetwork/resolvconf"
|
|
|
"github.com/docker/docker/libnetwork/types"
|
|
|
"github.com/hashicorp/go-multierror"
|
|
|
"github.com/sirupsen/logrus"
|
|
|
"github.com/vishvananda/netlink"
|
|
|
- "github.com/vishvananda/netlink/nl"
|
|
|
"github.com/vishvananda/netns"
|
|
|
"golang.org/x/sys/unix"
|
|
|
)
|
|
|
|
|
|
var (
|
|
|
- hostMode bool
|
|
|
networkOnce sync.Once
|
|
|
networkMu sync.Mutex
|
|
|
vniTbl = make(map[uint32]string)
|
|
@@ -49,18 +44,9 @@ type subnet struct {
|
|
|
gwIP *net.IPNet
|
|
|
}
|
|
|
|
|
|
-type subnetJSON struct {
|
|
|
- SubnetIP string
|
|
|
- GwIP string
|
|
|
- Vni uint32
|
|
|
-}
|
|
|
-
|
|
|
type network struct {
|
|
|
id string
|
|
|
- dbIndex uint64
|
|
|
- dbExists bool
|
|
|
sbox osl.Sandbox
|
|
|
- nlSocket *nl.NetlinkSocket
|
|
|
endpoints endpointTable
|
|
|
driver *driver
|
|
|
joinCnt int
|
|
@@ -75,11 +61,11 @@ type network struct {
|
|
|
|
|
|
func init() {
|
|
|
// Lock main() to the initial thread to exclude the goroutines executing
|
|
|
- // func (*network).watchMiss() or func setDefaultVLAN() from being
|
|
|
- // scheduled onto that thread. Changes to the network namespace of the
|
|
|
- // initial thread alter /proc/self/ns/net, which would break any code
|
|
|
- // which (incorrectly) assumes that that file is a handle to the network
|
|
|
- // namespace for the thread it is currently executing on.
|
|
|
+ // func setDefaultVLAN() from being scheduled onto that thread. Changes to
|
|
|
+ // the network namespace of the initial thread alter /proc/self/ns/net,
|
|
|
+ // which would break any code which (incorrectly) assumes that that file is
|
|
|
+ // a handle to the network namespace for the thread it is currently
|
|
|
+ // executing on.
|
|
|
runtime.LockOSThread()
|
|
|
}
|
|
|
|
|
@@ -113,37 +99,43 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
|
|
|
}
|
|
|
|
|
|
vnis := make([]uint32, 0, len(ipV4Data))
|
|
|
- if gval, ok := option[netlabel.GenericData]; ok {
|
|
|
- optMap := gval.(map[string]string)
|
|
|
- if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
|
|
|
- logrus.Debugf("overlay: Received vxlan IDs: %s", val)
|
|
|
- vniStrings := strings.Split(val, ",")
|
|
|
- for _, vniStr := range vniStrings {
|
|
|
- vni, err := strconv.Atoi(vniStr)
|
|
|
- if err != nil {
|
|
|
- return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
|
|
|
- }
|
|
|
+ gval, ok := option[netlabel.GenericData]
|
|
|
+ if !ok {
|
|
|
+ return fmt.Errorf("option %s is missing", netlabel.GenericData)
|
|
|
+ }
|
|
|
|
|
|
- vnis = append(vnis, uint32(vni))
|
|
|
- }
|
|
|
+ optMap := gval.(map[string]string)
|
|
|
+ vnisOpt, ok := optMap[netlabel.OverlayVxlanIDList]
|
|
|
+ if !ok {
|
|
|
+ return errors.New("no VNI provided")
|
|
|
+ }
|
|
|
+ logrus.Debugf("overlay: Received vxlan IDs: %s", vnisOpt)
|
|
|
+ vniStrings := strings.Split(vnisOpt, ",")
|
|
|
+ for _, vniStr := range vniStrings {
|
|
|
+ vni, err := strconv.Atoi(vniStr)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
|
|
|
}
|
|
|
- if _, ok := optMap[secureOption]; ok {
|
|
|
- n.secure = true
|
|
|
+
|
|
|
+ vnis = append(vnis, uint32(vni))
|
|
|
+ }
|
|
|
+
|
|
|
+ if _, ok := optMap[secureOption]; ok {
|
|
|
+ n.secure = true
|
|
|
+ }
|
|
|
+ if val, ok := optMap[netlabel.DriverMTU]; ok {
|
|
|
+ var err error
|
|
|
+ if n.mtu, err = strconv.Atoi(val); err != nil {
|
|
|
+ return fmt.Errorf("failed to parse %v: %v", val, err)
|
|
|
}
|
|
|
- if val, ok := optMap[netlabel.DriverMTU]; ok {
|
|
|
- var err error
|
|
|
- if n.mtu, err = strconv.Atoi(val); err != nil {
|
|
|
- return fmt.Errorf("failed to parse %v: %v", val, err)
|
|
|
- }
|
|
|
- if n.mtu < 0 {
|
|
|
- return fmt.Errorf("invalid MTU value: %v", n.mtu)
|
|
|
- }
|
|
|
+ if n.mtu < 0 {
|
|
|
+ return fmt.Errorf("invalid MTU value: %v", n.mtu)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // If we are getting vnis from libnetwork, either we get for
|
|
|
- // all subnets or none.
|
|
|
- if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
|
|
|
+ if len(vnis) == 0 {
|
|
|
+ return errors.New("no VNI provided")
|
|
|
+ } else if len(vnis) < len(ipV4Data) {
|
|
|
return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
|
|
|
}
|
|
|
|
|
@@ -151,10 +143,7 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
|
|
|
s := &subnet{
|
|
|
subnetIP: ipd.Pool,
|
|
|
gwIP: ipd.Gateway,
|
|
|
- }
|
|
|
-
|
|
|
- if len(vnis) != 0 {
|
|
|
- s.vni = vnis[i]
|
|
|
+ vni: vnis[i],
|
|
|
}
|
|
|
|
|
|
n.subnets = append(n.subnets, s)
|
|
@@ -166,10 +155,6 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
|
|
|
return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
|
|
|
}
|
|
|
|
|
|
- if err := n.writeToStore(); err != nil {
|
|
|
- return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
|
|
|
- }
|
|
|
-
|
|
|
// Make sure no rule is on the way from any stale secure network
|
|
|
if !n.secure {
|
|
|
for _, vni := range vnis {
|
|
@@ -213,10 +198,7 @@ func (d *driver) DeleteNetwork(nid string) error {
|
|
|
|
|
|
// This is similar to d.network(), but we need to keep holding the lock
|
|
|
// until we are done removing this network.
|
|
|
- n, ok := d.networks[nid]
|
|
|
- if !ok {
|
|
|
- n = d.restoreNetworkFromStore(nid)
|
|
|
- }
|
|
|
+ n := d.networks[nid]
|
|
|
if n == nil {
|
|
|
return fmt.Errorf("could not find network with id %s", nid)
|
|
|
}
|
|
@@ -229,27 +211,11 @@ func (d *driver) DeleteNetwork(nid string) error {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- if err := d.deleteEndpointFromStore(ep); err != nil {
|
|
|
- logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
doPeerFlush = true
|
|
|
delete(d.networks, nid)
|
|
|
|
|
|
- vnis, err := n.releaseVxlanID()
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
-
|
|
|
- if n.secure {
|
|
|
- for _, vni := range vnis {
|
|
|
- programMangle(vni, false)
|
|
|
- programInput(vni, false)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
return nil
|
|
|
}
|
|
|
|
|
@@ -261,16 +227,15 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
-func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error {
|
|
|
+func (n *network) joinSandbox(s *subnet, incJoinCount bool) error {
|
|
|
// If there is a race between two go routines here only one will win
|
|
|
// the other will wait.
|
|
|
- networkOnce.Do(networkOnceInit)
|
|
|
+ networkOnce.Do(populateVNITbl)
|
|
|
|
|
|
n.Lock()
|
|
|
- // If non-restore initialization occurred and was successful then
|
|
|
- // tell the peerDB to initialize the sandbox with all the peers
|
|
|
- // previously received from networkdb. But only do this after
|
|
|
- // unlocking the network. Otherwise we could deadlock with
|
|
|
+ // If initialization was successful then tell the peerDB to initialize the
|
|
|
+ // sandbox with all the peers previously received from networkdb. But only
|
|
|
+ // do this after unlocking the network. Otherwise we could deadlock with
|
|
|
// on the peerDB channel while peerDB is waiting for the network lock.
|
|
|
var doInitPeerDB bool
|
|
|
defer func() {
|
|
@@ -281,8 +246,8 @@ func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error
|
|
|
}()
|
|
|
|
|
|
if !n.sboxInit {
|
|
|
- n.initErr = n.initSandbox(restore)
|
|
|
- doInitPeerDB = n.initErr == nil && !restore
|
|
|
+ n.initErr = n.initSandbox()
|
|
|
+ doInitPeerDB = n.initErr == nil
|
|
|
// If there was an error, we cannot recover it
|
|
|
n.sboxInit = true
|
|
|
}
|
|
@@ -293,9 +258,9 @@ func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error
|
|
|
|
|
|
subnetErr := s.initErr
|
|
|
if !s.sboxInit {
|
|
|
- subnetErr = n.initSubnetSandbox(s, restore)
|
|
|
- // We can recover from these errors, but not on restore
|
|
|
- if restore || subnetErr == nil {
|
|
|
+ subnetErr = n.initSubnetSandbox(s)
|
|
|
+ // We can recover from these errors
|
|
|
+ if subnetErr == nil {
|
|
|
s.initErr = subnetErr
|
|
|
s.sboxInit = true
|
|
|
}
|
|
@@ -339,12 +304,6 @@ func (n *network) destroySandbox() {
|
|
|
}
|
|
|
|
|
|
for _, s := range n.subnets {
|
|
|
- if hostMode {
|
|
|
- if err := removeFilters(n.id[:12], s.brName); err != nil {
|
|
|
- logrus.Warnf("Could not remove overlay filters: %v", err)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
if s.vxlanName != "" {
|
|
|
err := deleteInterface(s.vxlanName)
|
|
|
if err != nil {
|
|
@@ -353,18 +312,6 @@ func (n *network) destroySandbox() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if hostMode {
|
|
|
- if err := removeNetworkChain(n.id[:12]); err != nil {
|
|
|
- logrus.Warnf("could not remove network chain: %v", err)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Close the netlink socket, this will also release the watchMiss goroutine that is using it
|
|
|
- if n.nlSocket != nil {
|
|
|
- n.nlSocket.Close()
|
|
|
- n.nlSocket = nil
|
|
|
- }
|
|
|
-
|
|
|
n.sbox.Destroy()
|
|
|
n.sbox = nil
|
|
|
}
|
|
@@ -416,45 +363,6 @@ func populateVNITbl() {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
-func networkOnceInit() {
|
|
|
- populateVNITbl()
|
|
|
-
|
|
|
- if os.Getenv("_OVERLAY_HOST_MODE") != "" {
|
|
|
- hostMode = true
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- err := createVxlan("testvxlan", 1, 0)
|
|
|
- if err != nil {
|
|
|
- logrus.Errorf("Failed to create testvxlan interface: %v", err)
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- defer deleteInterface("testvxlan")
|
|
|
-
|
|
|
- path := "/proc/self/ns/net"
|
|
|
- hNs, err := netns.GetFromPath(path)
|
|
|
- if err != nil {
|
|
|
- logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
|
|
|
- return
|
|
|
- }
|
|
|
- defer hNs.Close()
|
|
|
-
|
|
|
- nlh := ns.NlHandle()
|
|
|
-
|
|
|
- iface, err := nlh.LinkByName("testvxlan")
|
|
|
- if err != nil {
|
|
|
- logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- // If we are not able to move the vxlan interface to a namespace
|
|
|
- // then fallback to host mode
|
|
|
- if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
|
|
|
- hostMode = true
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
func (n *network) generateVxlanName(s *subnet) string {
|
|
|
id := n.id
|
|
|
if len(n.id) > 5 {
|
|
@@ -477,79 +385,26 @@ func (n *network) getBridgeNamePrefix(s *subnet) string {
|
|
|
return fmt.Sprintf("ov-%06x", s.vni)
|
|
|
}
|
|
|
|
|
|
-func checkOverlap(nw *net.IPNet) error {
|
|
|
- var nameservers []string
|
|
|
-
|
|
|
- if rc, err := os.ReadFile(resolvconf.Path()); err == nil {
|
|
|
- nameservers = resolvconf.GetNameserversAsCIDR(rc)
|
|
|
- }
|
|
|
-
|
|
|
- if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
|
|
|
- return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
|
|
|
- }
|
|
|
-
|
|
|
- if err := netutils.CheckRouteOverlaps(nw); err != nil {
|
|
|
- return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
|
|
|
- }
|
|
|
-
|
|
|
- return nil
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
|
|
|
- // restore overlay osl sandbox
|
|
|
- ifaces := map[string][]osl.IfaceOption{
|
|
|
- brName + "+br": {
|
|
|
- n.sbox.InterfaceOptions().Address(s.gwIP),
|
|
|
- n.sbox.InterfaceOptions().Bridge(true),
|
|
|
- },
|
|
|
- }
|
|
|
- if err := n.sbox.Restore(ifaces, nil, nil, nil); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
-
|
|
|
- ifaces = map[string][]osl.IfaceOption{
|
|
|
- vxlanName + "+vxlan": {
|
|
|
- n.sbox.InterfaceOptions().Master(brName),
|
|
|
- },
|
|
|
- }
|
|
|
- return n.sbox.Restore(ifaces, nil, nil, nil)
|
|
|
-}
|
|
|
-
|
|
|
func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
|
|
|
- if hostMode {
|
|
|
- // Try to delete stale bridge interface if it exists
|
|
|
- if err := deleteInterface(brName); err != nil {
|
|
|
- deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
|
|
|
- }
|
|
|
- // Try to delete the vxlan interface by vni if already present
|
|
|
- deleteVxlanByVNI("", s.vni)
|
|
|
-
|
|
|
- if err := checkOverlap(s.subnetIP); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- }
|
|
|
+ // Try to find this subnet's vni is being used in some
|
|
|
+ // other namespace by looking at vniTbl that we just
|
|
|
+ // populated in the once init. If a hit is found then
|
|
|
+ // it must a stale namespace from previous
|
|
|
+ // life. Destroy it completely and reclaim resourced.
|
|
|
+ networkMu.Lock()
|
|
|
+ path, ok := vniTbl[s.vni]
|
|
|
+ networkMu.Unlock()
|
|
|
+
|
|
|
+ if ok {
|
|
|
+ deleteVxlanByVNI(path, s.vni)
|
|
|
+ if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
|
|
|
+ logrus.Errorf("unmount of %s failed: %v", path, err)
|
|
|
+ }
|
|
|
+ os.Remove(path)
|
|
|
|
|
|
- if !hostMode {
|
|
|
- // Try to find this subnet's vni is being used in some
|
|
|
- // other namespace by looking at vniTbl that we just
|
|
|
- // populated in the once init. If a hit is found then
|
|
|
- // it must a stale namespace from previous
|
|
|
- // life. Destroy it completely and reclaim resourced.
|
|
|
networkMu.Lock()
|
|
|
- path, ok := vniTbl[s.vni]
|
|
|
+ delete(vniTbl, s.vni)
|
|
|
networkMu.Unlock()
|
|
|
-
|
|
|
- if ok {
|
|
|
- deleteVxlanByVNI(path, s.vni)
|
|
|
- if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
|
|
|
- logrus.Errorf("unmount of %s failed: %v", path, err)
|
|
|
- }
|
|
|
- os.Remove(path)
|
|
|
-
|
|
|
- networkMu.Lock()
|
|
|
- delete(vniTbl, s.vni)
|
|
|
- networkMu.Unlock()
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
// create a bridge and vxlan device for this subnet and move it to the sandbox
|
|
@@ -588,10 +443,6 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
|
|
|
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
|
|
|
}
|
|
|
|
|
|
- if hostMode {
|
|
|
- return addFilters(n.id[:12], brName)
|
|
|
- }
|
|
|
-
|
|
|
if err := setDefaultVLAN(sbox); err != nil {
|
|
|
// not a fatal error
|
|
|
logrus.WithError(err).Error("set bridge default vlan failed")
|
|
@@ -651,7 +502,7 @@ func setDefaultVLAN(sbox osl.Sandbox) error {
|
|
|
}
|
|
|
|
|
|
// Must be called with the network lock
|
|
|
-func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
|
|
|
+func (n *network) initSubnetSandbox(s *subnet) error {
|
|
|
brName := n.generateBridgeName(s)
|
|
|
vxlanName := n.generateVxlanName(s)
|
|
|
|
|
@@ -667,14 +518,8 @@ func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if restore {
|
|
|
- if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- } else {
|
|
|
- if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
+ if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
|
|
|
+ return err
|
|
|
}
|
|
|
|
|
|
s.vxlanName = vxlanName
|
|
@@ -718,427 +563,39 @@ func (n *network) cleanupStaleSandboxes() {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
-func (n *network) initSandbox(restore bool) error {
|
|
|
+func (n *network) initSandbox() error {
|
|
|
n.initEpoch++
|
|
|
|
|
|
- if !restore {
|
|
|
- if hostMode {
|
|
|
- if err := addNetworkChain(n.id[:12]); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // If there are any stale sandboxes related to this network
|
|
|
- // from previous daemon life clean it up here
|
|
|
- n.cleanupStaleSandboxes()
|
|
|
- }
|
|
|
-
|
|
|
- // In the restore case network sandbox already exist; but we don't know
|
|
|
- // what epoch number it was created with. It has to be retrieved by
|
|
|
- // searching the net namespaces.
|
|
|
- var key string
|
|
|
- if restore {
|
|
|
- key = osl.GenerateKey("-" + n.id)
|
|
|
- } else {
|
|
|
- key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
|
|
|
- }
|
|
|
+ // If there are any stale sandboxes related to this network
|
|
|
+ // from previous daemon life clean it up here
|
|
|
+ n.cleanupStaleSandboxes()
|
|
|
|
|
|
- sbox, err := osl.NewSandbox(key, !hostMode, restore)
|
|
|
+ key := osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
|
|
|
+ sbox, err := osl.NewSandbox(key, true, false)
|
|
|
if err != nil {
|
|
|
- return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
|
|
|
+ return fmt.Errorf("could not get network sandbox: %v", err)
|
|
|
}
|
|
|
|
|
|
// this is needed to let the peerAdd configure the sandbox
|
|
|
n.sbox = sbox
|
|
|
|
|
|
- // If we are in swarm mode, we don't need anymore the watchMiss routine.
|
|
|
- // This will save 1 thread and 1 netlink socket per network
|
|
|
- if !n.driver.isSerfAlive() {
|
|
|
- return nil
|
|
|
- }
|
|
|
-
|
|
|
- var nlSock *nl.NetlinkSocket
|
|
|
- sbox.InvokeFunc(func() {
|
|
|
- nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
|
|
|
- if err != nil {
|
|
|
- return
|
|
|
- }
|
|
|
- // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
|
|
|
- tv := unix.NsecToTimeval(soTimeout.Nanoseconds())
|
|
|
- err = nlSock.SetReceiveTimeout(&tv)
|
|
|
- })
|
|
|
- n.nlSocket = nlSock
|
|
|
-
|
|
|
- if err == nil {
|
|
|
- go n.watchMiss(nlSock, key)
|
|
|
- } else {
|
|
|
- logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v",
|
|
|
- n.id, sbox.Key(), err)
|
|
|
- }
|
|
|
-
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
-func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
|
|
|
- // With the new version of the netlink library the deserialize function makes
|
|
|
- // requests about the interface of the netlink message. This can succeed only
|
|
|
- // if this go routine is in the target namespace.
|
|
|
- origNs, err := netns.Get()
|
|
|
- if err != nil {
|
|
|
- logrus.WithError(err).Error("failed to get the initial network namespace")
|
|
|
- return
|
|
|
- }
|
|
|
- defer origNs.Close()
|
|
|
- newNs, err := netns.GetFromPath(nsPath)
|
|
|
- if err != nil {
|
|
|
- logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath)
|
|
|
- return
|
|
|
- }
|
|
|
- defer newNs.Close()
|
|
|
-
|
|
|
- runtime.LockOSThread()
|
|
|
- if err = netns.Set(newNs); err != nil {
|
|
|
- logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath)
|
|
|
- runtime.UnlockOSThread()
|
|
|
- return
|
|
|
- }
|
|
|
- defer func() {
|
|
|
- if err := netns.Set(origNs); err != nil {
|
|
|
- logrus.WithError(err).Error("failed to restore the thread's initial network namespace")
|
|
|
- // The error is only fatal for the current thread. Keep this
|
|
|
- // goroutine locked to the thread to make the runtime replace it
|
|
|
- // with a clean thread once this goroutine terminates.
|
|
|
- } else {
|
|
|
- runtime.UnlockOSThread()
|
|
|
- }
|
|
|
- }()
|
|
|
- for {
|
|
|
- msgs, _, err := nlSock.Receive()
|
|
|
- if err != nil {
|
|
|
- n.Lock()
|
|
|
- nlFd := nlSock.GetFd()
|
|
|
- n.Unlock()
|
|
|
- if nlFd == -1 {
|
|
|
- // The netlink socket got closed, simply exit to not leak this goroutine
|
|
|
- return
|
|
|
- }
|
|
|
- // When the receive timeout expires the receive will return EAGAIN
|
|
|
- if err == unix.EAGAIN {
|
|
|
- // we continue here to avoid spam for timeouts
|
|
|
- continue
|
|
|
- }
|
|
|
- logrus.Errorf("Failed to receive from netlink: %v ", err)
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- for _, msg := range msgs {
|
|
|
- if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- neigh, err := netlink.NeighDeserialize(msg.Data)
|
|
|
- if err != nil {
|
|
|
- logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- var (
|
|
|
- ip net.IP
|
|
|
- mac net.HardwareAddr
|
|
|
- l2Miss, l3Miss bool
|
|
|
- )
|
|
|
- if neigh.IP.To4() != nil {
|
|
|
- ip = neigh.IP
|
|
|
- l3Miss = true
|
|
|
- } else if neigh.HardwareAddr != nil {
|
|
|
- mac = []byte(neigh.HardwareAddr)
|
|
|
- ip = net.IP(mac[2:])
|
|
|
- l2Miss = true
|
|
|
- } else {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // Not any of the network's subnets. Ignore.
|
|
|
- if !n.contains(ip) {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
|
|
|
- mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
|
|
|
- if err != nil {
|
|
|
- logrus.Errorf("could not resolve peer %q: %v", ip, err)
|
|
|
- continue
|
|
|
- }
|
|
|
- n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-// Restore a network from the store to the driver if it is present.
|
|
|
-// Must be called with the driver locked!
|
|
|
-func (d *driver) restoreNetworkFromStore(nid string) *network {
|
|
|
- n := d.getNetworkFromStore(nid)
|
|
|
- if n != nil {
|
|
|
- n.driver = d
|
|
|
- n.endpoints = endpointTable{}
|
|
|
- d.networks[nid] = n
|
|
|
- }
|
|
|
- return n
|
|
|
-}
|
|
|
-
|
|
|
func (d *driver) network(nid string) *network {
|
|
|
d.Lock()
|
|
|
- n, ok := d.networks[nid]
|
|
|
- if !ok {
|
|
|
- n = d.restoreNetworkFromStore(nid)
|
|
|
- }
|
|
|
+ n := d.networks[nid]
|
|
|
d.Unlock()
|
|
|
|
|
|
return n
|
|
|
}
|
|
|
|
|
|
-func (d *driver) getNetworkFromStore(nid string) *network {
|
|
|
- if d.store == nil {
|
|
|
- return nil
|
|
|
- }
|
|
|
-
|
|
|
- n := &network{id: nid}
|
|
|
- if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
|
|
|
- return nil
|
|
|
- }
|
|
|
-
|
|
|
- return n
|
|
|
-}
|
|
|
-
|
|
|
func (n *network) sandbox() osl.Sandbox {
|
|
|
n.Lock()
|
|
|
defer n.Unlock()
|
|
|
return n.sbox
|
|
|
}
|
|
|
|
|
|
-func (n *network) vxlanID(s *subnet) uint32 {
|
|
|
- n.Lock()
|
|
|
- defer n.Unlock()
|
|
|
- return s.vni
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) setVxlanID(s *subnet, vni uint32) {
|
|
|
- n.Lock()
|
|
|
- s.vni = vni
|
|
|
- n.Unlock()
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) Key() []string {
|
|
|
- return []string{"overlay", "network", n.id}
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) KeyPrefix() []string {
|
|
|
- return []string{"overlay", "network"}
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) Value() []byte {
|
|
|
- m := map[string]interface{}{}
|
|
|
-
|
|
|
- netJSON := []*subnetJSON{}
|
|
|
-
|
|
|
- for _, s := range n.subnets {
|
|
|
- sj := &subnetJSON{
|
|
|
- SubnetIP: s.subnetIP.String(),
|
|
|
- GwIP: s.gwIP.String(),
|
|
|
- Vni: s.vni,
|
|
|
- }
|
|
|
- netJSON = append(netJSON, sj)
|
|
|
- }
|
|
|
-
|
|
|
- m["secure"] = n.secure
|
|
|
- m["subnets"] = netJSON
|
|
|
- m["mtu"] = n.mtu
|
|
|
- b, err := json.Marshal(m)
|
|
|
- if err != nil {
|
|
|
- return []byte{}
|
|
|
- }
|
|
|
-
|
|
|
- return b
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) Index() uint64 {
|
|
|
- return n.dbIndex
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) SetIndex(index uint64) {
|
|
|
- n.dbIndex = index
|
|
|
- n.dbExists = true
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) Exists() bool {
|
|
|
- return n.dbExists
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) Skip() bool {
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) SetValue(value []byte) error {
|
|
|
- var (
|
|
|
- m map[string]interface{}
|
|
|
- newNet bool
|
|
|
- isMap = true
|
|
|
- netJSON = []*subnetJSON{}
|
|
|
- )
|
|
|
-
|
|
|
- if err := json.Unmarshal(value, &m); err != nil {
|
|
|
- err := json.Unmarshal(value, &netJSON)
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- isMap = false
|
|
|
- }
|
|
|
-
|
|
|
- if len(n.subnets) == 0 {
|
|
|
- newNet = true
|
|
|
- }
|
|
|
-
|
|
|
- if isMap {
|
|
|
- if val, ok := m["secure"]; ok {
|
|
|
- n.secure = val.(bool)
|
|
|
- }
|
|
|
- if val, ok := m["mtu"]; ok {
|
|
|
- n.mtu = int(val.(float64))
|
|
|
- }
|
|
|
- bytes, err := json.Marshal(m["subnets"])
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- if err := json.Unmarshal(bytes, &netJSON); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for _, sj := range netJSON {
|
|
|
- subnetIPstr := sj.SubnetIP
|
|
|
- gwIPstr := sj.GwIP
|
|
|
- vni := sj.Vni
|
|
|
-
|
|
|
- subnetIP, _ := types.ParseCIDR(subnetIPstr)
|
|
|
- gwIP, _ := types.ParseCIDR(gwIPstr)
|
|
|
-
|
|
|
- if newNet {
|
|
|
- s := &subnet{
|
|
|
- subnetIP: subnetIP,
|
|
|
- gwIP: gwIP,
|
|
|
- vni: vni,
|
|
|
- }
|
|
|
- n.subnets = append(n.subnets, s)
|
|
|
- } else {
|
|
|
- sNet := n.getMatchingSubnet(subnetIP)
|
|
|
- if sNet != nil {
|
|
|
- sNet.vni = vni
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return nil
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) DataScope() string {
|
|
|
- return datastore.GlobalScope
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) writeToStore() error {
|
|
|
- if n.driver.store == nil {
|
|
|
- return nil
|
|
|
- }
|
|
|
-
|
|
|
- return n.driver.store.PutObjectAtomic(n)
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) releaseVxlanID() ([]uint32, error) {
|
|
|
- n.Lock()
|
|
|
- nSubnets := len(n.subnets)
|
|
|
- n.Unlock()
|
|
|
- if nSubnets == 0 {
|
|
|
- return nil, nil
|
|
|
- }
|
|
|
-
|
|
|
- if n.driver.store != nil {
|
|
|
- if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
|
|
|
- if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
|
|
|
- // In both the above cases we can safely assume that the key has been removed by some other
|
|
|
- // instance and so simply get out of here
|
|
|
- return nil, nil
|
|
|
- }
|
|
|
-
|
|
|
- return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err)
|
|
|
- }
|
|
|
- }
|
|
|
- var vnis []uint32
|
|
|
- n.Lock()
|
|
|
- for _, s := range n.subnets {
|
|
|
- if n.driver.vxlanIdm != nil {
|
|
|
- vnis = append(vnis, s.vni)
|
|
|
- }
|
|
|
- s.vni = 0
|
|
|
- }
|
|
|
- n.Unlock()
|
|
|
-
|
|
|
- for _, vni := range vnis {
|
|
|
- n.driver.vxlanIdm.Release(uint64(vni))
|
|
|
- }
|
|
|
-
|
|
|
- return vnis, nil
|
|
|
-}
|
|
|
-
|
|
|
-func (n *network) obtainVxlanID(s *subnet) error {
|
|
|
- // return if the subnet already has a vxlan id assigned
|
|
|
- if n.vxlanID(s) != 0 {
|
|
|
- return nil
|
|
|
- }
|
|
|
-
|
|
|
- if n.driver.store == nil {
|
|
|
- return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
|
|
|
- }
|
|
|
-
|
|
|
- for {
|
|
|
- if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
|
|
|
- return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
|
|
|
- }
|
|
|
-
|
|
|
- if n.vxlanID(s) == 0 {
|
|
|
- vxlanID, err := n.driver.vxlanIdm.GetID(true)
|
|
|
- if err != nil {
|
|
|
- return fmt.Errorf("failed to allocate vxlan id: %v", err)
|
|
|
- }
|
|
|
-
|
|
|
- n.setVxlanID(s, uint32(vxlanID))
|
|
|
- if err := n.writeToStore(); err != nil {
|
|
|
- n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
|
|
|
- n.setVxlanID(s, 0)
|
|
|
- if err == datastore.ErrKeyModified {
|
|
|
- continue
|
|
|
- }
|
|
|
- return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
|
|
|
- }
|
|
|
- return nil
|
|
|
- }
|
|
|
- return nil
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-// contains return true if the passed ip belongs to one the network's
|
|
|
-// subnets
|
|
|
-func (n *network) contains(ip net.IP) bool {
|
|
|
- for _, s := range n.subnets {
|
|
|
- if s.subnetIP.Contains(ip) {
|
|
|
- return true
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
// getSubnetforIP returns the subnet to which the given IP belongs
|
|
|
func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
|
|
|
for _, s := range n.subnets {
|
|
@@ -1154,22 +611,3 @@ func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
|
|
|
}
|
|
|
return nil
|
|
|
}
|
|
|
-
|
|
|
-// getMatchingSubnet return the network's subnet that matches the input
|
|
|
-func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
|
|
|
- if ip == nil {
|
|
|
- return nil
|
|
|
- }
|
|
|
- for _, s := range n.subnets {
|
|
|
- // first check if the mask lengths are the same
|
|
|
- i, _ := s.subnetIP.Mask.Size()
|
|
|
- j, _ := ip.Mask.Size()
|
|
|
- if i != j {
|
|
|
- continue
|
|
|
- }
|
|
|
- if s.subnetIP.IP.Equal(ip.IP) {
|
|
|
- return s
|
|
|
- }
|
|
|
- }
|
|
|
- return nil
|
|
|
-}
|