Prechádzať zdrojové kódy

Persist and restore overlay endpoints to handle daemon restart

Signed-off-by: Santhosh Manohar <santhosh@docker.com>
Santhosh Manohar 9 rokov pred
rodič
commit
2e9c30a4a1

+ 6 - 2
libnetwork/drivers/overlay/joinleave.go

@@ -40,11 +40,11 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
 	}
 
-	if err := n.joinSandbox(); err != nil {
+	if err := n.joinSandbox(false); err != nil {
 		return fmt.Errorf("network sandbox join failed: %v", err)
 	}
 
-	if err := n.joinSubnetSandbox(s); err != nil {
+	if err := n.joinSubnetSandbox(s, false); err != nil {
 		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
 	}
 
@@ -61,6 +61,10 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 
 	ep.ifName = containerIfName
 
+	if err := d.writeEndpointToStore(ep); err != nil {
+		return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
+	}
+
 	nlh := ns.NlHandle()
 
 	// Set the container interface and its peer MTU to 1450 to allow

+ 140 - 4
libnetwork/drivers/overlay/ov_endpoint.go

@@ -1,22 +1,30 @@
 package overlay
 
 import (
+	"encoding/json"
 	"fmt"
 	"net"
 
 	log "github.com/Sirupsen/logrus"
+	"github.com/docker/libnetwork/datastore"
 	"github.com/docker/libnetwork/driverapi"
 	"github.com/docker/libnetwork/netutils"
 	"github.com/docker/libnetwork/ns"
+	"github.com/docker/libnetwork/types"
 )
 
 type endpointTable map[string]*endpoint
 
+const overlayEndpointPrefix = "overlay/endpoint"
+
 type endpoint struct {
-	id     string
-	ifName string
-	mac    net.HardwareAddr
-	addr   *net.IPNet
+	id       string
+	nid      string
+	ifName   string
+	mac      net.HardwareAddr
+	addr     *net.IPNet
+	dbExists bool
+	dbIndex  uint64
 }
 
 func (n *network) endpoint(eid string) *endpoint {
@@ -60,6 +68,7 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
 
 	ep := &endpoint{
 		id:   eid,
+		nid:  n.id,
 		addr: ifInfo.Address(),
 		mac:  ifInfo.MacAddress(),
 	}
@@ -80,6 +89,10 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
 
 	n.addEndpoint(ep)
 
+	if err := d.writeEndpointToStore(ep); err != nil {
+		return fmt.Errorf("failed to update overlay endpoint %s to local store: %v", ep.id[0:7], err)
+	}
+
 	return nil
 }
 
@@ -102,6 +115,10 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
 
 	n.deleteEndpoint(eid)
 
+	if err := d.deleteEndpointFromStore(ep); err != nil {
+		log.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err)
+	}
+
 	if ep.ifName == "" {
 		return nil
 	}
@@ -121,3 +138,122 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
 func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, error) {
 	return make(map[string]interface{}, 0), nil
 }
+
+func (d *driver) deleteEndpointFromStore(e *endpoint) error {
+	if d.localStore == nil {
+		return fmt.Errorf("overlay local store not initialized, ep not deleted")
+	}
+
+	if err := d.localStore.DeleteObjectAtomic(e); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (d *driver) writeEndpointToStore(e *endpoint) error {
+	if d.localStore == nil {
+		return fmt.Errorf("overlay local store not initialized, ep not added")
+	}
+
+	if err := d.localStore.PutObjectAtomic(e); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (ep *endpoint) DataScope() string {
+	return datastore.LocalScope
+}
+
+func (ep *endpoint) New() datastore.KVObject {
+	return &endpoint{}
+}
+
+func (ep *endpoint) CopyTo(o datastore.KVObject) error {
+	dstep := o.(*endpoint)
+	*dstep = *ep
+	return nil
+}
+
+func (ep *endpoint) Key() []string {
+	return []string{overlayEndpointPrefix, ep.id}
+}
+
+func (ep *endpoint) KeyPrefix() []string {
+	return []string{overlayEndpointPrefix}
+}
+
+func (ep *endpoint) Index() uint64 {
+	return ep.dbIndex
+}
+
+func (ep *endpoint) SetIndex(index uint64) {
+	ep.dbIndex = index
+	ep.dbExists = true
+}
+
+func (ep *endpoint) Exists() bool {
+	return ep.dbExists
+}
+
+func (ep *endpoint) Skip() bool {
+	return false
+}
+
+func (ep *endpoint) Value() []byte {
+	b, err := json.Marshal(ep)
+	if err != nil {
+		return nil
+	}
+	return b
+}
+
+func (ep *endpoint) SetValue(value []byte) error {
+	return json.Unmarshal(value, ep)
+}
+
+func (ep *endpoint) MarshalJSON() ([]byte, error) {
+	epMap := make(map[string]interface{})
+
+	epMap["id"] = ep.id
+	epMap["nid"] = ep.nid
+	if ep.ifName != "" {
+		epMap["ifName"] = ep.ifName
+	}
+	if ep.addr != nil {
+		epMap["addr"] = ep.addr.String()
+	}
+	if len(ep.mac) != 0 {
+		epMap["mac"] = ep.mac.String()
+	}
+
+	return json.Marshal(epMap)
+}
+
+func (ep *endpoint) UnmarshalJSON(value []byte) error {
+	var (
+		err   error
+		epMap map[string]interface{}
+	)
+
+	json.Unmarshal(value, &epMap)
+
+	ep.id = epMap["id"].(string)
+	ep.nid = epMap["nid"].(string)
+	if v, ok := epMap["mac"]; ok {
+		if ep.mac, err = net.ParseMAC(v.(string)); err != nil {
+			return types.InternalErrorf("failed to decode endpoint interface mac address after json unmarshal: %s", v.(string))
+		}
+	}
+	if v, ok := epMap["addr"]; ok {
+		if ep.addr, err = types.ParseCIDR(v.(string)); err != nil {
+			return types.InternalErrorf("failed to decode endpoint interface ipv4 address after json unmarshal: %v", err)
+		}
+	}
+	if v, ok := epMap["ifName"]; ok {
+		ep.ifName = v.(string)
+	}
+
+	return nil
+}

+ 68 - 18
libnetwork/drivers/overlay/ov_network.go

@@ -195,21 +195,21 @@ func (n *network) incEndpointCount() {
 	n.joinCnt++
 }
 
-func (n *network) joinSandbox() error {
+func (n *network) joinSandbox(restore bool) error {
 	// If there is a race between two go routines here only one will win
 	// the other will wait.
 	n.once.Do(func() {
 		// save the error status of initSandbox in n.initErr so that
 		// all the racing go routines are able to know the status.
-		n.initErr = n.initSandbox()
+		n.initErr = n.initSandbox(restore)
 	})
 
 	return n.initErr
 }
 
-func (n *network) joinSubnetSandbox(s *subnet) error {
+func (n *network) joinSubnetSandbox(s *subnet, restore bool) error {
 	s.once.Do(func() {
-		s.initErr = n.initSubnetSandbox(s)
+		s.initErr = n.initSubnetSandbox(s, restore)
 	})
 	return s.initErr
 }
@@ -386,9 +386,33 @@ func isOverlap(nw *net.IPNet) bool {
 	return false
 }
 
-func (n *network) initSubnetSandbox(s *subnet) error {
-	brName := n.generateBridgeName(s)
-	vxlanName := n.generateVxlanName(s)
+func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
+	sbox := n.sandbox()
+
+	// restore overlay osl sandbox
+	Ifaces := make(map[string][]osl.IfaceOption)
+	brIfaceOption := make([]osl.IfaceOption, 2)
+	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
+	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
+	Ifaces[fmt.Sprintf("%s+%s", brName, "br")] = brIfaceOption
+
+	err := sbox.Restore(Ifaces, nil, nil, nil)
+	if err != nil {
+		return err
+	}
+
+	Ifaces = make(map[string][]osl.IfaceOption)
+	vxlanIfaceOption := make([]osl.IfaceOption, 1)
+	vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
+	Ifaces[fmt.Sprintf("%s+%s", vxlanName, "vxlan")] = vxlanIfaceOption
+	err = sbox.Restore(Ifaces, nil, nil, nil)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
 
 	if hostMode {
 		// Try to delete stale bridge interface if it exists
@@ -451,6 +475,19 @@ func (n *network) initSubnetSandbox(s *subnet) error {
 		}
 	}
 
+	return nil
+}
+
+func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
+	brName := n.generateBridgeName(s)
+	vxlanName := n.generateVxlanName(s)
+
+	if restore {
+		n.restoreSubnetSandbox(s, brName, vxlanName)
+	} else {
+		n.setupSubnetSandbox(s, brName, vxlanName)
+	}
+
 	n.Lock()
 	s.vxlanName = vxlanName
 	s.brName = brName
@@ -494,32 +531,45 @@ func (n *network) cleanupStaleSandboxes() {
 		})
 }
 
-func (n *network) initSandbox() error {
+func (n *network) initSandbox(restore bool) error {
 	n.Lock()
 	n.initEpoch++
 	n.Unlock()
 
 	networkOnce.Do(networkOnceInit)
 
-	if hostMode {
-		if err := addNetworkChain(n.id[:12]); err != nil {
-			return err
+	if !restore {
+		if hostMode {
+			if err := addNetworkChain(n.id[:12]); err != nil {
+				return err
+			}
 		}
+
+		// If there are any stale sandboxes related to this network
+		// from previous daemon life clean it up here
+		n.cleanupStaleSandboxes()
 	}
 
-	// If there are any stale sandboxes related to this network
-	// from previous daemon life clean it up here
-	n.cleanupStaleSandboxes()
+	// In the restore case network sandbox already exist; but we don't know
+	// what epoch number it was created with. It has to be retrieved by
+	// searching the net namespaces.
+	key := ""
+	if restore {
+		key = osl.GenerateKey("-" + n.id)
+	} else {
+		key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
+	}
 
-	sbox, err := osl.NewSandbox(
-		osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), !hostMode, false)
+	sbox, err := osl.NewSandbox(key, !hostMode, restore)
 	if err != nil {
-		return fmt.Errorf("could not create network sandbox: %v", err)
+		return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
 	}
 
 	n.setSandbox(sbox)
 
-	n.driver.peerDbUpdateSandbox(n.id)
+	if !restore {
+		n.driver.peerDbUpdateSandbox(n.id)
+	}
 
 	var nlSock *nl.NetlinkSocket
 	sbox.InvokeFunc(func() {

+ 68 - 0
libnetwork/drivers/overlay/overlay.go

@@ -13,6 +13,7 @@ import (
 	"github.com/docker/libnetwork/driverapi"
 	"github.com/docker/libnetwork/idm"
 	"github.com/docker/libnetwork/netlabel"
+	"github.com/docker/libnetwork/osl"
 	"github.com/docker/libnetwork/types"
 	"github.com/hashicorp/serf/serf"
 )
@@ -41,6 +42,7 @@ type driver struct {
 	serfInstance *serf.Serf
 	networks     networkTable
 	store        datastore.DataStore
+	localStore   datastore.DataStore
 	vxlanIdm     *idm.Idm
 	once         sync.Once
 	joinOnce     sync.Once
@@ -74,9 +76,75 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error {
 		}
 	}
 
+	if data, ok := config[netlabel.LocalKVClient]; ok {
+		var err error
+		dsc, ok := data.(discoverapi.DatastoreConfigData)
+		if !ok {
+			return types.InternalErrorf("incorrect data in datastore configuration: %v", data)
+		}
+		d.localStore, err = datastore.NewDataStoreFromConfig(dsc)
+		if err != nil {
+			return types.InternalErrorf("failed to initialize local data store: %v", err)
+		}
+	}
+
+	d.restoreEndpoints()
+
 	return dc.RegisterDriver(networkType, d, c)
 }
 
+// Endpoints are stored in the local store. Restore them and reconstruct the overlay sandbox
+func (d *driver) restoreEndpoints() error {
+	if d.localStore == nil {
+		logrus.Warnf("Cannot restore overlay endpoints because local datastore is missing")
+		return nil
+	}
+	kvol, err := d.localStore.List(datastore.Key(overlayEndpointPrefix), &endpoint{})
+	if err != nil && err != datastore.ErrKeyNotFound {
+		return fmt.Errorf("failed to read overlay endpoint from store: %v", err)
+	}
+
+	if err == datastore.ErrKeyNotFound {
+		return nil
+	}
+	for _, kvo := range kvol {
+		ep := kvo.(*endpoint)
+		n := d.network(ep.nid)
+		if n == nil {
+			logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid, ep.id)
+			continue
+		}
+		n.addEndpoint(ep)
+
+		s := n.getSubnetforIP(ep.addr)
+		if s == nil {
+			return fmt.Errorf("could not find subnet for endpoint %s", ep.id)
+		}
+
+		if err := n.joinSandbox(true); err != nil {
+			return fmt.Errorf("restore network sandbox failed: %v", err)
+		}
+
+		if err := n.joinSubnetSandbox(s, true); err != nil {
+			return fmt.Errorf("restore subnet sandbox failed for %q: %v", s.subnetIP.String(), err)
+		}
+
+		Ifaces := make(map[string][]osl.IfaceOption)
+		vethIfaceOption := make([]osl.IfaceOption, 1)
+		vethIfaceOption = append(vethIfaceOption, n.sbox.InterfaceOptions().Master(s.brName))
+		Ifaces[fmt.Sprintf("%s+%s", "veth", "veth")] = vethIfaceOption
+
+		err := n.sbox.Restore(Ifaces, nil, nil, nil)
+		if err != nil {
+			return fmt.Errorf("failed to restore overlay sandbox: %v", err)
+		}
+
+		n.incEndpointCount()
+		d.peerDbAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.bindAddress), true)
+	}
+	return nil
+}
+
 // Fini cleans up the driver resources
 func Fini(drv driverapi.Driver) {
 	d := drv.(*driver)

+ 1 - 1
libnetwork/drivers/overlay/peerdb.go

@@ -271,7 +271,7 @@ func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
 		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
 	}
 
-	if err := n.joinSubnetSandbox(s); err != nil {
+	if err := n.joinSubnetSandbox(s, false); err != nil {
 		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
 	}