Przeglądaj źródła

Avoid network/endpoint count inconsistences

- ... on ungraceful shutdown during network create
- Allow forceful deletion of network
- On network delete, first mark the network for deletion
- On controller creation, first forcely remove any network
  that is marked for deletion.

Signed-off-by: Alessandro Boch <aboch@docker.com>
Alessandro Boch 9 lat temu
rodzic
commit
c92a4e9fd6
4 zmienionych plików z 72 dodań i 38 usunięć
  1. 13 5
      libnetwork/controller.go
  2. 0 9
      libnetwork/libnetwork_test.go
  3. 39 21
      libnetwork/network.go
  4. 20 3
      libnetwork/store.go

+ 13 - 5
libnetwork/controller.go

@@ -187,6 +187,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
 
 	c.sandboxCleanup()
 	c.cleanupLocalEndpoints()
+	c.networkCleanup()
 
 	if err := c.startExternalKeyListener(); err != nil {
 		return nil, err
@@ -479,19 +480,23 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
 		}
 	}()
 
-	if err = c.updateToStore(network); err != nil {
+	// First store the endpoint count, then the network. To avoid to
+	// end up with a datastore containing a network and not an epCnt,
+	// in case of an ungraceful shutdown during this function call.
+	epCnt := &endpointCnt{n: network}
+	if err = c.updateToStore(epCnt); err != nil {
 		return nil, err
 	}
 	defer func() {
 		if err != nil {
-			if e := c.deleteFromStore(network); e != nil {
-				log.Warnf("couldnt rollback from store, network %s on failure (%v): %v", network.name, err, e)
+			if e := c.deleteFromStore(epCnt); e != nil {
+				log.Warnf("couldnt rollback from store, epCnt %v on failure (%v): %v", epCnt, err, e)
 			}
 		}
 	}()
 
-	network.epCnt = &endpointCnt{n: network}
-	if err = c.updateToStore(network.epCnt); err != nil {
+	network.epCnt = epCnt
+	if err = c.updateToStore(network); err != nil {
 		return nil, err
 	}
 
@@ -521,6 +526,9 @@ func (c *controller) Networks() []Network {
 	}
 
 	for _, n := range networks {
+		if n.inDelete {
+			continue
+		}
 		list = append(list, n)
 	}
 

+ 0 - 9
libnetwork/libnetwork_test.go

@@ -252,15 +252,6 @@ func TestHost(t *testing.T) {
 	if err := ep3.Delete(false); err != nil {
 		t.Fatal(err)
 	}
-
-	// host type is special network. Cannot be removed.
-	err = network.Delete()
-	if err == nil {
-		t.Fatal(err)
-	}
-	if _, ok := err.(types.ForbiddenError); !ok {
-		t.Fatalf("Unexpected error type")
-	}
 }
 
 func TestBridge(t *testing.T) {

+ 39 - 21
libnetwork/network.go

@@ -167,6 +167,7 @@ type network struct {
 	stopWatchCh  chan struct{}
 	drvOnce      *sync.Once
 	internal     bool
+	inDelete     bool
 	sync.Mutex
 }
 
@@ -306,6 +307,7 @@ func (n *network) CopyTo(o datastore.KVObject) error {
 	dstN.dbExists = n.dbExists
 	dstN.drvOnce = n.drvOnce
 	dstN.internal = n.internal
+	dstN.inDelete = n.inDelete
 
 	for _, v4conf := range n.ipamV4Config {
 		dstV4Conf := &IpamConf{}
@@ -394,6 +396,7 @@ func (n *network) MarshalJSON() ([]byte, error) {
 		netMap["ipamV6Info"] = string(iis)
 	}
 	netMap["internal"] = n.internal
+	netMap["inDelete"] = n.inDelete
 	return json.Marshal(netMap)
 }
 
@@ -463,6 +466,9 @@ func (n *network) UnmarshalJSON(b []byte) (err error) {
 	if s, ok := netMap["scope"]; ok {
 		n.scope = s.(string)
 	}
+	if v, ok := netMap["inDelete"]; ok {
+		n.inDelete = v.(bool)
+	}
 	return nil
 }
 
@@ -611,6 +617,10 @@ func (n *network) driver(load bool) (driverapi.Driver, error) {
 }
 
 func (n *network) Delete() error {
+	return n.delete(false)
+}
+
+func (n *network) delete(force bool) error {
 	n.Lock()
 	c := n.ctrlr
 	name := n.name
@@ -622,33 +632,39 @@ func (n *network) Delete() error {
 		return &UnknownNetworkError{name: name, id: id}
 	}
 
-	numEps := n.getEpCnt().EndpointCnt()
-	if numEps != 0 {
+	if !force && n.getEpCnt().EndpointCnt() != 0 {
 		return &ActiveEndpointsError{name: n.name, id: n.id}
 	}
 
-	if err = n.deleteNetwork(); err != nil {
-		return err
+	// Mark the network for deletion
+	n.inDelete = true
+	if err = c.updateToStore(n); err != nil {
+		return fmt.Errorf("error marking network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
 	}
-	defer func() {
-		if err != nil {
-			if e := c.addNetwork(n); e != nil {
-				log.Warnf("failed to rollback deleteNetwork for network %s: %v",
-					n.Name(), err)
-			}
+
+	if err = n.deleteNetwork(); err != nil {
+		if !force {
+			return err
 		}
-	}()
+		log.Debugf("driver failed to delete stale network %s (%s): %v", n.Name(), n.ID(), err)
+	}
+
+	n.ipamRelease()
+	if err = c.updateToStore(n); err != nil {
+		log.Warnf("Failed to update store after ipam release for network %s (%s): %v", n.Name(), n.ID(), err)
+	}
 
 	// deleteFromStore performs an atomic delete operation and the
 	// network.epCnt will help prevent any possible
 	// race between endpoint join and network delete
-	if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
-		return fmt.Errorf("error deleting network endpoint count from store: %v", err)
+	if err = c.deleteFromStore(n.getEpCnt()); err != nil {
+		if !force {
+			return fmt.Errorf("error deleting network endpoint count from store: %v", err)
+		}
+		log.Debugf("Error deleting endpoint count from store for stale network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
 	}
 
-	n.ipamRelease()
-
-	if err = n.getController().deleteFromStore(n); err != nil {
+	if err = c.deleteFromStore(n); err != nil {
 		return fmt.Errorf("error deleting network from store: %v", err)
 	}
 
@@ -1098,25 +1114,25 @@ func (n *network) ipamRelease() {
 }
 
 func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) {
-	var infoList []*IpamInfo
+	var infoList *[]*IpamInfo
 
 	switch ipVer {
 	case 4:
-		infoList = n.ipamV4Info
+		infoList = &n.ipamV4Info
 	case 6:
-		infoList = n.ipamV6Info
+		infoList = &n.ipamV6Info
 	default:
 		log.Warnf("incorrect ip version passed to ipam release: %d", ipVer)
 		return
 	}
 
-	if infoList == nil {
+	if *infoList == nil {
 		return
 	}
 
 	log.Debugf("releasing IPv%d pools from network %s (%s)", ipVer, n.Name(), n.ID())
 
-	for _, d := range infoList {
+	for _, d := range *infoList {
 		if d.Gateway != nil {
 			if err := ipam.ReleaseAddress(d.PoolID, d.Gateway.IP); err != nil {
 				log.Warnf("Failed to release gateway ip address %s on delete of network %s (%s): %v", d.Gateway.IP, n.Name(), n.ID(), err)
@@ -1135,6 +1151,8 @@ func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) {
 			log.Warnf("Failed to release address pool %s on delete of network %s (%s): %v", d.PoolID, n.Name(), n.ID(), err)
 		}
 	}
+
+	*infoList = nil
 }
 
 func (n *network) getIPInfo(ipVer int) []*IpamInfo {

+ 20 - 3
libnetwork/store.go

@@ -71,7 +71,7 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) {
 
 		ec := &endpointCnt{n: n}
 		err = store.GetObject(datastore.Key(ec.Key()...), ec)
-		if err != nil {
+		if err != nil && !n.inDelete {
 			return nil, fmt.Errorf("could not find endpoint count for network %s: %v", n.Name(), err)
 		}
 
@@ -104,7 +104,7 @@ func (c *controller) getNetworksForScope(scope string) ([]*network, error) {
 
 		ec := &endpointCnt{n: n}
 		err = store.GetObject(datastore.Key(ec.Key()...), ec)
-		if err != nil {
+		if err != nil && !n.inDelete {
 			log.Warnf("Could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
 			continue
 		}
@@ -139,7 +139,7 @@ func (c *controller) getNetworksFromStore() ([]*network, error) {
 
 			ec := &endpointCnt{n: n}
 			err = store.GetObject(datastore.Key(ec.Key()...), ec)
-			if err != nil {
+			if err != nil && !n.inDelete {
 				log.Warnf("could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
 				continue
 			}
@@ -428,3 +428,20 @@ func (c *controller) startWatch() {
 
 	go c.watchLoop()
 }
+
+func (c *controller) networkCleanup() {
+	networks, err := c.getNetworksFromStore()
+	if err != nil {
+		log.Warnf("Could not retrieve networks from store(s) during network cleanup: %v", err)
+		return
+	}
+
+	for _, n := range networks {
+		if n.inDelete {
+			log.Infof("Removing stale network %s (%s)", n.Name(), n.ID())
+			if err := n.delete(true); err != nil {
+				log.Debugf("Error while removing stale network: %v", err)
+			}
+		}
+	}
+}