Avoid network/endpoint count inconsistences

- ... on ungraceful shutdown during network create
- Allow forceful deletion of network
- On network delete, first mark the network for deletion
- On controller creation, first forcely remove any network
  that is marked for deletion.

Signed-off-by: Alessandro Boch <aboch@docker.com>
This commit is contained in:
Alessandro Boch 2016-03-05 02:00:31 -08:00
parent 0946e1444f
commit c92a4e9fd6
4 changed files with 72 additions and 38 deletions

View file

@ -187,6 +187,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
c.sandboxCleanup()
c.cleanupLocalEndpoints()
c.networkCleanup()
if err := c.startExternalKeyListener(); err != nil {
return nil, err
@ -479,19 +480,23 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
}
}()
if err = c.updateToStore(network); err != nil {
// First store the endpoint count, then the network. To avoid to
// end up with a datastore containing a network and not an epCnt,
// in case of an ungraceful shutdown during this function call.
epCnt := &endpointCnt{n: network}
if err = c.updateToStore(epCnt); err != nil {
return nil, err
}
defer func() {
if err != nil {
if e := c.deleteFromStore(network); e != nil {
log.Warnf("couldnt rollback from store, network %s on failure (%v): %v", network.name, err, e)
if e := c.deleteFromStore(epCnt); e != nil {
log.Warnf("couldnt rollback from store, epCnt %v on failure (%v): %v", epCnt, err, e)
}
}
}()
network.epCnt = &endpointCnt{n: network}
if err = c.updateToStore(network.epCnt); err != nil {
network.epCnt = epCnt
if err = c.updateToStore(network); err != nil {
return nil, err
}
@ -521,6 +526,9 @@ func (c *controller) Networks() []Network {
}
for _, n := range networks {
if n.inDelete {
continue
}
list = append(list, n)
}

View file

@ -252,15 +252,6 @@ func TestHost(t *testing.T) {
if err := ep3.Delete(false); err != nil {
t.Fatal(err)
}
// host type is special network. Cannot be removed.
err = network.Delete()
if err == nil {
t.Fatal(err)
}
if _, ok := err.(types.ForbiddenError); !ok {
t.Fatalf("Unexpected error type")
}
}
func TestBridge(t *testing.T) {

View file

@ -167,6 +167,7 @@ type network struct {
stopWatchCh chan struct{}
drvOnce *sync.Once
internal bool
inDelete bool
sync.Mutex
}
@ -306,6 +307,7 @@ func (n *network) CopyTo(o datastore.KVObject) error {
dstN.dbExists = n.dbExists
dstN.drvOnce = n.drvOnce
dstN.internal = n.internal
dstN.inDelete = n.inDelete
for _, v4conf := range n.ipamV4Config {
dstV4Conf := &IpamConf{}
@ -394,6 +396,7 @@ func (n *network) MarshalJSON() ([]byte, error) {
netMap["ipamV6Info"] = string(iis)
}
netMap["internal"] = n.internal
netMap["inDelete"] = n.inDelete
return json.Marshal(netMap)
}
@ -463,6 +466,9 @@ func (n *network) UnmarshalJSON(b []byte) (err error) {
if s, ok := netMap["scope"]; ok {
n.scope = s.(string)
}
if v, ok := netMap["inDelete"]; ok {
n.inDelete = v.(bool)
}
return nil
}
@ -611,6 +617,10 @@ func (n *network) driver(load bool) (driverapi.Driver, error) {
}
func (n *network) Delete() error {
return n.delete(false)
}
func (n *network) delete(force bool) error {
n.Lock()
c := n.ctrlr
name := n.name
@ -622,33 +632,39 @@ func (n *network) Delete() error {
return &UnknownNetworkError{name: name, id: id}
}
numEps := n.getEpCnt().EndpointCnt()
if numEps != 0 {
if !force && n.getEpCnt().EndpointCnt() != 0 {
return &ActiveEndpointsError{name: n.name, id: n.id}
}
if err = n.deleteNetwork(); err != nil {
return err
// Mark the network for deletion
n.inDelete = true
if err = c.updateToStore(n); err != nil {
return fmt.Errorf("error marking network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
}
defer func() {
if err != nil {
if e := c.addNetwork(n); e != nil {
log.Warnf("failed to rollback deleteNetwork for network %s: %v",
n.Name(), err)
}
if err = n.deleteNetwork(); err != nil {
if !force {
return err
}
}()
log.Debugf("driver failed to delete stale network %s (%s): %v", n.Name(), n.ID(), err)
}
n.ipamRelease()
if err = c.updateToStore(n); err != nil {
log.Warnf("Failed to update store after ipam release for network %s (%s): %v", n.Name(), n.ID(), err)
}
// deleteFromStore performs an atomic delete operation and the
// network.epCnt will help prevent any possible
// race between endpoint join and network delete
if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
return fmt.Errorf("error deleting network endpoint count from store: %v", err)
if err = c.deleteFromStore(n.getEpCnt()); err != nil {
if !force {
return fmt.Errorf("error deleting network endpoint count from store: %v", err)
}
log.Debugf("Error deleting endpoint count from store for stale network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
}
n.ipamRelease()
if err = n.getController().deleteFromStore(n); err != nil {
if err = c.deleteFromStore(n); err != nil {
return fmt.Errorf("error deleting network from store: %v", err)
}
@ -1098,25 +1114,25 @@ func (n *network) ipamRelease() {
}
func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) {
var infoList []*IpamInfo
var infoList *[]*IpamInfo
switch ipVer {
case 4:
infoList = n.ipamV4Info
infoList = &n.ipamV4Info
case 6:
infoList = n.ipamV6Info
infoList = &n.ipamV6Info
default:
log.Warnf("incorrect ip version passed to ipam release: %d", ipVer)
return
}
if infoList == nil {
if *infoList == nil {
return
}
log.Debugf("releasing IPv%d pools from network %s (%s)", ipVer, n.Name(), n.ID())
for _, d := range infoList {
for _, d := range *infoList {
if d.Gateway != nil {
if err := ipam.ReleaseAddress(d.PoolID, d.Gateway.IP); err != nil {
log.Warnf("Failed to release gateway ip address %s on delete of network %s (%s): %v", d.Gateway.IP, n.Name(), n.ID(), err)
@ -1135,6 +1151,8 @@ func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) {
log.Warnf("Failed to release address pool %s on delete of network %s (%s): %v", d.PoolID, n.Name(), n.ID(), err)
}
}
*infoList = nil
}
func (n *network) getIPInfo(ipVer int) []*IpamInfo {

View file

@ -71,7 +71,7 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) {
ec := &endpointCnt{n: n}
err = store.GetObject(datastore.Key(ec.Key()...), ec)
if err != nil {
if err != nil && !n.inDelete {
return nil, fmt.Errorf("could not find endpoint count for network %s: %v", n.Name(), err)
}
@ -104,7 +104,7 @@ func (c *controller) getNetworksForScope(scope string) ([]*network, error) {
ec := &endpointCnt{n: n}
err = store.GetObject(datastore.Key(ec.Key()...), ec)
if err != nil {
if err != nil && !n.inDelete {
log.Warnf("Could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
continue
}
@ -139,7 +139,7 @@ func (c *controller) getNetworksFromStore() ([]*network, error) {
ec := &endpointCnt{n: n}
err = store.GetObject(datastore.Key(ec.Key()...), ec)
if err != nil {
if err != nil && !n.inDelete {
log.Warnf("could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
continue
}
@ -428,3 +428,20 @@ func (c *controller) startWatch() {
go c.watchLoop()
}
func (c *controller) networkCleanup() {
networks, err := c.getNetworksFromStore()
if err != nil {
log.Warnf("Could not retrieve networks from store(s) during network cleanup: %v", err)
return
}
for _, n := range networks {
if n.inDelete {
log.Infof("Removing stale network %s (%s)", n.Name(), n.ID())
if err := n.delete(true); err != nil {
log.Debugf("Error while removing stale network: %v", err)
}
}
}
}