Avoid network/endpoint count inconsistences
- ... on ungraceful shutdown during network create - Allow forceful deletion of network - On network delete, first mark the network for deletion - On controller creation, first forcely remove any network that is marked for deletion. Signed-off-by: Alessandro Boch <aboch@docker.com>
This commit is contained in:
parent
0946e1444f
commit
c92a4e9fd6
4 changed files with 72 additions and 38 deletions
|
@ -187,6 +187,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
|
|||
|
||||
c.sandboxCleanup()
|
||||
c.cleanupLocalEndpoints()
|
||||
c.networkCleanup()
|
||||
|
||||
if err := c.startExternalKeyListener(); err != nil {
|
||||
return nil, err
|
||||
|
@ -479,19 +480,23 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
|
|||
}
|
||||
}()
|
||||
|
||||
if err = c.updateToStore(network); err != nil {
|
||||
// First store the endpoint count, then the network. To avoid to
|
||||
// end up with a datastore containing a network and not an epCnt,
|
||||
// in case of an ungraceful shutdown during this function call.
|
||||
epCnt := &endpointCnt{n: network}
|
||||
if err = c.updateToStore(epCnt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if e := c.deleteFromStore(network); e != nil {
|
||||
log.Warnf("couldnt rollback from store, network %s on failure (%v): %v", network.name, err, e)
|
||||
if e := c.deleteFromStore(epCnt); e != nil {
|
||||
log.Warnf("couldnt rollback from store, epCnt %v on failure (%v): %v", epCnt, err, e)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
network.epCnt = &endpointCnt{n: network}
|
||||
if err = c.updateToStore(network.epCnt); err != nil {
|
||||
network.epCnt = epCnt
|
||||
if err = c.updateToStore(network); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -521,6 +526,9 @@ func (c *controller) Networks() []Network {
|
|||
}
|
||||
|
||||
for _, n := range networks {
|
||||
if n.inDelete {
|
||||
continue
|
||||
}
|
||||
list = append(list, n)
|
||||
}
|
||||
|
||||
|
|
|
@ -252,15 +252,6 @@ func TestHost(t *testing.T) {
|
|||
if err := ep3.Delete(false); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// host type is special network. Cannot be removed.
|
||||
err = network.Delete()
|
||||
if err == nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, ok := err.(types.ForbiddenError); !ok {
|
||||
t.Fatalf("Unexpected error type")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBridge(t *testing.T) {
|
||||
|
|
|
@ -167,6 +167,7 @@ type network struct {
|
|||
stopWatchCh chan struct{}
|
||||
drvOnce *sync.Once
|
||||
internal bool
|
||||
inDelete bool
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
|
@ -306,6 +307,7 @@ func (n *network) CopyTo(o datastore.KVObject) error {
|
|||
dstN.dbExists = n.dbExists
|
||||
dstN.drvOnce = n.drvOnce
|
||||
dstN.internal = n.internal
|
||||
dstN.inDelete = n.inDelete
|
||||
|
||||
for _, v4conf := range n.ipamV4Config {
|
||||
dstV4Conf := &IpamConf{}
|
||||
|
@ -394,6 +396,7 @@ func (n *network) MarshalJSON() ([]byte, error) {
|
|||
netMap["ipamV6Info"] = string(iis)
|
||||
}
|
||||
netMap["internal"] = n.internal
|
||||
netMap["inDelete"] = n.inDelete
|
||||
return json.Marshal(netMap)
|
||||
}
|
||||
|
||||
|
@ -463,6 +466,9 @@ func (n *network) UnmarshalJSON(b []byte) (err error) {
|
|||
if s, ok := netMap["scope"]; ok {
|
||||
n.scope = s.(string)
|
||||
}
|
||||
if v, ok := netMap["inDelete"]; ok {
|
||||
n.inDelete = v.(bool)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -611,6 +617,10 @@ func (n *network) driver(load bool) (driverapi.Driver, error) {
|
|||
}
|
||||
|
||||
func (n *network) Delete() error {
|
||||
return n.delete(false)
|
||||
}
|
||||
|
||||
func (n *network) delete(force bool) error {
|
||||
n.Lock()
|
||||
c := n.ctrlr
|
||||
name := n.name
|
||||
|
@ -622,33 +632,39 @@ func (n *network) Delete() error {
|
|||
return &UnknownNetworkError{name: name, id: id}
|
||||
}
|
||||
|
||||
numEps := n.getEpCnt().EndpointCnt()
|
||||
if numEps != 0 {
|
||||
if !force && n.getEpCnt().EndpointCnt() != 0 {
|
||||
return &ActiveEndpointsError{name: n.name, id: n.id}
|
||||
}
|
||||
|
||||
if err = n.deleteNetwork(); err != nil {
|
||||
return err
|
||||
// Mark the network for deletion
|
||||
n.inDelete = true
|
||||
if err = c.updateToStore(n); err != nil {
|
||||
return fmt.Errorf("error marking network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if e := c.addNetwork(n); e != nil {
|
||||
log.Warnf("failed to rollback deleteNetwork for network %s: %v",
|
||||
n.Name(), err)
|
||||
}
|
||||
|
||||
if err = n.deleteNetwork(); err != nil {
|
||||
if !force {
|
||||
return err
|
||||
}
|
||||
}()
|
||||
log.Debugf("driver failed to delete stale network %s (%s): %v", n.Name(), n.ID(), err)
|
||||
}
|
||||
|
||||
n.ipamRelease()
|
||||
if err = c.updateToStore(n); err != nil {
|
||||
log.Warnf("Failed to update store after ipam release for network %s (%s): %v", n.Name(), n.ID(), err)
|
||||
}
|
||||
|
||||
// deleteFromStore performs an atomic delete operation and the
|
||||
// network.epCnt will help prevent any possible
|
||||
// race between endpoint join and network delete
|
||||
if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
|
||||
return fmt.Errorf("error deleting network endpoint count from store: %v", err)
|
||||
if err = c.deleteFromStore(n.getEpCnt()); err != nil {
|
||||
if !force {
|
||||
return fmt.Errorf("error deleting network endpoint count from store: %v", err)
|
||||
}
|
||||
log.Debugf("Error deleting endpoint count from store for stale network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
|
||||
}
|
||||
|
||||
n.ipamRelease()
|
||||
|
||||
if err = n.getController().deleteFromStore(n); err != nil {
|
||||
if err = c.deleteFromStore(n); err != nil {
|
||||
return fmt.Errorf("error deleting network from store: %v", err)
|
||||
}
|
||||
|
||||
|
@ -1098,25 +1114,25 @@ func (n *network) ipamRelease() {
|
|||
}
|
||||
|
||||
func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) {
|
||||
var infoList []*IpamInfo
|
||||
var infoList *[]*IpamInfo
|
||||
|
||||
switch ipVer {
|
||||
case 4:
|
||||
infoList = n.ipamV4Info
|
||||
infoList = &n.ipamV4Info
|
||||
case 6:
|
||||
infoList = n.ipamV6Info
|
||||
infoList = &n.ipamV6Info
|
||||
default:
|
||||
log.Warnf("incorrect ip version passed to ipam release: %d", ipVer)
|
||||
return
|
||||
}
|
||||
|
||||
if infoList == nil {
|
||||
if *infoList == nil {
|
||||
return
|
||||
}
|
||||
|
||||
log.Debugf("releasing IPv%d pools from network %s (%s)", ipVer, n.Name(), n.ID())
|
||||
|
||||
for _, d := range infoList {
|
||||
for _, d := range *infoList {
|
||||
if d.Gateway != nil {
|
||||
if err := ipam.ReleaseAddress(d.PoolID, d.Gateway.IP); err != nil {
|
||||
log.Warnf("Failed to release gateway ip address %s on delete of network %s (%s): %v", d.Gateway.IP, n.Name(), n.ID(), err)
|
||||
|
@ -1135,6 +1151,8 @@ func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) {
|
|||
log.Warnf("Failed to release address pool %s on delete of network %s (%s): %v", d.PoolID, n.Name(), n.ID(), err)
|
||||
}
|
||||
}
|
||||
|
||||
*infoList = nil
|
||||
}
|
||||
|
||||
func (n *network) getIPInfo(ipVer int) []*IpamInfo {
|
||||
|
|
|
@ -71,7 +71,7 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) {
|
|||
|
||||
ec := &endpointCnt{n: n}
|
||||
err = store.GetObject(datastore.Key(ec.Key()...), ec)
|
||||
if err != nil {
|
||||
if err != nil && !n.inDelete {
|
||||
return nil, fmt.Errorf("could not find endpoint count for network %s: %v", n.Name(), err)
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ func (c *controller) getNetworksForScope(scope string) ([]*network, error) {
|
|||
|
||||
ec := &endpointCnt{n: n}
|
||||
err = store.GetObject(datastore.Key(ec.Key()...), ec)
|
||||
if err != nil {
|
||||
if err != nil && !n.inDelete {
|
||||
log.Warnf("Could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
|
||||
continue
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ func (c *controller) getNetworksFromStore() ([]*network, error) {
|
|||
|
||||
ec := &endpointCnt{n: n}
|
||||
err = store.GetObject(datastore.Key(ec.Key()...), ec)
|
||||
if err != nil {
|
||||
if err != nil && !n.inDelete {
|
||||
log.Warnf("could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err)
|
||||
continue
|
||||
}
|
||||
|
@ -428,3 +428,20 @@ func (c *controller) startWatch() {
|
|||
|
||||
go c.watchLoop()
|
||||
}
|
||||
|
||||
func (c *controller) networkCleanup() {
|
||||
networks, err := c.getNetworksFromStore()
|
||||
if err != nil {
|
||||
log.Warnf("Could not retrieve networks from store(s) during network cleanup: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, n := range networks {
|
||||
if n.inDelete {
|
||||
log.Infof("Removing stale network %s (%s)", n.Name(), n.ID())
|
||||
if err := n.delete(true); err != nil {
|
||||
log.Debugf("Error while removing stale network: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue