Ver Fonte

Do not allow overlay destroySandbox() to be interrupted

- Concurrent leave/join of one member overlay network can end with the error:
  "subnet sandbox join failed for "A.B.C.D/MM": error creating vxlan interface: file exists"
  This happens when the join is processed while the leave has already started.
  Having the network one member only, the leave resets the once variable for this network subnets
  and triggers the sandbox destroy for each subnet's vxlan interface, when the n.joinCnt goes to 0.
  But given the destroySandbox() is not atomic, the join thread can trigger the creation of the
  vxlan interface in between (given subnet.once was re-initialized) before the leave thread
  removes the vxlan interface for this subnet.
- The fix is to not allow interruptions between the re-initialization of the subnet.once var and
  consequent vxlan interface removal.

Signed-off-by: Alessandro Boch <aboch@docker.com>
Alessandro Boch há 9 anos atrás
pai
commit
cea4dd457c
1 ficheiros alterados com 6 adições e 7 exclusões
  1. 6 7
      libnetwork/drivers/overlay/ov_network.go

+ 6 - 7
libnetwork/drivers/overlay/ov_network.go

@@ -149,9 +149,9 @@ func (n *network) joinSubnetSandbox(s *subnet) error {
 
 func (n *network) leaveSandbox() {
 	n.Lock()
+	defer n.Unlock()
 	n.joinCnt--
 	if n.joinCnt != 0 {
-		n.Unlock()
 		return
 	}
 
@@ -162,15 +162,14 @@ func (n *network) leaveSandbox() {
 	for _, s := range n.subnets {
 		s.once = &sync.Once{}
 	}
-	n.Unlock()
 
 	n.destroySandbox()
 }
 
+// to be called while holding network lock
 func (n *network) destroySandbox() {
-	sbox := n.sandbox()
-	if sbox != nil {
-		for _, iface := range sbox.Info().Interfaces() {
+	if n.sbox != nil {
+		for _, iface := range n.sbox.Info().Interfaces() {
 			if err := iface.Remove(); err != nil {
 				logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
 			}
@@ -197,8 +196,8 @@ func (n *network) destroySandbox() {
 			}
 		}
 
-		sbox.Destroy()
-		n.setSandbox(nil)
+		n.sbox.Destroy()
+		n.sbox = nil
 	}
 }