瀏覽代碼

Merge pull request #17514 from mavenugo/ugr

Fixing a case of dangling endpoint during ungraceful daemon restart
Brian Goff 9 年之前
父節點
當前提交
17a8fbeaeb

+ 20 - 8
daemon/container_unix.go

@@ -893,6 +893,13 @@ func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([]
 }
 }
 
 
 func (container *Container) allocateNetwork() error {
 func (container *Container) allocateNetwork() error {
+	sb := container.getNetworkSandbox()
+	if sb != nil {
+		// Cleanup any stale sandbox left over due to ungraceful daemon shutdown
+		if err := sb.Delete(); err != nil {
+			logrus.Errorf("failed to cleanup up stale network sandbox for container %s", container.ID)
+		}
+	}
 	updateSettings := false
 	updateSettings := false
 	if len(container.NetworkSettings.Networks) == 0 {
 	if len(container.NetworkSettings.Networks) == 0 {
 		mode := container.hostConfig.NetworkMode
 		mode := container.hostConfig.NetworkMode
@@ -919,6 +926,18 @@ func (container *Container) allocateNetwork() error {
 	return container.writeHostConfig()
 	return container.writeHostConfig()
 }
 }
 
 
+func (container *Container) getNetworkSandbox() libnetwork.Sandbox {
+	var sb libnetwork.Sandbox
+	container.daemon.netController.WalkSandboxes(func(s libnetwork.Sandbox) bool {
+		if s.ContainerID() == container.ID {
+			sb = s
+			return true
+		}
+		return false
+	})
+	return sb
+}
+
 // ConnectToNetwork connects a container to a netork
 // ConnectToNetwork connects a container to a netork
 func (container *Container) ConnectToNetwork(idOrName string) error {
 func (container *Container) ConnectToNetwork(idOrName string) error {
 	if !container.Running {
 	if !container.Running {
@@ -984,14 +1003,7 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo
 		return err
 		return err
 	}
 	}
 
 
-	var sb libnetwork.Sandbox
-	controller.WalkSandboxes(func(s libnetwork.Sandbox) bool {
-		if s.ContainerID() == container.ID {
-			sb = s
-			return true
-		}
-		return false
-	})
+	sb := container.getNetworkSandbox()
 	if sb == nil {
 	if sb == nil {
 		options, err := container.buildSandboxOptions(n)
 		options, err := container.buildSandboxOptions(n)
 		if err != nil {
 		if err != nil {

+ 1 - 1
hack/vendor.sh

@@ -21,7 +21,7 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
 clone git golang.org/x/net 3cffabab72adf04f8e3b01c5baf775361837b5fe https://github.com/golang/net.git
 clone git golang.org/x/net 3cffabab72adf04f8e3b01c5baf775361837b5fe https://github.com/golang/net.git
 
 
 #get libnetwork packages
 #get libnetwork packages
-clone git github.com/docker/libnetwork 20351a84241aa1278493d74492db947336989be6
+clone git github.com/docker/libnetwork 5fc6ba506daa7914f4d58befb38480ec8e9c9f70
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
 clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
 clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
 clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
 clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4

+ 6 - 0
vendor/src/github.com/docker/libnetwork/drivers/overlay/joinleave.go

@@ -118,6 +118,12 @@ func (d *driver) Leave(nid, eid string) error {
 		return fmt.Errorf("could not find network with id %s", nid)
 		return fmt.Errorf("could not find network with id %s", nid)
 	}
 	}
 
 
+	ep := n.endpoint(eid)
+
+	if ep == nil {
+		return types.InternalMaskableErrorf("could not find endpoint with id %s", eid)
+	}
+
 	if d.notifyCh != nil {
 	if d.notifyCh != nil {
 		d.notifyCh <- ovNotify{
 		d.notifyCh <- ovNotify{
 			action: "leave",
 			action: "leave",

+ 9 - 0
vendor/src/github.com/docker/libnetwork/sandbox.go

@@ -168,6 +168,7 @@ func (sb *sandbox) Delete() error {
 	c := sb.controller
 	c := sb.controller
 
 
 	// Detach from all endpoints
 	// Detach from all endpoints
+	retain := false
 	for _, ep := range sb.getConnectedEndpoints() {
 	for _, ep := range sb.getConnectedEndpoints() {
 		// endpoint in the Gateway network will be cleaned up
 		// endpoint in the Gateway network will be cleaned up
 		// when when sandbox no longer needs external connectivity
 		// when when sandbox no longer needs external connectivity
@@ -176,14 +177,22 @@ func (sb *sandbox) Delete() error {
 		}
 		}
 
 
 		if err := ep.Leave(sb); err != nil {
 		if err := ep.Leave(sb); err != nil {
+			retain = true
 			log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
 			log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
 		}
 		}
 
 
 		if err := ep.Delete(); err != nil {
 		if err := ep.Delete(); err != nil {
+			retain = true
 			log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
 			log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
 		}
 		}
 	}
 	}
 
 
+	if retain {
+		sb.Lock()
+		sb.inDelete = false
+		sb.Unlock()
+		return fmt.Errorf("could not cleanup all the endpoints in container %s / sandbox %s", sb.containerID, sb.id)
+	}
 	// Container is going away. Path cache in etchosts is most
 	// Container is going away. Path cache in etchosts is most
 	// likely not required any more. Drop it.
 	// likely not required any more. Drop it.
 	etchosts.Drop(sb.config.hostsPath)
 	etchosts.Drop(sb.config.hostsPath)

+ 13 - 9
vendor/src/github.com/docker/libnetwork/sandbox_store.go

@@ -3,6 +3,7 @@ package libnetwork
 import (
 import (
 	"container/heap"
 	"container/heap"
 	"encoding/json"
 	"encoding/json"
+	"sync"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/libnetwork/datastore"
 	"github.com/docker/libnetwork/datastore"
@@ -119,8 +120,9 @@ func (sbs *sbState) DataScope() string {
 
 
 func (sb *sandbox) storeUpdate() error {
 func (sb *sandbox) storeUpdate() error {
 	sbs := &sbState{
 	sbs := &sbState{
-		c:  sb.controller,
-		ID: sb.id,
+		c:   sb.controller,
+		ID:  sb.id,
+		Cid: sb.containerID,
 	}
 	}
 
 
 retry:
 retry:
@@ -197,15 +199,17 @@ func (c *controller) sandboxCleanup() {
 
 
 		for _, eps := range sbs.Eps {
 		for _, eps := range sbs.Eps {
 			n, err := c.getNetworkFromStore(eps.Nid)
 			n, err := c.getNetworkFromStore(eps.Nid)
+			var ep *endpoint
 			if err != nil {
 			if err != nil {
 				logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
 				logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
-				continue
-			}
-
-			ep, err := n.getEndpointFromStore(eps.Eid)
-			if err != nil {
-				logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
-				continue
+				n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}}
+				ep = &endpoint{id: eps.Eid, network: n}
+			} else {
+				ep, err = n.getEndpointFromStore(eps.Eid)
+				if err != nil {
+					logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
+					ep = &endpoint{id: eps.Eid, network: n}
+				}
 			}
 			}
 
 
 			heap.Push(&sb.endpoints, ep)
 			heap.Push(&sb.endpoints, ep)