瀏覽代碼

Merge pull request #42432 from dperny/fix-ip-overlap

Akihiro Suda 4 年之前
父節點
當前提交
7729ebfa1b
共有 1 個文件被更改,包括 81 次插入0 次删除
  1. 81 0
      daemon/cluster/executor/container/executor.go

+ 81 - 0
daemon/cluster/executor/container/executor.go

@@ -15,12 +15,15 @@ import (
 	"github.com/docker/docker/daemon/cluster/convert"
 	executorpkg "github.com/docker/docker/daemon/cluster/executor"
 	clustertypes "github.com/docker/docker/daemon/cluster/provider"
+	"github.com/docker/docker/libnetwork"
 	networktypes "github.com/docker/docker/libnetwork/types"
 	"github.com/docker/swarmkit/agent"
 	"github.com/docker/swarmkit/agent/exec"
 	"github.com/docker/swarmkit/api"
 	"github.com/docker/swarmkit/api/naming"
+	"github.com/docker/swarmkit/log"
 	"github.com/docker/swarmkit/template"
+	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
 
@@ -32,6 +35,14 @@ type executor struct {
 	dependencies  exec.DependencyManager
 	mutex         sync.Mutex // This mutex protects the following node field
 	node          *api.NodeDescription
+
+	// nodeObj holds a copy of the swarmkit Node object from the time of the
+	// last call to executor.Configure. This allows us to discover which
+	// network attachments the node previously had, which further allows us to
+	// determine which, if any, need to be removed. nodeObj is not protected by
+	// a mutex, because it is only written to in the method (Configure) that it
+	// is read from. If that changes, it may need to be guarded.
+	nodeObj *api.Node
 }
 
 // NewExecutor returns an executor from the docker client.
@@ -157,6 +168,40 @@ func (e *executor) Configure(ctx context.Context, node *api.Node) error {
 		attachments[na.Network.ID] = na.Addresses[0]
 	}
 
+	// discover which, if any, attachments have been removed.
+	//
+	// we aren't responsible directly for creating these networks. that is
+	// handled indirectly when a container using that network is created.
+	// however, when it comes time to remove the network, none of the relevant
+	// tasks may exist anymore. this means we should go ahead and try to remove
+	// any network we know to no longer be in use.
+
+	// removeAttachments maps the network ID to a boolean. This boolean
+	// indicates whether the attachment in question is totally removed (true),
+	// or has just had its IP changed (false)
+	removeAttachments := make(map[string]bool)
+
+	// the first time we Configure, nodeObj wil be nil, because it will not be
+	// set yet. in that case, skip this check.
+	if e.nodeObj != nil {
+		for _, na := range e.nodeObj.Attachments {
+			// same thing as above, check sanity of the attachments so we don't
+			// get a panic.
+			if na == nil || na.Network == nil || len(na.Addresses) == 0 {
+				logrus.WithField("NetworkAttachment", fmt.Sprintf("%#v", na)).
+					Warnf("skipping nil or malformed node network attachment entry")
+				continue
+			}
+
+			// now, check if the attachment exists and shares the same IP address.
+			if ip, ok := attachments[na.Network.ID]; !ok || na.Addresses[0] != ip {
+				// if the map entry exists, then the network still exists, and the
+				// IP must be what has changed
+				removeAttachments[na.Network.ID] = !ok
+			}
+		}
+	}
+
 	if (ingressNA == nil) && (node.Attachment != nil) && (len(node.Attachment.Addresses) > 0) {
 		ingressNA = node.Attachment
 		attachments[ingressNA.Network.ID] = ingressNA.Addresses[0]
@@ -197,6 +242,42 @@ func (e *executor) Configure(ctx context.Context, node *api.Node) error {
 		return err
 	}
 
+	var (
+		activeEndpointsError *libnetwork.ActiveEndpointsError
+		errNoSuchNetwork     libnetwork.ErrNoSuchNetwork
+	)
+
+	// now, finally, remove any network LB attachments that we no longer have.
+	for nw, gone := range removeAttachments {
+		err := e.backend.DeleteManagedNetwork(nw)
+		switch {
+		case err == nil:
+			continue
+		case errors.As(err, &activeEndpointsError):
+			// this is the purpose of the boolean in the map. it's literally
+			// just to log an appropriate, informative error. i'm unsure if
+			// this can ever actually occur, but we need to know if it does.
+			if gone {
+				log.G(ctx).Warnf("network %s should be removed, but still has active attachments", nw)
+			} else {
+				log.G(ctx).Warnf(
+					"network %s should have its node LB IP changed, but cannot be removed because of active attachments",
+					nw,
+				)
+			}
+			continue
+		case errors.As(err, &errNoSuchNetwork):
+			// NoSuchNetworkError indicates the network is already gone.
+			continue
+		default:
+			log.G(ctx).Errorf("network %s remove failed: %v", nw, err)
+		}
+	}
+
+	// now update our copy of the node object, reset the attachment store, and
+	// return
+	e.nodeObj = node
+
 	return e.backend.GetAttachmentStore().ResetAttachments(attachments)
 }