From 5295e88ceb662387a55a4e1a171c895433961040 Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Tue, 23 Jan 2024 20:15:45 +0100 Subject: [PATCH] daemon: rename: don't reload endpoint from datastore Commit 8b7af1d0f added some code to update the DNSNames of all endpoints attached to a sandbox by loading a new instance of each affected endpoints from the datastore through a call to `Network.EndpointByID()`. This method then calls `Network.getEndpointFromStore()`, that in turn calls `store.GetObject()`, which then calls `cache.get()`, which calls `o.CopyTo(kvObject)`. This effectively creates a fresh new instance of an Endpoint. However, endpoints are already kept in memory by Sandbox, meaning we now have two in-memory instances of the same Endpoint. As it turns out, libnetwork is built around the idea that no two objects representing the same thing should leave in-memory, otherwise breaking mutex locking and optimistic locking (as both instances will have a drifting version tracking ID -- dbIndex in libnetwork parliance). In this specific case, this bug materializes by container rename failing when applied a second time for a given container. An integration test is added to make sure this won't happen again. Signed-off-by: Albin Kerouanton (cherry picked from commit 80c44b4b2e5028e003deac89fcaa4cbc2d172ee5) Signed-off-by: Albin Kerouanton --- daemon/rename.go | 7 ++++--- integration/container/rename_test.go | 22 ++++++++++++++++++++++ libnetwork/agent.go | 4 ++-- libnetwork/endpoint_info.go | 2 +- libnetwork/network.go | 4 ++-- libnetwork/sandbox.go | 4 ++-- libnetwork/service_linux.go | 2 +- 7 files changed, 34 insertions(+), 11 deletions(-) diff --git a/daemon/rename.go b/daemon/rename.go index 215ba919e1..4983e58d5e 100644 --- a/daemon/rename.go +++ b/daemon/rename.go @@ -2,6 +2,7 @@ package daemon // import "github.com/docker/docker/daemon" import ( "context" + "fmt" "strings" "github.com/containerd/log" @@ -127,9 +128,9 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) { return err } - ep, err := nw.EndpointByID(epConfig.EndpointID) - if err != nil { - return err + ep := sb.GetEndpoint(epConfig.EndpointID) + if ep == nil { + return fmt.Errorf("no endpoint attached to network %s found", nw.Name()) } oldDNSNames := make([]string, len(epConfig.DNSNames)) diff --git a/integration/container/rename_test.go b/integration/container/rename_test.go index c84e73f751..4434b1a400 100644 --- a/integration/container/rename_test.go +++ b/integration/container/rename_test.go @@ -192,3 +192,25 @@ func TestRenameContainerWithLinkedContainer(t *testing.T) { assert.NilError(t, err) assert.Check(t, is.Equal(db1ID, inspect.ID)) } + +// Regression test for https://github.com/moby/moby/issues/47186 +func TestRenameContainerTwice(t *testing.T) { + ctx := setupTest(t) + apiClient := testEnv.APIClient() + + ctrName := "c0" + container.Run(ctx, t, apiClient, container.WithName("c0")) + defer func() { + container.Remove(ctx, t, apiClient, ctrName, containertypes.RemoveOptions{ + Force: true, + }) + }() + + err := apiClient.ContainerRename(ctx, "c0", "c1") + assert.NilError(t, err) + ctrName = "c1" + + err = apiClient.ContainerRename(ctx, "c1", "c2") + assert.NilError(t, err) + ctrName = "c2" +} diff --git a/libnetwork/agent.go b/libnetwork/agent.go index be3091fd0f..501889ab2e 100644 --- a/libnetwork/agent.go +++ b/libnetwork/agent.go @@ -623,7 +623,7 @@ func (ep *Endpoint) addServiceInfoToCluster(sb *Sandbox) error { // In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is // removed from the list, in this situation the delete will bail out not finding any data to cleanup // and the add will bail out not finding the endpoint on the sandbox. - if err := sb.getEndpoint(ep.ID()); err == nil { + if err := sb.GetEndpoint(ep.ID()); err == nil { log.G(context.TODO()).Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID()) return nil } @@ -692,7 +692,7 @@ func (ep *Endpoint) deleteServiceInfoFromCluster(sb *Sandbox, fullRemove bool, m // get caught in disableServceInNetworkDB, but we check here to make the // nature of the condition more clear. // See comment in addServiceInfoToCluster() - if err := sb.getEndpoint(ep.ID()); err == nil { + if err := sb.GetEndpoint(ep.ID()); err == nil { log.G(context.TODO()).Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID()) return nil } diff --git a/libnetwork/endpoint_info.go b/libnetwork/endpoint_info.go index 0b160e9102..3938b41342 100644 --- a/libnetwork/endpoint_info.go +++ b/libnetwork/endpoint_info.go @@ -194,7 +194,7 @@ func (ep *Endpoint) Info() EndpointInfo { return ep } - return sb.getEndpoint(ep.ID()) + return sb.GetEndpoint(ep.ID()) } // Iface returns information about the interface which was assigned to diff --git a/libnetwork/network.go b/libnetwork/network.go index e3434ca29d..311f0e81cc 100644 --- a/libnetwork/network.go +++ b/libnetwork/network.go @@ -1276,8 +1276,8 @@ func (n *Network) EndpointByName(name string) (*Endpoint, error) { return e, nil } -// EndpointByID returns the Endpoint which has the passed id. If not found, -// the error ErrNoSuchEndpoint is returned. +// EndpointByID should *never* be called as it's going to create a 2nd instance of an Endpoint. The first one lives in +// the Sandbox the endpoint is attached to. Instead, the endpoint should be retrieved by calling [Sandbox.Endpoints()]. func (n *Network) EndpointByID(id string) (*Endpoint, error) { if id == "" { return nil, ErrInvalidID(id) diff --git a/libnetwork/sandbox.go b/libnetwork/sandbox.go index 81a0db1408..c7e625a6bd 100644 --- a/libnetwork/sandbox.go +++ b/libnetwork/sandbox.go @@ -334,7 +334,7 @@ func (sb *Sandbox) removeEndpointRaw(ep *Endpoint) { } } -func (sb *Sandbox) getEndpoint(id string) *Endpoint { +func (sb *Sandbox) GetEndpoint(id string) *Endpoint { sb.mu.Lock() defer sb.mu.Unlock() @@ -587,7 +587,7 @@ func (sb *Sandbox) DisableService() (err error) { } func (sb *Sandbox) clearNetworkResources(origEp *Endpoint) error { - ep := sb.getEndpoint(origEp.id) + ep := sb.GetEndpoint(origEp.id) if ep == nil { return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s", origEp.id) diff --git a/libnetwork/service_linux.go b/libnetwork/service_linux.go index bc24728b8b..79e12255f8 100644 --- a/libnetwork/service_linux.go +++ b/libnetwork/service_linux.go @@ -57,7 +57,7 @@ func (n *Network) findLBEndpointSandbox() (*Endpoint, *Sandbox, error) { if !ok { return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID()) } - sep := sb.getEndpoint(ep.ID()) + sep := sb.GetEndpoint(ep.ID()) if sep == nil { return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID()) }