daemon: rename: don't reload endpoint from datastore

Commit 8b7af1d0f added some code to update the DNSNames of all
endpoints attached to a sandbox by loading a new instance of each
affected endpoints from the datastore through a call to
`Network.EndpointByID()`.

This method then calls `Network.getEndpointFromStore()`, that in
turn calls `store.GetObject()`, which then calls `cache.get()`,
which calls `o.CopyTo(kvObject)`. This effectively creates a fresh
new instance of an Endpoint. However, endpoints are already kept in
memory by Sandbox, meaning we now have two in-memory instances of
the same Endpoint.

As it turns out, libnetwork is built around the idea that no two objects
representing the same thing should leave in-memory, otherwise breaking
mutex locking and optimistic locking (as both instances will have a drifting
version tracking ID -- dbIndex in libnetwork parliance).

In this specific case, this bug materializes by container rename failing
when applied a second time for a given container. An integration test is
added to make sure this won't happen again.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
Albin Kerouanton 2024-01-23 20:15:45 +01:00
parent f19f233ca5
commit 80c44b4b2e
7 changed files with 34 additions and 11 deletions

View file

@ -2,6 +2,7 @@ package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"fmt"
"strings"
"github.com/containerd/log"
@ -127,9 +128,9 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) {
return err
}
ep, err := nw.EndpointByID(epConfig.EndpointID)
if err != nil {
return err
ep := sb.GetEndpoint(epConfig.EndpointID)
if ep == nil {
return fmt.Errorf("no endpoint attached to network %s found", nw.Name())
}
oldDNSNames := make([]string, len(epConfig.DNSNames))

View file

@ -192,3 +192,25 @@ func TestRenameContainerWithLinkedContainer(t *testing.T) {
assert.NilError(t, err)
assert.Check(t, is.Equal(db1ID, inspect.ID))
}
// Regression test for https://github.com/moby/moby/issues/47186
func TestRenameContainerTwice(t *testing.T) {
ctx := setupTest(t)
apiClient := testEnv.APIClient()
ctrName := "c0"
container.Run(ctx, t, apiClient, container.WithName("c0"))
defer func() {
container.Remove(ctx, t, apiClient, ctrName, containertypes.RemoveOptions{
Force: true,
})
}()
err := apiClient.ContainerRename(ctx, "c0", "c1")
assert.NilError(t, err)
ctrName = "c1"
err = apiClient.ContainerRename(ctx, "c1", "c2")
assert.NilError(t, err)
ctrName = "c2"
}

View file

@ -623,7 +623,7 @@ func (ep *Endpoint) addServiceInfoToCluster(sb *Sandbox) error {
// In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is
// removed from the list, in this situation the delete will bail out not finding any data to cleanup
// and the add will bail out not finding the endpoint on the sandbox.
if err := sb.getEndpoint(ep.ID()); err == nil {
if err := sb.GetEndpoint(ep.ID()); err == nil {
log.G(context.TODO()).Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
return nil
}
@ -692,7 +692,7 @@ func (ep *Endpoint) deleteServiceInfoFromCluster(sb *Sandbox, fullRemove bool, m
// get caught in disableServceInNetworkDB, but we check here to make the
// nature of the condition more clear.
// See comment in addServiceInfoToCluster()
if err := sb.getEndpoint(ep.ID()); err == nil {
if err := sb.GetEndpoint(ep.ID()); err == nil {
log.G(context.TODO()).Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
return nil
}

View file

@ -194,7 +194,7 @@ func (ep *Endpoint) Info() EndpointInfo {
return ep
}
return sb.getEndpoint(ep.ID())
return sb.GetEndpoint(ep.ID())
}
// Iface returns information about the interface which was assigned to

View file

@ -1276,8 +1276,8 @@ func (n *Network) EndpointByName(name string) (*Endpoint, error) {
return e, nil
}
// EndpointByID returns the Endpoint which has the passed id. If not found,
// the error ErrNoSuchEndpoint is returned.
// EndpointByID should *never* be called as it's going to create a 2nd instance of an Endpoint. The first one lives in
// the Sandbox the endpoint is attached to. Instead, the endpoint should be retrieved by calling [Sandbox.Endpoints()].
func (n *Network) EndpointByID(id string) (*Endpoint, error) {
if id == "" {
return nil, ErrInvalidID(id)

View file

@ -334,7 +334,7 @@ func (sb *Sandbox) removeEndpointRaw(ep *Endpoint) {
}
}
func (sb *Sandbox) getEndpoint(id string) *Endpoint {
func (sb *Sandbox) GetEndpoint(id string) *Endpoint {
sb.mu.Lock()
defer sb.mu.Unlock()
@ -554,7 +554,7 @@ func (sb *Sandbox) DisableService() (err error) {
}
func (sb *Sandbox) clearNetworkResources(origEp *Endpoint) error {
ep := sb.getEndpoint(origEp.id)
ep := sb.GetEndpoint(origEp.id)
if ep == nil {
return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s",
origEp.id)

View file

@ -57,7 +57,7 @@ func (n *Network) findLBEndpointSandbox() (*Endpoint, *Sandbox, error) {
if !ok {
return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
}
sep := sb.getEndpoint(ep.ID())
sep := sb.GetEndpoint(ep.ID())
if sep == nil {
return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
}