daemon: daemon.containerRestart: don't cancel restart on context cancel
commitdef549c8f6
passed through the context to the daemon.ContainerStart function. As a result, restarting containers no longer is an atomic operation, because a context cancellation could interrupt the restart (between "stopping" and "(re)starting"), resulting in the container being stopped, but not restarted. Restarting a container, or more factually; making a successful request on the `/containers/{id]/restart` endpoint, should be an atomic operation. This patch uses a context.WithoutCancel for restart requests. It's worth noting that daemon.containerStop already uses context.WithoutCancel, so in that function, we'll be wrapping the context twice, but this should likely not cause issues (just redundant for this code-path). Before this patch, starting a container that bind-mounts the docker socket, then restarting itself from within the container would cancel the restart operation. The container would be stopped, but not started after that: docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh docker exec myself sh -c 'docker restart myself' docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 3a2a741c65ff docker:cli "docker-entrypoint.s…" 26 seconds ago Exited (128) 7 seconds ago myself With this patch: the stop still cancels the exec, but does not cancel the restart operation, and the container is started again: docker run -dit --name myself -v /var/run/docker.sock:/var/run/docker.sock docker:cli sh docker exec myself sh -c 'docker restart myself' docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 4393a01f7c75 docker:cli "docker-entrypoint.s…" About a minute ago Up 4 seconds myself Signed-off-by: Sebastiaan van Stijn <github@gone.nl> (cherry picked from commitaeb8972281
) Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
9b20b1a5fe
commit
05d7386665
2 changed files with 77 additions and 0 deletions
|
@ -6,6 +6,7 @@ import (
|
|||
|
||||
containertypes "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/internal/compatcontext"
|
||||
)
|
||||
|
||||
// ContainerRestart stops and starts a container. It attempts to
|
||||
|
@ -31,6 +32,10 @@ func (daemon *Daemon) ContainerRestart(ctx context.Context, name string, options
|
|||
// gracefully stop, before forcefully terminating the container. If
|
||||
// given a negative duration, wait forever for a graceful stop.
|
||||
func (daemon *Daemon) containerRestart(ctx context.Context, container *container.Container, options containertypes.StopOptions) error {
|
||||
// Restarting is expected to be an atomic operation, and cancelling
|
||||
// the request should not cancel the stop -> start sequence.
|
||||
ctx = compatcontext.WithoutCancel(ctx)
|
||||
|
||||
// Determine isolation. If not specified in the hostconfig, use daemon default.
|
||||
actualIsolation := container.HostConfig.Isolation
|
||||
if containertypes.Isolation.IsDefault(actualIsolation) {
|
||||
|
|
|
@ -3,15 +3,18 @@ package container // import "github.com/docker/docker/integration/container"
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/client"
|
||||
testContainer "github.com/docker/docker/integration/internal/container"
|
||||
"github.com/docker/docker/testutil/daemon"
|
||||
"gotest.tools/v3/assert"
|
||||
is "gotest.tools/v3/assert/cmp"
|
||||
"gotest.tools/v3/poll"
|
||||
"gotest.tools/v3/skip"
|
||||
)
|
||||
|
@ -208,3 +211,72 @@ func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestContainerRestartWithCancelledRequest verifies that cancelling a restart
|
||||
// request does not cancel the restart operation, and still starts the container
|
||||
// after it was stopped.
|
||||
//
|
||||
// Regression test for https://github.com/moby/moby/discussions/46682
|
||||
func TestContainerRestartWithCancelledRequest(t *testing.T) {
|
||||
ctx := context.TODO()
|
||||
defer setupTest(t)()
|
||||
apiClient := testEnv.APIClient()
|
||||
|
||||
// Create a container that ignores SIGTERM and doesn't stop immediately,
|
||||
// giving us time to cancel the request.
|
||||
//
|
||||
// Restarting a container is "stop" (and, if needed, "kill"), then "start"
|
||||
// the container. We're trying to create the scenario where the "stop" is
|
||||
// handled, but the request was cancelled and therefore the "start" not
|
||||
// taking place.
|
||||
cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
|
||||
defer func() {
|
||||
err := apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true})
|
||||
if t.Failed() && err != nil {
|
||||
t.Logf("Cleaning up test container failed with error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Start listening for events.
|
||||
messages, errs := apiClient.Events(ctx, types.EventsOptions{
|
||||
Filters: filters.NewArgs(
|
||||
filters.Arg("container", cID),
|
||||
filters.Arg("event", "restart"),
|
||||
),
|
||||
})
|
||||
|
||||
// Make restart request, but cancel the request before the container
|
||||
// is (forcibly) killed.
|
||||
ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
|
||||
stopTimeout := 1
|
||||
err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
|
||||
Timeout: &stopTimeout,
|
||||
})
|
||||
assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
|
||||
cancel()
|
||||
|
||||
// Validate that the restart event occurred, which is emitted
|
||||
// after the restart (stop (kill) start) finished.
|
||||
//
|
||||
// Note that we cannot use RestartCount for this, as that's only
|
||||
// used for restart-policies.
|
||||
restartTimeout := 2 * time.Second
|
||||
if runtime.GOOS == "windows" {
|
||||
// hcs can sometimes take a long time to stop container.
|
||||
restartTimeout = StopContainerWindowsPollTimeout
|
||||
}
|
||||
select {
|
||||
case m := <-messages:
|
||||
assert.Check(t, is.Equal(m.Actor.ID, cID))
|
||||
assert.Check(t, is.Equal(m.Action, "restart"))
|
||||
case err := <-errs:
|
||||
assert.NilError(t, err)
|
||||
case <-time.After(restartTimeout):
|
||||
t.Errorf("timeout waiting for restart event")
|
||||
}
|
||||
|
||||
// Container should be restarted (running).
|
||||
inspect, err := apiClient.ContainerInspect(ctx, cID)
|
||||
assert.NilError(t, err)
|
||||
assert.Check(t, is.Equal(inspect.State.Status, "running"))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue