daemon: fix restoring container with missing task
Before 4bafaa00aa
, if the daemon was
killed while a container was running and the container shim is killed
before the daemon is restarted, such as if the host system is
hard-rebooted, the daemon would restore the container to the stopped
state and set the exit code to 255. The aforementioned commit introduced
a regression where the container's exit code would instead be set to 0.
Fix the regression so that the exit code is once against set to 255 on
restore.
Signed-off-by: Cory Snider <csnider@mirantis.com>
This commit is contained in:
parent
8d070e30f5
commit
165dfd6c3e
3 changed files with 93 additions and 17 deletions
|
@ -447,6 +447,8 @@ func (daemon *Daemon) restore(cfg *configStore) error {
|
|||
if es != nil {
|
||||
ces.ExitCode = int(es.ExitCode())
|
||||
ces.ExitedAt = es.ExitTime()
|
||||
} else {
|
||||
ces.ExitCode = 255
|
||||
}
|
||||
c.SetStopped(&ces)
|
||||
daemon.Cleanup(c)
|
||||
|
|
|
@ -2,10 +2,8 @@ package container // import "github.com/docker/docker/integration/container"
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -19,6 +17,7 @@ import (
|
|||
"golang.org/x/sys/unix"
|
||||
"gotest.tools/v3/assert"
|
||||
is "gotest.tools/v3/assert/cmp"
|
||||
"gotest.tools/v3/assert/opt"
|
||||
"gotest.tools/v3/skip"
|
||||
)
|
||||
|
||||
|
@ -204,21 +203,10 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
|
|||
|
||||
d.Stop(t)
|
||||
|
||||
configPath := filepath.Join(d.Root, "containers", id, "config.v2.json")
|
||||
configBytes, err := os.ReadFile(configPath)
|
||||
assert.NilError(t, err)
|
||||
|
||||
var c realcontainer.Container
|
||||
|
||||
assert.NilError(t, json.Unmarshal(configBytes, &c))
|
||||
|
||||
c.State = realcontainer.NewState()
|
||||
c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
|
||||
c.HasBeenStartedBefore = true
|
||||
|
||||
configBytes, err = json.Marshal(&c)
|
||||
assert.NilError(t, err)
|
||||
assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
|
||||
d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
|
||||
c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
|
||||
c.HasBeenStartedBefore = true
|
||||
})
|
||||
|
||||
d.Start(t)
|
||||
|
||||
|
@ -231,3 +219,71 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
|
|||
assert.NilError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHardRestartWhenContainerIsRunning simulates a case where dockerd is
|
||||
// killed while a container is running, and the container's task no longer
|
||||
// exists when dockerd starts back up. This can happen if the system is
|
||||
// hard-rebooted, for example.
|
||||
//
|
||||
// Regression test for moby/moby#45788
|
||||
func TestHardRestartWhenContainerIsRunning(t *testing.T) {
|
||||
skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
|
||||
skip.If(t, testEnv.DaemonInfo.OSType == "windows")
|
||||
|
||||
t.Parallel()
|
||||
|
||||
d := daemon.New(t)
|
||||
defer d.Cleanup(t)
|
||||
|
||||
d.StartWithBusybox(t, "--iptables=false")
|
||||
defer d.Stop(t)
|
||||
|
||||
ctx := context.Background()
|
||||
client := d.NewClientT(t)
|
||||
|
||||
// Just create the containers, no need to start them.
|
||||
// We really want to make sure there is no process running when docker starts back up.
|
||||
// We will manipulate the on disk state later.
|
||||
nopolicy := container.Create(ctx, t, client, container.WithCmd("/bin/sh", "-c", "exit 1"))
|
||||
onfailure := container.Create(ctx, t, client, container.WithRestartPolicy("on-failure"), container.WithCmd("/bin/sh", "-c", "sleep 60"))
|
||||
|
||||
d.Stop(t)
|
||||
|
||||
for _, id := range []string{nopolicy, onfailure} {
|
||||
d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
|
||||
c.SetRunning(nil, nil, true)
|
||||
c.HasBeenStartedBefore = true
|
||||
})
|
||||
}
|
||||
|
||||
d.Start(t)
|
||||
|
||||
t.Run("RestartPolicy=none", func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
inspect, err := client.ContainerInspect(ctx, nopolicy)
|
||||
assert.NilError(t, err)
|
||||
assert.Check(t, is.Equal(inspect.State.Status, "exited"))
|
||||
assert.Check(t, is.Equal(inspect.State.ExitCode, 255))
|
||||
finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
|
||||
if assert.Check(t, err) {
|
||||
assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("RestartPolicy=on-failure", func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
inspect, err := client.ContainerInspect(ctx, onfailure)
|
||||
assert.NilError(t, err)
|
||||
assert.Check(t, is.Equal(inspect.State.Status, "running"))
|
||||
assert.Check(t, is.Equal(inspect.State.ExitCode, 0))
|
||||
finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
|
||||
if assert.Check(t, err) {
|
||||
assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
|
||||
}
|
||||
|
||||
stopTimeout := 0
|
||||
assert.Assert(t, client.ContainerStop(ctx, onfailure, containerapi.StopOptions{Timeout: &stopTimeout}))
|
||||
})
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/events"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/pkg/ioutils"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"github.com/docker/docker/testutil/request"
|
||||
|
@ -825,6 +826,23 @@ func (d *Daemon) Info(t testing.TB) types.Info {
|
|||
return info
|
||||
}
|
||||
|
||||
// TamperWithContainerConfig modifies the on-disk config of a container.
|
||||
func (d *Daemon) TamperWithContainerConfig(t testing.TB, containerID string, tamper func(*container.Container)) {
|
||||
t.Helper()
|
||||
|
||||
configPath := filepath.Join(d.Root, "containers", containerID, "config.v2.json")
|
||||
configBytes, err := os.ReadFile(configPath)
|
||||
assert.NilError(t, err)
|
||||
|
||||
var c container.Container
|
||||
assert.NilError(t, json.Unmarshal(configBytes, &c))
|
||||
c.State = container.NewState()
|
||||
tamper(&c)
|
||||
configBytes, err = json.Marshal(&c)
|
||||
assert.NilError(t, err)
|
||||
assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
|
||||
}
|
||||
|
||||
// cleanupRaftDir removes swarmkit wal files if present
|
||||
func cleanupRaftDir(t testing.TB, d *Daemon) {
|
||||
t.Helper()
|
||||
|
|
Loading…
Reference in a new issue