daemon: fix restoring container with missing task

Before 4bafaa00aa, if the daemon was
killed while a container was running and the container shim is killed
before the daemon is restarted, such as if the host system is
hard-rebooted, the daemon would restore the container to the stopped
state and set the exit code to 255. The aforementioned commit introduced
a regression where the container's exit code would instead be set to 0.
Fix the regression so that the exit code is once against set to 255 on
restore.

Signed-off-by: Cory Snider <csnider@mirantis.com>
(cherry picked from commit 165dfd6c3e)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Cory Snider 2023-06-23 10:55:09 -04:00 committed by Sebastiaan van Stijn
parent 8b61625a5e
commit 2f379ecfd6
No known key found for this signature in database
GPG key ID: 76698F39D527CE8C
3 changed files with 93 additions and 17 deletions

View file

@ -420,6 +420,8 @@ func (daemon *Daemon) restore() error {
if es != nil {
ces.ExitCode = int(es.ExitCode())
ces.ExitedAt = es.ExitTime()
} else {
ces.ExitCode = 255
}
c.SetStopped(&ces)
daemon.Cleanup(c)

View file

@ -2,10 +2,8 @@ package container // import "github.com/docker/docker/integration/container"
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
@ -19,6 +17,7 @@ import (
"golang.org/x/sys/unix"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/assert/opt"
"gotest.tools/v3/skip"
)
@ -204,21 +203,10 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
d.Stop(t)
configPath := filepath.Join(d.Root, "containers", id, "config.v2.json")
configBytes, err := os.ReadFile(configPath)
assert.NilError(t, err)
var c realcontainer.Container
assert.NilError(t, json.Unmarshal(configBytes, &c))
c.State = realcontainer.NewState()
c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
c.HasBeenStartedBefore = true
configBytes, err = json.Marshal(&c)
assert.NilError(t, err)
assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
c.HasBeenStartedBefore = true
})
d.Start(t)
@ -231,3 +219,71 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
assert.NilError(t, err)
}
}
// TestHardRestartWhenContainerIsRunning simulates a case where dockerd is
// killed while a container is running, and the container's task no longer
// exists when dockerd starts back up. This can happen if the system is
// hard-rebooted, for example.
//
// Regression test for moby/moby#45788
func TestHardRestartWhenContainerIsRunning(t *testing.T) {
skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
skip.If(t, testEnv.DaemonInfo.OSType == "windows")
t.Parallel()
d := daemon.New(t)
defer d.Cleanup(t)
d.StartWithBusybox(t, "--iptables=false")
defer d.Stop(t)
ctx := context.Background()
client := d.NewClientT(t)
// Just create the containers, no need to start them.
// We really want to make sure there is no process running when docker starts back up.
// We will manipulate the on disk state later.
nopolicy := container.Create(ctx, t, client, container.WithCmd("/bin/sh", "-c", "exit 1"))
onfailure := container.Create(ctx, t, client, container.WithRestartPolicy("on-failure"), container.WithCmd("/bin/sh", "-c", "sleep 60"))
d.Stop(t)
for _, id := range []string{nopolicy, onfailure} {
d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
c.SetRunning(nil, nil, true)
c.HasBeenStartedBefore = true
})
}
d.Start(t)
t.Run("RestartPolicy=none", func(t *testing.T) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
inspect, err := client.ContainerInspect(ctx, nopolicy)
assert.NilError(t, err)
assert.Check(t, is.Equal(inspect.State.Status, "exited"))
assert.Check(t, is.Equal(inspect.State.ExitCode, 255))
finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
if assert.Check(t, err) {
assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
}
})
t.Run("RestartPolicy=on-failure", func(t *testing.T) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
inspect, err := client.ContainerInspect(ctx, onfailure)
assert.NilError(t, err)
assert.Check(t, is.Equal(inspect.State.Status, "running"))
assert.Check(t, is.Equal(inspect.State.ExitCode, 0))
finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
if assert.Check(t, err) {
assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
}
stopTimeout := 0
assert.Assert(t, client.ContainerStop(ctx, onfailure, containerapi.StopOptions{Timeout: &stopTimeout}))
})
}

View file

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/events"
"github.com/docker/docker/client"
"github.com/docker/docker/container"
"github.com/docker/docker/pkg/ioutils"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/testutil/request"
@ -825,6 +826,23 @@ func (d *Daemon) Info(t testing.TB) types.Info {
return info
}
// TamperWithContainerConfig modifies the on-disk config of a container.
func (d *Daemon) TamperWithContainerConfig(t testing.TB, containerID string, tamper func(*container.Container)) {
t.Helper()
configPath := filepath.Join(d.Root, "containers", containerID, "config.v2.json")
configBytes, err := os.ReadFile(configPath)
assert.NilError(t, err)
var c container.Container
assert.NilError(t, json.Unmarshal(configBytes, &c))
c.State = container.NewState()
tamper(&c)
configBytes, err = json.Marshal(&c)
assert.NilError(t, err)
assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
}
// cleanupRaftDir removes swarmkit wal files if present
func cleanupRaftDir(t testing.TB, d *Daemon) {
t.Helper()