Переглянути джерело

daemon: fix restoring container with missing task

Before 4bafaa00aa810dd17fde13e563def08f96fffc31, if the daemon was
killed while a container was running and the container shim is killed
before the daemon is restarted, such as if the host system is
hard-rebooted, the daemon would restore the container to the stopped
state and set the exit code to 255. The aforementioned commit introduced
a regression where the container's exit code would instead be set to 0.
Fix the regression so that the exit code is once against set to 255 on
restore.

Signed-off-by: Cory Snider <csnider@mirantis.com>
Cory Snider 2 роки тому
батько
коміт
165dfd6c3e
3 змінених файлів з 93 додано та 17 видалено
  1. 2 0
      daemon/daemon.go
  2. 73 17
      integration/container/daemon_linux_test.go
  3. 18 0
      testutil/daemon/daemon.go

+ 2 - 0
daemon/daemon.go

@@ -447,6 +447,8 @@ func (daemon *Daemon) restore(cfg *configStore) error {
 					if es != nil {
 						ces.ExitCode = int(es.ExitCode())
 						ces.ExitedAt = es.ExitTime()
+					} else {
+						ces.ExitCode = 255
 					}
 					c.SetStopped(&ces)
 					daemon.Cleanup(c)

+ 73 - 17
integration/container/daemon_linux_test.go

@@ -2,10 +2,8 @@ package container // import "github.com/docker/docker/integration/container"
 
 import (
 	"context"
-	"encoding/json"
 	"fmt"
 	"os"
-	"path/filepath"
 	"strconv"
 	"strings"
 	"testing"
@@ -19,6 +17,7 @@ import (
 	"golang.org/x/sys/unix"
 	"gotest.tools/v3/assert"
 	is "gotest.tools/v3/assert/cmp"
+	"gotest.tools/v3/assert/opt"
 	"gotest.tools/v3/skip"
 )
 
@@ -204,21 +203,10 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
 
 	d.Stop(t)
 
-	configPath := filepath.Join(d.Root, "containers", id, "config.v2.json")
-	configBytes, err := os.ReadFile(configPath)
-	assert.NilError(t, err)
-
-	var c realcontainer.Container
-
-	assert.NilError(t, json.Unmarshal(configBytes, &c))
-
-	c.State = realcontainer.NewState()
-	c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
-	c.HasBeenStartedBefore = true
-
-	configBytes, err = json.Marshal(&c)
-	assert.NilError(t, err)
-	assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
+	d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
+		c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
+		c.HasBeenStartedBefore = true
+	})
 
 	d.Start(t)
 
@@ -231,3 +219,71 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
 		assert.NilError(t, err)
 	}
 }
+
+// TestHardRestartWhenContainerIsRunning simulates a case where dockerd is
+// killed while a container is running, and the container's task no longer
+// exists when dockerd starts back up. This can happen if the system is
+// hard-rebooted, for example.
+//
+// Regression test for moby/moby#45788
+func TestHardRestartWhenContainerIsRunning(t *testing.T) {
+	skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
+
+	t.Parallel()
+
+	d := daemon.New(t)
+	defer d.Cleanup(t)
+
+	d.StartWithBusybox(t, "--iptables=false")
+	defer d.Stop(t)
+
+	ctx := context.Background()
+	client := d.NewClientT(t)
+
+	// Just create the containers, no need to start them.
+	// We really want to make sure there is no process running when docker starts back up.
+	// We will manipulate the on disk state later.
+	nopolicy := container.Create(ctx, t, client, container.WithCmd("/bin/sh", "-c", "exit 1"))
+	onfailure := container.Create(ctx, t, client, container.WithRestartPolicy("on-failure"), container.WithCmd("/bin/sh", "-c", "sleep 60"))
+
+	d.Stop(t)
+
+	for _, id := range []string{nopolicy, onfailure} {
+		d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
+			c.SetRunning(nil, nil, true)
+			c.HasBeenStartedBefore = true
+		})
+	}
+
+	d.Start(t)
+
+	t.Run("RestartPolicy=none", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
+		defer cancel()
+		inspect, err := client.ContainerInspect(ctx, nopolicy)
+		assert.NilError(t, err)
+		assert.Check(t, is.Equal(inspect.State.Status, "exited"))
+		assert.Check(t, is.Equal(inspect.State.ExitCode, 255))
+		finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
+		if assert.Check(t, err) {
+			assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
+		}
+	})
+
+	t.Run("RestartPolicy=on-failure", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
+		defer cancel()
+		inspect, err := client.ContainerInspect(ctx, onfailure)
+		assert.NilError(t, err)
+		assert.Check(t, is.Equal(inspect.State.Status, "running"))
+		assert.Check(t, is.Equal(inspect.State.ExitCode, 0))
+		finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
+		if assert.Check(t, err) {
+			assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
+		}
+
+		stopTimeout := 0
+		assert.Assert(t, client.ContainerStop(ctx, onfailure, containerapi.StopOptions{Timeout: &stopTimeout}))
+	})
+}

+ 18 - 0
testutil/daemon/daemon.go

@@ -16,6 +16,7 @@ import (
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/events"
 	"github.com/docker/docker/client"
+	"github.com/docker/docker/container"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/stringid"
 	"github.com/docker/docker/testutil/request"
@@ -825,6 +826,23 @@ func (d *Daemon) Info(t testing.TB) types.Info {
 	return info
 }
 
+// TamperWithContainerConfig modifies the on-disk config of a container.
+func (d *Daemon) TamperWithContainerConfig(t testing.TB, containerID string, tamper func(*container.Container)) {
+	t.Helper()
+
+	configPath := filepath.Join(d.Root, "containers", containerID, "config.v2.json")
+	configBytes, err := os.ReadFile(configPath)
+	assert.NilError(t, err)
+
+	var c container.Container
+	assert.NilError(t, json.Unmarshal(configBytes, &c))
+	c.State = container.NewState()
+	tamper(&c)
+	configBytes, err = json.Marshal(&c)
+	assert.NilError(t, err)
+	assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
+}
+
 // cleanupRaftDir removes swarmkit wal files if present
 func cleanupRaftDir(t testing.TB, d *Daemon) {
 	t.Helper()