Przeglądaj źródła

Ensure containers are stopped on daemon startup

When the containerd 1.0 runtime changes were made, we inadvertantly
removed the functionality where any running containers are killed on
startup when not using live-restore.
This change restores that behavior.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
Brian Goff 7 lat temu
rodzic
commit
e69127bd5b
2 zmienionych plików z 135 dodań i 18 usunięć
  1. 23 18
      daemon/daemon.go
  2. 112 0
      integration/container/restart_test.go

+ 23 - 18
daemon/daemon.go

@@ -247,6 +247,11 @@ func (daemon *Daemon) restore() error {
 					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
 					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
 					return
 					return
 				}
 				}
+			} else if !daemon.configStore.LiveRestoreEnabled {
+				if err := daemon.kill(c, c.StopSignal()); err != nil && !errdefs.IsNotFound(err) {
+					logrus.WithError(err).WithField("container", c.ID).Error("error shutting down container")
+					return
+				}
 			}
 			}
 
 
 			if c.IsRunning() || c.IsPaused() {
 			if c.IsRunning() || c.IsPaused() {
@@ -317,24 +322,24 @@ func (daemon *Daemon) restore() error {
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
 					mapLock.Unlock()
 					mapLock.Unlock()
 				}
 				}
-			} else {
-				// get list of containers we need to restart
-
-				// Do not autostart containers which
-				// has endpoints in a swarm scope
-				// network yet since the cluster is
-				// not initialized yet. We will start
-				// it after the cluster is
-				// initialized.
-				if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
-					mapLock.Lock()
-					restartContainers[c] = make(chan struct{})
-					mapLock.Unlock()
-				} else if c.HostConfig != nil && c.HostConfig.AutoRemove {
-					mapLock.Lock()
-					removeContainers[c.ID] = c
-					mapLock.Unlock()
-				}
+			}
+
+			// get list of containers we need to restart
+
+			// Do not autostart containers which
+			// has endpoints in a swarm scope
+			// network yet since the cluster is
+			// not initialized yet. We will start
+			// it after the cluster is
+			// initialized.
+			if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
+				mapLock.Lock()
+				restartContainers[c] = make(chan struct{})
+				mapLock.Unlock()
+			} else if c.HostConfig != nil && c.HostConfig.AutoRemove {
+				mapLock.Lock()
+				removeContainers[c.ID] = c
+				mapLock.Unlock()
 			}
 			}
 
 
 			c.Lock()
 			c.Lock()

+ 112 - 0
integration/container/restart_test.go

@@ -0,0 +1,112 @@
+package container
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/integration-cli/daemon"
+)
+
+func TestDaemonRestartKillContainers(t *testing.T) {
+	type testCase struct {
+		desc       string
+		config     *container.Config
+		hostConfig *container.HostConfig
+
+		xRunning            bool
+		xRunningLiveRestore bool
+	}
+
+	for _, c := range []testCase{
+		{
+			desc:                "container without restart policy",
+			config:              &container.Config{Image: "busybox", Cmd: []string{"top"}},
+			xRunningLiveRestore: true,
+		},
+		{
+			desc:                "container with restart=always",
+			config:              &container.Config{Image: "busybox", Cmd: []string{"top"}},
+			hostConfig:          &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
+			xRunning:            true,
+			xRunningLiveRestore: true,
+		},
+	} {
+		for _, liveRestoreEnabled := range []bool{false, true} {
+			for fnName, stopDaemon := range map[string]func(*testing.T, *daemon.Daemon){
+				"kill-daemon": func(t *testing.T, d *daemon.Daemon) {
+					if err := d.Kill(); err != nil {
+						t.Fatal(err)
+					}
+				},
+				"stop-daemon": func(t *testing.T, d *daemon.Daemon) {
+					d.Stop(t)
+				},
+			} {
+				t.Run(fmt.Sprintf("live-restore=%v/%s/%s", liveRestoreEnabled, c.desc, fnName), func(t *testing.T) {
+					c := c
+					liveRestoreEnabled := liveRestoreEnabled
+					stopDaemon := stopDaemon
+
+					t.Parallel()
+
+					d := daemon.New(t, "", "dockerd", daemon.Config{})
+					client, err := d.NewClient()
+					if err != nil {
+						t.Fatal(err)
+					}
+
+					var args []string
+					if liveRestoreEnabled {
+						args = []string{"--live-restore"}
+					}
+
+					d.StartWithBusybox(t, args...)
+					defer d.Stop(t)
+					ctx := context.Background()
+
+					resp, err := client.ContainerCreate(ctx, c.config, c.hostConfig, nil, "")
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer client.ContainerRemove(ctx, resp.ID, types.ContainerRemoveOptions{Force: true})
+
+					if err := client.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil {
+						t.Fatal(err)
+					}
+
+					stopDaemon(t, d)
+					d.Start(t, args...)
+
+					expected := c.xRunning
+					if liveRestoreEnabled {
+						expected = c.xRunningLiveRestore
+					}
+
+					var running bool
+					for i := 0; i < 30; i++ {
+						inspect, err := client.ContainerInspect(ctx, resp.ID)
+						if err != nil {
+							t.Fatal(err)
+						}
+
+						running = inspect.State.Running
+						if running == expected {
+							break
+						}
+						time.Sleep(2 * time.Second)
+
+					}
+
+					if running != expected {
+						t.Fatalf("got unexpected running state, expected %v, got: %v", expected, running)
+					}
+					// TODO(cpuguy83): test pause states... this seems to be rather undefined currently
+				})
+			}
+		}
+	}
+}