restart_test.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. package container // import "github.com/docker/docker/integration/container"
  2. import (
  3. "context"
  4. "fmt"
  5. "runtime"
  6. "testing"
  7. "time"
  8. "github.com/docker/docker/api/types"
  9. "github.com/docker/docker/api/types/container"
  10. "github.com/docker/docker/api/types/events"
  11. "github.com/docker/docker/api/types/filters"
  12. "github.com/docker/docker/client"
  13. testContainer "github.com/docker/docker/integration/internal/container"
  14. "github.com/docker/docker/testutil"
  15. "github.com/docker/docker/testutil/daemon"
  16. "gotest.tools/v3/assert"
  17. is "gotest.tools/v3/assert/cmp"
  18. "gotest.tools/v3/poll"
  19. "gotest.tools/v3/skip"
  20. )
  21. func TestDaemonRestartKillContainers(t *testing.T) {
  22. skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
  23. skip.If(t, testEnv.DaemonInfo.OSType == "windows")
  24. skip.If(t, testEnv.IsRootless, "rootless mode doesn't support live-restore")
  25. ctx := testutil.StartSpan(baseContext, t)
  26. type testCase struct {
  27. desc string
  28. config *container.Config
  29. hostConfig *container.HostConfig
  30. xRunning bool
  31. xRunningLiveRestore bool
  32. xStart bool
  33. xHealthCheck bool
  34. }
  35. for _, tc := range []testCase{
  36. {
  37. desc: "container without restart policy",
  38. config: &container.Config{Image: "busybox", Cmd: []string{"top"}},
  39. xRunningLiveRestore: true,
  40. xStart: true,
  41. },
  42. {
  43. desc: "container with restart=always",
  44. config: &container.Config{Image: "busybox", Cmd: []string{"top"}},
  45. hostConfig: &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
  46. xRunning: true,
  47. xRunningLiveRestore: true,
  48. xStart: true,
  49. },
  50. {
  51. desc: "container with restart=always and with healthcheck",
  52. config: &container.Config{
  53. Image: "busybox", Cmd: []string{"top"},
  54. Healthcheck: &container.HealthConfig{
  55. Test: []string{"CMD-SHELL", "sleep 1"},
  56. Interval: time.Second,
  57. },
  58. },
  59. hostConfig: &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
  60. xRunning: true,
  61. xRunningLiveRestore: true,
  62. xStart: true,
  63. xHealthCheck: true,
  64. },
  65. {
  66. desc: "container created should not be restarted",
  67. config: &container.Config{Image: "busybox", Cmd: []string{"top"}},
  68. hostConfig: &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
  69. },
  70. } {
  71. for _, liveRestoreEnabled := range []bool{false, true} {
  72. for fnName, stopDaemon := range map[string]func(*testing.T, *daemon.Daemon){
  73. "kill-daemon": func(t *testing.T, d *daemon.Daemon) {
  74. err := d.Kill()
  75. assert.NilError(t, err)
  76. },
  77. "stop-daemon": func(t *testing.T, d *daemon.Daemon) {
  78. d.Stop(t)
  79. },
  80. } {
  81. tc := tc
  82. liveRestoreEnabled := liveRestoreEnabled
  83. stopDaemon := stopDaemon
  84. t.Run(fmt.Sprintf("live-restore=%v/%s/%s", liveRestoreEnabled, tc.desc, fnName), func(t *testing.T) {
  85. t.Parallel()
  86. ctx := testutil.StartSpan(ctx, t)
  87. d := daemon.New(t)
  88. apiClient := d.NewClientT(t)
  89. args := []string{"--iptables=false"}
  90. if liveRestoreEnabled {
  91. args = append(args, "--live-restore")
  92. }
  93. d.StartWithBusybox(ctx, t, args...)
  94. defer d.Stop(t)
  95. resp, err := apiClient.ContainerCreate(ctx, tc.config, tc.hostConfig, nil, nil, "")
  96. assert.NilError(t, err)
  97. defer apiClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
  98. if tc.xStart {
  99. err = apiClient.ContainerStart(ctx, resp.ID, container.StartOptions{})
  100. assert.NilError(t, err)
  101. }
  102. stopDaemon(t, d)
  103. d.Start(t, args...)
  104. expected := tc.xRunning
  105. if liveRestoreEnabled {
  106. expected = tc.xRunningLiveRestore
  107. }
  108. var running bool
  109. for i := 0; i < 30; i++ {
  110. inspect, err := apiClient.ContainerInspect(ctx, resp.ID)
  111. assert.NilError(t, err)
  112. running = inspect.State.Running
  113. if running == expected {
  114. break
  115. }
  116. time.Sleep(2 * time.Second)
  117. }
  118. assert.Equal(t, expected, running, "got unexpected running state, expected %v, got: %v", expected, running)
  119. if tc.xHealthCheck {
  120. startTime := time.Now()
  121. ctxPoll, cancel := context.WithTimeout(ctx, 30*time.Second)
  122. defer cancel()
  123. poll.WaitOn(t, pollForNewHealthCheck(ctxPoll, apiClient, startTime, resp.ID), poll.WithDelay(100*time.Millisecond))
  124. }
  125. // TODO(cpuguy83): test pause states... this seems to be rather undefined currently
  126. })
  127. }
  128. }
  129. }
  130. }
  131. func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result {
  132. return func(log poll.LogT) poll.Result {
  133. inspect, err := client.ContainerInspect(ctx, containerID)
  134. if err != nil {
  135. return poll.Error(err)
  136. }
  137. healthChecksTotal := len(inspect.State.Health.Log)
  138. if healthChecksTotal > 0 {
  139. if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) {
  140. return poll.Success()
  141. }
  142. }
  143. return poll.Continue("waiting for a new container healthcheck")
  144. }
  145. }
  146. // Container started with --rm should be able to be restarted.
  147. // It should be removed only if killed or stopped
  148. func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
  149. ctx := setupTest(t)
  150. apiClient := testEnv.APIClient()
  151. noWaitTimeout := 0
  152. for _, tc := range []struct {
  153. desc string
  154. doSth func(ctx context.Context, containerID string) error
  155. }{
  156. {
  157. desc: "kill",
  158. doSth: func(ctx context.Context, containerID string) error {
  159. return apiClient.ContainerKill(ctx, containerID, "SIGKILL")
  160. },
  161. },
  162. {
  163. desc: "stop",
  164. doSth: func(ctx context.Context, containerID string) error {
  165. return apiClient.ContainerStop(ctx, containerID, container.StopOptions{Timeout: &noWaitTimeout})
  166. },
  167. },
  168. } {
  169. tc := tc
  170. t.Run(tc.desc, func(t *testing.T) {
  171. testutil.StartSpan(ctx, t)
  172. cID := testContainer.Run(ctx, t, apiClient,
  173. testContainer.WithName("autoremove-restart-and-"+tc.desc),
  174. testContainer.WithAutoRemove,
  175. )
  176. defer func() {
  177. err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
  178. if t.Failed() && err != nil {
  179. t.Logf("Cleaning up test container failed with error: %v", err)
  180. }
  181. }()
  182. err := apiClient.ContainerRestart(ctx, cID, container.StopOptions{Timeout: &noWaitTimeout})
  183. assert.NilError(t, err)
  184. inspect, err := apiClient.ContainerInspect(ctx, cID)
  185. assert.NilError(t, err)
  186. assert.Assert(t, inspect.State.Status != "removing", "Container should not be removing yet")
  187. poll.WaitOn(t, testContainer.IsInState(ctx, apiClient, cID, "running"))
  188. err = tc.doSth(ctx, cID)
  189. assert.NilError(t, err)
  190. poll.WaitOn(t, testContainer.IsRemoved(ctx, apiClient, cID))
  191. })
  192. }
  193. }
  194. // TestContainerRestartWithCancelledRequest verifies that cancelling a restart
  195. // request does not cancel the restart operation, and still starts the container
  196. // after it was stopped.
  197. //
  198. // Regression test for https://github.com/moby/moby/discussions/46682
  199. func TestContainerRestartWithCancelledRequest(t *testing.T) {
  200. ctx := setupTest(t)
  201. apiClient := testEnv.APIClient()
  202. testutil.StartSpan(ctx, t)
  203. // Create a container that ignores SIGTERM and doesn't stop immediately,
  204. // giving us time to cancel the request.
  205. //
  206. // Restarting a container is "stop" (and, if needed, "kill"), then "start"
  207. // the container. We're trying to create the scenario where the "stop" is
  208. // handled, but the request was cancelled and therefore the "start" not
  209. // taking place.
  210. cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
  211. defer func() {
  212. err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
  213. if t.Failed() && err != nil {
  214. t.Logf("Cleaning up test container failed with error: %v", err)
  215. }
  216. }()
  217. // Start listening for events.
  218. messages, errs := apiClient.Events(ctx, types.EventsOptions{
  219. Filters: filters.NewArgs(
  220. filters.Arg("container", cID),
  221. filters.Arg("event", string(events.ActionRestart)),
  222. ),
  223. })
  224. // Make restart request, but cancel the request before the container
  225. // is (forcibly) killed.
  226. ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
  227. stopTimeout := 1
  228. err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
  229. Timeout: &stopTimeout,
  230. })
  231. assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
  232. cancel()
  233. // Validate that the restart event occurred, which is emitted
  234. // after the restart (stop (kill) start) finished.
  235. //
  236. // Note that we cannot use RestartCount for this, as that's only
  237. // used for restart-policies.
  238. restartTimeout := 2 * time.Second
  239. if runtime.GOOS == "windows" {
  240. // hcs can sometimes take a long time to stop container.
  241. restartTimeout = StopContainerWindowsPollTimeout
  242. }
  243. select {
  244. case m := <-messages:
  245. assert.Check(t, is.Equal(m.Actor.ID, cID))
  246. assert.Check(t, is.Equal(m.Action, events.ActionRestart))
  247. case err := <-errs:
  248. assert.NilError(t, err)
  249. case <-time.After(restartTimeout):
  250. t.Errorf("timeout waiting for restart event")
  251. }
  252. // Container should be restarted (running).
  253. inspect, err := apiClient.ContainerInspect(ctx, cID)
  254. assert.NilError(t, err)
  255. assert.Check(t, is.Equal(inspect.State.Status, "running"))
  256. }