restart_test.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. package container // import "github.com/docker/docker/integration/container"
  2. import (
  3. "context"
  4. "fmt"
  5. "runtime"
  6. "testing"
  7. "time"
  8. "github.com/docker/docker/api/types"
  9. "github.com/docker/docker/api/types/container"
  10. "github.com/docker/docker/api/types/events"
  11. "github.com/docker/docker/api/types/filters"
  12. "github.com/docker/docker/client"
  13. testContainer "github.com/docker/docker/integration/internal/container"
  14. "github.com/docker/docker/testutil"
  15. "github.com/docker/docker/testutil/daemon"
  16. "gotest.tools/v3/assert"
  17. is "gotest.tools/v3/assert/cmp"
  18. "gotest.tools/v3/poll"
  19. "gotest.tools/v3/skip"
  20. )
  21. func TestDaemonRestartKillContainers(t *testing.T) {
  22. skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
  23. skip.If(t, testEnv.DaemonInfo.OSType == "windows")
  24. skip.If(t, testEnv.IsRootless, "rootless mode doesn't support live-restore")
  25. ctx := testutil.StartSpan(baseContext, t)
  26. type testCase struct {
  27. desc string
  28. restartPolicy container.RestartPolicy
  29. xRunning bool
  30. xRunningLiveRestore bool
  31. xStart bool
  32. xHealthCheck bool
  33. }
  34. for _, tc := range []testCase{
  35. {
  36. desc: "container without restart policy",
  37. xRunningLiveRestore: true,
  38. xStart: true,
  39. },
  40. {
  41. desc: "container with restart=always",
  42. restartPolicy: container.RestartPolicy{Name: "always"},
  43. xRunning: true,
  44. xRunningLiveRestore: true,
  45. xStart: true,
  46. },
  47. {
  48. desc: "container with restart=always and with healthcheck",
  49. restartPolicy: container.RestartPolicy{Name: "always"},
  50. xRunning: true,
  51. xRunningLiveRestore: true,
  52. xStart: true,
  53. xHealthCheck: true,
  54. },
  55. {
  56. desc: "container created should not be restarted",
  57. restartPolicy: container.RestartPolicy{Name: "always"},
  58. },
  59. } {
  60. for _, liveRestoreEnabled := range []bool{false, true} {
  61. for fnName, stopDaemon := range map[string]func(*testing.T, *daemon.Daemon){
  62. "kill-daemon": func(t *testing.T, d *daemon.Daemon) {
  63. err := d.Kill()
  64. assert.NilError(t, err)
  65. },
  66. "stop-daemon": func(t *testing.T, d *daemon.Daemon) {
  67. d.Stop(t)
  68. },
  69. } {
  70. tc := tc
  71. liveRestoreEnabled := liveRestoreEnabled
  72. stopDaemon := stopDaemon
  73. t.Run(fmt.Sprintf("live-restore=%v/%s/%s", liveRestoreEnabled, tc.desc, fnName), func(t *testing.T) {
  74. t.Parallel()
  75. ctx := testutil.StartSpan(ctx, t)
  76. d := daemon.New(t)
  77. apiClient := d.NewClientT(t)
  78. args := []string{"--iptables=false"}
  79. if liveRestoreEnabled {
  80. args = append(args, "--live-restore")
  81. }
  82. d.StartWithBusybox(ctx, t, args...)
  83. defer d.Stop(t)
  84. config := container.Config{Image: "busybox", Cmd: []string{"top"}}
  85. hostConfig := container.HostConfig{RestartPolicy: tc.restartPolicy}
  86. if tc.xHealthCheck {
  87. config.Healthcheck = &container.HealthConfig{
  88. Test: []string{"CMD-SHELL", "! test -f /tmp/unhealthy"},
  89. StartPeriod: 60 * time.Second,
  90. StartInterval: 1 * time.Second,
  91. Interval: 60 * time.Second,
  92. }
  93. }
  94. resp, err := apiClient.ContainerCreate(ctx, &config, &hostConfig, nil, nil, "")
  95. assert.NilError(t, err)
  96. defer apiClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
  97. if tc.xStart {
  98. err = apiClient.ContainerStart(ctx, resp.ID, container.StartOptions{})
  99. assert.NilError(t, err)
  100. if tc.xHealthCheck {
  101. poll.WaitOn(t, pollForHealthStatus(ctx, apiClient, resp.ID, types.Healthy), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
  102. testContainer.ExecT(ctx, t, apiClient, resp.ID, []string{"touch", "/tmp/unhealthy"}).AssertSuccess(t)
  103. }
  104. }
  105. stopDaemon(t, d)
  106. startTime := time.Now()
  107. d.Start(t, args...)
  108. expected := tc.xRunning
  109. if liveRestoreEnabled {
  110. expected = tc.xRunningLiveRestore
  111. }
  112. poll.WaitOn(t, testContainer.RunningStateFlagIs(ctx, apiClient, resp.ID, expected), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
  113. if tc.xHealthCheck {
  114. // We have arranged to have the container's health probes fail until we tell it
  115. // to become healthy, which gives us the entire StartPeriod (60s) to assert that
  116. // the container's health state is Starting before we have to worry about racing
  117. // the health monitor.
  118. assert.Equal(t, testContainer.Inspect(ctx, t, apiClient, resp.ID).State.Health.Status, types.Starting)
  119. poll.WaitOn(t, pollForNewHealthCheck(ctx, apiClient, startTime, resp.ID), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
  120. testContainer.ExecT(ctx, t, apiClient, resp.ID, []string{"rm", "/tmp/unhealthy"}).AssertSuccess(t)
  121. poll.WaitOn(t, pollForHealthStatus(ctx, apiClient, resp.ID, types.Healthy), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
  122. }
  123. // TODO(cpuguy83): test pause states... this seems to be rather undefined currently
  124. })
  125. }
  126. }
  127. }
  128. }
  129. func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result {
  130. return func(log poll.LogT) poll.Result {
  131. inspect, err := client.ContainerInspect(ctx, containerID)
  132. if err != nil {
  133. return poll.Error(err)
  134. }
  135. healthChecksTotal := len(inspect.State.Health.Log)
  136. if healthChecksTotal > 0 {
  137. if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) {
  138. return poll.Success()
  139. }
  140. }
  141. return poll.Continue("waiting for a new container healthcheck")
  142. }
  143. }
  144. // Container started with --rm should be able to be restarted.
  145. // It should be removed only if killed or stopped
  146. func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
  147. ctx := setupTest(t)
  148. apiClient := testEnv.APIClient()
  149. noWaitTimeout := 0
  150. for _, tc := range []struct {
  151. desc string
  152. doSth func(ctx context.Context, containerID string) error
  153. }{
  154. {
  155. desc: "kill",
  156. doSth: func(ctx context.Context, containerID string) error {
  157. return apiClient.ContainerKill(ctx, containerID, "SIGKILL")
  158. },
  159. },
  160. {
  161. desc: "stop",
  162. doSth: func(ctx context.Context, containerID string) error {
  163. return apiClient.ContainerStop(ctx, containerID, container.StopOptions{Timeout: &noWaitTimeout})
  164. },
  165. },
  166. } {
  167. tc := tc
  168. t.Run(tc.desc, func(t *testing.T) {
  169. testutil.StartSpan(ctx, t)
  170. cID := testContainer.Run(ctx, t, apiClient,
  171. testContainer.WithName("autoremove-restart-and-"+tc.desc),
  172. testContainer.WithAutoRemove,
  173. )
  174. defer func() {
  175. err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
  176. if t.Failed() && err != nil {
  177. t.Logf("Cleaning up test container failed with error: %v", err)
  178. }
  179. }()
  180. err := apiClient.ContainerRestart(ctx, cID, container.StopOptions{Timeout: &noWaitTimeout})
  181. assert.NilError(t, err)
  182. inspect, err := apiClient.ContainerInspect(ctx, cID)
  183. assert.NilError(t, err)
  184. assert.Assert(t, inspect.State.Status != "removing", "Container should not be removing yet")
  185. poll.WaitOn(t, testContainer.IsInState(ctx, apiClient, cID, "running"))
  186. err = tc.doSth(ctx, cID)
  187. assert.NilError(t, err)
  188. poll.WaitOn(t, testContainer.IsRemoved(ctx, apiClient, cID))
  189. })
  190. }
  191. }
  192. // TestContainerRestartWithCancelledRequest verifies that cancelling a restart
  193. // request does not cancel the restart operation, and still starts the container
  194. // after it was stopped.
  195. //
  196. // Regression test for https://github.com/moby/moby/discussions/46682
  197. func TestContainerRestartWithCancelledRequest(t *testing.T) {
  198. ctx := setupTest(t)
  199. apiClient := testEnv.APIClient()
  200. testutil.StartSpan(ctx, t)
  201. // Create a container that ignores SIGTERM and doesn't stop immediately,
  202. // giving us time to cancel the request.
  203. //
  204. // Restarting a container is "stop" (and, if needed, "kill"), then "start"
  205. // the container. We're trying to create the scenario where the "stop" is
  206. // handled, but the request was cancelled and therefore the "start" not
  207. // taking place.
  208. cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
  209. defer func() {
  210. err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
  211. if t.Failed() && err != nil {
  212. t.Logf("Cleaning up test container failed with error: %v", err)
  213. }
  214. }()
  215. // Start listening for events.
  216. messages, errs := apiClient.Events(ctx, types.EventsOptions{
  217. Filters: filters.NewArgs(
  218. filters.Arg("container", cID),
  219. filters.Arg("event", string(events.ActionRestart)),
  220. ),
  221. })
  222. // Make restart request, but cancel the request before the container
  223. // is (forcibly) killed.
  224. ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
  225. stopTimeout := 1
  226. err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
  227. Timeout: &stopTimeout,
  228. })
  229. assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
  230. cancel()
  231. // Validate that the restart event occurred, which is emitted
  232. // after the restart (stop (kill) start) finished.
  233. //
  234. // Note that we cannot use RestartCount for this, as that's only
  235. // used for restart-policies.
  236. restartTimeout := 2 * time.Second
  237. if runtime.GOOS == "windows" {
  238. // hcs can sometimes take a long time to stop container.
  239. restartTimeout = StopContainerWindowsPollTimeout
  240. }
  241. select {
  242. case m := <-messages:
  243. assert.Check(t, is.Equal(m.Actor.ID, cID))
  244. assert.Check(t, is.Equal(m.Action, events.ActionRestart))
  245. case err := <-errs:
  246. assert.NilError(t, err)
  247. case <-time.After(restartTimeout):
  248. t.Errorf("timeout waiting for restart event")
  249. }
  250. // Container should be restarted (running).
  251. inspect, err := apiClient.ContainerInspect(ctx, cID)
  252. assert.NilError(t, err)
  253. assert.Check(t, is.Equal(inspect.State.Status, "running"))
  254. }