monitor.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. package daemon
  2. import (
  3. "errors"
  4. "fmt"
  5. "runtime"
  6. "strconv"
  7. "time"
  8. "github.com/docker/docker/api/types"
  9. "github.com/docker/docker/container"
  10. "github.com/docker/docker/libcontainerd"
  11. "github.com/docker/docker/restartmanager"
  12. "github.com/sirupsen/logrus"
  13. )
  14. func (daemon *Daemon) setStateCounter(c *container.Container) {
  15. switch c.StateString() {
  16. case "paused":
  17. stateCtr.set(c.ID, "paused")
  18. case "running":
  19. stateCtr.set(c.ID, "running")
  20. default:
  21. stateCtr.set(c.ID, "stopped")
  22. }
  23. }
  24. // StateChanged updates daemon state changes from containerd
  25. func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
  26. c := daemon.containers.Get(id)
  27. if c == nil {
  28. return fmt.Errorf("no such container: %s", id)
  29. }
  30. switch e.State {
  31. case libcontainerd.StateOOM:
  32. // StateOOM is Linux specific and should never be hit on Windows
  33. if runtime.GOOS == "windows" {
  34. return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
  35. }
  36. daemon.updateHealthMonitor(c)
  37. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  38. return err
  39. }
  40. daemon.LogContainerEvent(c, "oom")
  41. case libcontainerd.StateExit:
  42. c.Lock()
  43. c.StreamConfig.Wait()
  44. c.Reset(false)
  45. // If daemon is being shutdown, don't let the container restart
  46. restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
  47. if err == nil && restart {
  48. c.RestartCount++
  49. c.SetRestarting(platformConstructExitStatus(e))
  50. } else {
  51. c.SetStopped(platformConstructExitStatus(e))
  52. defer daemon.autoRemove(c)
  53. }
  54. // cancel healthcheck here, they will be automatically
  55. // restarted if/when the container is started again
  56. daemon.stopHealthchecks(c)
  57. attributes := map[string]string{
  58. "exitCode": strconv.Itoa(int(e.ExitCode)),
  59. }
  60. daemon.LogContainerEventWithAttributes(c, "die", attributes)
  61. daemon.Cleanup(c)
  62. if err == nil && restart {
  63. go func() {
  64. err := <-wait
  65. if err == nil {
  66. // daemon.netController is initialized when daemon is restoring containers.
  67. // But containerStart will use daemon.netController segment.
  68. // So to avoid panic at startup process, here must wait util daemon restore done.
  69. daemon.waitForStartupDone()
  70. if err = daemon.containerStart(c, "", "", false); err != nil {
  71. logrus.Debugf("failed to restart container: %+v", err)
  72. }
  73. }
  74. if err != nil {
  75. c.SetStopped(platformConstructExitStatus(e))
  76. defer daemon.autoRemove(c)
  77. if err != restartmanager.ErrRestartCanceled {
  78. logrus.Errorf("restartmanger wait error: %+v", err)
  79. }
  80. }
  81. }()
  82. }
  83. daemon.setStateCounter(c)
  84. defer c.Unlock()
  85. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  86. return err
  87. }
  88. return daemon.postRunProcessing(c, e)
  89. case libcontainerd.StateExitProcess:
  90. if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
  91. ec := int(e.ExitCode)
  92. execConfig.Lock()
  93. defer execConfig.Unlock()
  94. execConfig.ExitCode = &ec
  95. execConfig.Running = false
  96. execConfig.StreamConfig.Wait()
  97. if err := execConfig.CloseStreams(); err != nil {
  98. logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
  99. }
  100. // remove the exec command from the container's store only and not the
  101. // daemon's store so that the exec command can be inspected.
  102. c.ExecCommands.Delete(execConfig.ID)
  103. } else {
  104. logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
  105. }
  106. case libcontainerd.StateStart, libcontainerd.StateRestore:
  107. // Container is already locked in this case
  108. c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
  109. c.HasBeenManuallyStopped = false
  110. c.HasBeenStartedBefore = true
  111. daemon.setStateCounter(c)
  112. daemon.initHealthMonitor(c)
  113. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  114. c.Reset(false)
  115. return err
  116. }
  117. daemon.LogContainerEvent(c, "start")
  118. case libcontainerd.StatePause:
  119. // Container is already locked in this case
  120. c.Paused = true
  121. daemon.setStateCounter(c)
  122. daemon.updateHealthMonitor(c)
  123. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  124. return err
  125. }
  126. daemon.LogContainerEvent(c, "pause")
  127. case libcontainerd.StateResume:
  128. // Container is already locked in this case
  129. c.Paused = false
  130. daemon.setStateCounter(c)
  131. daemon.updateHealthMonitor(c)
  132. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  133. return err
  134. }
  135. daemon.LogContainerEvent(c, "unpause")
  136. }
  137. return nil
  138. }
  139. func (daemon *Daemon) autoRemove(c *container.Container) {
  140. c.Lock()
  141. ar := c.HostConfig.AutoRemove
  142. c.Unlock()
  143. if !ar {
  144. return
  145. }
  146. var err error
  147. if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
  148. return
  149. }
  150. if c := daemon.containers.Get(c.ID); c == nil {
  151. return
  152. }
  153. if err != nil {
  154. logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
  155. }
  156. }