monitor.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package daemon
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "runtime"
  7. "strconv"
  8. "time"
  9. "github.com/docker/docker/api/types"
  10. "github.com/docker/docker/container"
  11. "github.com/docker/docker/libcontainerd"
  12. "github.com/docker/docker/restartmanager"
  13. "github.com/sirupsen/logrus"
  14. )
  15. func (daemon *Daemon) setStateCounter(c *container.Container) {
  16. switch c.StateString() {
  17. case "paused":
  18. stateCtr.set(c.ID, "paused")
  19. case "running":
  20. stateCtr.set(c.ID, "running")
  21. default:
  22. stateCtr.set(c.ID, "stopped")
  23. }
  24. }
  25. // ProcessEvent is called by libcontainerd whenever an event occurs
  26. func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error {
  27. c, err := daemon.GetContainer(id)
  28. if c == nil || err != nil {
  29. return fmt.Errorf("no such container: %s", id)
  30. }
  31. switch e {
  32. case libcontainerd.EventOOM:
  33. // StateOOM is Linux specific and should never be hit on Windows
  34. if runtime.GOOS == "windows" {
  35. return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
  36. }
  37. c.Lock()
  38. defer c.Unlock()
  39. daemon.updateHealthMonitor(c)
  40. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  41. return err
  42. }
  43. daemon.LogContainerEvent(c, "oom")
  44. case libcontainerd.EventExit:
  45. if int(ei.Pid) == c.Pid {
  46. c.Lock()
  47. _, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
  48. if err != nil {
  49. logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
  50. }
  51. c.StreamConfig.Wait()
  52. c.Reset(false)
  53. exitStatus := container.ExitStatus{
  54. ExitCode: int(ei.ExitCode),
  55. ExitedAt: ei.ExitedAt,
  56. OOMKilled: ei.OOMKilled,
  57. }
  58. restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
  59. if err == nil && restart {
  60. c.RestartCount++
  61. c.SetRestarting(&exitStatus)
  62. } else {
  63. c.SetStopped(&exitStatus)
  64. defer daemon.autoRemove(c)
  65. }
  66. defer c.Unlock() // needs to be called before autoRemove
  67. // cancel healthcheck here, they will be automatically
  68. // restarted if/when the container is started again
  69. daemon.stopHealthchecks(c)
  70. attributes := map[string]string{
  71. "exitCode": strconv.Itoa(int(ei.ExitCode)),
  72. }
  73. daemon.LogContainerEventWithAttributes(c, "die", attributes)
  74. daemon.Cleanup(c)
  75. if err == nil && restart {
  76. go func() {
  77. err := <-wait
  78. if err == nil {
  79. // daemon.netController is initialized when daemon is restoring containers.
  80. // But containerStart will use daemon.netController segment.
  81. // So to avoid panic at startup process, here must wait util daemon restore done.
  82. daemon.waitForStartupDone()
  83. if err = daemon.containerStart(c, "", "", false); err != nil {
  84. logrus.Debugf("failed to restart container: %+v", err)
  85. }
  86. }
  87. if err != nil {
  88. c.Lock()
  89. c.SetStopped(&exitStatus)
  90. c.Unlock()
  91. defer daemon.autoRemove(c)
  92. if err != restartmanager.ErrRestartCanceled {
  93. logrus.Errorf("restartmanger wait error: %+v", err)
  94. }
  95. }
  96. }()
  97. }
  98. daemon.setStateCounter(c)
  99. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  100. return err
  101. }
  102. return daemon.postRunProcessing(c, ei)
  103. }
  104. if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
  105. ec := int(ei.ExitCode)
  106. execConfig.Lock()
  107. defer execConfig.Unlock()
  108. execConfig.ExitCode = &ec
  109. execConfig.Running = false
  110. execConfig.StreamConfig.Wait()
  111. if err := execConfig.CloseStreams(); err != nil {
  112. logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
  113. }
  114. // remove the exec command from the container's store only and not the
  115. // daemon's store so that the exec command can be inspected.
  116. c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
  117. } else {
  118. logrus.WithFields(logrus.Fields{
  119. "container": c.ID,
  120. "exec-id": ei.ProcessID,
  121. "exec-pid": ei.Pid,
  122. }).Warnf("Ignoring Exit Event, no such exec command found")
  123. }
  124. case libcontainerd.EventStart:
  125. c.Lock()
  126. defer c.Unlock()
  127. // This is here to handle start not generated by docker
  128. if !c.Running {
  129. c.SetRunning(int(ei.Pid), false)
  130. c.HasBeenManuallyStopped = false
  131. c.HasBeenStartedBefore = true
  132. daemon.setStateCounter(c)
  133. daemon.initHealthMonitor(c)
  134. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  135. return err
  136. }
  137. daemon.LogContainerEvent(c, "start")
  138. }
  139. case libcontainerd.EventPaused:
  140. c.Lock()
  141. defer c.Unlock()
  142. if !c.Paused {
  143. c.Paused = true
  144. daemon.setStateCounter(c)
  145. daemon.updateHealthMonitor(c)
  146. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  147. return err
  148. }
  149. daemon.LogContainerEvent(c, "pause")
  150. }
  151. case libcontainerd.EventResumed:
  152. c.Lock()
  153. defer c.Unlock()
  154. if c.Paused {
  155. c.Paused = false
  156. daemon.setStateCounter(c)
  157. daemon.updateHealthMonitor(c)
  158. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  159. return err
  160. }
  161. daemon.LogContainerEvent(c, "unpause")
  162. }
  163. }
  164. return nil
  165. }
  166. func (daemon *Daemon) autoRemove(c *container.Container) {
  167. c.Lock()
  168. ar := c.HostConfig.AutoRemove
  169. c.Unlock()
  170. if !ar {
  171. return
  172. }
  173. var err error
  174. if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
  175. return
  176. }
  177. if c := daemon.containers.Get(c.ID); c == nil {
  178. return
  179. }
  180. if err != nil {
  181. logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
  182. }
  183. }