monitor.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "context"
  4. "strconv"
  5. "time"
  6. "github.com/docker/docker/api/types"
  7. "github.com/docker/docker/container"
  8. libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
  9. "github.com/docker/docker/restartmanager"
  10. "github.com/pkg/errors"
  11. "github.com/sirupsen/logrus"
  12. )
  13. func (daemon *Daemon) setStateCounter(c *container.Container) {
  14. switch c.StateString() {
  15. case "paused":
  16. stateCtr.set(c.ID, "paused")
  17. case "running":
  18. stateCtr.set(c.ID, "running")
  19. default:
  20. stateCtr.set(c.ID, "stopped")
  21. }
  22. }
  23. func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
  24. c.Lock()
  25. ec, et, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
  26. if err != nil {
  27. logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
  28. }
  29. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  30. c.StreamConfig.Wait(ctx)
  31. cancel()
  32. c.Reset(false)
  33. exitStatus := container.ExitStatus{
  34. ExitCode: int(ec),
  35. ExitedAt: et,
  36. }
  37. if e != nil {
  38. exitStatus.ExitCode = int(e.ExitCode)
  39. exitStatus.ExitedAt = e.ExitedAt
  40. exitStatus.OOMKilled = e.OOMKilled
  41. if e.Error != nil {
  42. c.SetError(e.Error)
  43. }
  44. }
  45. restart, wait, err := c.RestartManager().ShouldRestart(ec, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
  46. if err == nil && restart {
  47. c.RestartCount++
  48. c.SetRestarting(&exitStatus)
  49. } else {
  50. c.SetStopped(&exitStatus)
  51. defer daemon.autoRemove(c)
  52. }
  53. defer c.Unlock() // needs to be called before autoRemove
  54. // cancel healthcheck here, they will be automatically
  55. // restarted if/when the container is started again
  56. daemon.stopHealthchecks(c)
  57. attributes := map[string]string{
  58. "exitCode": strconv.Itoa(int(ec)),
  59. }
  60. daemon.LogContainerEventWithAttributes(c, "die", attributes)
  61. daemon.Cleanup(c)
  62. daemon.setStateCounter(c)
  63. cpErr := c.CheckpointTo(daemon.containersReplica)
  64. if err == nil && restart {
  65. go func() {
  66. err := <-wait
  67. if err == nil {
  68. // daemon.netController is initialized when daemon is restoring containers.
  69. // But containerStart will use daemon.netController segment.
  70. // So to avoid panic at startup process, here must wait util daemon restore done.
  71. daemon.waitForStartupDone()
  72. if err = daemon.containerStart(c, "", "", false); err != nil {
  73. logrus.Debugf("failed to restart container: %+v", err)
  74. }
  75. }
  76. if err != nil {
  77. c.Lock()
  78. c.SetStopped(&exitStatus)
  79. daemon.setStateCounter(c)
  80. c.CheckpointTo(daemon.containersReplica)
  81. c.Unlock()
  82. defer daemon.autoRemove(c)
  83. if err != restartmanager.ErrRestartCanceled {
  84. logrus.Errorf("restartmanger wait error: %+v", err)
  85. }
  86. }
  87. }()
  88. }
  89. return cpErr
  90. }
  91. // ProcessEvent is called by libcontainerd whenever an event occurs
  92. func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
  93. c, err := daemon.GetContainer(id)
  94. if err != nil {
  95. return errors.Wrapf(err, "could not find container %s", id)
  96. }
  97. switch e {
  98. case libcontainerdtypes.EventOOM:
  99. // StateOOM is Linux specific and should never be hit on Windows
  100. if isWindows {
  101. return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
  102. }
  103. c.Lock()
  104. defer c.Unlock()
  105. daemon.updateHealthMonitor(c)
  106. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  107. return err
  108. }
  109. daemon.LogContainerEvent(c, "oom")
  110. case libcontainerdtypes.EventExit:
  111. if int(ei.Pid) == c.Pid {
  112. return daemon.handleContainerExit(c, &ei)
  113. }
  114. exitCode := 127
  115. if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
  116. ec := int(ei.ExitCode)
  117. execConfig.Lock()
  118. defer execConfig.Unlock()
  119. execConfig.ExitCode = &ec
  120. execConfig.Running = false
  121. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  122. execConfig.StreamConfig.Wait(ctx)
  123. cancel()
  124. if err := execConfig.CloseStreams(); err != nil {
  125. logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
  126. }
  127. // remove the exec command from the container's store only and not the
  128. // daemon's store so that the exec command can be inspected.
  129. c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
  130. exitCode = ec
  131. }
  132. attributes := map[string]string{
  133. "execID": ei.ProcessID,
  134. "exitCode": strconv.Itoa(exitCode),
  135. }
  136. daemon.LogContainerEventWithAttributes(c, "exec_die", attributes)
  137. case libcontainerdtypes.EventStart:
  138. c.Lock()
  139. defer c.Unlock()
  140. // This is here to handle start not generated by docker
  141. if !c.Running {
  142. c.SetRunning(int(ei.Pid), false)
  143. c.HasBeenManuallyStopped = false
  144. c.HasBeenStartedBefore = true
  145. daemon.setStateCounter(c)
  146. daemon.initHealthMonitor(c)
  147. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  148. return err
  149. }
  150. daemon.LogContainerEvent(c, "start")
  151. }
  152. case libcontainerdtypes.EventPaused:
  153. c.Lock()
  154. defer c.Unlock()
  155. if !c.Paused {
  156. c.Paused = true
  157. daemon.setStateCounter(c)
  158. daemon.updateHealthMonitor(c)
  159. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  160. return err
  161. }
  162. daemon.LogContainerEvent(c, "pause")
  163. }
  164. case libcontainerdtypes.EventResumed:
  165. c.Lock()
  166. defer c.Unlock()
  167. if c.Paused {
  168. c.Paused = false
  169. daemon.setStateCounter(c)
  170. daemon.updateHealthMonitor(c)
  171. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  172. return err
  173. }
  174. daemon.LogContainerEvent(c, "unpause")
  175. }
  176. }
  177. return nil
  178. }
  179. func (daemon *Daemon) autoRemove(c *container.Container) {
  180. c.Lock()
  181. ar := c.HostConfig.AutoRemove
  182. c.Unlock()
  183. if !ar {
  184. return
  185. }
  186. err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
  187. if err == nil {
  188. return
  189. }
  190. if c := daemon.containers.Get(c.ID); c == nil {
  191. return
  192. }
  193. logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
  194. }