monitor.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "context"
  4. "runtime"
  5. "strconv"
  6. "time"
  7. "github.com/docker/docker/api/types"
  8. "github.com/docker/docker/container"
  9. libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
  10. "github.com/docker/docker/restartmanager"
  11. "github.com/pkg/errors"
  12. "github.com/sirupsen/logrus"
  13. )
  14. func (daemon *Daemon) setStateCounter(c *container.Container) {
  15. switch c.StateString() {
  16. case "paused":
  17. stateCtr.set(c.ID, "paused")
  18. case "running":
  19. stateCtr.set(c.ID, "running")
  20. default:
  21. stateCtr.set(c.ID, "stopped")
  22. }
  23. }
  24. // ProcessEvent is called by libcontainerd whenever an event occurs
  25. func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
  26. c, err := daemon.GetContainer(id)
  27. if err != nil {
  28. return errors.Wrapf(err, "could not find container %s", id)
  29. }
  30. switch e {
  31. case libcontainerdtypes.EventOOM:
  32. // StateOOM is Linux specific and should never be hit on Windows
  33. if runtime.GOOS == "windows" {
  34. return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
  35. }
  36. c.Lock()
  37. defer c.Unlock()
  38. daemon.updateHealthMonitor(c)
  39. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  40. return err
  41. }
  42. daemon.LogContainerEvent(c, "oom")
  43. case libcontainerdtypes.EventExit:
  44. if int(ei.Pid) == c.Pid {
  45. c.Lock()
  46. _, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
  47. if err != nil {
  48. logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
  49. }
  50. ctx, _ := context.WithTimeout(context.Background(), 2*time.Second)
  51. c.StreamConfig.Wait(ctx)
  52. c.Reset(false)
  53. exitStatus := container.ExitStatus{
  54. ExitCode: int(ei.ExitCode),
  55. ExitedAt: ei.ExitedAt,
  56. OOMKilled: ei.OOMKilled,
  57. }
  58. restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
  59. if err == nil && restart {
  60. c.RestartCount++
  61. c.SetRestarting(&exitStatus)
  62. } else {
  63. if ei.Error != nil {
  64. c.SetError(ei.Error)
  65. }
  66. c.SetStopped(&exitStatus)
  67. defer daemon.autoRemove(c)
  68. }
  69. defer c.Unlock() // needs to be called before autoRemove
  70. // cancel healthcheck here, they will be automatically
  71. // restarted if/when the container is started again
  72. daemon.stopHealthchecks(c)
  73. attributes := map[string]string{
  74. "exitCode": strconv.Itoa(int(ei.ExitCode)),
  75. }
  76. daemon.LogContainerEventWithAttributes(c, "die", attributes)
  77. daemon.Cleanup(c)
  78. daemon.setStateCounter(c)
  79. cpErr := c.CheckpointTo(daemon.containersReplica)
  80. if err == nil && restart {
  81. go func() {
  82. err := <-wait
  83. if err == nil {
  84. // daemon.netController is initialized when daemon is restoring containers.
  85. // But containerStart will use daemon.netController segment.
  86. // So to avoid panic at startup process, here must wait util daemon restore done.
  87. daemon.waitForStartupDone()
  88. if err = daemon.containerStart(c, "", "", false); err != nil {
  89. logrus.Debugf("failed to restart container: %+v", err)
  90. }
  91. }
  92. if err != nil {
  93. c.Lock()
  94. c.SetStopped(&exitStatus)
  95. daemon.setStateCounter(c)
  96. c.CheckpointTo(daemon.containersReplica)
  97. c.Unlock()
  98. defer daemon.autoRemove(c)
  99. if err != restartmanager.ErrRestartCanceled {
  100. logrus.Errorf("restartmanger wait error: %+v", err)
  101. }
  102. }
  103. }()
  104. }
  105. return cpErr
  106. }
  107. if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
  108. ec := int(ei.ExitCode)
  109. execConfig.Lock()
  110. defer execConfig.Unlock()
  111. execConfig.ExitCode = &ec
  112. execConfig.Running = false
  113. ctx, _ := context.WithTimeout(context.Background(), 2*time.Second)
  114. execConfig.StreamConfig.Wait(ctx)
  115. if err := execConfig.CloseStreams(); err != nil {
  116. logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
  117. }
  118. // remove the exec command from the container's store only and not the
  119. // daemon's store so that the exec command can be inspected.
  120. c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
  121. attributes := map[string]string{
  122. "execID": execConfig.ID,
  123. "exitCode": strconv.Itoa(ec),
  124. }
  125. daemon.LogContainerEventWithAttributes(c, "exec_die", attributes)
  126. } else {
  127. logrus.WithFields(logrus.Fields{
  128. "container": c.ID,
  129. "exec-id": ei.ProcessID,
  130. "exec-pid": ei.Pid,
  131. }).Warn("Ignoring Exit Event, no such exec command found")
  132. }
  133. case libcontainerdtypes.EventStart:
  134. c.Lock()
  135. defer c.Unlock()
  136. // This is here to handle start not generated by docker
  137. if !c.Running {
  138. c.SetRunning(int(ei.Pid), false)
  139. c.HasBeenManuallyStopped = false
  140. c.HasBeenStartedBefore = true
  141. daemon.setStateCounter(c)
  142. daemon.initHealthMonitor(c)
  143. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  144. return err
  145. }
  146. daemon.LogContainerEvent(c, "start")
  147. }
  148. case libcontainerdtypes.EventPaused:
  149. c.Lock()
  150. defer c.Unlock()
  151. if !c.Paused {
  152. c.Paused = true
  153. daemon.setStateCounter(c)
  154. daemon.updateHealthMonitor(c)
  155. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  156. return err
  157. }
  158. daemon.LogContainerEvent(c, "pause")
  159. }
  160. case libcontainerdtypes.EventResumed:
  161. c.Lock()
  162. defer c.Unlock()
  163. if c.Paused {
  164. c.Paused = false
  165. daemon.setStateCounter(c)
  166. daemon.updateHealthMonitor(c)
  167. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  168. return err
  169. }
  170. daemon.LogContainerEvent(c, "unpause")
  171. }
  172. }
  173. return nil
  174. }
  175. func (daemon *Daemon) autoRemove(c *container.Container) {
  176. c.Lock()
  177. ar := c.HostConfig.AutoRemove
  178. c.Unlock()
  179. if !ar {
  180. return
  181. }
  182. var err error
  183. if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
  184. return
  185. }
  186. if c := daemon.containers.Get(c.ID); c == nil {
  187. return
  188. }
  189. if err != nil {
  190. logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
  191. }
  192. }