monitor.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "context"
  4. "strconv"
  5. "time"
  6. "github.com/containerd/log"
  7. "github.com/docker/docker/api/types/backend"
  8. "github.com/docker/docker/api/types/events"
  9. "github.com/docker/docker/container"
  10. "github.com/docker/docker/daemon/config"
  11. "github.com/docker/docker/errdefs"
  12. libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
  13. "github.com/docker/docker/restartmanager"
  14. "github.com/pkg/errors"
  15. )
  16. func (daemon *Daemon) setStateCounter(c *container.Container) {
  17. switch c.StateString() {
  18. case "paused":
  19. stateCtr.set(c.ID, "paused")
  20. case "running":
  21. stateCtr.set(c.ID, "running")
  22. default:
  23. stateCtr.set(c.ID, "stopped")
  24. }
  25. }
  26. func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
  27. var exitStatus container.ExitStatus
  28. c.Lock()
  29. cfg := daemon.config()
  30. // Health checks will be automatically restarted if/when the
  31. // container is started again.
  32. daemon.stopHealthchecks(c)
  33. tsk, ok := c.Task()
  34. if ok {
  35. ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
  36. es, err := tsk.Delete(ctx)
  37. cancel()
  38. if err != nil {
  39. log.G(ctx).WithFields(log.Fields{
  40. "error": err,
  41. "container": c.ID,
  42. }).Warn("failed to delete container from containerd")
  43. } else {
  44. exitStatus = container.ExitStatus{
  45. ExitCode: int(es.ExitCode()),
  46. ExitedAt: es.ExitTime(),
  47. }
  48. }
  49. }
  50. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  51. c.StreamConfig.Wait(ctx)
  52. cancel()
  53. c.Reset(false)
  54. if e != nil {
  55. exitStatus.ExitCode = int(e.ExitCode)
  56. exitStatus.ExitedAt = e.ExitedAt
  57. if e.Error != nil {
  58. c.SetError(e.Error)
  59. }
  60. }
  61. daemonShutdown := daemon.IsShuttingDown()
  62. execDuration := time.Since(c.StartedAt)
  63. restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
  64. if err != nil {
  65. log.G(ctx).WithFields(log.Fields{
  66. "error": err,
  67. "container": c.ID,
  68. "restartCount": c.RestartCount,
  69. "exitStatus": exitStatus,
  70. "daemonShuttingDown": daemonShutdown,
  71. "hasBeenManuallyStopped": c.HasBeenManuallyStopped,
  72. "execDuration": execDuration,
  73. }).Warn("ShouldRestart failed, container will not be restarted")
  74. restart = false
  75. }
  76. attributes := map[string]string{
  77. "exitCode": strconv.Itoa(exitStatus.ExitCode),
  78. "execDuration": strconv.Itoa(int(execDuration.Seconds())),
  79. }
  80. daemon.Cleanup(c)
  81. if restart {
  82. c.RestartCount++
  83. log.G(ctx).WithFields(log.Fields{
  84. "container": c.ID,
  85. "restartCount": c.RestartCount,
  86. "exitStatus": exitStatus,
  87. "manualRestart": c.HasBeenManuallyRestarted,
  88. }).Debug("Restarting container")
  89. c.SetRestarting(&exitStatus)
  90. } else {
  91. c.SetStopped(&exitStatus)
  92. if !c.HasBeenManuallyRestarted {
  93. defer daemon.autoRemove(&cfg.Config, c)
  94. }
  95. }
  96. defer c.Unlock() // needs to be called before autoRemove
  97. daemon.setStateCounter(c)
  98. checkpointErr := c.CheckpointTo(daemon.containersReplica)
  99. daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes)
  100. if restart {
  101. go func() {
  102. err := <-wait
  103. if err == nil {
  104. // daemon.netController is initialized when daemon is restoring containers.
  105. // But containerStart will use daemon.netController segment.
  106. // So to avoid panic at startup process, here must wait util daemon restore done.
  107. daemon.waitForStartupDone()
  108. cfg := daemon.config() // Apply the most up-to-date daemon config to the restarted container.
  109. // update the error if we fail to start the container, so that the cleanup code
  110. // below can handle updating the container's status, and auto-remove (if set).
  111. err = daemon.containerStart(context.Background(), cfg, c, "", "", false)
  112. if err != nil {
  113. log.G(ctx).Debugf("failed to restart container: %+v", err)
  114. }
  115. }
  116. if err != nil {
  117. c.Lock()
  118. c.SetStopped(&exitStatus)
  119. daemon.setStateCounter(c)
  120. c.CheckpointTo(daemon.containersReplica)
  121. c.Unlock()
  122. defer daemon.autoRemove(&cfg.Config, c)
  123. if err != restartmanager.ErrRestartCanceled {
  124. log.G(ctx).Errorf("restartmanger wait error: %+v", err)
  125. }
  126. }
  127. }()
  128. }
  129. return checkpointErr
  130. }
  131. // ProcessEvent is called by libcontainerd whenever an event occurs
  132. func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
  133. c, err := daemon.GetContainer(id)
  134. if err != nil {
  135. return errors.Wrapf(err, "could not find container %s", id)
  136. }
  137. switch e {
  138. case libcontainerdtypes.EventOOM:
  139. // StateOOM is Linux specific and should never be hit on Windows
  140. if isWindows {
  141. return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
  142. }
  143. c.Lock()
  144. defer c.Unlock()
  145. c.OOMKilled = true
  146. daemon.updateHealthMonitor(c)
  147. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  148. return err
  149. }
  150. daemon.LogContainerEvent(c, events.ActionOOM)
  151. case libcontainerdtypes.EventExit:
  152. if ei.ProcessID == ei.ContainerID {
  153. return daemon.handleContainerExit(c, &ei)
  154. }
  155. exitCode := 127
  156. if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
  157. ec := int(ei.ExitCode)
  158. execConfig.Lock()
  159. defer execConfig.Unlock()
  160. // Remove the exec command from the container's store only and not the
  161. // daemon's store so that the exec command can be inspected. Remove it
  162. // before mutating execConfig to maintain the invariant that
  163. // c.ExecCommands only contains execs that have not exited.
  164. c.ExecCommands.Delete(execConfig.ID)
  165. execConfig.ExitCode = &ec
  166. execConfig.Running = false
  167. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  168. execConfig.StreamConfig.Wait(ctx)
  169. cancel()
  170. if err := execConfig.CloseStreams(); err != nil {
  171. log.G(ctx).Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
  172. }
  173. exitCode = ec
  174. // If the exec failed at start in such a way that containerd
  175. // publishes an exit event for it, we will race processing the event
  176. // with daemon.ContainerExecStart() removing the exec from
  177. // c.ExecCommands. If we win the race, we will find that there is no
  178. // process to clean up. (And ContainerExecStart will clobber the
  179. // exit code we set.) Prevent a nil-dereferenc panic in that
  180. // situation to restore the status quo where this is merely a
  181. // logical race condition.
  182. if execConfig.Process != nil {
  183. go func() {
  184. if _, err := execConfig.Process.Delete(context.Background()); err != nil {
  185. log.G(ctx).WithFields(log.Fields{
  186. "error": err,
  187. "container": ei.ContainerID,
  188. "process": ei.ProcessID,
  189. }).Warn("failed to delete process")
  190. }
  191. }()
  192. }
  193. }
  194. daemon.LogContainerEventWithAttributes(c, events.ActionExecDie, map[string]string{
  195. "execID": ei.ProcessID,
  196. "exitCode": strconv.Itoa(exitCode),
  197. })
  198. case libcontainerdtypes.EventStart:
  199. c.Lock()
  200. defer c.Unlock()
  201. // This is here to handle start not generated by docker
  202. if !c.Running {
  203. ctr, err := daemon.containerd.LoadContainer(context.Background(), c.ID)
  204. if err != nil {
  205. if errdefs.IsNotFound(err) {
  206. // The container was started by not-docker and so could have been deleted by
  207. // not-docker before we got around to loading it from containerd.
  208. log.G(context.TODO()).WithFields(log.Fields{
  209. "error": err,
  210. "container": c.ID,
  211. }).Debug("could not load containerd container for start event")
  212. return nil
  213. }
  214. return err
  215. }
  216. tsk, err := ctr.Task(context.Background())
  217. if err != nil {
  218. if errdefs.IsNotFound(err) {
  219. log.G(context.TODO()).WithFields(log.Fields{
  220. "error": err,
  221. "container": c.ID,
  222. }).Debug("failed to load task for externally-started container")
  223. return nil
  224. }
  225. return err
  226. }
  227. c.SetRunning(ctr, tsk, false)
  228. c.HasBeenManuallyStopped = false
  229. c.HasBeenStartedBefore = true
  230. daemon.setStateCounter(c)
  231. daemon.initHealthMonitor(c)
  232. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  233. return err
  234. }
  235. daemon.LogContainerEvent(c, events.ActionStart)
  236. }
  237. case libcontainerdtypes.EventPaused:
  238. c.Lock()
  239. defer c.Unlock()
  240. if !c.Paused {
  241. c.Paused = true
  242. daemon.setStateCounter(c)
  243. daemon.updateHealthMonitor(c)
  244. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  245. return err
  246. }
  247. daemon.LogContainerEvent(c, events.ActionPause)
  248. }
  249. case libcontainerdtypes.EventResumed:
  250. c.Lock()
  251. defer c.Unlock()
  252. if c.Paused {
  253. c.Paused = false
  254. daemon.setStateCounter(c)
  255. daemon.updateHealthMonitor(c)
  256. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  257. return err
  258. }
  259. daemon.LogContainerEvent(c, events.ActionUnPause)
  260. }
  261. }
  262. return nil
  263. }
  264. func (daemon *Daemon) autoRemove(cfg *config.Config, c *container.Container) {
  265. c.Lock()
  266. ar := c.HostConfig.AutoRemove
  267. c.Unlock()
  268. if !ar {
  269. return
  270. }
  271. err := daemon.containerRm(cfg, c.ID, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
  272. if err == nil {
  273. return
  274. }
  275. if c := daemon.containers.Get(c.ID); c == nil {
  276. return
  277. }
  278. log.G(context.TODO()).WithFields(log.Fields{"error": err, "container": c.ID}).Error("error removing container")
  279. }