8c0ecb6387
When manually stopping a container with a restart-policy, the container would show as "restarting" in `docker ps` whereas its actual state is "exited". Stopping a container with a restart policy shows the container as "restarting" docker run -d --name test --restart unless-stopped busybox false docker stop test docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 7e07409fa1d3 busybox "false" 5 minutes ago Restarting (1) 4 minutes ago test However, inspecting the same container shows that it's exited: docker inspect test --format '{{ json .State }}' { "Status": "exited", "Running": false, "Paused": false, "Restarting": false, "OOMKilled": false, "Dead": false, "Pid": 0, "ExitCode": 1, "Error": "", "StartedAt": "2019-02-14T13:26:27.6091648Z", "FinishedAt": "2019-02-14T13:26:27.689427Z" } And killing the container confirms this; docker kill test Error response from daemon: Cannot kill container: test: Container 7e07409fa1d36dc8d8cb8f25cf12ee1168ad9040183b85fafa73ee2c1fcf9361 is not running docker run -d --name test --restart unless-stopped busybox false docker stop test docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES d0595237054a busybox "false" 5 minutes ago Restarting (1) 4 minutes ago exit Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
215 lines
5.7 KiB
Go
215 lines
5.7 KiB
Go
package daemon // import "github.com/docker/docker/daemon"
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"runtime"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/docker/docker/api/types"
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/libcontainerd"
|
|
"github.com/docker/docker/restartmanager"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func (daemon *Daemon) setStateCounter(c *container.Container) {
|
|
switch c.StateString() {
|
|
case "paused":
|
|
stateCtr.set(c.ID, "paused")
|
|
case "running":
|
|
stateCtr.set(c.ID, "running")
|
|
default:
|
|
stateCtr.set(c.ID, "stopped")
|
|
}
|
|
}
|
|
|
|
// ProcessEvent is called by libcontainerd whenever an event occurs
|
|
func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error {
|
|
c, err := daemon.GetContainer(id)
|
|
if c == nil || err != nil {
|
|
return fmt.Errorf("no such container: %s", id)
|
|
}
|
|
|
|
switch e {
|
|
case libcontainerd.EventOOM:
|
|
// StateOOM is Linux specific and should never be hit on Windows
|
|
if runtime.GOOS == "windows" {
|
|
return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
|
|
}
|
|
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
daemon.updateHealthMonitor(c)
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
return err
|
|
}
|
|
|
|
daemon.LogContainerEvent(c, "oom")
|
|
case libcontainerd.EventExit:
|
|
if int(ei.Pid) == c.Pid {
|
|
c.Lock()
|
|
_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
|
|
if err != nil {
|
|
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
|
|
}
|
|
|
|
c.StreamConfig.Wait()
|
|
c.Reset(false)
|
|
|
|
exitStatus := container.ExitStatus{
|
|
ExitCode: int(ei.ExitCode),
|
|
ExitedAt: ei.ExitedAt,
|
|
OOMKilled: ei.OOMKilled,
|
|
}
|
|
restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
|
|
if err == nil && restart {
|
|
c.RestartCount++
|
|
c.SetRestarting(&exitStatus)
|
|
} else {
|
|
if ei.Error != nil {
|
|
c.SetError(ei.Error)
|
|
}
|
|
c.SetStopped(&exitStatus)
|
|
defer daemon.autoRemove(c)
|
|
}
|
|
defer c.Unlock() // needs to be called before autoRemove
|
|
|
|
// cancel healthcheck here, they will be automatically
|
|
// restarted if/when the container is started again
|
|
daemon.stopHealthchecks(c)
|
|
attributes := map[string]string{
|
|
"exitCode": strconv.Itoa(int(ei.ExitCode)),
|
|
}
|
|
daemon.LogContainerEventWithAttributes(c, "die", attributes)
|
|
daemon.Cleanup(c)
|
|
daemon.setStateCounter(c)
|
|
cpErr := c.CheckpointTo(daemon.containersReplica)
|
|
|
|
if err == nil && restart {
|
|
go func() {
|
|
err := <-wait
|
|
if err == nil {
|
|
// daemon.netController is initialized when daemon is restoring containers.
|
|
// But containerStart will use daemon.netController segment.
|
|
// So to avoid panic at startup process, here must wait util daemon restore done.
|
|
daemon.waitForStartupDone()
|
|
if err = daemon.containerStart(c, "", "", false); err != nil {
|
|
logrus.Debugf("failed to restart container: %+v", err)
|
|
}
|
|
}
|
|
if err != nil {
|
|
c.Lock()
|
|
c.SetStopped(&exitStatus)
|
|
daemon.setStateCounter(c)
|
|
c.CheckpointTo(daemon.containersReplica)
|
|
c.Unlock()
|
|
defer daemon.autoRemove(c)
|
|
if err != restartmanager.ErrRestartCanceled {
|
|
logrus.Errorf("restartmanger wait error: %+v", err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
return cpErr
|
|
}
|
|
|
|
if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
|
|
ec := int(ei.ExitCode)
|
|
execConfig.Lock()
|
|
defer execConfig.Unlock()
|
|
execConfig.ExitCode = &ec
|
|
execConfig.Running = false
|
|
execConfig.StreamConfig.Wait()
|
|
if err := execConfig.CloseStreams(); err != nil {
|
|
logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
|
|
}
|
|
|
|
// remove the exec command from the container's store only and not the
|
|
// daemon's store so that the exec command can be inspected.
|
|
c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
|
|
attributes := map[string]string{
|
|
"execID": execConfig.ID,
|
|
"exitCode": strconv.Itoa(ec),
|
|
}
|
|
daemon.LogContainerEventWithAttributes(c, "exec_die", attributes)
|
|
} else {
|
|
logrus.WithFields(logrus.Fields{
|
|
"container": c.ID,
|
|
"exec-id": ei.ProcessID,
|
|
"exec-pid": ei.Pid,
|
|
}).Warn("Ignoring Exit Event, no such exec command found")
|
|
}
|
|
case libcontainerd.EventStart:
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
// This is here to handle start not generated by docker
|
|
if !c.Running {
|
|
c.SetRunning(int(ei.Pid), false)
|
|
c.HasBeenManuallyStopped = false
|
|
c.HasBeenStartedBefore = true
|
|
daemon.setStateCounter(c)
|
|
|
|
daemon.initHealthMonitor(c)
|
|
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
return err
|
|
}
|
|
daemon.LogContainerEvent(c, "start")
|
|
}
|
|
|
|
case libcontainerd.EventPaused:
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
if !c.Paused {
|
|
c.Paused = true
|
|
daemon.setStateCounter(c)
|
|
daemon.updateHealthMonitor(c)
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
return err
|
|
}
|
|
daemon.LogContainerEvent(c, "pause")
|
|
}
|
|
case libcontainerd.EventResumed:
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
if c.Paused {
|
|
c.Paused = false
|
|
daemon.setStateCounter(c)
|
|
daemon.updateHealthMonitor(c)
|
|
|
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
|
return err
|
|
}
|
|
daemon.LogContainerEvent(c, "unpause")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (daemon *Daemon) autoRemove(c *container.Container) {
|
|
c.Lock()
|
|
ar := c.HostConfig.AutoRemove
|
|
c.Unlock()
|
|
if !ar {
|
|
return
|
|
}
|
|
|
|
var err error
|
|
if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
|
|
return
|
|
}
|
|
if c := daemon.containers.Get(c.ID); c == nil {
|
|
return
|
|
}
|
|
|
|
if err != nil {
|
|
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
|
|
}
|
|
}
|