moby/daemon/monitor.go
Lei Jitang 7318eba5b2 Don't create source directory while the daemon is being shutdown, fix #30348
If a container mount the socket the daemon is listening on into
container while the daemon is being shutdown, the socket will
not exist on the host, then daemon will assume it's a directory
and create it on the host, this will cause the daemon can't start
next time.

fix issue https://github.com/moby/moby/issues/30348

To reproduce this issue, you can add following code

```
--- a/daemon/oci_linux.go
+++ b/daemon/oci_linux.go
@@ -8,6 +8,7 @@ import (
        "sort"
        "strconv"
        "strings"
+       "time"

        "github.com/Sirupsen/logrus"
        "github.com/docker/docker/container"
@@ -666,7 +667,8 @@ func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, e
        if err := daemon.setupIpcDirs(c); err != nil {
                return nil, err
        }
-
+       fmt.Printf("===please stop the daemon===\n")
+       time.Sleep(time.Second * 2)
        ms, err := daemon.setupMounts(c)
        if err != nil {
                return nil, err

```

step1 run a container which has `--restart always` and `-v /var/run/docker.sock:/sock`
```
$ docker run -ti --restart always -v /var/run/docker.sock:/sock busybox
/ #

```
step2 exit the the container
```
/ # exit
```
and kill the daemon when you see
```
===please stop the daemon===
```
in the daemon log

The daemon can't restart again and fail with `can't create unix socket /var/run/docker.sock: is a directory`.

Signed-off-by: Lei Jitang <leijitang@huawei.com>
2017-05-30 22:59:51 -04:00

166 lines
4.4 KiB
Go

package daemon
import (
"errors"
"fmt"
"runtime"
"strconv"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/api/types"
"github.com/docker/docker/container"
"github.com/docker/docker/libcontainerd"
"github.com/docker/docker/restartmanager"
)
func (daemon *Daemon) setStateCounter(c *container.Container) {
switch c.StateString() {
case "paused":
stateCtr.set(c.ID, "paused")
case "running":
stateCtr.set(c.ID, "running")
default:
stateCtr.set(c.ID, "stopped")
}
}
// StateChanged updates daemon state changes from containerd
func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
c := daemon.containers.Get(id)
if c == nil {
return fmt.Errorf("no such container: %s", id)
}
switch e.State {
case libcontainerd.StateOOM:
// StateOOM is Linux specific and should never be hit on Windows
if runtime.GOOS == "windows" {
return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
}
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "oom")
case libcontainerd.StateExit:
c.Lock()
c.StreamConfig.Wait()
c.Reset(false)
// If daemon is being shutdown, don't let the container restart
restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
if err == nil && restart {
c.RestartCount++
c.SetRestarting(platformConstructExitStatus(e))
} else {
c.SetStopped(platformConstructExitStatus(e))
defer daemon.autoRemove(c)
}
// cancel healthcheck here, they will be automatically
// restarted if/when the container is started again
daemon.stopHealthchecks(c)
attributes := map[string]string{
"exitCode": strconv.Itoa(int(e.ExitCode)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
if err == nil && restart {
go func() {
err := <-wait
if err == nil {
if err = daemon.containerStart(c, "", "", false); err != nil {
logrus.Debugf("failed to restart container: %+v", err)
}
}
if err != nil {
c.SetStopped(platformConstructExitStatus(e))
defer daemon.autoRemove(c)
if err != restartmanager.ErrRestartCanceled {
logrus.Errorf("restartmanger wait error: %+v", err)
}
}
}()
}
daemon.setStateCounter(c)
defer c.Unlock()
if err := c.ToDisk(); err != nil {
return err
}
return daemon.postRunProcessing(c, e)
case libcontainerd.StateExitProcess:
if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
ec := int(e.ExitCode)
execConfig.Lock()
defer execConfig.Unlock()
execConfig.ExitCode = &ec
execConfig.Running = false
execConfig.StreamConfig.Wait()
if err := execConfig.CloseStreams(); err != nil {
logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
}
// remove the exec command from the container's store only and not the
// daemon's store so that the exec command can be inspected.
c.ExecCommands.Delete(execConfig.ID)
} else {
logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
}
case libcontainerd.StateStart, libcontainerd.StateRestore:
// Container is already locked in this case
c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
c.HasBeenManuallyStopped = false
c.HasBeenStartedBefore = true
daemon.setStateCounter(c)
if err := c.ToDisk(); err != nil {
c.Reset(false)
return err
}
daemon.initHealthMonitor(c)
daemon.LogContainerEvent(c, "start")
case libcontainerd.StatePause:
// Container is already locked in this case
c.Paused = true
daemon.setStateCounter(c)
if err := c.ToDisk(); err != nil {
return err
}
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "pause")
case libcontainerd.StateResume:
// Container is already locked in this case
c.Paused = false
daemon.setStateCounter(c)
if err := c.ToDisk(); err != nil {
return err
}
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "unpause")
}
return nil
}
func (daemon *Daemon) autoRemove(c *container.Container) {
c.Lock()
ar := c.HostConfig.AutoRemove
c.Unlock()
if !ar {
return
}
var err error
if err = daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err == nil {
return
}
if c := daemon.containers.Get(c.ID); c == nil {
return
}
if err != nil {
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
}
}