From bd8c9dc2395032a893e959e48f9a8536858d67e0 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 4 Aug 2014 15:53:10 -0700 Subject: [PATCH 01/18] Restart containers based on restart policy Signed-off-by: Michael Crosby --- daemon/container.go | 140 ++++++++++++++++++++++++++++------------ runconfig/hostconfig.go | 2 + runconfig/parse.go | 2 + 3 files changed, 103 insertions(+), 41 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index 50de4fc536..6786c2b2f1 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -7,6 +7,7 @@ import ( "io" "io/ioutil" "os" + "os/exec" "path" "path/filepath" "strings" @@ -75,6 +76,7 @@ type Container struct { daemon *Daemon MountLabel, ProcessLabel string + RestartCount int Volumes map[string]string // Store rw/ro in a separate structure to preserve reverse-compatibility on-disk. @@ -82,7 +84,8 @@ type Container struct { VolumesRW map[string]bool hostConfig *runconfig.HostConfig - activeLinks map[string]*links.Link + activeLinks map[string]*links.Link + requestedStop bool } func (container *Container) FromDisk() error { @@ -277,6 +280,7 @@ func (container *Container) Start() (err error) { if container.State.IsRunning() { return nil } + // if we encounter and error during start we need to ensure that any other // setup has been cleaned up properly defer func() { @@ -312,9 +316,6 @@ func (container *Container) Start() (err error) { if err := setupMountsForContainer(container); err != nil { return err } - if err := container.startLoggingToDisk(); err != nil { - return err - } return container.waitForStart() } @@ -497,35 +498,105 @@ func (container *Container) releaseNetwork() { func (container *Container) monitor(callback execdriver.StartCallback) error { var ( - err error - exitCode int + err error + exitCode int + failCount int ) - pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin) - exitCode, err = container.daemon.Run(container, pipes, callback) - if err != nil { - log.Errorf("Error running container: %s", err) - } - container.State.SetStopped(exitCode) + // reset the restart count + container.RestartCount = -1 + container.requestedStop = false - // Cleanup - container.cleanup() + for { + container.RestartCount++ - // Re-create a brand new stdin pipe once the container exited - if container.Config.OpenStdin { - container.stdin, container.stdinPipe = io.Pipe() - } - container.LogEvent("die") - // If the engine is shutting down, don't save the container state as stopped. - // This will cause it to be restarted when the engine is restarted. - if container.daemon != nil && container.daemon.eng != nil && !container.daemon.eng.IsShutdown() { - if err := container.toDisk(); err != nil { - log.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err) + pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin) + if err := container.startLoggingToDisk(); err != nil { + return err + } + + if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil { + failCount++ + + if failCount == 100 { + return err + } + + utils.Errorf("Error running container: %s", err) + } + + // We still wait to set the state as stopped and ensure that the locks were released + container.State.SetStopped(exitCode) + + if container.Config.OpenStdin { + if err := container.stdin.Close(); err != nil { + utils.Errorf("%s: Error close stdin: %s", container.ID, err) + } + } + + if err := container.stdout.Clean(); err != nil { + utils.Errorf("%s: Error close stdout: %s", container.ID, err) + } + + if err := container.stderr.Clean(); err != nil { + utils.Errorf("%s: Error close stderr: %s", container.ID, err) + } + + if container.command != nil && container.command.Terminal != nil { + if err := container.command.Terminal.Close(); err != nil { + utils.Errorf("%s: Error closing terminal: %s", container.ID, err) + } + } + + // Re-create a brand new stdin pipe once the container exited + if container.Config.OpenStdin { + container.stdin, container.stdinPipe = io.Pipe() + } + + if container.daemon != nil && container.daemon.srv != nil { + container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image)) + } + + policy := container.hostConfig.RestartPolicy + + if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop { + container.command.Cmd = copyCmd(&container.command.Cmd) + time.Sleep(1 * time.Second) + } else { + // do not restart the container, let it die + // Cleanup networking and mounts + container.cleanup() + + if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() { + // FIXME: here is race condition between two RUN instructions in Dockerfile + // because they share same runconfig and change image. Must be fixed + // in builder/builder.go + if err := container.toDisk(); err != nil { + utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err) + } + } + + return err } } - return err } +func copyCmd(c *exec.Cmd) exec.Cmd { + return exec.Cmd{ + Stdin: c.Stdin, + Stdout: c.Stdout, + Stderr: c.Stderr, + Path: c.Path, + Env: c.Env, + ExtraFiles: c.ExtraFiles, + Args: c.Args, + Dir: c.Dir, + SysProcAttr: c.SysProcAttr, + } +} + +// cleanup releases any network resources allocated to the container along with any rules +// around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { container.releaseNetwork() @@ -535,22 +606,6 @@ func (container *Container) cleanup() { link.Disable() } } - if container.Config.OpenStdin { - if err := container.stdin.Close(); err != nil { - log.Errorf("%s: Error close stdin: %s", container.ID, err) - } - } - if err := container.stdout.Clean(); err != nil { - log.Errorf("%s: Error close stdout: %s", container.ID, err) - } - if err := container.stderr.Clean(); err != nil { - log.Errorf("%s: Error close stderr: %s", container.ID, err) - } - if container.command != nil && container.command.Terminal != nil { - if err := container.command.Terminal.Close(); err != nil { - log.Errorf("%s: Error closing terminal: %s", container.ID, err) - } - } if err := container.Unmount(); err != nil { log.Errorf("%v: Failed to umount filesystem: %v", container.ID, err) @@ -570,6 +625,8 @@ func (container *Container) KillSig(sig int) error { if !container.State.IsRunning() { return nil } + container.requestedStop = true + return container.daemon.Kill(container, sig) } @@ -1122,6 +1179,7 @@ func (container *Container) waitForStart() error { c.Close() } } + container.State.SetRunning(command.Pid()) if err := container.toDisk(); err != nil { log.Debugf("%s", err) diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index 77e5fd8adb..f2a369842a 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -40,6 +40,7 @@ type HostConfig struct { NetworkMode NetworkMode CapAdd []string CapDrop []string + RestartPolicy string } func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { @@ -48,6 +49,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { Privileged: job.GetenvBool("Privileged"), PublishAllPorts: job.GetenvBool("PublishAllPorts"), NetworkMode: NetworkMode(job.Getenv("NetworkMode")), + RestartPolicy: job.Getenv("RestartPolicy"), } job.GetenvJson("LxcConf", &hostConfig.LxcConf) job.GetenvJson("PortBindings", &hostConfig.PortBindings) diff --git a/runconfig/parse.go b/runconfig/parse.go index 5805b6665b..3cab86234f 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -71,6 +71,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf flCpuShares = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)") flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)") flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.") + flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy when the dies") // For documentation purpose _ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.") _ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container") @@ -271,6 +272,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf Devices: deviceMappings, CapAdd: flCapAdd.GetAll(), CapDrop: flCapDrop.GetAll(), + RestartPolicy: *flRestartPolicy, } if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit { From e0a076d486ea49529dc3aeef2179eb0c406d0773 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 4 Aug 2014 16:02:29 -0700 Subject: [PATCH 02/18] Cleanup restart logic in monitor Signed-off-by: Michael Crosby --- daemon/container.go | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index 6786c2b2f1..6bf6aca2a3 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -501,8 +501,15 @@ func (container *Container) monitor(callback execdriver.StartCallback) error { err error exitCode int failCount int + + policy = container.hostConfig.RestartPolicy ) + if err := container.startLoggingToDisk(); err != nil { + // TODO: crosbymichael cleanup IO, network, and mounts + return err + } + // reset the restart count container.RestartCount = -1 container.requestedStop = false @@ -511,15 +518,12 @@ func (container *Container) monitor(callback execdriver.StartCallback) error { container.RestartCount++ pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin) - if err := container.startLoggingToDisk(); err != nil { - return err - } if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil { failCount++ if failCount == 100 { - return err + container.requestedStop = true } utils.Errorf("Error running container: %s", err) @@ -557,27 +561,27 @@ func (container *Container) monitor(callback execdriver.StartCallback) error { container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image)) } - policy := container.hostConfig.RestartPolicy - if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop { container.command.Cmd = copyCmd(&container.command.Cmd) + time.Sleep(1 * time.Second) - } else { - // do not restart the container, let it die - // Cleanup networking and mounts - container.cleanup() - if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() { - // FIXME: here is race condition between two RUN instructions in Dockerfile - // because they share same runconfig and change image. Must be fixed - // in builder/builder.go - if err := container.toDisk(); err != nil { - utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err) - } - } - - return err + continue } + + // Cleanup networking and mounts + container.cleanup() + + if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() { + // FIXME: here is race condition between two RUN instructions in Dockerfile + // because they share same runconfig and change image. Must be fixed + // in builder/builder.go + if err := container.toDisk(); err != nil { + utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err) + } + } + + return err } } From d9753ba20d5e602b0980687353d76c874b563042 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 4 Aug 2014 16:14:43 -0700 Subject: [PATCH 03/18] Add typed RestartPolicy Signed-off-by: Michael Crosby --- daemon/container.go | 7 ++++--- runconfig/hostconfig.go | 11 +++++++++-- runconfig/parse.go | 40 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index 6bf6aca2a3..3383276e6d 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -501,6 +501,7 @@ func (container *Container) monitor(callback execdriver.StartCallback) error { err error exitCode int failCount int + exit bool policy = container.hostConfig.RestartPolicy ) @@ -522,8 +523,8 @@ func (container *Container) monitor(callback execdriver.StartCallback) error { if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil { failCount++ - if failCount == 100 { - container.requestedStop = true + if failCount == policy.MaximumRetryCount { + exit = true } utils.Errorf("Error running container: %s", err) @@ -561,7 +562,7 @@ func (container *Container) monitor(callback execdriver.StartCallback) error { container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image)) } - if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop { + if (policy.Name == "always" || (policy.Name == "on-failure" && exitCode != 0)) && !container.requestedStop || !exit { container.command.Cmd = copyCmd(&container.command.Cmd) time.Sleep(1 * time.Second) diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index f2a369842a..4dd4766e5e 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -25,6 +25,11 @@ type DeviceMapping struct { CgroupPermissions string } +type RestartPolicy struct { + Name string + MaximumRetryCount int +} + type HostConfig struct { Binds []string ContainerIDFile string @@ -40,7 +45,7 @@ type HostConfig struct { NetworkMode NetworkMode CapAdd []string CapDrop []string - RestartPolicy string + RestartPolicy RestartPolicy } func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { @@ -49,11 +54,12 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { Privileged: job.GetenvBool("Privileged"), PublishAllPorts: job.GetenvBool("PublishAllPorts"), NetworkMode: NetworkMode(job.Getenv("NetworkMode")), - RestartPolicy: job.Getenv("RestartPolicy"), } + job.GetenvJson("LxcConf", &hostConfig.LxcConf) job.GetenvJson("PortBindings", &hostConfig.PortBindings) job.GetenvJson("Devices", &hostConfig.Devices) + job.GetenvJson("RestartPolicy", &hostConfig.RestartPolicy) if Binds := job.GetenvList("Binds"); Binds != nil { hostConfig.Binds = Binds } @@ -75,5 +81,6 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { if CapDrop := job.GetenvList("CapDrop"); CapDrop != nil { hostConfig.CapDrop = CapDrop } + return hostConfig } diff --git a/runconfig/parse.go b/runconfig/parse.go index 3cab86234f..ea6e9ebca2 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -4,6 +4,7 @@ import ( "fmt" "io/ioutil" "path" + "strconv" "strings" "github.com/docker/docker/nat" @@ -234,6 +235,11 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err) } + restartPolicy, err := parseRestartPolicy(*flRestartPolicy) + if err != nil { + return nil, nil, cmd, err + } + config := &Config{ Hostname: hostname, Domainname: domainname, @@ -272,7 +278,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf Devices: deviceMappings, CapAdd: flCapAdd.GetAll(), CapDrop: flCapDrop.GetAll(), - RestartPolicy: *flRestartPolicy, + RestartPolicy: restartPolicy, } if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit { @@ -287,6 +293,38 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf return config, hostConfig, cmd, nil } +// parseRestartPolicy returns the parsed policy or an error indicating what is incorrect +func parseRestartPolicy(policy string) (RestartPolicy, error) { + p := RestartPolicy{} + + if policy == "" { + return p, nil + } + + var ( + parts = strings.Split(policy, ":") + name = parts[0] + ) + + switch name { + case "no", "on-failure", "always": + p.Name = name + + if len(parts) == 2 { + count, err := strconv.Atoi(parts[1]) + if err != nil { + return p, err + } + + p.MaximumRetryCount = count + } + default: + return p, fmt.Errorf("invalid restart policy %s", name) + } + + return p, nil +} + // options will come in the format of name.key=value or name.option func parseDriverOpts(opts opts.ListOpts) (map[string][]string, error) { out := make(map[string][]string, len(opts.GetAll())) From 2b0776c883c41cda0b86808fe86e4df6e1aa1cd5 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 4 Aug 2014 17:05:56 -0700 Subject: [PATCH 04/18] Refactor container monitor into type Signed-off-by: Michael Crosby --- daemon/container.go | 142 ++++----------------------------- daemon/monitor.go | 189 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 127 deletions(-) create mode 100644 daemon/monitor.go diff --git a/daemon/container.go b/daemon/container.go index 3383276e6d..681852a17d 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -7,7 +7,6 @@ import ( "io" "io/ioutil" "os" - "os/exec" "path" "path/filepath" "strings" @@ -84,8 +83,8 @@ type Container struct { VolumesRW map[string]bool hostConfig *runconfig.HostConfig - activeLinks map[string]*links.Link - requestedStop bool + activeLinks map[string]*links.Link + monitor *containerMonitor } func (container *Container) FromDisk() error { @@ -496,110 +495,6 @@ func (container *Container) releaseNetwork() { container.NetworkSettings = &NetworkSettings{} } -func (container *Container) monitor(callback execdriver.StartCallback) error { - var ( - err error - exitCode int - failCount int - exit bool - - policy = container.hostConfig.RestartPolicy - ) - - if err := container.startLoggingToDisk(); err != nil { - // TODO: crosbymichael cleanup IO, network, and mounts - return err - } - - // reset the restart count - container.RestartCount = -1 - container.requestedStop = false - - for { - container.RestartCount++ - - pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin) - - if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil { - failCount++ - - if failCount == policy.MaximumRetryCount { - exit = true - } - - utils.Errorf("Error running container: %s", err) - } - - // We still wait to set the state as stopped and ensure that the locks were released - container.State.SetStopped(exitCode) - - if container.Config.OpenStdin { - if err := container.stdin.Close(); err != nil { - utils.Errorf("%s: Error close stdin: %s", container.ID, err) - } - } - - if err := container.stdout.Clean(); err != nil { - utils.Errorf("%s: Error close stdout: %s", container.ID, err) - } - - if err := container.stderr.Clean(); err != nil { - utils.Errorf("%s: Error close stderr: %s", container.ID, err) - } - - if container.command != nil && container.command.Terminal != nil { - if err := container.command.Terminal.Close(); err != nil { - utils.Errorf("%s: Error closing terminal: %s", container.ID, err) - } - } - - // Re-create a brand new stdin pipe once the container exited - if container.Config.OpenStdin { - container.stdin, container.stdinPipe = io.Pipe() - } - - if container.daemon != nil && container.daemon.srv != nil { - container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image)) - } - - if (policy.Name == "always" || (policy.Name == "on-failure" && exitCode != 0)) && !container.requestedStop || !exit { - container.command.Cmd = copyCmd(&container.command.Cmd) - - time.Sleep(1 * time.Second) - - continue - } - - // Cleanup networking and mounts - container.cleanup() - - if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() { - // FIXME: here is race condition between two RUN instructions in Dockerfile - // because they share same runconfig and change image. Must be fixed - // in builder/builder.go - if err := container.toDisk(); err != nil { - utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err) - } - } - - return err - } -} - -func copyCmd(c *exec.Cmd) exec.Cmd { - return exec.Cmd{ - Stdin: c.Stdin, - Stdout: c.Stdout, - Stderr: c.Stderr, - Path: c.Path, - Env: c.Env, - ExtraFiles: c.ExtraFiles, - Args: c.Args, - Dir: c.Dir, - SysProcAttr: c.SysProcAttr, - } -} - // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { @@ -630,7 +525,10 @@ func (container *Container) KillSig(sig int) error { if !container.State.IsRunning() { return nil } - container.requestedStop = true + + // signal to the monitor that it should not restart the container + // after we send the kill signal + container.monitor.ExitOnNext() return container.daemon.Kill(container, sig) } @@ -1174,27 +1072,17 @@ func (container *Container) startLoggingToDisk() error { } func (container *Container) waitForStart() error { - waitStart := make(chan struct{}) - callback := func(command *execdriver.Command) { - if command.Tty { - // The callback is called after the process Start() - // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace - // which we close here. - if c, ok := command.Stdout.(io.Closer); ok { - c.Close() - } - } + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) - container.State.SetRunning(command.Pid()) - if err := container.toDisk(); err != nil { - log.Debugf("%s", err) - } + var ( + cErr = utils.Go(container.monitor.Start) + waitStart = make(chan struct{}) + ) + + go func() { + container.State.WaitRunning(-1 * time.Second) close(waitStart) - } - - // We use a callback here instead of a goroutine and an chan for - // syncronization purposes - cErr := utils.Go(func() error { return container.monitor(callback) }) + }() // Start should not return until the process is actually running select { diff --git a/daemon/monitor.go b/daemon/monitor.go new file mode 100644 index 0000000000..94d7f2b004 --- /dev/null +++ b/daemon/monitor.go @@ -0,0 +1,189 @@ +package daemon + +import ( + "io" + "os/exec" + "sync" + "time" + + "github.com/docker/docker/daemon/execdriver" + "github.com/docker/docker/runconfig" + "github.com/docker/docker/utils" +) + +// containerMonitor monitors the execution of a container's main process. +// If a restart policy is specified for the cotnainer the monitor will ensure that the +// process is restarted based on the rules of the policy. When the container is finally stopped +// the monitor will reset and cleanup any of the container resources such as networking allocations +// and the rootfs +type containerMonitor struct { + mux sync.Mutex + + container *Container + restartPolicy runconfig.RestartPolicy + failureCount int + shouldStop bool +} + +func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor { + return &containerMonitor{ + container: container, + restartPolicy: policy, + } +} + +// Stop signals to the container monitor that it should stop monitoring the container +// for exits the next time the process dies +func (m *containerMonitor) ExitOnNext() { + m.mux.Lock() + m.shouldStop = true + m.mux.Unlock() +} + +// Close closes the container's resources such as networking allocations and +// unmounts the contatiner's root filesystem +func (m *containerMonitor) Close() error { + // Cleanup networking and mounts + m.container.cleanup() + + if m.container.daemon != nil && m.container.daemon.srv != nil && m.container.daemon.srv.IsRunning() { + // FIXME: here is race condition between two RUN instructions in Dockerfile + // because they share same runconfig and change image. Must be fixed + // in builder/builder.go + if err := m.container.toDisk(); err != nil { + utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err) + + return err + } + } + + return nil +} + +// reset resets the container's IO and ensures that the command is able to be executed again +// by copying the data into a new struct +func (m *containerMonitor) reset() { + container := m.container + + if container.Config.OpenStdin { + if err := container.stdin.Close(); err != nil { + utils.Errorf("%s: Error close stdin: %s", container.ID, err) + } + } + + if err := container.stdout.Clean(); err != nil { + utils.Errorf("%s: Error close stdout: %s", container.ID, err) + } + + if err := container.stderr.Clean(); err != nil { + utils.Errorf("%s: Error close stderr: %s", container.ID, err) + } + + if container.command != nil && container.command.Terminal != nil { + if err := container.command.Terminal.Close(); err != nil { + utils.Errorf("%s: Error closing terminal: %s", container.ID, err) + } + } + + // Re-create a brand new stdin pipe once the container exited + if container.Config.OpenStdin { + container.stdin, container.stdinPipe = io.Pipe() + } + + if container.daemon != nil && container.daemon.srv != nil { + container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image)) + } + + c := container.command.Cmd + + container.command.Cmd = exec.Cmd{ + Stdin: c.Stdin, + Stdout: c.Stdout, + Stderr: c.Stderr, + Path: c.Path, + Env: c.Env, + ExtraFiles: c.ExtraFiles, + Args: c.Args, + Dir: c.Dir, + SysProcAttr: c.SysProcAttr, + } +} + +// Start starts the containers process and monitors it according to the restart policy +func (m *containerMonitor) Start() error { + var ( + err error + exitCode int + ) + defer m.Close() + + // reset the restart count + m.container.RestartCount = -1 + + for !m.shouldStop { + m.container.RestartCount++ + if err := m.container.startLoggingToDisk(); err != nil { + m.reset() + + return err + } + + pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) + + if exitCode, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { + m.failureCount++ + + if m.failureCount == m.restartPolicy.MaximumRetryCount { + m.ExitOnNext() + } + + utils.Errorf("Error running container: %s", err) + } + + // We still wait to set the state as stopped and ensure that the locks were released + m.container.State.SetStopped(exitCode) + + m.reset() + + if m.shouldRestart(exitCode) { + time.Sleep(1 * time.Second) + + continue + } + + break + } + + return err +} + +func (m *containerMonitor) shouldRestart(exitCode int) bool { + m.mux.Lock() + + shouldRestart := (m.restartPolicy.Name == "always" || + (m.restartPolicy.Name == "on-failure" && exitCode != 0)) && + !m.shouldStop + + m.mux.Unlock() + + return shouldRestart +} + +// callback ensures that the container's state is properly updated after we +// received ack from the execution drivers +func (m *containerMonitor) callback(command *execdriver.Command) { + if command.Tty { + // The callback is called after the process Start() + // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace + // which we close here. + if c, ok := command.Stdout.(io.Closer); ok { + c.Close() + } + } + + m.container.State.SetRunning(command.Pid()) + + if err := m.container.ToDisk(); err != nil { + utils.Debugf("%s", err) + } +} From 860c13b788944410a98a6ad5b5cfb74de0a8405b Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 4 Aug 2014 18:20:53 -0700 Subject: [PATCH 05/18] Add documentation and update restart rules. Implement time backed backoff for restarting and fix failure count when the maximum is 0 Signed-off-by: Michael Crosby --- daemon/monitor.go | 89 +++++++++++++++++------ docs/sources/reference/commandline/cli.md | 26 +++++++ runconfig/parse.go | 29 +++++--- 3 files changed, 111 insertions(+), 33 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 94d7f2b004..3eb68791d6 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -11,6 +11,8 @@ import ( "github.com/docker/docker/utils" ) +const defaultTimeIncrement = 100 + // containerMonitor monitors the execution of a container's main process. // If a restart policy is specified for the cotnainer the monitor will ensure that the // process is restarted based on the rules of the policy. When the container is finally stopped @@ -19,16 +21,30 @@ import ( type containerMonitor struct { mux sync.Mutex - container *Container + // container is the container being monitored + container *Container + + // restartPolicy is the being applied to the container monitor restartPolicy runconfig.RestartPolicy - failureCount int - shouldStop bool + + // failureCount is the number of times the container has failed to + // start in a row + failureCount int + + // shouldStop signals the monitor that the next time the container exits it is + // either because docker or the user asked for the container to be stopped + shouldStop bool + + // timeIncrement is the amount of time to wait between restarts + // this is in milliseconds + timeIncrement int } func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor { return &containerMonitor{ container: container, restartPolicy: policy, + timeIncrement: defaultTimeIncrement, } } @@ -62,7 +78,7 @@ func (m *containerMonitor) Close() error { // reset resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct -func (m *containerMonitor) reset() { +func (m *containerMonitor) reset(successful bool) { container := m.container if container.Config.OpenStdin { @@ -107,14 +123,29 @@ func (m *containerMonitor) reset() { Dir: c.Dir, SysProcAttr: c.SysProcAttr, } + + // the container exited successfully so we need to reset the failure counter + // and the timeIncrement back to the default values + if successful { + m.failureCount = 0 + m.timeIncrement = defaultTimeIncrement + } else { + // otherwise we need to increment the amount of time we wait before restarting + // the process. We will build up by multiplying the increment by 2 + + m.failureCount++ + m.timeIncrement *= 2 + } } // Start starts the containers process and monitors it according to the restart policy func (m *containerMonitor) Start() error { var ( - err error - exitCode int + err error + exitStatus int ) + + // ensure that when the monitor finally exits we release the networking and unmount the rootfs defer m.Close() // reset the restart count @@ -122,31 +153,26 @@ func (m *containerMonitor) Start() error { for !m.shouldStop { m.container.RestartCount++ + if err := m.container.startLoggingToDisk(); err != nil { - m.reset() + m.reset(false) return err } pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) - if exitCode, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { - m.failureCount++ - - if m.failureCount == m.restartPolicy.MaximumRetryCount { - m.ExitOnNext() - } - + if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { utils.Errorf("Error running container: %s", err) } // We still wait to set the state as stopped and ensure that the locks were released - m.container.State.SetStopped(exitCode) + m.container.State.SetStopped(exitStatus) - m.reset() + m.reset(err == nil && exitStatus == 0) - if m.shouldRestart(exitCode) { - time.Sleep(1 * time.Second) + if m.shouldRestart(exitStatus) { + time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond) continue } @@ -157,16 +183,31 @@ func (m *containerMonitor) Start() error { return err } -func (m *containerMonitor) shouldRestart(exitCode int) bool { +// shouldRestart checks the restart policy and applies the rules to determine if +// the container's process should be restarted +func (m *containerMonitor) shouldRestart(exitStatus int) bool { m.mux.Lock() + defer m.mux.Unlock() - shouldRestart := (m.restartPolicy.Name == "always" || - (m.restartPolicy.Name == "on-failure" && exitCode != 0)) && - !m.shouldStop + // do not restart if the user or docker has requested that this container be stopped + if m.shouldStop { + return false + } - m.mux.Unlock() + switch m.restartPolicy.Name { + case "always": + return true + case "on-failure": + // the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count + if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max { + utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max) + return false + } - return shouldRestart + return exitStatus != 0 + } + + return false } // callback ensures that the container's state is properly updated after we diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md index 5e6107cfaa..8f4ed19a27 100644 --- a/docs/sources/reference/commandline/cli.md +++ b/docs/sources/reference/commandline/cli.md @@ -993,6 +993,7 @@ removed before the image is removed. format: ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort (use 'docker port' to see the actual mapping) --privileged=false Give extended privileges to this container + --restart="" Restart policy to apply when a container exits (no, on-failure, always) --rm=false Automatically remove the container when it exits (incompatible with -d) --sig-proxy=true Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. -t, --tty=false Allocate a pseudo-TTY @@ -1220,6 +1221,31 @@ application change: `--rm` option means that when the container exits, the container's layer is removed. +#### Restart Policies + +Using the `--restart` flag on docker run you can specify a restart policy for +how a container should or should not be restarted on exit. + +** no ** - Do not restart the container when it exits. + +** on-failure ** - Restart the container only if it exits with a non zero exit status. + +** always ** - Always restart the container reguardless of the exit status. + +You can also specify the maximum amount of times docker will try to restart the +container when using the ** on-failure ** policy. The default is that docker will try forever to restart the container. + + $ sudo docker run --restart=always redis + +This will run the redis container with a restart policy of ** always ** so that if +the container exits, docker will restart it. + + $ sudo docker run --restart=on-failure:10 redis + +This will run the redis container with a restart policy of ** on-failure ** and a +maximum restart count of 10. If the redis container exits with a non-zero exit +status more than 10 times in a row docker will abort trying to restart the container. + ## save Usage: docker save IMAGE diff --git a/runconfig/parse.go b/runconfig/parse.go index ea6e9ebca2..2b4dc632a0 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -17,11 +17,12 @@ import ( ) var ( - ErrInvalidWorkingDirectory = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.") - ErrConflictAttachDetach = fmt.Errorf("Conflicting options: -a and -d") - ErrConflictDetachAutoRemove = fmt.Errorf("Conflicting options: --rm and -d") - ErrConflictNetworkHostname = fmt.Errorf("Conflicting options: -h and the network mode (--net)") - ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.") + ErrInvalidWorkingDirectory = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.") + ErrConflictAttachDetach = fmt.Errorf("Conflicting options: -a and -d") + ErrConflictDetachAutoRemove = fmt.Errorf("Conflicting options: --rm and -d") + ErrConflictNetworkHostname = fmt.Errorf("Conflicting options: -h and the network mode (--net)") + ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.") + ErrConflictRestartPolicyAndAutoRemove = fmt.Errorf("Conflicting options: --restart and --rm") ) //FIXME Only used in tests @@ -72,7 +73,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf flCpuShares = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)") flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)") flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.") - flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy when the dies") + flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure, always)") // For documentation purpose _ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.") _ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container") @@ -227,8 +228,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf } // parse the '-e' and '--env' after, to allow override envVariables = append(envVariables, flEnv.GetAll()...) - // boo, there's no debug output for docker run - //log.Debugf("Environment variables for the container: %#v", envVariables) netMode, err := parseNetMode(*flNetMode) if err != nil { @@ -240,6 +239,10 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf return nil, nil, cmd, err } + if *flAutoRemove && (restartPolicy.Name == "always" || restartPolicy.Name == "on-failure") { + return nil, nil, cmd, ErrConflictRestartPolicyAndAutoRemove + } + config := &Config{ Hostname: hostname, Domainname: domainname, @@ -307,7 +310,15 @@ func parseRestartPolicy(policy string) (RestartPolicy, error) { ) switch name { - case "no", "on-failure", "always": + case "always": + p.Name = name + + if len(parts) == 2 { + return p, fmt.Errorf("maximum restart count not valid with restart policy of \"always\"") + } + case "no": + // do nothing + case "on-failure": p.Name = name if len(parts) == 2 { From 617edd89f48a7082bf4ec1d28ec12eefd1057a2f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 5 Aug 2014 14:40:50 -0700 Subject: [PATCH 06/18] Update docs based on feedback from review for --restart Signed-off-by: Michael Crosby --- docs/man/docker-run.1.md | 1 + docs/sources/reference/commandline/cli.md | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/man/docker-run.1.md b/docs/man/docker-run.1.md index 4dee97f195..225fb78cb8 100644 --- a/docs/man/docker-run.1.md +++ b/docs/man/docker-run.1.md @@ -30,6 +30,7 @@ docker-run - Run a command in a new container [**-P**|**--publish-all**[=*false*]] [**-p**|**--publish**[=*[]*]] [**--privileged**[=*false*]] +[**--restart**[=*POLICY*]] [**--rm**[=*false*]] [**--sig-proxy**[=*true*]] [**-t**|**--tty**[=*false*]] diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md index 8f4ed19a27..4a756cc16e 100644 --- a/docs/sources/reference/commandline/cli.md +++ b/docs/sources/reference/commandline/cli.md @@ -1223,7 +1223,7 @@ application change: #### Restart Policies -Using the `--restart` flag on docker run you can specify a restart policy for +Using the `--restart` flag on Docker run you can specify a restart policy for how a container should or should not be restarted on exit. ** no ** - Do not restart the container when it exits. @@ -1232,19 +1232,19 @@ how a container should or should not be restarted on exit. ** always ** - Always restart the container reguardless of the exit status. -You can also specify the maximum amount of times docker will try to restart the -container when using the ** on-failure ** policy. The default is that docker will try forever to restart the container. +You can also specify the maximum amount of times Docker will try to restart the +container when using the ** on-failure ** policy. The default is that Docker will try forever to restart the container. $ sudo docker run --restart=always redis -This will run the redis container with a restart policy of ** always ** so that if -the container exits, docker will restart it. +This will run the `redis` container with a restart policy of ** always ** so that if +the container exits, Docker will restart it. $ sudo docker run --restart=on-failure:10 redis -This will run the redis container with a restart policy of ** on-failure ** and a -maximum restart count of 10. If the redis container exits with a non-zero exit -status more than 10 times in a row docker will abort trying to restart the container. +This will run the `redis` container with a restart policy of ** on-failure ** and a +maximum restart count of 10. If the `redis` container exits with a non-zero exit +status more than 10 times in a row Docker will abort trying to restart the container. ## save From 41870a42bee9c394c61aba4bd81f782dbd38da12 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 6 Aug 2014 10:40:43 -0700 Subject: [PATCH 07/18] Only restart containers on daemon load with policy of always Signed-off-by: Michael Crosby --- daemon/daemon.go | 58 ++++++++++++++++++++++++++++++----------------- daemon/monitor.go | 14 +++++------- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/daemon/daemon.go b/daemon/daemon.go index e777c4a1be..0569126e17 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -172,20 +172,24 @@ func (daemon *Daemon) load(id string) (*Container, error) { if err := container.FromDisk(); err != nil { return nil, err } + if container.ID != id { return container, fmt.Errorf("Container %s is stored at %s", container.ID, id) } + + container.readHostConfig() + return container, nil } // Register makes a container object usable by the daemon as // This is a wrapper for register func (daemon *Daemon) Register(container *Container) error { - return daemon.register(container, true, nil) + return daemon.register(container, true) } // register makes a container object usable by the daemon as -func (daemon *Daemon) register(container *Container, updateSuffixarray bool, containersToStart *[]*Container) error { +func (daemon *Daemon) register(container *Container, updateSuffixarray bool) error { if container.daemon != nil || daemon.Exists(container.ID) { return fmt.Errorf("Container is already loaded") } @@ -257,14 +261,6 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool, con if err := container.ToDisk(); err != nil { return err } - - if daemon.config.AutoRestart { - log.Debugf("Marking as restarting") - - if containersToStart != nil { - *containersToStart = append(*containersToStart, container) - } - } } } return nil @@ -296,10 +292,9 @@ func (daemon *Daemon) LogToDisk(src *broadcastwriter.BroadcastWriter, dst, strea func (daemon *Daemon) restore() error { var ( - debug = (os.Getenv("DEBUG") != "" || os.Getenv("TEST") != "") - containers = make(map[string]*Container) - currentDriver = daemon.driver.String() - containersToStart = []*Container{} + debug = (os.Getenv("DEBUG") != "" || os.Getenv("TEST") != "") + containers = make(map[string]*Container) + currentDriver = daemon.driver.String() ) if !debug { @@ -322,24 +317,33 @@ func (daemon *Daemon) restore() error { } // Ignore the container if it does not support the current driver being used by the graph - if container.Driver == "" && currentDriver == "aufs" || container.Driver == currentDriver { + if (container.Driver == "" && currentDriver == "aufs") || container.Driver == currentDriver { log.Debugf("Loaded container %v", container.ID) + containers[container.ID] = container } else { log.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } } + registeredContainers := []*Container{} + if entities := daemon.containerGraph.List("/", -1); entities != nil { for _, p := range entities.Paths() { if !debug { fmt.Print(".") } + e := entities[p] + if container, ok := containers[e.ID()]; ok { - if err := daemon.register(container, false, &containersToStart); err != nil { + if err := daemon.register(container, false); err != nil { log.Debugf("Failed to register container %s: %s", container.ID, err) } + + registeredContainers = append(registeredContainers, container) + + // delete from the map so that a new name is not automatically generated delete(containers, e.ID()) } } @@ -352,15 +356,27 @@ func (daemon *Daemon) restore() error { if err != nil { log.Debugf("Setting default id - %s", err) } - if err := daemon.register(container, false, &containersToStart); err != nil { + + if err := daemon.register(container, false); err != nil { log.Debugf("Failed to register container %s: %s", container.ID, err) } + + registeredContainers = append(registeredContainers, container) } - for _, container := range containersToStart { - log.Debugf("Starting container %d", container.ID) - if err := container.Start(); err != nil { - log.Debugf("Failed to start container %s: %s", container.ID, err) + // check the restart policy on the containers and restart any container with + // the restart policy of "always" + if daemon.config.AutoRestart { + log.Debugf("Restarting containers...") + + for _, container := range registeredContainers { + if container.hostConfig.RestartPolicy.Name == "always" { + utils.Debugf("Starting container %s", container.ID) + + if err := container.Start(); err != nil { + utils.Debugf("Failed to start container %s: %s", container.ID, err) + } + } } } diff --git a/daemon/monitor.go b/daemon/monitor.go index 3eb68791d6..66f2a14e3e 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -62,15 +62,13 @@ func (m *containerMonitor) Close() error { // Cleanup networking and mounts m.container.cleanup() - if m.container.daemon != nil && m.container.daemon.srv != nil && m.container.daemon.srv.IsRunning() { - // FIXME: here is race condition between two RUN instructions in Dockerfile - // because they share same runconfig and change image. Must be fixed - // in builder/builder.go - if err := m.container.toDisk(); err != nil { - utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err) + // FIXME: here is race condition between two RUN instructions in Dockerfile + // because they share same runconfig and change image. Must be fixed + // in builder/builder.go + if err := m.container.toDisk(); err != nil { + utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err) - return err - } + return err } return nil From feda8fbb21489f64aa3b7340c94473a08502bd6b Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 6 Aug 2014 21:13:06 -0700 Subject: [PATCH 08/18] Restart conatiner with on-failure policy if exit code != 0 Signed-off-by: Michael Crosby --- daemon/daemon.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/daemon/daemon.go b/daemon/daemon.go index 0569126e17..c88ac32e63 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -370,7 +370,8 @@ func (daemon *Daemon) restore() error { log.Debugf("Restarting containers...") for _, container := range registeredContainers { - if container.hostConfig.RestartPolicy.Name == "always" { + if container.hostConfig.RestartPolicy.Name == "always" || + (container.hostConfig.RestartPolicy.Name == "on-failure" && container.State.ExitCode != 0) { utils.Debugf("Starting container %s", container.ID) if err := container.Start(); err != nil { From b7550cd456e29874279499a546b07bb062a12096 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 6 Aug 2014 21:15:34 -0700 Subject: [PATCH 09/18] Fix rebase around log event Signed-off-by: Michael Crosby --- daemon/monitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 66f2a14e3e..3517589a37 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -105,7 +105,7 @@ func (m *containerMonitor) reset(successful bool) { } if container.daemon != nil && container.daemon.srv != nil { - container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image)) + container.LogEvent("die") } c := container.command.Cmd From 6ae05936e1f343a2ecc51b8f642bdc8b93325d81 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 7 Aug 2014 10:50:25 -0700 Subject: [PATCH 10/18] Move container start event into monitor Signed-off-by: Michael Crosby --- daemon/monitor.go | 6 +++--- daemon/start.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 3517589a37..6f172dc155 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -104,9 +104,7 @@ func (m *containerMonitor) reset(successful bool) { container.stdin, container.stdinPipe = io.Pipe() } - if container.daemon != nil && container.daemon.srv != nil { - container.LogEvent("die") - } + container.LogEvent("die") c := container.command.Cmd @@ -160,6 +158,8 @@ func (m *containerMonitor) Start() error { pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) + m.container.LogEvent("start") + if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { utils.Errorf("Error running container: %s", err) } diff --git a/daemon/start.go b/daemon/start.go index cb6e9cb21f..30e015496f 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -36,7 +36,7 @@ func (daemon *Daemon) ContainerStart(job *engine.Job) engine.Status { if err := container.Start(); err != nil { return job.Errorf("Cannot start container %s: %s", name, err) } - container.LogEvent("start") + return engine.StatusOK } From be22d7ac2db6396becd63ec2dd28b9731011c9b9 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 11 Aug 2014 10:50:02 -0700 Subject: [PATCH 11/18] Add additional comments for vague lines Signed-off-by: Michael Crosby --- daemon/monitor.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 6f172dc155..97fbe6ab47 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -164,12 +164,15 @@ func (m *containerMonitor) Start() error { utils.Errorf("Error running container: %s", err) } - // We still wait to set the state as stopped and ensure that the locks were released + // we still wait to set the state as stopped and ensure that the locks were released m.container.State.SetStopped(exitStatus) + // pass if we exited successfully m.reset(err == nil && exitStatus == 0) if m.shouldRestart(exitStatus) { + // sleep with a small time increment between each restart to help avoid issues cased by quickly + // restarting the container because of some types of errors ( networking cut out, etc... ) time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond) continue From a2afb2b1e37e72cab0d68cfe8dd0ec830b7a54e5 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 11 Aug 2014 11:07:37 -0700 Subject: [PATCH 12/18] Add Restarting state when docker is handling the restart of containers Signed-off-by: Michael Crosby --- daemon/monitor.go | 141 ++++++++++++++++++++++++---------------------- daemon/state.go | 30 ++++++++++ 2 files changed, 105 insertions(+), 66 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 97fbe6ab47..b733531dff 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -74,66 +74,6 @@ func (m *containerMonitor) Close() error { return nil } -// reset resets the container's IO and ensures that the command is able to be executed again -// by copying the data into a new struct -func (m *containerMonitor) reset(successful bool) { - container := m.container - - if container.Config.OpenStdin { - if err := container.stdin.Close(); err != nil { - utils.Errorf("%s: Error close stdin: %s", container.ID, err) - } - } - - if err := container.stdout.Clean(); err != nil { - utils.Errorf("%s: Error close stdout: %s", container.ID, err) - } - - if err := container.stderr.Clean(); err != nil { - utils.Errorf("%s: Error close stderr: %s", container.ID, err) - } - - if container.command != nil && container.command.Terminal != nil { - if err := container.command.Terminal.Close(); err != nil { - utils.Errorf("%s: Error closing terminal: %s", container.ID, err) - } - } - - // Re-create a brand new stdin pipe once the container exited - if container.Config.OpenStdin { - container.stdin, container.stdinPipe = io.Pipe() - } - - container.LogEvent("die") - - c := container.command.Cmd - - container.command.Cmd = exec.Cmd{ - Stdin: c.Stdin, - Stdout: c.Stdout, - Stderr: c.Stderr, - Path: c.Path, - Env: c.Env, - ExtraFiles: c.ExtraFiles, - Args: c.Args, - Dir: c.Dir, - SysProcAttr: c.SysProcAttr, - } - - // the container exited successfully so we need to reset the failure counter - // and the timeIncrement back to the default values - if successful { - m.failureCount = 0 - m.timeIncrement = defaultTimeIncrement - } else { - // otherwise we need to increment the amount of time we wait before restarting - // the process. We will build up by multiplying the increment by 2 - - m.failureCount++ - m.timeIncrement *= 2 - } -} - // Start starts the containers process and monitors it according to the restart policy func (m *containerMonitor) Start() error { var ( @@ -151,7 +91,7 @@ func (m *containerMonitor) Start() error { m.container.RestartCount++ if err := m.container.startLoggingToDisk(); err != nil { - m.reset(false) + m.resetContainer() return err } @@ -164,18 +104,23 @@ func (m *containerMonitor) Start() error { utils.Errorf("Error running container: %s", err) } - // we still wait to set the state as stopped and ensure that the locks were released - m.container.State.SetStopped(exitStatus) - - // pass if we exited successfully - m.reset(err == nil && exitStatus == 0) + m.resetMonitor(err == nil && exitStatus == 0) if m.shouldRestart(exitStatus) { + m.container.State.SetRestarting(exitStatus) + + m.resetContainer() + // sleep with a small time increment between each restart to help avoid issues cased by quickly // restarting the container because of some types of errors ( networking cut out, etc... ) time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond) continue + } else { + // we still wait to set the state as stopped and ensure that the locks were released + m.container.State.SetStopped(exitStatus) + + m.resetContainer() } break @@ -184,6 +129,23 @@ func (m *containerMonitor) Start() error { return err } +// resetMonitor resets the stateful fields on the containerMonitor based on the +// previous runs success or failure +func (m *containerMonitor) resetMonitor(successful bool) { + // the container exited successfully so we need to reset the failure counter + // and the timeIncrement back to the default values + if successful { + m.failureCount = 0 + m.timeIncrement = defaultTimeIncrement + } else { + // otherwise we need to increment the amount of time we wait before restarting + // the process. We will build up by multiplying the increment by 2 + + m.failureCount++ + m.timeIncrement *= 2 + } +} + // shouldRestart checks the restart policy and applies the rules to determine if // the container's process should be restarted func (m *containerMonitor) shouldRestart(exitStatus int) bool { @@ -229,3 +191,50 @@ func (m *containerMonitor) callback(command *execdriver.Command) { utils.Debugf("%s", err) } } + +// resetContainer resets the container's IO and ensures that the command is able to be executed again +// by copying the data into a new struct +func (m *containerMonitor) resetContainer() { + container := m.container + + if container.Config.OpenStdin { + if err := container.stdin.Close(); err != nil { + utils.Errorf("%s: Error close stdin: %s", container.ID, err) + } + } + + if err := container.stdout.Clean(); err != nil { + utils.Errorf("%s: Error close stdout: %s", container.ID, err) + } + + if err := container.stderr.Clean(); err != nil { + utils.Errorf("%s: Error close stderr: %s", container.ID, err) + } + + if container.command != nil && container.command.Terminal != nil { + if err := container.command.Terminal.Close(); err != nil { + utils.Errorf("%s: Error closing terminal: %s", container.ID, err) + } + } + + // Re-create a brand new stdin pipe once the container exited + if container.Config.OpenStdin { + container.stdin, container.stdinPipe = io.Pipe() + } + + container.LogEvent("die") + + c := container.command.Cmd + + container.command.Cmd = exec.Cmd{ + Stdin: c.Stdin, + Stdout: c.Stdout, + Stderr: c.Stderr, + Path: c.Path, + Env: c.Env, + ExtraFiles: c.ExtraFiles, + Args: c.Args, + Dir: c.Dir, + SysProcAttr: c.SysProcAttr, + } +} diff --git a/daemon/state.go b/daemon/state.go index 9baa396843..00597d699a 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -12,6 +12,7 @@ type State struct { sync.RWMutex Running bool Paused bool + Restarting bool Pid int ExitCode int StartedAt time.Time @@ -30,15 +31,22 @@ func (s *State) String() string { s.RLock() defer s.RUnlock() + if s.Restarting { + return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) + } + if s.Running { if s.Paused { return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) } + return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) } + if s.FinishedAt.IsZero() { return "" } + return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) } @@ -135,6 +143,28 @@ func (s *State) SetStopped(exitCode int) { s.Unlock() } +// SetRestarting is when docker hanldes the auto restart of containers when they are +// in the middle of a stop and being restarted again +func (s *State) SetRestarting(exitCode int) { + s.Lock() + if s.Running { + s.Running = false + s.Pid = 0 + s.FinishedAt = time.Now().UTC() + s.ExitCode = exitCode + close(s.waitChan) // fire waiters for stop + s.waitChan = make(chan struct{}) + } + s.Unlock() +} + +func (s *State) IsRestarting() bool { + s.RLock() + res := s.Restarting + s.RUnlock() + return res +} + func (s *State) SetPaused() { s.Lock() s.Paused = true From c4a00d549d0b895bb990491cbdf58aabe2891842 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 11 Aug 2014 11:35:18 -0700 Subject: [PATCH 13/18] Honor the restarting state in Stop Signed-off-by: Michael Crosby --- daemon/container.go | 7 +++++++ daemon/monitor.go | 9 ++++----- daemon/state.go | 12 +++++++----- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index 681852a17d..e0da3c144a 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -530,6 +530,13 @@ func (container *Container) KillSig(sig int) error { // after we send the kill signal container.monitor.ExitOnNext() + // if the container is currently restarting we do not need to send the signal + // to the process. Telling the monitor that it should exit on it's next event + // loop is enough + if container.State.IsRestarting() { + return nil + } + return container.daemon.Kill(container, sig) } diff --git a/daemon/monitor.go b/daemon/monitor.go index b733531dff..6c6c7b76db 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -116,16 +116,15 @@ func (m *containerMonitor) Start() error { time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond) continue - } else { - // we still wait to set the state as stopped and ensure that the locks were released - m.container.State.SetStopped(exitStatus) - - m.resetContainer() } break } + m.container.State.SetStopped(exitStatus) + + m.resetContainer() + return err } diff --git a/daemon/state.go b/daemon/state.go index 00597d699a..61b65f7d4e 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -31,14 +31,13 @@ func (s *State) String() string { s.RLock() defer s.RUnlock() - if s.Restarting { - return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) - } - if s.Running { if s.Paused { return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) } + if s.Restarting { + return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) + } return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) } @@ -148,7 +147,10 @@ func (s *State) SetStopped(exitCode int) { func (s *State) SetRestarting(exitCode int) { s.Lock() if s.Running { - s.Running = false + // we should consider the container running when it is restarting because of + // all the checks in docker around rm/stop/etc + s.Running = true + s.Restarting = true s.Pid = 0 s.FinishedAt = time.Now().UTC() s.ExitCode = exitCode From 972c89493148f930388097816663d453bf3e8c2c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 11 Aug 2014 15:00:55 -0700 Subject: [PATCH 14/18] Improve wait during restart We need to do this so that when a user asks docker to stop the container and it is currently in the restart loop we don't want to have to wait for the duration of the restart time increment before ack. the stop. Signed-off-by: Michael Crosby --- daemon/monitor.go | 50 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 6c6c7b76db..d9e67b0681 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -35,6 +35,11 @@ type containerMonitor struct { // either because docker or the user asked for the container to be stopped shouldStop bool + // stopChan is used to signal to the monitor whenever there is a wait for the + // next restart so that the timeIncrement is not honored and the user is not + // left waiting for nothing to happen during this time + stopChan chan struct{} + // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int @@ -45,6 +50,7 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) * container: container, restartPolicy: policy, timeIncrement: defaultTimeIncrement, + stopChan: make(chan struct{}, 1), } } @@ -52,7 +58,14 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) * // for exits the next time the process dies func (m *containerMonitor) ExitOnNext() { m.mux.Lock() - m.shouldStop = true + + // we need to protect having a double close of the channel when stop is called + // twice or else we will get a panic + if !m.shouldStop { + m.shouldStop = true + close(m.stopChan) + } + m.mux.Unlock() } @@ -87,7 +100,7 @@ func (m *containerMonitor) Start() error { // reset the restart count m.container.RestartCount = -1 - for !m.shouldStop { + for { m.container.RestartCount++ if err := m.container.startLoggingToDisk(); err != nil { @@ -109,22 +122,34 @@ func (m *containerMonitor) Start() error { if m.shouldRestart(exitStatus) { m.container.State.SetRestarting(exitStatus) + m.container.LogEvent("die") + m.resetContainer() // sleep with a small time increment between each restart to help avoid issues cased by quickly // restarting the container because of some types of errors ( networking cut out, etc... ) - time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond) + m.waitForNextRestart() + + // we need to check this before reentering the loop because the waitForNextRestart could have + // been terminated by a request from a user + if m.shouldStop { + m.container.State.SetStopped(exitStatus) + + return err + } continue } + m.container.State.SetStopped(exitStatus) + + m.container.LogEvent("die") + + m.resetContainer() + break } - m.container.State.SetStopped(exitStatus) - - m.resetContainer() - return err } @@ -145,6 +170,15 @@ func (m *containerMonitor) resetMonitor(successful bool) { } } +// waitForNextRestart waits with the default time increment to restart the container unless +// a user or docker asks to container to be stopped +func (m *containerMonitor) waitForNextRestart() { + select { + case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond): + case <-m.stopChan: + } +} + // shouldRestart checks the restart policy and applies the rules to determine if // the container's process should be restarted func (m *containerMonitor) shouldRestart(exitStatus int) bool { @@ -221,8 +255,6 @@ func (m *containerMonitor) resetContainer() { container.stdin, container.stdinPipe = io.Pipe() } - container.LogEvent("die") - c := container.command.Cmd container.command.Cmd = exec.Cmd{ From ebf5d4657d14c7b57a5ca1c7b8689077b18b33dc Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 12 Aug 2014 13:08:26 -0700 Subject: [PATCH 15/18] Reguardless of success reset time increment Reset the time increment if the container's execution time is greater than 10s or else as a container runs and is restarted the time will grow overtime. Signed-off-by: Michael Crosby --- daemon/monitor.go | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index d9e67b0681..231b883a3a 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -43,6 +43,9 @@ type containerMonitor struct { // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int + + // lastStartTime is the time which the monitor last exec'd the container's process + lastStartTime time.Time } func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor { @@ -113,6 +116,8 @@ func (m *containerMonitor) Start() error { m.container.LogEvent("start") + m.lastStartTime = time.Now() + if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { utils.Errorf("Error running container: %s", err) } @@ -154,20 +159,25 @@ func (m *containerMonitor) Start() error { } // resetMonitor resets the stateful fields on the containerMonitor based on the -// previous runs success or failure +// previous runs success or failure. Reguardless of success, if the container had +// an execution time of more than 10s then reset the timer back to the default func (m *containerMonitor) resetMonitor(successful bool) { - // the container exited successfully so we need to reset the failure counter - // and the timeIncrement back to the default values - if successful { - m.failureCount = 0 + executionTime := time.Now().Sub(m.lastStartTime).Seconds() + + if executionTime > 10 { m.timeIncrement = defaultTimeIncrement } else { // otherwise we need to increment the amount of time we wait before restarting // the process. We will build up by multiplying the increment by 2 - - m.failureCount++ m.timeIncrement *= 2 } + + // the container exited successfully so we need to reset the failure counter + if successful { + m.failureCount = 0 + } else { + m.failureCount++ + } } // waitForNextRestart waits with the default time increment to restart the container unless From 2ec1b697c1a7c0ecc534b9ebffd7ba40ea285ff7 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 12 Aug 2014 18:03:11 -0700 Subject: [PATCH 16/18] Rebased changes to return on first start's error Signed-off-by: Michael Crosby --- daemon/container.go | 18 +++++------------- daemon/monitor.go | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index e0da3c144a..3cb7af99e3 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -1081,22 +1081,14 @@ func (container *Container) startLoggingToDisk() error { func (container *Container) waitForStart() error { container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) - var ( - cErr = utils.Go(container.monitor.Start) - waitStart = make(chan struct{}) - ) - - go func() { - container.State.WaitRunning(-1 * time.Second) - close(waitStart) - }() - - // Start should not return until the process is actually running + // block until we either receive an error from the initial start of the container's + // process or until the process is running in the container select { - case <-waitStart: - case err := <-cErr: + case <-container.monitor.startSignal: + case err := <-utils.Go(container.monitor.Start): return err } + return nil } diff --git a/daemon/monitor.go b/daemon/monitor.go index 231b883a3a..801fe0e043 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -35,6 +35,10 @@ type containerMonitor struct { // either because docker or the user asked for the container to be stopped shouldStop bool + // startSignal signals with the initial process has launched after calling Start + // on the monitor + startSignal chan struct{} + // stopChan is used to signal to the monitor whenever there is a wait for the // next restart so that the timeIncrement is not honored and the user is not // left waiting for nothing to happen during this time @@ -48,12 +52,15 @@ type containerMonitor struct { lastStartTime time.Time } +// newContainerMonitor returns an initialized containerMonitor for the provided container +// honoring the provided restart policy func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor { return &containerMonitor{ container: container, restartPolicy: policy, timeIncrement: defaultTimeIncrement, stopChan: make(chan struct{}, 1), + startSignal: make(chan struct{}, 1), } } @@ -119,6 +126,14 @@ func (m *containerMonitor) Start() error { m.lastStartTime = time.Now() if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { + // if we receive an internal error from the initial start of a container then lets + // return it instead of entering the restart loop + if m.container.RestartCount == 1 { + m.resetContainer() + + return err + } + utils.Errorf("Error running container: %s", err) } @@ -230,6 +245,9 @@ func (m *containerMonitor) callback(command *execdriver.Command) { m.container.State.SetRunning(command.Pid()) + // signal that the process has started + close(m.startSignal) + if err := m.container.ToDisk(); err != nil { utils.Debugf("%s", err) } From 73ced636800c150640e2f0d307842b0f9259b611 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 13 Aug 2014 14:56:35 -0700 Subject: [PATCH 17/18] Update based on comments from the code review Signed-off-by: Michael Crosby --- daemon/monitor.go | 14 ++++++++------ daemon/state.go | 22 +++++++++++----------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 801fe0e043..cb388e9139 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -24,7 +24,7 @@ type containerMonitor struct { // container is the container being monitored container *Container - // restartPolicy is the being applied to the container monitor + // restartPolicy is the current policy being applied to the container monitor restartPolicy runconfig.RestartPolicy // failureCount is the number of times the container has failed to @@ -35,8 +35,7 @@ type containerMonitor struct { // either because docker or the user asked for the container to be stopped shouldStop bool - // startSignal signals with the initial process has launched after calling Start - // on the monitor + // startSignal is a channel that is closes after the container initially starts startSignal chan struct{} // stopChan is used to signal to the monitor whenever there is a wait for the @@ -196,7 +195,7 @@ func (m *containerMonitor) resetMonitor(successful bool) { } // waitForNextRestart waits with the default time increment to restart the container unless -// a user or docker asks to container to be stopped +// a user or docker asks for the container to be stopped func (m *containerMonitor) waitForNextRestart() { select { case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond): @@ -245,8 +244,11 @@ func (m *containerMonitor) callback(command *execdriver.Command) { m.container.State.SetRunning(command.Pid()) - // signal that the process has started - close(m.startSignal) + if m.startSignal != nil { + // signal that the process has started + close(m.startSignal) + m.startSignal = nil + } if err := m.container.ToDisk(); err != nil { utils.Debugf("%s", err) diff --git a/daemon/state.go b/daemon/state.go index 61b65f7d4e..ed137b0206 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -123,6 +123,7 @@ func (s *State) SetRunning(pid int) { s.Lock() s.Running = true s.Paused = false + s.Restarting = false s.ExitCode = 0 s.Pid = pid s.StartedAt = time.Now().UTC() @@ -134,6 +135,7 @@ func (s *State) SetRunning(pid int) { func (s *State) SetStopped(exitCode int) { s.Lock() s.Running = false + s.Restarting = false s.Pid = 0 s.FinishedAt = time.Now().UTC() s.ExitCode = exitCode @@ -146,17 +148,15 @@ func (s *State) SetStopped(exitCode int) { // in the middle of a stop and being restarted again func (s *State) SetRestarting(exitCode int) { s.Lock() - if s.Running { - // we should consider the container running when it is restarting because of - // all the checks in docker around rm/stop/etc - s.Running = true - s.Restarting = true - s.Pid = 0 - s.FinishedAt = time.Now().UTC() - s.ExitCode = exitCode - close(s.waitChan) // fire waiters for stop - s.waitChan = make(chan struct{}) - } + // we should consider the container running when it is restarting because of + // all the checks in docker around rm/stop/etc + s.Running = true + s.Restarting = true + s.Pid = 0 + s.FinishedAt = time.Now().UTC() + s.ExitCode = exitCode + close(s.waitChan) // fire waiters for stop + s.waitChan = make(chan struct{}) s.Unlock() } From 25c519e829640ebb23061b82c6ace88c5983b63d Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 13 Aug 2014 14:58:57 -0700 Subject: [PATCH 18/18] Deprecate --restart on the daemon Signed-off-by: Michael Crosby --- daemon/config.go | 2 +- daemon/daemon.go | 4 ++-- daemon/monitor.go | 20 ++++++++++---------- docs/man/docker.1.md | 3 --- docs/sources/reference/commandline/cli.md | 1 - 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/daemon/config.go b/daemon/config.go index 5d0ea6ac12..a396bd0232 100644 --- a/daemon/config.go +++ b/daemon/config.go @@ -45,7 +45,7 @@ type Config struct { func (config *Config) InstallFlags() { flag.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, "/var/run/docker.pid", "Path to use for daemon PID file") flag.StringVar(&config.Root, []string{"g", "-graph"}, "/var/lib/docker", "Path to use as the root of the Docker runtime") - flag.BoolVar(&config.AutoRestart, []string{"r", "-restart"}, true, "Restart previously running containers") + flag.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, "--restart on the daemon has been deprecated infavor of --restart policies on docker run") flag.BoolVar(&config.EnableIptables, []string{"#iptables", "-iptables"}, true, "Enable Docker's addition of iptables rules") flag.BoolVar(&config.EnableIpForward, []string{"#ip-forward", "-ip-forward"}, true, "Enable net.ipv4.ip_forward") flag.StringVar(&config.BridgeIP, []string{"#bip", "-bip"}, "", "Use this CIDR notation address for the network bridge's IP, not compatible with -b") diff --git a/daemon/daemon.go b/daemon/daemon.go index c88ac32e63..90cb6a89e1 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -372,10 +372,10 @@ func (daemon *Daemon) restore() error { for _, container := range registeredContainers { if container.hostConfig.RestartPolicy.Name == "always" || (container.hostConfig.RestartPolicy.Name == "on-failure" && container.State.ExitCode != 0) { - utils.Debugf("Starting container %s", container.ID) + log.Debugf("Starting container %s", container.ID) if err := container.Start(); err != nil { - utils.Debugf("Failed to start container %s: %s", container.ID, err) + log.Debugf("Failed to start container %s: %s", container.ID, err) } } } diff --git a/daemon/monitor.go b/daemon/monitor.go index cb388e9139..28dd424f3c 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -7,8 +7,8 @@ import ( "time" "github.com/docker/docker/daemon/execdriver" + "github.com/docker/docker/pkg/log" "github.com/docker/docker/runconfig" - "github.com/docker/docker/utils" ) const defaultTimeIncrement = 100 @@ -88,7 +88,7 @@ func (m *containerMonitor) Close() error { // because they share same runconfig and change image. Must be fixed // in builder/builder.go if err := m.container.toDisk(); err != nil { - utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err) + log.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err) return err } @@ -127,13 +127,13 @@ func (m *containerMonitor) Start() error { if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { // if we receive an internal error from the initial start of a container then lets // return it instead of entering the restart loop - if m.container.RestartCount == 1 { + if m.container.RestartCount == 0 { m.resetContainer() return err } - utils.Errorf("Error running container: %s", err) + log.Errorf("Error running container: %s", err) } m.resetMonitor(err == nil && exitStatus == 0) @@ -220,7 +220,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool { case "on-failure": // the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max { - utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max) + log.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max) return false } @@ -251,7 +251,7 @@ func (m *containerMonitor) callback(command *execdriver.Command) { } if err := m.container.ToDisk(); err != nil { - utils.Debugf("%s", err) + log.Debugf("%s", err) } } @@ -262,21 +262,21 @@ func (m *containerMonitor) resetContainer() { if container.Config.OpenStdin { if err := container.stdin.Close(); err != nil { - utils.Errorf("%s: Error close stdin: %s", container.ID, err) + log.Errorf("%s: Error close stdin: %s", container.ID, err) } } if err := container.stdout.Clean(); err != nil { - utils.Errorf("%s: Error close stdout: %s", container.ID, err) + log.Errorf("%s: Error close stdout: %s", container.ID, err) } if err := container.stderr.Clean(); err != nil { - utils.Errorf("%s: Error close stderr: %s", container.ID, err) + log.Errorf("%s: Error close stderr: %s", container.ID, err) } if container.command != nil && container.command.Terminal != nil { if err := container.command.Terminal.Close(); err != nil { - utils.Errorf("%s: Error closing terminal: %s", container.ID, err) + log.Errorf("%s: Error closing terminal: %s", container.ID, err) } } diff --git a/docs/man/docker.1.md b/docs/man/docker.1.md index 18e8978af6..3932097255 100644 --- a/docs/man/docker.1.md +++ b/docs/man/docker.1.md @@ -64,9 +64,6 @@ unix://[/path/to/socket] to use. **-p**="" Path to use for daemon PID file. Default is `/var/run/docker.pid` -**-r**=*true*|*false* - Restart previously running containers. Default is true. - **-s**="" Force the Docker runtime to use a specific storage driver. diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md index 4a756cc16e..9afa691b6b 100644 --- a/docs/sources/reference/commandline/cli.md +++ b/docs/sources/reference/commandline/cli.md @@ -71,7 +71,6 @@ expect an integer, and they can only be specified once. --mtu=0 Set the containers network MTU if no value is provided: default to the default route MTU or 1500 if no default route is available -p, --pidfile="/var/run/docker.pid" Path to use for daemon PID file - -r, --restart=true Restart previously running containers -s, --storage-driver="" Force the Docker runtime to use a specific storage driver --selinux-enabled=false Enable selinux support. SELinux does not presently support the BTRFS storage driver --storage-opt=[] Set storage driver options