From bd8c9dc2395032a893e959e48f9a8536858d67e0 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 4 Aug 2014 15:53:10 -0700
Subject: [PATCH 01/18] Restart containers based on restart policy

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/container.go     | 140 ++++++++++++++++++++++++++++------------
 runconfig/hostconfig.go |   2 +
 runconfig/parse.go      |   2 +
 3 files changed, 103 insertions(+), 41 deletions(-)

diff --git a/daemon/container.go b/daemon/container.go
index 50de4fc5361f7b9cfd446d6b48bd585251a26833..6786c2b2f179c9ab6dd74b05fb5388952c75a69f 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -7,6 +7,7 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
+	"os/exec"
 	"path"
 	"path/filepath"
 	"strings"
@@ -75,6 +76,7 @@ type Container struct {
 
 	daemon                   *Daemon
 	MountLabel, ProcessLabel string
+	RestartCount             int
 
 	Volumes map[string]string
 	// Store rw/ro in a separate structure to preserve reverse-compatibility on-disk.
@@ -82,7 +84,8 @@ type Container struct {
 	VolumesRW  map[string]bool
 	hostConfig *runconfig.HostConfig
 
-	activeLinks map[string]*links.Link
+	activeLinks   map[string]*links.Link
+	requestedStop bool
 }
 
 func (container *Container) FromDisk() error {
@@ -277,6 +280,7 @@ func (container *Container) Start() (err error) {
 	if container.State.IsRunning() {
 		return nil
 	}
+
 	// if we encounter and error during start we need to ensure that any other
 	// setup has been cleaned up properly
 	defer func() {
@@ -312,9 +316,6 @@ func (container *Container) Start() (err error) {
 	if err := setupMountsForContainer(container); err != nil {
 		return err
 	}
-	if err := container.startLoggingToDisk(); err != nil {
-		return err
-	}
 
 	return container.waitForStart()
 }
@@ -497,35 +498,105 @@ func (container *Container) releaseNetwork() {
 
 func (container *Container) monitor(callback execdriver.StartCallback) error {
 	var (
-		err      error
-		exitCode int
+		err       error
+		exitCode  int
+		failCount int
 	)
 
-	pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
-	exitCode, err = container.daemon.Run(container, pipes, callback)
-	if err != nil {
-		log.Errorf("Error running container: %s", err)
-	}
-	container.State.SetStopped(exitCode)
+	// reset the restart count
+	container.RestartCount = -1
+	container.requestedStop = false
 
-	// Cleanup
-	container.cleanup()
+	for {
+		container.RestartCount++
 
-	// Re-create a brand new stdin pipe once the container exited
-	if container.Config.OpenStdin {
-		container.stdin, container.stdinPipe = io.Pipe()
-	}
-	container.LogEvent("die")
-	// If the engine is shutting down, don't save the container state as stopped.
-	// This will cause it to be restarted when the engine is restarted.
-	if container.daemon != nil && container.daemon.eng != nil && !container.daemon.eng.IsShutdown() {
-		if err := container.toDisk(); err != nil {
-			log.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
+		pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
+		if err := container.startLoggingToDisk(); err != nil {
+			return err
+		}
+
+		if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil {
+			failCount++
+
+			if failCount == 100 {
+				return err
+			}
+
+			utils.Errorf("Error running container: %s", err)
+		}
+
+		// We still wait to set the state as stopped and ensure that the locks were released
+		container.State.SetStopped(exitCode)
+
+		if container.Config.OpenStdin {
+			if err := container.stdin.Close(); err != nil {
+				utils.Errorf("%s: Error close stdin: %s", container.ID, err)
+			}
+		}
+
+		if err := container.stdout.Clean(); err != nil {
+			utils.Errorf("%s: Error close stdout: %s", container.ID, err)
+		}
+
+		if err := container.stderr.Clean(); err != nil {
+			utils.Errorf("%s: Error close stderr: %s", container.ID, err)
+		}
+
+		if container.command != nil && container.command.Terminal != nil {
+			if err := container.command.Terminal.Close(); err != nil {
+				utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
+			}
+		}
+
+		// Re-create a brand new stdin pipe once the container exited
+		if container.Config.OpenStdin {
+			container.stdin, container.stdinPipe = io.Pipe()
+		}
+
+		if container.daemon != nil && container.daemon.srv != nil {
+			container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
+		}
+
+		policy := container.hostConfig.RestartPolicy
+
+		if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop {
+			container.command.Cmd = copyCmd(&container.command.Cmd)
+			time.Sleep(1 * time.Second)
+		} else {
+			// do not restart the container, let it die
+			// Cleanup networking and mounts
+			container.cleanup()
+
+			if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() {
+				// FIXME: here is race condition between two RUN instructions in Dockerfile
+				// because they share same runconfig and change image. Must be fixed
+				// in builder/builder.go
+				if err := container.toDisk(); err != nil {
+					utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
+				}
+			}
+
+			return err
 		}
 	}
-	return err
 }
 
+func copyCmd(c *exec.Cmd) exec.Cmd {
+	return exec.Cmd{
+		Stdin:       c.Stdin,
+		Stdout:      c.Stdout,
+		Stderr:      c.Stderr,
+		Path:        c.Path,
+		Env:         c.Env,
+		ExtraFiles:  c.ExtraFiles,
+		Args:        c.Args,
+		Dir:         c.Dir,
+		SysProcAttr: c.SysProcAttr,
+	}
+}
+
+// cleanup releases any network resources allocated to the container along with any rules
+// around how containers are linked together.  It also unmounts the container's root filesystem.
 func (container *Container) cleanup() {
 	container.releaseNetwork()
 
@@ -535,22 +606,6 @@ func (container *Container) cleanup() {
 			link.Disable()
 		}
 	}
-	if container.Config.OpenStdin {
-		if err := container.stdin.Close(); err != nil {
-			log.Errorf("%s: Error close stdin: %s", container.ID, err)
-		}
-	}
-	if err := container.stdout.Clean(); err != nil {
-		log.Errorf("%s: Error close stdout: %s", container.ID, err)
-	}
-	if err := container.stderr.Clean(); err != nil {
-		log.Errorf("%s: Error close stderr: %s", container.ID, err)
-	}
-	if container.command != nil && container.command.Terminal != nil {
-		if err := container.command.Terminal.Close(); err != nil {
-			log.Errorf("%s: Error closing terminal: %s", container.ID, err)
-		}
-	}
 
 	if err := container.Unmount(); err != nil {
 		log.Errorf("%v: Failed to umount filesystem: %v", container.ID, err)
@@ -570,6 +625,8 @@ func (container *Container) KillSig(sig int) error {
 	if !container.State.IsRunning() {
 		return nil
 	}
+	container.requestedStop = true
+
 	return container.daemon.Kill(container, sig)
 }
 
@@ -1122,6 +1179,7 @@ func (container *Container) waitForStart() error {
 				c.Close()
 			}
 		}
+
 		container.State.SetRunning(command.Pid())
 		if err := container.toDisk(); err != nil {
 			log.Debugf("%s", err)
diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go
index 77e5fd8adba9b7998d273b838c188c73676c9d07..f2a369842a5dfc5f9cf90b2f7a9a011ff1fdee86 100644
--- a/runconfig/hostconfig.go
+++ b/runconfig/hostconfig.go
@@ -40,6 +40,7 @@ type HostConfig struct {
 	NetworkMode     NetworkMode
 	CapAdd          []string
 	CapDrop         []string
+	RestartPolicy   string
 }
 
 func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
@@ -48,6 +49,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 		Privileged:      job.GetenvBool("Privileged"),
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
+		RestartPolicy:   job.Getenv("RestartPolicy"),
 	}
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)
 	job.GetenvJson("PortBindings", &hostConfig.PortBindings)
diff --git a/runconfig/parse.go b/runconfig/parse.go
index 5805b6665b86ed169d7a2cec0450ad226794293d..3cab86234f2caff084651c1a05b9e707e97596d3 100644
--- a/runconfig/parse.go
+++ b/runconfig/parse.go
@@ -71,6 +71,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		flCpuShares       = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
+		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy when the dies")
 		// For documentation purpose
 		_ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.")
 		_ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container")
@@ -271,6 +272,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		Devices:         deviceMappings,
 		CapAdd:          flCapAdd.GetAll(),
 		CapDrop:         flCapDrop.GetAll(),
+		RestartPolicy:   *flRestartPolicy,
 	}
 
 	if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {

From e0a076d486ea49529dc3aeef2179eb0c406d0773 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 4 Aug 2014 16:02:29 -0700
Subject: [PATCH 02/18] Cleanup restart logic in monitor

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/container.go | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/daemon/container.go b/daemon/container.go
index 6786c2b2f179c9ab6dd74b05fb5388952c75a69f..6bf6aca2a3029b6167a12e140b30c2b65f9e84b4 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -501,8 +501,15 @@ func (container *Container) monitor(callback execdriver.StartCallback) error {
 		err       error
 		exitCode  int
 		failCount int
+
+		policy = container.hostConfig.RestartPolicy
 	)
 
+	if err := container.startLoggingToDisk(); err != nil {
+		// TODO: crosbymichael cleanup IO, network, and mounts
+		return err
+	}
+
 	// reset the restart count
 	container.RestartCount = -1
 	container.requestedStop = false
@@ -511,15 +518,12 @@ func (container *Container) monitor(callback execdriver.StartCallback) error {
 		container.RestartCount++
 
 		pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
-		if err := container.startLoggingToDisk(); err != nil {
-			return err
-		}
 
 		if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil {
 			failCount++
 
 			if failCount == 100 {
-				return err
+				container.requestedStop = true
 			}
 
 			utils.Errorf("Error running container: %s", err)
@@ -557,27 +561,27 @@ func (container *Container) monitor(callback execdriver.StartCallback) error {
 			container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
 		}
 
-		policy := container.hostConfig.RestartPolicy
-
 		if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop {
 			container.command.Cmd = copyCmd(&container.command.Cmd)
+
 			time.Sleep(1 * time.Second)
-		} else {
-			// do not restart the container, let it die
-			// Cleanup networking and mounts
-			container.cleanup()
 
-			if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() {
-				// FIXME: here is race condition between two RUN instructions in Dockerfile
-				// because they share same runconfig and change image. Must be fixed
-				// in builder/builder.go
-				if err := container.toDisk(); err != nil {
-					utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
-				}
-			}
+			continue
+		}
 
-			return err
+		// Cleanup networking and mounts
+		container.cleanup()
+
+		if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() {
+			// FIXME: here is race condition between two RUN instructions in Dockerfile
+			// because they share same runconfig and change image. Must be fixed
+			// in builder/builder.go
+			if err := container.toDisk(); err != nil {
+				utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
+			}
 		}
+
+		return err
 	}
 }
 

From d9753ba20d5e602b0980687353d76c874b563042 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 4 Aug 2014 16:14:43 -0700
Subject: [PATCH 03/18] Add typed RestartPolicy

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/container.go     |  7 ++++---
 runconfig/hostconfig.go | 11 +++++++++--
 runconfig/parse.go      | 40 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/daemon/container.go b/daemon/container.go
index 6bf6aca2a3029b6167a12e140b30c2b65f9e84b4..3383276e6d08f242510855e193cec27d106ceb9d 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -501,6 +501,7 @@ func (container *Container) monitor(callback execdriver.StartCallback) error {
 		err       error
 		exitCode  int
 		failCount int
+		exit      bool
 
 		policy = container.hostConfig.RestartPolicy
 	)
@@ -522,8 +523,8 @@ func (container *Container) monitor(callback execdriver.StartCallback) error {
 		if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil {
 			failCount++
 
-			if failCount == 100 {
-				container.requestedStop = true
+			if failCount == policy.MaximumRetryCount {
+				exit = true
 			}
 
 			utils.Errorf("Error running container: %s", err)
@@ -561,7 +562,7 @@ func (container *Container) monitor(callback execdriver.StartCallback) error {
 			container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
 		}
 
-		if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop {
+		if (policy.Name == "always" || (policy.Name == "on-failure" && exitCode != 0)) && !container.requestedStop || !exit {
 			container.command.Cmd = copyCmd(&container.command.Cmd)
 
 			time.Sleep(1 * time.Second)
diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go
index f2a369842a5dfc5f9cf90b2f7a9a011ff1fdee86..4dd4766e5efe76b1f26fc2c3b88a80cd2adc5e56 100644
--- a/runconfig/hostconfig.go
+++ b/runconfig/hostconfig.go
@@ -25,6 +25,11 @@ type DeviceMapping struct {
 	CgroupPermissions string
 }
 
+type RestartPolicy struct {
+	Name              string
+	MaximumRetryCount int
+}
+
 type HostConfig struct {
 	Binds           []string
 	ContainerIDFile string
@@ -40,7 +45,7 @@ type HostConfig struct {
 	NetworkMode     NetworkMode
 	CapAdd          []string
 	CapDrop         []string
-	RestartPolicy   string
+	RestartPolicy   RestartPolicy
 }
 
 func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
@@ -49,11 +54,12 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 		Privileged:      job.GetenvBool("Privileged"),
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
-		RestartPolicy:   job.Getenv("RestartPolicy"),
 	}
+
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)
 	job.GetenvJson("PortBindings", &hostConfig.PortBindings)
 	job.GetenvJson("Devices", &hostConfig.Devices)
+	job.GetenvJson("RestartPolicy", &hostConfig.RestartPolicy)
 	if Binds := job.GetenvList("Binds"); Binds != nil {
 		hostConfig.Binds = Binds
 	}
@@ -75,5 +81,6 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 	if CapDrop := job.GetenvList("CapDrop"); CapDrop != nil {
 		hostConfig.CapDrop = CapDrop
 	}
+
 	return hostConfig
 }
diff --git a/runconfig/parse.go b/runconfig/parse.go
index 3cab86234f2caff084651c1a05b9e707e97596d3..ea6e9ebca27d92245f3a064597d4758c41d4d5ed 100644
--- a/runconfig/parse.go
+++ b/runconfig/parse.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"path"
+	"strconv"
 	"strings"
 
 	"github.com/docker/docker/nat"
@@ -234,6 +235,11 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
 	}
 
+	restartPolicy, err := parseRestartPolicy(*flRestartPolicy)
+	if err != nil {
+		return nil, nil, cmd, err
+	}
+
 	config := &Config{
 		Hostname:        hostname,
 		Domainname:      domainname,
@@ -272,7 +278,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		Devices:         deviceMappings,
 		CapAdd:          flCapAdd.GetAll(),
 		CapDrop:         flCapDrop.GetAll(),
-		RestartPolicy:   *flRestartPolicy,
+		RestartPolicy:   restartPolicy,
 	}
 
 	if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {
@@ -287,6 +293,38 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 	return config, hostConfig, cmd, nil
 }
 
+// parseRestartPolicy returns the parsed policy or an error indicating what is incorrect
+func parseRestartPolicy(policy string) (RestartPolicy, error) {
+	p := RestartPolicy{}
+
+	if policy == "" {
+		return p, nil
+	}
+
+	var (
+		parts = strings.Split(policy, ":")
+		name  = parts[0]
+	)
+
+	switch name {
+	case "no", "on-failure", "always":
+		p.Name = name
+
+		if len(parts) == 2 {
+			count, err := strconv.Atoi(parts[1])
+			if err != nil {
+				return p, err
+			}
+
+			p.MaximumRetryCount = count
+		}
+	default:
+		return p, fmt.Errorf("invalid restart policy %s", name)
+	}
+
+	return p, nil
+}
+
 // options will come in the format of name.key=value or name.option
 func parseDriverOpts(opts opts.ListOpts) (map[string][]string, error) {
 	out := make(map[string][]string, len(opts.GetAll()))

From 2b0776c883c41cda0b86808fe86e4df6e1aa1cd5 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 4 Aug 2014 17:05:56 -0700
Subject: [PATCH 04/18] Refactor container monitor into type

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/container.go | 142 ++++-----------------------------
 daemon/monitor.go   | 189 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+), 127 deletions(-)
 create mode 100644 daemon/monitor.go

diff --git a/daemon/container.go b/daemon/container.go
index 3383276e6d08f242510855e193cec27d106ceb9d..681852a17dd5f654f11964b81edd93d27754da34 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -7,7 +7,6 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
-	"os/exec"
 	"path"
 	"path/filepath"
 	"strings"
@@ -84,8 +83,8 @@ type Container struct {
 	VolumesRW  map[string]bool
 	hostConfig *runconfig.HostConfig
 
-	activeLinks   map[string]*links.Link
-	requestedStop bool
+	activeLinks map[string]*links.Link
+	monitor     *containerMonitor
 }
 
 func (container *Container) FromDisk() error {
@@ -496,110 +495,6 @@ func (container *Container) releaseNetwork() {
 	container.NetworkSettings = &NetworkSettings{}
 }
 
-func (container *Container) monitor(callback execdriver.StartCallback) error {
-	var (
-		err       error
-		exitCode  int
-		failCount int
-		exit      bool
-
-		policy = container.hostConfig.RestartPolicy
-	)
-
-	if err := container.startLoggingToDisk(); err != nil {
-		// TODO: crosbymichael cleanup IO, network, and mounts
-		return err
-	}
-
-	// reset the restart count
-	container.RestartCount = -1
-	container.requestedStop = false
-
-	for {
-		container.RestartCount++
-
-		pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
-
-		if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil {
-			failCount++
-
-			if failCount == policy.MaximumRetryCount {
-				exit = true
-			}
-
-			utils.Errorf("Error running container: %s", err)
-		}
-
-		// We still wait to set the state as stopped and ensure that the locks were released
-		container.State.SetStopped(exitCode)
-
-		if container.Config.OpenStdin {
-			if err := container.stdin.Close(); err != nil {
-				utils.Errorf("%s: Error close stdin: %s", container.ID, err)
-			}
-		}
-
-		if err := container.stdout.Clean(); err != nil {
-			utils.Errorf("%s: Error close stdout: %s", container.ID, err)
-		}
-
-		if err := container.stderr.Clean(); err != nil {
-			utils.Errorf("%s: Error close stderr: %s", container.ID, err)
-		}
-
-		if container.command != nil && container.command.Terminal != nil {
-			if err := container.command.Terminal.Close(); err != nil {
-				utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
-			}
-		}
-
-		// Re-create a brand new stdin pipe once the container exited
-		if container.Config.OpenStdin {
-			container.stdin, container.stdinPipe = io.Pipe()
-		}
-
-		if container.daemon != nil && container.daemon.srv != nil {
-			container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
-		}
-
-		if (policy.Name == "always" || (policy.Name == "on-failure" && exitCode != 0)) && !container.requestedStop || !exit {
-			container.command.Cmd = copyCmd(&container.command.Cmd)
-
-			time.Sleep(1 * time.Second)
-
-			continue
-		}
-
-		// Cleanup networking and mounts
-		container.cleanup()
-
-		if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() {
-			// FIXME: here is race condition between two RUN instructions in Dockerfile
-			// because they share same runconfig and change image. Must be fixed
-			// in builder/builder.go
-			if err := container.toDisk(); err != nil {
-				utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
-			}
-		}
-
-		return err
-	}
-}
-
-func copyCmd(c *exec.Cmd) exec.Cmd {
-	return exec.Cmd{
-		Stdin:       c.Stdin,
-		Stdout:      c.Stdout,
-		Stderr:      c.Stderr,
-		Path:        c.Path,
-		Env:         c.Env,
-		ExtraFiles:  c.ExtraFiles,
-		Args:        c.Args,
-		Dir:         c.Dir,
-		SysProcAttr: c.SysProcAttr,
-	}
-}
-
 // cleanup releases any network resources allocated to the container along with any rules
 // around how containers are linked together.  It also unmounts the container's root filesystem.
 func (container *Container) cleanup() {
@@ -630,7 +525,10 @@ func (container *Container) KillSig(sig int) error {
 	if !container.State.IsRunning() {
 		return nil
 	}
-	container.requestedStop = true
+
+	// signal to the monitor that it should not restart the container
+	// after we send the kill signal
+	container.monitor.ExitOnNext()
 
 	return container.daemon.Kill(container, sig)
 }
@@ -1174,27 +1072,17 @@ func (container *Container) startLoggingToDisk() error {
 }
 
 func (container *Container) waitForStart() error {
-	waitStart := make(chan struct{})
-	callback := func(command *execdriver.Command) {
-		if command.Tty {
-			// The callback is called after the process Start()
-			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
-			// which we close here.
-			if c, ok := command.Stdout.(io.Closer); ok {
-				c.Close()
-			}
-		}
+	container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)
 
-		container.State.SetRunning(command.Pid())
-		if err := container.toDisk(); err != nil {
-			log.Debugf("%s", err)
-		}
-		close(waitStart)
-	}
+	var (
+		cErr      = utils.Go(container.monitor.Start)
+		waitStart = make(chan struct{})
+	)
 
-	// We use a callback here instead of a goroutine and an chan for
-	// syncronization purposes
-	cErr := utils.Go(func() error { return container.monitor(callback) })
+	go func() {
+		container.State.WaitRunning(-1 * time.Second)
+		close(waitStart)
+	}()
 
 	// Start should not return until the process is actually running
 	select {
diff --git a/daemon/monitor.go b/daemon/monitor.go
new file mode 100644
index 0000000000000000000000000000000000000000..94d7f2b0049d412ba8b34bc5f605889110591f06
--- /dev/null
+++ b/daemon/monitor.go
@@ -0,0 +1,189 @@
+package daemon
+
+import (
+	"io"
+	"os/exec"
+	"sync"
+	"time"
+
+	"github.com/docker/docker/daemon/execdriver"
+	"github.com/docker/docker/runconfig"
+	"github.com/docker/docker/utils"
+)
+
+// containerMonitor monitors the execution of a container's main process.
+// If a restart policy is specified for the cotnainer the monitor will ensure that the
+// process is restarted based on the rules of the policy.  When the container is finally stopped
+// the monitor will reset and cleanup any of the container resources such as networking allocations
+// and the rootfs
+type containerMonitor struct {
+	mux sync.Mutex
+
+	container     *Container
+	restartPolicy runconfig.RestartPolicy
+	failureCount  int
+	shouldStop    bool
+}
+
+func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
+	return &containerMonitor{
+		container:     container,
+		restartPolicy: policy,
+	}
+}
+
+// Stop signals to the container monitor that it should stop monitoring the container
+// for exits the next time the process dies
+func (m *containerMonitor) ExitOnNext() {
+	m.mux.Lock()
+	m.shouldStop = true
+	m.mux.Unlock()
+}
+
+// Close closes the container's resources such as networking allocations and
+// unmounts the contatiner's root filesystem
+func (m *containerMonitor) Close() error {
+	// Cleanup networking and mounts
+	m.container.cleanup()
+
+	if m.container.daemon != nil && m.container.daemon.srv != nil && m.container.daemon.srv.IsRunning() {
+		// FIXME: here is race condition between two RUN instructions in Dockerfile
+		// because they share same runconfig and change image. Must be fixed
+		// in builder/builder.go
+		if err := m.container.toDisk(); err != nil {
+			utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
+
+			return err
+		}
+	}
+
+	return nil
+}
+
+// reset resets the container's IO and ensures that the command is able to be executed again
+// by copying the data into a new struct
+func (m *containerMonitor) reset() {
+	container := m.container
+
+	if container.Config.OpenStdin {
+		if err := container.stdin.Close(); err != nil {
+			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
+		}
+	}
+
+	if err := container.stdout.Clean(); err != nil {
+		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
+	}
+
+	if err := container.stderr.Clean(); err != nil {
+		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
+	}
+
+	if container.command != nil && container.command.Terminal != nil {
+		if err := container.command.Terminal.Close(); err != nil {
+			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
+		}
+	}
+
+	// Re-create a brand new stdin pipe once the container exited
+	if container.Config.OpenStdin {
+		container.stdin, container.stdinPipe = io.Pipe()
+	}
+
+	if container.daemon != nil && container.daemon.srv != nil {
+		container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
+	}
+
+	c := container.command.Cmd
+
+	container.command.Cmd = exec.Cmd{
+		Stdin:       c.Stdin,
+		Stdout:      c.Stdout,
+		Stderr:      c.Stderr,
+		Path:        c.Path,
+		Env:         c.Env,
+		ExtraFiles:  c.ExtraFiles,
+		Args:        c.Args,
+		Dir:         c.Dir,
+		SysProcAttr: c.SysProcAttr,
+	}
+}
+
+// Start starts the containers process and monitors it according to the restart policy
+func (m *containerMonitor) Start() error {
+	var (
+		err      error
+		exitCode int
+	)
+	defer m.Close()
+
+	// reset the restart count
+	m.container.RestartCount = -1
+
+	for !m.shouldStop {
+		m.container.RestartCount++
+		if err := m.container.startLoggingToDisk(); err != nil {
+			m.reset()
+
+			return err
+		}
+
+		pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
+
+		if exitCode, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
+			m.failureCount++
+
+			if m.failureCount == m.restartPolicy.MaximumRetryCount {
+				m.ExitOnNext()
+			}
+
+			utils.Errorf("Error running container: %s", err)
+		}
+
+		// We still wait to set the state as stopped and ensure that the locks were released
+		m.container.State.SetStopped(exitCode)
+
+		m.reset()
+
+		if m.shouldRestart(exitCode) {
+			time.Sleep(1 * time.Second)
+
+			continue
+		}
+
+		break
+	}
+
+	return err
+}
+
+func (m *containerMonitor) shouldRestart(exitCode int) bool {
+	m.mux.Lock()
+
+	shouldRestart := (m.restartPolicy.Name == "always" ||
+		(m.restartPolicy.Name == "on-failure" && exitCode != 0)) &&
+		!m.shouldStop
+
+	m.mux.Unlock()
+
+	return shouldRestart
+}
+
+// callback ensures that the container's state is properly updated after we
+// received ack from the execution drivers
+func (m *containerMonitor) callback(command *execdriver.Command) {
+	if command.Tty {
+		// The callback is called after the process Start()
+		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
+		// which we close here.
+		if c, ok := command.Stdout.(io.Closer); ok {
+			c.Close()
+		}
+	}
+
+	m.container.State.SetRunning(command.Pid())
+
+	if err := m.container.ToDisk(); err != nil {
+		utils.Debugf("%s", err)
+	}
+}

From 860c13b788944410a98a6ad5b5cfb74de0a8405b Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 4 Aug 2014 18:20:53 -0700
Subject: [PATCH 05/18] Add documentation and update restart rules.

Implement time backed backoff for restarting and fix failure count when
the maximum is 0

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go                         | 89 +++++++++++++++++------
 docs/sources/reference/commandline/cli.md | 26 +++++++
 runconfig/parse.go                        | 29 +++++---
 3 files changed, 111 insertions(+), 33 deletions(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 94d7f2b0049d412ba8b34bc5f605889110591f06..3eb68791d6cc80f309a9933049c1b42b9248205b 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -11,6 +11,8 @@ import (
 	"github.com/docker/docker/utils"
 )
 
+const defaultTimeIncrement = 100
+
 // containerMonitor monitors the execution of a container's main process.
 // If a restart policy is specified for the cotnainer the monitor will ensure that the
 // process is restarted based on the rules of the policy.  When the container is finally stopped
@@ -19,16 +21,30 @@ import (
 type containerMonitor struct {
 	mux sync.Mutex
 
-	container     *Container
+	// container is the container being monitored
+	container *Container
+
+	// restartPolicy is the being applied to the container monitor
 	restartPolicy runconfig.RestartPolicy
-	failureCount  int
-	shouldStop    bool
+
+	// failureCount is the number of times the container has failed to
+	// start in a row
+	failureCount int
+
+	// shouldStop signals the monitor that the next time the container exits it is
+	// either because docker or the user asked for the container to be stopped
+	shouldStop bool
+
+	// timeIncrement is the amount of time to wait between restarts
+	// this is in milliseconds
+	timeIncrement int
 }
 
 func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
 	return &containerMonitor{
 		container:     container,
 		restartPolicy: policy,
+		timeIncrement: defaultTimeIncrement,
 	}
 }
 
@@ -62,7 +78,7 @@ func (m *containerMonitor) Close() error {
 
 // reset resets the container's IO and ensures that the command is able to be executed again
 // by copying the data into a new struct
-func (m *containerMonitor) reset() {
+func (m *containerMonitor) reset(successful bool) {
 	container := m.container
 
 	if container.Config.OpenStdin {
@@ -107,14 +123,29 @@ func (m *containerMonitor) reset() {
 		Dir:         c.Dir,
 		SysProcAttr: c.SysProcAttr,
 	}
+
+	// the container exited successfully so we need to reset the failure counter
+	// and the timeIncrement back to the default values
+	if successful {
+		m.failureCount = 0
+		m.timeIncrement = defaultTimeIncrement
+	} else {
+		// otherwise we need to increment the amount of time we wait before restarting
+		// the process.  We will build up by multiplying the increment by 2
+
+		m.failureCount++
+		m.timeIncrement *= 2
+	}
 }
 
 // Start starts the containers process and monitors it according to the restart policy
 func (m *containerMonitor) Start() error {
 	var (
-		err      error
-		exitCode int
+		err        error
+		exitStatus int
 	)
+
+	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
 	defer m.Close()
 
 	// reset the restart count
@@ -122,31 +153,26 @@ func (m *containerMonitor) Start() error {
 
 	for !m.shouldStop {
 		m.container.RestartCount++
+
 		if err := m.container.startLoggingToDisk(); err != nil {
-			m.reset()
+			m.reset(false)
 
 			return err
 		}
 
 		pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
 
-		if exitCode, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
-			m.failureCount++
-
-			if m.failureCount == m.restartPolicy.MaximumRetryCount {
-				m.ExitOnNext()
-			}
-
+		if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
 			utils.Errorf("Error running container: %s", err)
 		}
 
 		// We still wait to set the state as stopped and ensure that the locks were released
-		m.container.State.SetStopped(exitCode)
+		m.container.State.SetStopped(exitStatus)
 
-		m.reset()
+		m.reset(err == nil && exitStatus == 0)
 
-		if m.shouldRestart(exitCode) {
-			time.Sleep(1 * time.Second)
+		if m.shouldRestart(exitStatus) {
+			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
 
 			continue
 		}
@@ -157,16 +183,31 @@ func (m *containerMonitor) Start() error {
 	return err
 }
 
-func (m *containerMonitor) shouldRestart(exitCode int) bool {
+// shouldRestart checks the restart policy and applies the rules to determine if
+// the container's process should be restarted
+func (m *containerMonitor) shouldRestart(exitStatus int) bool {
 	m.mux.Lock()
+	defer m.mux.Unlock()
+
+	// do not restart if the user or docker has requested that this container be stopped
+	if m.shouldStop {
+		return false
+	}
 
-	shouldRestart := (m.restartPolicy.Name == "always" ||
-		(m.restartPolicy.Name == "on-failure" && exitCode != 0)) &&
-		!m.shouldStop
+	switch m.restartPolicy.Name {
+	case "always":
+		return true
+	case "on-failure":
+		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
+		if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max {
+			utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
+			return false
+		}
 
-	m.mux.Unlock()
+		return exitStatus != 0
+	}
 
-	return shouldRestart
+	return false
 }
 
 // callback ensures that the container's state is properly updated after we
diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md
index 5e6107cfaaf4763c901dd64467fe52a31ebe095e..8f4ed19a27887d70266ca3dcebded462e394896a 100644
--- a/docs/sources/reference/commandline/cli.md
+++ b/docs/sources/reference/commandline/cli.md
@@ -993,6 +993,7 @@ removed before the image is removed.
                                    format: ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort
                                    (use 'docker port' to see the actual mapping)
       --privileged=false         Give extended privileges to this container
+      --restart=""               Restart policy to apply when a container exits (no, on-failure, always)
       --rm=false                 Automatically remove the container when it exits (incompatible with -d)
       --sig-proxy=true           Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.
       -t, --tty=false            Allocate a pseudo-TTY
@@ -1220,6 +1221,31 @@ application change:
    `--rm` option means that when the container exits, the container's layer is
    removed.
 
+#### Restart Policies
+
+Using the `--restart` flag on docker run you can specify a restart policy for 
+how a container should or should not be restarted on exit.
+
+** no ** - Do not restart the container when it exits.
+
+** on-failure ** - Restart the container only if it exits with a non zero exit status.
+
+** always ** - Always restart the container reguardless of the exit status.
+
+You can also specify the maximum amount of times docker will try to restart the 
+container when using the ** on-failure ** policy.  The default is that docker will try forever to restart the container.
+
+    $ sudo docker run --restart=always redis
+
+This will run the redis container with a restart policy of ** always ** so that if 
+the container exits, docker will restart it.
+
+    $ sudo docker run --restart=on-failure:10 redis
+
+This will run the redis container with a restart policy of ** on-failure ** and a 
+maximum restart count of 10.  If the redis container exits with a non-zero exit 
+status more than 10 times in a row docker will abort trying to restart the container.
+
 ## save
 
     Usage: docker save IMAGE
diff --git a/runconfig/parse.go b/runconfig/parse.go
index ea6e9ebca27d92245f3a064597d4758c41d4d5ed..2b4dc632a061729fa19bd804307e45e1aa0cc764 100644
--- a/runconfig/parse.go
+++ b/runconfig/parse.go
@@ -17,11 +17,12 @@ import (
 )
 
 var (
-	ErrInvalidWorkingDirectory     = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
-	ErrConflictAttachDetach        = fmt.Errorf("Conflicting options: -a and -d")
-	ErrConflictDetachAutoRemove    = fmt.Errorf("Conflicting options: --rm and -d")
-	ErrConflictNetworkHostname     = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
-	ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
+	ErrInvalidWorkingDirectory            = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
+	ErrConflictAttachDetach               = fmt.Errorf("Conflicting options: -a and -d")
+	ErrConflictDetachAutoRemove           = fmt.Errorf("Conflicting options: --rm and -d")
+	ErrConflictNetworkHostname            = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
+	ErrConflictHostNetworkAndLinks        = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
+	ErrConflictRestartPolicyAndAutoRemove = fmt.Errorf("Conflicting options: --restart and --rm")
 )
 
 //FIXME Only used in tests
@@ -72,7 +73,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		flCpuShares       = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
-		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy when the dies")
+		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure, always)")
 		// For documentation purpose
 		_ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.")
 		_ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container")
@@ -227,8 +228,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 	}
 	// parse the '-e' and '--env' after, to allow override
 	envVariables = append(envVariables, flEnv.GetAll()...)
-	// boo, there's no debug output for docker run
-	//log.Debugf("Environment variables for the container: %#v", envVariables)
 
 	netMode, err := parseNetMode(*flNetMode)
 	if err != nil {
@@ -240,6 +239,10 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		return nil, nil, cmd, err
 	}
 
+	if *flAutoRemove && (restartPolicy.Name == "always" || restartPolicy.Name == "on-failure") {
+		return nil, nil, cmd, ErrConflictRestartPolicyAndAutoRemove
+	}
+
 	config := &Config{
 		Hostname:        hostname,
 		Domainname:      domainname,
@@ -307,7 +310,15 @@ func parseRestartPolicy(policy string) (RestartPolicy, error) {
 	)
 
 	switch name {
-	case "no", "on-failure", "always":
+	case "always":
+		p.Name = name
+
+		if len(parts) == 2 {
+			return p, fmt.Errorf("maximum restart count not valid with restart policy of \"always\"")
+		}
+	case "no":
+		// do nothing
+	case "on-failure":
 		p.Name = name
 
 		if len(parts) == 2 {

From 617edd89f48a7082bf4ec1d28ec12eefd1057a2f Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Tue, 5 Aug 2014 14:40:50 -0700
Subject: [PATCH 06/18] Update docs based on feedback from review for --restart

Signed-off-by: Michael Crosby <michael@docker.com>
---
 docs/man/docker-run.1.md                  |  1 +
 docs/sources/reference/commandline/cli.md | 16 ++++++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/docs/man/docker-run.1.md b/docs/man/docker-run.1.md
index 4dee97f19535ac2eb52d127d898fb667c153d9e4..225fb78cb84a3ef50b997744d3c1e91dff611871 100644
--- a/docs/man/docker-run.1.md
+++ b/docs/man/docker-run.1.md
@@ -30,6 +30,7 @@ docker-run - Run a command in a new container
 [**-P**|**--publish-all**[=*false*]]
 [**-p**|**--publish**[=*[]*]]
 [**--privileged**[=*false*]]
+[**--restart**[=*POLICY*]]
 [**--rm**[=*false*]]
 [**--sig-proxy**[=*true*]]
 [**-t**|**--tty**[=*false*]]
diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md
index 8f4ed19a27887d70266ca3dcebded462e394896a..4a756cc16ef5b58d5cff7e0efa33f9ee5a42685e 100644
--- a/docs/sources/reference/commandline/cli.md
+++ b/docs/sources/reference/commandline/cli.md
@@ -1223,7 +1223,7 @@ application change:
 
 #### Restart Policies
 
-Using the `--restart` flag on docker run you can specify a restart policy for 
+Using the `--restart` flag on Docker run you can specify a restart policy for 
 how a container should or should not be restarted on exit.
 
 ** no ** - Do not restart the container when it exits.
@@ -1232,19 +1232,19 @@ how a container should or should not be restarted on exit.
 
 ** always ** - Always restart the container reguardless of the exit status.
 
-You can also specify the maximum amount of times docker will try to restart the 
-container when using the ** on-failure ** policy.  The default is that docker will try forever to restart the container.
+You can also specify the maximum amount of times Docker will try to restart the 
+container when using the ** on-failure ** policy.  The default is that Docker will try forever to restart the container.
 
     $ sudo docker run --restart=always redis
 
-This will run the redis container with a restart policy of ** always ** so that if 
-the container exits, docker will restart it.
+This will run the `redis` container with a restart policy of ** always ** so that if 
+the container exits, Docker will restart it.
 
     $ sudo docker run --restart=on-failure:10 redis
 
-This will run the redis container with a restart policy of ** on-failure ** and a 
-maximum restart count of 10.  If the redis container exits with a non-zero exit 
-status more than 10 times in a row docker will abort trying to restart the container.
+This will run the `redis` container with a restart policy of ** on-failure ** and a 
+maximum restart count of 10.  If the `redis` container exits with a non-zero exit 
+status more than 10 times in a row Docker will abort trying to restart the container.
 
 ## save
 

From 41870a42bee9c394c61aba4bd81f782dbd38da12 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Wed, 6 Aug 2014 10:40:43 -0700
Subject: [PATCH 07/18] Only restart containers on daemon load with policy of
 always

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/daemon.go  | 58 ++++++++++++++++++++++++++++++-----------------
 daemon/monitor.go | 14 +++++-------
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/daemon/daemon.go b/daemon/daemon.go
index e777c4a1be065218d988fea8eb312a5ada997b3a..0569126e17b9112059771c633b7d339b9b88d3cf 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -172,20 +172,24 @@ func (daemon *Daemon) load(id string) (*Container, error) {
 	if err := container.FromDisk(); err != nil {
 		return nil, err
 	}
+
 	if container.ID != id {
 		return container, fmt.Errorf("Container %s is stored at %s", container.ID, id)
 	}
+
+	container.readHostConfig()
+
 	return container, nil
 }
 
 // Register makes a container object usable by the daemon as <container.ID>
 // This is a wrapper for register
 func (daemon *Daemon) Register(container *Container) error {
-	return daemon.register(container, true, nil)
+	return daemon.register(container, true)
 }
 
 // register makes a container object usable by the daemon as <container.ID>
-func (daemon *Daemon) register(container *Container, updateSuffixarray bool, containersToStart *[]*Container) error {
+func (daemon *Daemon) register(container *Container, updateSuffixarray bool) error {
 	if container.daemon != nil || daemon.Exists(container.ID) {
 		return fmt.Errorf("Container is already loaded")
 	}
@@ -257,14 +261,6 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool, con
 			if err := container.ToDisk(); err != nil {
 				return err
 			}
-
-			if daemon.config.AutoRestart {
-				log.Debugf("Marking as restarting")
-
-				if containersToStart != nil {
-					*containersToStart = append(*containersToStart, container)
-				}
-			}
 		}
 	}
 	return nil
@@ -296,10 +292,9 @@ func (daemon *Daemon) LogToDisk(src *broadcastwriter.BroadcastWriter, dst, strea
 
 func (daemon *Daemon) restore() error {
 	var (
-		debug             = (os.Getenv("DEBUG") != "" || os.Getenv("TEST") != "")
-		containers        = make(map[string]*Container)
-		currentDriver     = daemon.driver.String()
-		containersToStart = []*Container{}
+		debug         = (os.Getenv("DEBUG") != "" || os.Getenv("TEST") != "")
+		containers    = make(map[string]*Container)
+		currentDriver = daemon.driver.String()
 	)
 
 	if !debug {
@@ -322,24 +317,33 @@ func (daemon *Daemon) restore() error {
 		}
 
 		// Ignore the container if it does not support the current driver being used by the graph
-		if container.Driver == "" && currentDriver == "aufs" || container.Driver == currentDriver {
+		if (container.Driver == "" && currentDriver == "aufs") || container.Driver == currentDriver {
 			log.Debugf("Loaded container %v", container.ID)
+
 			containers[container.ID] = container
 		} else {
 			log.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
 		}
 	}
 
+	registeredContainers := []*Container{}
+
 	if entities := daemon.containerGraph.List("/", -1); entities != nil {
 		for _, p := range entities.Paths() {
 			if !debug {
 				fmt.Print(".")
 			}
+
 			e := entities[p]
+
 			if container, ok := containers[e.ID()]; ok {
-				if err := daemon.register(container, false, &containersToStart); err != nil {
+				if err := daemon.register(container, false); err != nil {
 					log.Debugf("Failed to register container %s: %s", container.ID, err)
 				}
+
+				registeredContainers = append(registeredContainers, container)
+
+				// delete from the map so that a new name is not automatically generated
 				delete(containers, e.ID())
 			}
 		}
@@ -352,15 +356,27 @@ func (daemon *Daemon) restore() error {
 		if err != nil {
 			log.Debugf("Setting default id - %s", err)
 		}
-		if err := daemon.register(container, false, &containersToStart); err != nil {
+
+		if err := daemon.register(container, false); err != nil {
 			log.Debugf("Failed to register container %s: %s", container.ID, err)
 		}
+
+		registeredContainers = append(registeredContainers, container)
 	}
 
-	for _, container := range containersToStart {
-		log.Debugf("Starting container %d", container.ID)
-		if err := container.Start(); err != nil {
-			log.Debugf("Failed to start container %s: %s", container.ID, err)
+	// check the restart policy on the containers and restart any container with
+	// the restart policy of "always"
+	if daemon.config.AutoRestart {
+		log.Debugf("Restarting containers...")
+
+		for _, container := range registeredContainers {
+			if container.hostConfig.RestartPolicy.Name == "always" {
+				utils.Debugf("Starting container %s", container.ID)
+
+				if err := container.Start(); err != nil {
+					utils.Debugf("Failed to start container %s: %s", container.ID, err)
+				}
+			}
 		}
 	}
 
diff --git a/daemon/monitor.go b/daemon/monitor.go
index 3eb68791d6cc80f309a9933049c1b42b9248205b..66f2a14e3e31fd1c059989991cbabe0041e71728 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -62,15 +62,13 @@ func (m *containerMonitor) Close() error {
 	// Cleanup networking and mounts
 	m.container.cleanup()
 
-	if m.container.daemon != nil && m.container.daemon.srv != nil && m.container.daemon.srv.IsRunning() {
-		// FIXME: here is race condition between two RUN instructions in Dockerfile
-		// because they share same runconfig and change image. Must be fixed
-		// in builder/builder.go
-		if err := m.container.toDisk(); err != nil {
-			utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
+	// FIXME: here is race condition between two RUN instructions in Dockerfile
+	// because they share same runconfig and change image. Must be fixed
+	// in builder/builder.go
+	if err := m.container.toDisk(); err != nil {
+		utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
 
-			return err
-		}
+		return err
 	}
 
 	return nil

From feda8fbb21489f64aa3b7340c94473a08502bd6b Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Wed, 6 Aug 2014 21:13:06 -0700
Subject: [PATCH 08/18] Restart conatiner with on-failure policy if exit code
 != 0

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/daemon.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/daemon/daemon.go b/daemon/daemon.go
index 0569126e17b9112059771c633b7d339b9b88d3cf..c88ac32e63c6e43381a8e7974a54175254c01612 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -370,7 +370,8 @@ func (daemon *Daemon) restore() error {
 		log.Debugf("Restarting containers...")
 
 		for _, container := range registeredContainers {
-			if container.hostConfig.RestartPolicy.Name == "always" {
+			if container.hostConfig.RestartPolicy.Name == "always" ||
+				(container.hostConfig.RestartPolicy.Name == "on-failure" && container.State.ExitCode != 0) {
 				utils.Debugf("Starting container %s", container.ID)
 
 				if err := container.Start(); err != nil {

From b7550cd456e29874279499a546b07bb062a12096 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Wed, 6 Aug 2014 21:15:34 -0700
Subject: [PATCH 09/18] Fix rebase around log event

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 66f2a14e3e31fd1c059989991cbabe0041e71728..3517589a3735b6664a0ae892298a6e8c0f925eaf 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -105,7 +105,7 @@ func (m *containerMonitor) reset(successful bool) {
 	}
 
 	if container.daemon != nil && container.daemon.srv != nil {
-		container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
+		container.LogEvent("die")
 	}
 
 	c := container.command.Cmd

From 6ae05936e1f343a2ecc51b8f642bdc8b93325d81 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Thu, 7 Aug 2014 10:50:25 -0700
Subject: [PATCH 10/18] Move container start event into monitor

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 6 +++---
 daemon/start.go   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 3517589a3735b6664a0ae892298a6e8c0f925eaf..6f172dc1557ae136e8266040645e17f26c40ec29 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -104,9 +104,7 @@ func (m *containerMonitor) reset(successful bool) {
 		container.stdin, container.stdinPipe = io.Pipe()
 	}
 
-	if container.daemon != nil && container.daemon.srv != nil {
-		container.LogEvent("die")
-	}
+	container.LogEvent("die")
 
 	c := container.command.Cmd
 
@@ -160,6 +158,8 @@ func (m *containerMonitor) Start() error {
 
 		pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
 
+		m.container.LogEvent("start")
+
 		if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
 			utils.Errorf("Error running container: %s", err)
 		}
diff --git a/daemon/start.go b/daemon/start.go
index cb6e9cb21f9807f60bf0c74a3916864e768894cb..30e015496ffac9350a2fe0e413862c4187798e73 100644
--- a/daemon/start.go
+++ b/daemon/start.go
@@ -36,7 +36,7 @@ func (daemon *Daemon) ContainerStart(job *engine.Job) engine.Status {
 	if err := container.Start(); err != nil {
 		return job.Errorf("Cannot start container %s: %s", name, err)
 	}
-	container.LogEvent("start")
+
 	return engine.StatusOK
 }
 

From be22d7ac2db6396becd63ec2dd28b9731011c9b9 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 11 Aug 2014 10:50:02 -0700
Subject: [PATCH 11/18] Add additional comments for vague lines

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 6f172dc1557ae136e8266040645e17f26c40ec29..97fbe6ab472a08ce018903c9877144e06c0dce3d 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -164,12 +164,15 @@ func (m *containerMonitor) Start() error {
 			utils.Errorf("Error running container: %s", err)
 		}
 
-		// We still wait to set the state as stopped and ensure that the locks were released
+		// we still wait to set the state as stopped and ensure that the locks were released
 		m.container.State.SetStopped(exitStatus)
 
+		// pass if we exited successfully
 		m.reset(err == nil && exitStatus == 0)
 
 		if m.shouldRestart(exitStatus) {
+			// sleep with a small time increment between each restart to help avoid issues cased by quickly
+			// restarting the container because of some types of errors ( networking cut out, etc... )
 			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
 
 			continue

From a2afb2b1e37e72cab0d68cfe8dd0ec830b7a54e5 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 11 Aug 2014 11:07:37 -0700
Subject: [PATCH 12/18] Add Restarting state when docker is handling the
 restart of containers

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 141 ++++++++++++++++++++++++----------------------
 daemon/state.go   |  30 ++++++++++
 2 files changed, 105 insertions(+), 66 deletions(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 97fbe6ab472a08ce018903c9877144e06c0dce3d..b733531dff0120af5732c033e9a2ba4656882079 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -74,66 +74,6 @@ func (m *containerMonitor) Close() error {
 	return nil
 }
 
-// reset resets the container's IO and ensures that the command is able to be executed again
-// by copying the data into a new struct
-func (m *containerMonitor) reset(successful bool) {
-	container := m.container
-
-	if container.Config.OpenStdin {
-		if err := container.stdin.Close(); err != nil {
-			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
-		}
-	}
-
-	if err := container.stdout.Clean(); err != nil {
-		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
-	}
-
-	if err := container.stderr.Clean(); err != nil {
-		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
-	}
-
-	if container.command != nil && container.command.Terminal != nil {
-		if err := container.command.Terminal.Close(); err != nil {
-			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
-		}
-	}
-
-	// Re-create a brand new stdin pipe once the container exited
-	if container.Config.OpenStdin {
-		container.stdin, container.stdinPipe = io.Pipe()
-	}
-
-	container.LogEvent("die")
-
-	c := container.command.Cmd
-
-	container.command.Cmd = exec.Cmd{
-		Stdin:       c.Stdin,
-		Stdout:      c.Stdout,
-		Stderr:      c.Stderr,
-		Path:        c.Path,
-		Env:         c.Env,
-		ExtraFiles:  c.ExtraFiles,
-		Args:        c.Args,
-		Dir:         c.Dir,
-		SysProcAttr: c.SysProcAttr,
-	}
-
-	// the container exited successfully so we need to reset the failure counter
-	// and the timeIncrement back to the default values
-	if successful {
-		m.failureCount = 0
-		m.timeIncrement = defaultTimeIncrement
-	} else {
-		// otherwise we need to increment the amount of time we wait before restarting
-		// the process.  We will build up by multiplying the increment by 2
-
-		m.failureCount++
-		m.timeIncrement *= 2
-	}
-}
-
 // Start starts the containers process and monitors it according to the restart policy
 func (m *containerMonitor) Start() error {
 	var (
@@ -151,7 +91,7 @@ func (m *containerMonitor) Start() error {
 		m.container.RestartCount++
 
 		if err := m.container.startLoggingToDisk(); err != nil {
-			m.reset(false)
+			m.resetContainer()
 
 			return err
 		}
@@ -164,18 +104,23 @@ func (m *containerMonitor) Start() error {
 			utils.Errorf("Error running container: %s", err)
 		}
 
-		// we still wait to set the state as stopped and ensure that the locks were released
-		m.container.State.SetStopped(exitStatus)
-
-		// pass if we exited successfully
-		m.reset(err == nil && exitStatus == 0)
+		m.resetMonitor(err == nil && exitStatus == 0)
 
 		if m.shouldRestart(exitStatus) {
+			m.container.State.SetRestarting(exitStatus)
+
+			m.resetContainer()
+
 			// sleep with a small time increment between each restart to help avoid issues cased by quickly
 			// restarting the container because of some types of errors ( networking cut out, etc... )
 			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
 
 			continue
+		} else {
+			// we still wait to set the state as stopped and ensure that the locks were released
+			m.container.State.SetStopped(exitStatus)
+
+			m.resetContainer()
 		}
 
 		break
@@ -184,6 +129,23 @@ func (m *containerMonitor) Start() error {
 	return err
 }
 
+// resetMonitor resets the stateful fields on the containerMonitor based on the
+// previous runs success or failure
+func (m *containerMonitor) resetMonitor(successful bool) {
+	// the container exited successfully so we need to reset the failure counter
+	// and the timeIncrement back to the default values
+	if successful {
+		m.failureCount = 0
+		m.timeIncrement = defaultTimeIncrement
+	} else {
+		// otherwise we need to increment the amount of time we wait before restarting
+		// the process.  We will build up by multiplying the increment by 2
+
+		m.failureCount++
+		m.timeIncrement *= 2
+	}
+}
+
 // shouldRestart checks the restart policy and applies the rules to determine if
 // the container's process should be restarted
 func (m *containerMonitor) shouldRestart(exitStatus int) bool {
@@ -229,3 +191,50 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
 		utils.Debugf("%s", err)
 	}
 }
+
+// resetContainer resets the container's IO and ensures that the command is able to be executed again
+// by copying the data into a new struct
+func (m *containerMonitor) resetContainer() {
+	container := m.container
+
+	if container.Config.OpenStdin {
+		if err := container.stdin.Close(); err != nil {
+			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
+		}
+	}
+
+	if err := container.stdout.Clean(); err != nil {
+		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
+	}
+
+	if err := container.stderr.Clean(); err != nil {
+		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
+	}
+
+	if container.command != nil && container.command.Terminal != nil {
+		if err := container.command.Terminal.Close(); err != nil {
+			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
+		}
+	}
+
+	// Re-create a brand new stdin pipe once the container exited
+	if container.Config.OpenStdin {
+		container.stdin, container.stdinPipe = io.Pipe()
+	}
+
+	container.LogEvent("die")
+
+	c := container.command.Cmd
+
+	container.command.Cmd = exec.Cmd{
+		Stdin:       c.Stdin,
+		Stdout:      c.Stdout,
+		Stderr:      c.Stderr,
+		Path:        c.Path,
+		Env:         c.Env,
+		ExtraFiles:  c.ExtraFiles,
+		Args:        c.Args,
+		Dir:         c.Dir,
+		SysProcAttr: c.SysProcAttr,
+	}
+}
diff --git a/daemon/state.go b/daemon/state.go
index 9baa396843814607a6b40d6d0544b29536b1d005..00597d699ad38d688a194fca171eca671a28812b 100644
--- a/daemon/state.go
+++ b/daemon/state.go
@@ -12,6 +12,7 @@ type State struct {
 	sync.RWMutex
 	Running    bool
 	Paused     bool
+	Restarting bool
 	Pid        int
 	ExitCode   int
 	StartedAt  time.Time
@@ -30,15 +31,22 @@ func (s *State) String() string {
 	s.RLock()
 	defer s.RUnlock()
 
+	if s.Restarting {
+		return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+	}
+
 	if s.Running {
 		if s.Paused {
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 		}
+
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 	}
+
 	if s.FinishedAt.IsZero() {
 		return ""
 	}
+
 	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
 }
 
@@ -135,6 +143,28 @@ func (s *State) SetStopped(exitCode int) {
 	s.Unlock()
 }
 
+// SetRestarting is when docker hanldes the auto restart of containers when they are
+// in the middle of a stop and being restarted again
+func (s *State) SetRestarting(exitCode int) {
+	s.Lock()
+	if s.Running {
+		s.Running = false
+		s.Pid = 0
+		s.FinishedAt = time.Now().UTC()
+		s.ExitCode = exitCode
+		close(s.waitChan) // fire waiters for stop
+		s.waitChan = make(chan struct{})
+	}
+	s.Unlock()
+}
+
+func (s *State) IsRestarting() bool {
+	s.RLock()
+	res := s.Restarting
+	s.RUnlock()
+	return res
+}
+
 func (s *State) SetPaused() {
 	s.Lock()
 	s.Paused = true

From c4a00d549d0b895bb990491cbdf58aabe2891842 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 11 Aug 2014 11:35:18 -0700
Subject: [PATCH 13/18] Honor the restarting state in Stop

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/container.go |  7 +++++++
 daemon/monitor.go   |  9 ++++-----
 daemon/state.go     | 12 +++++++-----
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/daemon/container.go b/daemon/container.go
index 681852a17dd5f654f11964b81edd93d27754da34..e0da3c144af5a0f80dac38b24ee0f325b0715754 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -530,6 +530,13 @@ func (container *Container) KillSig(sig int) error {
 	// after we send the kill signal
 	container.monitor.ExitOnNext()
 
+	// if the container is currently restarting we do not need to send the signal
+	// to the process.  Telling the monitor that it should exit on it's next event
+	// loop is enough
+	if container.State.IsRestarting() {
+		return nil
+	}
+
 	return container.daemon.Kill(container, sig)
 }
 
diff --git a/daemon/monitor.go b/daemon/monitor.go
index b733531dff0120af5732c033e9a2ba4656882079..6c6c7b76db6ada5c301af7c019c499869ac9fa39 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -116,16 +116,15 @@ func (m *containerMonitor) Start() error {
 			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
 
 			continue
-		} else {
-			// we still wait to set the state as stopped and ensure that the locks were released
-			m.container.State.SetStopped(exitStatus)
-
-			m.resetContainer()
 		}
 
 		break
 	}
 
+	m.container.State.SetStopped(exitStatus)
+
+	m.resetContainer()
+
 	return err
 }
 
diff --git a/daemon/state.go b/daemon/state.go
index 00597d699ad38d688a194fca171eca671a28812b..61b65f7d4e5a54af7a4f13bb5033357d0087964a 100644
--- a/daemon/state.go
+++ b/daemon/state.go
@@ -31,14 +31,13 @@ func (s *State) String() string {
 	s.RLock()
 	defer s.RUnlock()
 
-	if s.Restarting {
-		return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
-	}
-
 	if s.Running {
 		if s.Paused {
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 		}
+		if s.Restarting {
+			return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+		}
 
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 	}
@@ -148,7 +147,10 @@ func (s *State) SetStopped(exitCode int) {
 func (s *State) SetRestarting(exitCode int) {
 	s.Lock()
 	if s.Running {
-		s.Running = false
+		// we should consider the container running when it is restarting because of
+		// all the checks in docker around rm/stop/etc
+		s.Running = true
+		s.Restarting = true
 		s.Pid = 0
 		s.FinishedAt = time.Now().UTC()
 		s.ExitCode = exitCode

From 972c89493148f930388097816663d453bf3e8c2c Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Mon, 11 Aug 2014 15:00:55 -0700
Subject: [PATCH 14/18] Improve wait during restart

We need to do this so that when a user asks docker to stop the container
and it is currently in the restart loop we don't want to have to wait
for the duration of the restart time increment before ack. the stop.

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 50 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 6c6c7b76db6ada5c301af7c019c499869ac9fa39..d9e67b0681a0ea910ceacfa77de6fea9e9d6ca46 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -35,6 +35,11 @@ type containerMonitor struct {
 	// either because docker or the user asked for the container to be stopped
 	shouldStop bool
 
+	// stopChan is used to signal to the monitor whenever there is a wait for the
+	// next restart so that the timeIncrement is not honored and the user is not
+	// left waiting for nothing to happen during this time
+	stopChan chan struct{}
+
 	// timeIncrement is the amount of time to wait between restarts
 	// this is in milliseconds
 	timeIncrement int
@@ -45,6 +50,7 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *
 		container:     container,
 		restartPolicy: policy,
 		timeIncrement: defaultTimeIncrement,
+		stopChan:      make(chan struct{}, 1),
 	}
 }
 
@@ -52,7 +58,14 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *
 // for exits the next time the process dies
 func (m *containerMonitor) ExitOnNext() {
 	m.mux.Lock()
-	m.shouldStop = true
+
+	// we need to protect having a double close of the channel when stop is called
+	// twice or else we will get a panic
+	if !m.shouldStop {
+		m.shouldStop = true
+		close(m.stopChan)
+	}
+
 	m.mux.Unlock()
 }
 
@@ -87,7 +100,7 @@ func (m *containerMonitor) Start() error {
 	// reset the restart count
 	m.container.RestartCount = -1
 
-	for !m.shouldStop {
+	for {
 		m.container.RestartCount++
 
 		if err := m.container.startLoggingToDisk(); err != nil {
@@ -109,21 +122,33 @@ func (m *containerMonitor) Start() error {
 		if m.shouldRestart(exitStatus) {
 			m.container.State.SetRestarting(exitStatus)
 
+			m.container.LogEvent("die")
+
 			m.resetContainer()
 
 			// sleep with a small time increment between each restart to help avoid issues cased by quickly
 			// restarting the container because of some types of errors ( networking cut out, etc... )
-			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
+			m.waitForNextRestart()
+
+			// we need to check this before reentering the loop because the waitForNextRestart could have
+			// been terminated by a request from a user
+			if m.shouldStop {
+				m.container.State.SetStopped(exitStatus)
+
+				return err
+			}
 
 			continue
 		}
 
-		break
-	}
+		m.container.State.SetStopped(exitStatus)
+
+		m.container.LogEvent("die")
 
-	m.container.State.SetStopped(exitStatus)
+		m.resetContainer()
 
-	m.resetContainer()
+		break
+	}
 
 	return err
 }
@@ -145,6 +170,15 @@ func (m *containerMonitor) resetMonitor(successful bool) {
 	}
 }
 
+// waitForNextRestart waits with the default time increment to restart the container unless
+// a user or docker asks to container to be stopped
+func (m *containerMonitor) waitForNextRestart() {
+	select {
+	case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
+	case <-m.stopChan:
+	}
+}
+
 // shouldRestart checks the restart policy and applies the rules to determine if
 // the container's process should be restarted
 func (m *containerMonitor) shouldRestart(exitStatus int) bool {
@@ -221,8 +255,6 @@ func (m *containerMonitor) resetContainer() {
 		container.stdin, container.stdinPipe = io.Pipe()
 	}
 
-	container.LogEvent("die")
-
 	c := container.command.Cmd
 
 	container.command.Cmd = exec.Cmd{

From ebf5d4657d14c7b57a5ca1c7b8689077b18b33dc Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Tue, 12 Aug 2014 13:08:26 -0700
Subject: [PATCH 15/18] Reguardless of success reset time increment

Reset the time increment if the container's execution time is greater
than 10s or else as a container runs and is restarted the time will grow
overtime.

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index d9e67b0681a0ea910ceacfa77de6fea9e9d6ca46..231b883a3aceeb270a6b65b68dfe9b0d1b233f32 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -43,6 +43,9 @@ type containerMonitor struct {
 	// timeIncrement is the amount of time to wait between restarts
 	// this is in milliseconds
 	timeIncrement int
+
+	// lastStartTime is the time which the monitor last exec'd the container's process
+	lastStartTime time.Time
 }
 
 func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
@@ -113,6 +116,8 @@ func (m *containerMonitor) Start() error {
 
 		m.container.LogEvent("start")
 
+		m.lastStartTime = time.Now()
+
 		if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
 			utils.Errorf("Error running container: %s", err)
 		}
@@ -154,19 +159,24 @@ func (m *containerMonitor) Start() error {
 }
 
 // resetMonitor resets the stateful fields on the containerMonitor based on the
-// previous runs success or failure
+// previous runs success or failure.  Reguardless of success, if the container had
+// an execution time of more than 10s then reset the timer back to the default
 func (m *containerMonitor) resetMonitor(successful bool) {
-	// the container exited successfully so we need to reset the failure counter
-	// and the timeIncrement back to the default values
-	if successful {
-		m.failureCount = 0
+	executionTime := time.Now().Sub(m.lastStartTime).Seconds()
+
+	if executionTime > 10 {
 		m.timeIncrement = defaultTimeIncrement
 	} else {
 		// otherwise we need to increment the amount of time we wait before restarting
 		// the process.  We will build up by multiplying the increment by 2
+		m.timeIncrement *= 2
+	}
 
+	// the container exited successfully so we need to reset the failure counter
+	if successful {
+		m.failureCount = 0
+	} else {
 		m.failureCount++
-		m.timeIncrement *= 2
 	}
 }
 

From 2ec1b697c1a7c0ecc534b9ebffd7ba40ea285ff7 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Tue, 12 Aug 2014 18:03:11 -0700
Subject: [PATCH 16/18] Rebased changes to return on first start's error

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/container.go | 18 +++++-------------
 daemon/monitor.go   | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/daemon/container.go b/daemon/container.go
index e0da3c144af5a0f80dac38b24ee0f325b0715754..3cb7af99e3f0e77a4fecd71d535d267039ee356d 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -1081,22 +1081,14 @@ func (container *Container) startLoggingToDisk() error {
 func (container *Container) waitForStart() error {
 	container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)
 
-	var (
-		cErr      = utils.Go(container.monitor.Start)
-		waitStart = make(chan struct{})
-	)
-
-	go func() {
-		container.State.WaitRunning(-1 * time.Second)
-		close(waitStart)
-	}()
-
-	// Start should not return until the process is actually running
+	// block until we either receive an error from the initial start of the container's
+	// process or until the process is running in the container
 	select {
-	case <-waitStart:
-	case err := <-cErr:
+	case <-container.monitor.startSignal:
+	case err := <-utils.Go(container.monitor.Start):
 		return err
 	}
+
 	return nil
 }
 
diff --git a/daemon/monitor.go b/daemon/monitor.go
index 231b883a3aceeb270a6b65b68dfe9b0d1b233f32..801fe0e04308f0f0dd6808218cb12967f80dbb88 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -35,6 +35,10 @@ type containerMonitor struct {
 	// either because docker or the user asked for the container to be stopped
 	shouldStop bool
 
+	// startSignal signals with the initial process has launched after calling Start
+	// on the monitor
+	startSignal chan struct{}
+
 	// stopChan is used to signal to the monitor whenever there is a wait for the
 	// next restart so that the timeIncrement is not honored and the user is not
 	// left waiting for nothing to happen during this time
@@ -48,12 +52,15 @@ type containerMonitor struct {
 	lastStartTime time.Time
 }
 
+// newContainerMonitor returns an initialized containerMonitor for the provided container
+// honoring the provided restart policy
 func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
 	return &containerMonitor{
 		container:     container,
 		restartPolicy: policy,
 		timeIncrement: defaultTimeIncrement,
 		stopChan:      make(chan struct{}, 1),
+		startSignal:   make(chan struct{}, 1),
 	}
 }
 
@@ -119,6 +126,14 @@ func (m *containerMonitor) Start() error {
 		m.lastStartTime = time.Now()
 
 		if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
+			// if we receive an internal error from the initial start of a container then lets
+			// return it instead of entering the restart loop
+			if m.container.RestartCount == 1 {
+				m.resetContainer()
+
+				return err
+			}
+
 			utils.Errorf("Error running container: %s", err)
 		}
 
@@ -230,6 +245,9 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
 
 	m.container.State.SetRunning(command.Pid())
 
+	// signal that the process has started
+	close(m.startSignal)
+
 	if err := m.container.ToDisk(); err != nil {
 		utils.Debugf("%s", err)
 	}

From 73ced636800c150640e2f0d307842b0f9259b611 Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Wed, 13 Aug 2014 14:56:35 -0700
Subject: [PATCH 17/18] Update based on comments from the code review

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/monitor.go | 14 ++++++++------
 daemon/state.go   | 22 +++++++++++-----------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/daemon/monitor.go b/daemon/monitor.go
index 801fe0e04308f0f0dd6808218cb12967f80dbb88..cb388e9139cd3db699272db055112e15e0666172 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -24,7 +24,7 @@ type containerMonitor struct {
 	// container is the container being monitored
 	container *Container
 
-	// restartPolicy is the being applied to the container monitor
+	// restartPolicy is the current policy being applied to the container monitor
 	restartPolicy runconfig.RestartPolicy
 
 	// failureCount is the number of times the container has failed to
@@ -35,8 +35,7 @@ type containerMonitor struct {
 	// either because docker or the user asked for the container to be stopped
 	shouldStop bool
 
-	// startSignal signals with the initial process has launched after calling Start
-	// on the monitor
+	// startSignal is a channel that is closes after the container initially starts
 	startSignal chan struct{}
 
 	// stopChan is used to signal to the monitor whenever there is a wait for the
@@ -196,7 +195,7 @@ func (m *containerMonitor) resetMonitor(successful bool) {
 }
 
 // waitForNextRestart waits with the default time increment to restart the container unless
-// a user or docker asks to container to be stopped
+// a user or docker asks for the container to be stopped
 func (m *containerMonitor) waitForNextRestart() {
 	select {
 	case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
@@ -245,8 +244,11 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
 
 	m.container.State.SetRunning(command.Pid())
 
-	// signal that the process has started
-	close(m.startSignal)
+	if m.startSignal != nil {
+		// signal that the process has started
+		close(m.startSignal)
+		m.startSignal = nil
+	}
 
 	if err := m.container.ToDisk(); err != nil {
 		utils.Debugf("%s", err)
diff --git a/daemon/state.go b/daemon/state.go
index 61b65f7d4e5a54af7a4f13bb5033357d0087964a..ed137b02066d9547b391fd5599d9d51bbffedab7 100644
--- a/daemon/state.go
+++ b/daemon/state.go
@@ -123,6 +123,7 @@ func (s *State) SetRunning(pid int) {
 	s.Lock()
 	s.Running = true
 	s.Paused = false
+	s.Restarting = false
 	s.ExitCode = 0
 	s.Pid = pid
 	s.StartedAt = time.Now().UTC()
@@ -134,6 +135,7 @@ func (s *State) SetRunning(pid int) {
 func (s *State) SetStopped(exitCode int) {
 	s.Lock()
 	s.Running = false
+	s.Restarting = false
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
 	s.ExitCode = exitCode
@@ -146,17 +148,15 @@ func (s *State) SetStopped(exitCode int) {
 // in the middle of a stop and being restarted again
 func (s *State) SetRestarting(exitCode int) {
 	s.Lock()
-	if s.Running {
-		// we should consider the container running when it is restarting because of
-		// all the checks in docker around rm/stop/etc
-		s.Running = true
-		s.Restarting = true
-		s.Pid = 0
-		s.FinishedAt = time.Now().UTC()
-		s.ExitCode = exitCode
-		close(s.waitChan) // fire waiters for stop
-		s.waitChan = make(chan struct{})
-	}
+	// we should consider the container running when it is restarting because of
+	// all the checks in docker around rm/stop/etc
+	s.Running = true
+	s.Restarting = true
+	s.Pid = 0
+	s.FinishedAt = time.Now().UTC()
+	s.ExitCode = exitCode
+	close(s.waitChan) // fire waiters for stop
+	s.waitChan = make(chan struct{})
 	s.Unlock()
 }
 

From 25c519e829640ebb23061b82c6ace88c5983b63d Mon Sep 17 00:00:00 2001
From: Michael Crosby <michael@docker.com>
Date: Wed, 13 Aug 2014 14:58:57 -0700
Subject: [PATCH 18/18] Deprecate --restart on the daemon

Signed-off-by: Michael Crosby <michael@docker.com>
---
 daemon/config.go                          |  2 +-
 daemon/daemon.go                          |  4 ++--
 daemon/monitor.go                         | 20 ++++++++++----------
 docs/man/docker.1.md                      |  3 ---
 docs/sources/reference/commandline/cli.md |  1 -
 5 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/daemon/config.go b/daemon/config.go
index 5d0ea6ac12f420b0bbd5901070144979666c0ae2..a396bd0232daae486d7e4fe10a06a6e1a2e89bb3 100644
--- a/daemon/config.go
+++ b/daemon/config.go
@@ -45,7 +45,7 @@ type Config struct {
 func (config *Config) InstallFlags() {
 	flag.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, "/var/run/docker.pid", "Path to use for daemon PID file")
 	flag.StringVar(&config.Root, []string{"g", "-graph"}, "/var/lib/docker", "Path to use as the root of the Docker runtime")
-	flag.BoolVar(&config.AutoRestart, []string{"r", "-restart"}, true, "Restart previously running containers")
+	flag.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, "--restart on the daemon has been deprecated infavor of --restart policies on docker run")
 	flag.BoolVar(&config.EnableIptables, []string{"#iptables", "-iptables"}, true, "Enable Docker's addition of iptables rules")
 	flag.BoolVar(&config.EnableIpForward, []string{"#ip-forward", "-ip-forward"}, true, "Enable net.ipv4.ip_forward")
 	flag.StringVar(&config.BridgeIP, []string{"#bip", "-bip"}, "", "Use this CIDR notation address for the network bridge's IP, not compatible with -b")
diff --git a/daemon/daemon.go b/daemon/daemon.go
index c88ac32e63c6e43381a8e7974a54175254c01612..90cb6a89e1ec440d9b28c317a039eca86e21c4fe 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -372,10 +372,10 @@ func (daemon *Daemon) restore() error {
 		for _, container := range registeredContainers {
 			if container.hostConfig.RestartPolicy.Name == "always" ||
 				(container.hostConfig.RestartPolicy.Name == "on-failure" && container.State.ExitCode != 0) {
-				utils.Debugf("Starting container %s", container.ID)
+				log.Debugf("Starting container %s", container.ID)
 
 				if err := container.Start(); err != nil {
-					utils.Debugf("Failed to start container %s: %s", container.ID, err)
+					log.Debugf("Failed to start container %s: %s", container.ID, err)
 				}
 			}
 		}
diff --git a/daemon/monitor.go b/daemon/monitor.go
index cb388e9139cd3db699272db055112e15e0666172..28dd424f3ce631e6b1f74a4536350d4ede01a153 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -7,8 +7,8 @@ import (
 	"time"
 
 	"github.com/docker/docker/daemon/execdriver"
+	"github.com/docker/docker/pkg/log"
 	"github.com/docker/docker/runconfig"
-	"github.com/docker/docker/utils"
 )
 
 const defaultTimeIncrement = 100
@@ -88,7 +88,7 @@ func (m *containerMonitor) Close() error {
 	// because they share same runconfig and change image. Must be fixed
 	// in builder/builder.go
 	if err := m.container.toDisk(); err != nil {
-		utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
+		log.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
 
 		return err
 	}
@@ -127,13 +127,13 @@ func (m *containerMonitor) Start() error {
 		if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
 			// if we receive an internal error from the initial start of a container then lets
 			// return it instead of entering the restart loop
-			if m.container.RestartCount == 1 {
+			if m.container.RestartCount == 0 {
 				m.resetContainer()
 
 				return err
 			}
 
-			utils.Errorf("Error running container: %s", err)
+			log.Errorf("Error running container: %s", err)
 		}
 
 		m.resetMonitor(err == nil && exitStatus == 0)
@@ -220,7 +220,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
 	case "on-failure":
 		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
 		if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max {
-			utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
+			log.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
 			return false
 		}
 
@@ -251,7 +251,7 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
 	}
 
 	if err := m.container.ToDisk(); err != nil {
-		utils.Debugf("%s", err)
+		log.Debugf("%s", err)
 	}
 }
 
@@ -262,21 +262,21 @@ func (m *containerMonitor) resetContainer() {
 
 	if container.Config.OpenStdin {
 		if err := container.stdin.Close(); err != nil {
-			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
+			log.Errorf("%s: Error close stdin: %s", container.ID, err)
 		}
 	}
 
 	if err := container.stdout.Clean(); err != nil {
-		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
+		log.Errorf("%s: Error close stdout: %s", container.ID, err)
 	}
 
 	if err := container.stderr.Clean(); err != nil {
-		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
+		log.Errorf("%s: Error close stderr: %s", container.ID, err)
 	}
 
 	if container.command != nil && container.command.Terminal != nil {
 		if err := container.command.Terminal.Close(); err != nil {
-			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
+			log.Errorf("%s: Error closing terminal: %s", container.ID, err)
 		}
 	}
 
diff --git a/docs/man/docker.1.md b/docs/man/docker.1.md
index 18e8978af6d80c2bffd990b74f0c9bf7ad468e1c..393209725523370caa59cc42dbcb355e6c86ba53 100644
--- a/docs/man/docker.1.md
+++ b/docs/man/docker.1.md
@@ -64,9 +64,6 @@ unix://[/path/to/socket] to use.
 **-p**=""
   Path to use for daemon PID file. Default is `/var/run/docker.pid`
 
-**-r**=*true*|*false*
-  Restart previously running containers. Default is true.
-
 **-s**=""
   Force the Docker runtime to use a specific storage driver.
 
diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md
index 4a756cc16ef5b58d5cff7e0efa33f9ee5a42685e..9afa691b6bc5e780d238e7ce3b0f7162b4511392 100644
--- a/docs/sources/reference/commandline/cli.md
+++ b/docs/sources/reference/commandline/cli.md
@@ -71,7 +71,6 @@ expect an integer, and they can only be specified once.
       --mtu=0                                    Set the containers network MTU
                                                    if no value is provided: default to the default route MTU or 1500 if no default route is available
       -p, --pidfile="/var/run/docker.pid"        Path to use for daemon PID file
-      -r, --restart=true                         Restart previously running containers
       -s, --storage-driver=""                    Force the Docker runtime to use a specific storage driver
       --selinux-enabled=false                    Enable selinux support. SELinux does not presently support the BTRFS storage driver
       --storage-opt=[]                           Set storage driver options