瀏覽代碼

Add Restarting state when docker is handling the restart of containers

Signed-off-by: Michael Crosby <michael@docker.com>
Michael Crosby 11 年之前
父節點
當前提交
a2afb2b1e3
共有 2 個文件被更改,包括 105 次插入66 次删除
  1. 75 66
      daemon/monitor.go
  2. 30 0
      daemon/state.go

+ 75 - 66
daemon/monitor.go

@@ -74,66 +74,6 @@ func (m *containerMonitor) Close() error {
 	return nil
 	return nil
 }
 }
 
 
-// reset resets the container's IO and ensures that the command is able to be executed again
-// by copying the data into a new struct
-func (m *containerMonitor) reset(successful bool) {
-	container := m.container
-
-	if container.Config.OpenStdin {
-		if err := container.stdin.Close(); err != nil {
-			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
-		}
-	}
-
-	if err := container.stdout.Clean(); err != nil {
-		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
-	}
-
-	if err := container.stderr.Clean(); err != nil {
-		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
-	}
-
-	if container.command != nil && container.command.Terminal != nil {
-		if err := container.command.Terminal.Close(); err != nil {
-			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
-		}
-	}
-
-	// Re-create a brand new stdin pipe once the container exited
-	if container.Config.OpenStdin {
-		container.stdin, container.stdinPipe = io.Pipe()
-	}
-
-	container.LogEvent("die")
-
-	c := container.command.Cmd
-
-	container.command.Cmd = exec.Cmd{
-		Stdin:       c.Stdin,
-		Stdout:      c.Stdout,
-		Stderr:      c.Stderr,
-		Path:        c.Path,
-		Env:         c.Env,
-		ExtraFiles:  c.ExtraFiles,
-		Args:        c.Args,
-		Dir:         c.Dir,
-		SysProcAttr: c.SysProcAttr,
-	}
-
-	// the container exited successfully so we need to reset the failure counter
-	// and the timeIncrement back to the default values
-	if successful {
-		m.failureCount = 0
-		m.timeIncrement = defaultTimeIncrement
-	} else {
-		// otherwise we need to increment the amount of time we wait before restarting
-		// the process.  We will build up by multiplying the increment by 2
-
-		m.failureCount++
-		m.timeIncrement *= 2
-	}
-}
-
 // Start starts the containers process and monitors it according to the restart policy
 // Start starts the containers process and monitors it according to the restart policy
 func (m *containerMonitor) Start() error {
 func (m *containerMonitor) Start() error {
 	var (
 	var (
@@ -151,7 +91,7 @@ func (m *containerMonitor) Start() error {
 		m.container.RestartCount++
 		m.container.RestartCount++
 
 
 		if err := m.container.startLoggingToDisk(); err != nil {
 		if err := m.container.startLoggingToDisk(); err != nil {
-			m.reset(false)
+			m.resetContainer()
 
 
 			return err
 			return err
 		}
 		}
@@ -164,18 +104,23 @@ func (m *containerMonitor) Start() error {
 			utils.Errorf("Error running container: %s", err)
 			utils.Errorf("Error running container: %s", err)
 		}
 		}
 
 
-		// we still wait to set the state as stopped and ensure that the locks were released
-		m.container.State.SetStopped(exitStatus)
-
-		// pass if we exited successfully
-		m.reset(err == nil && exitStatus == 0)
+		m.resetMonitor(err == nil && exitStatus == 0)
 
 
 		if m.shouldRestart(exitStatus) {
 		if m.shouldRestart(exitStatus) {
+			m.container.State.SetRestarting(exitStatus)
+
+			m.resetContainer()
+
 			// sleep with a small time increment between each restart to help avoid issues cased by quickly
 			// sleep with a small time increment between each restart to help avoid issues cased by quickly
 			// restarting the container because of some types of errors ( networking cut out, etc... )
 			// restarting the container because of some types of errors ( networking cut out, etc... )
 			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
 			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
 
 
 			continue
 			continue
+		} else {
+			// we still wait to set the state as stopped and ensure that the locks were released
+			m.container.State.SetStopped(exitStatus)
+
+			m.resetContainer()
 		}
 		}
 
 
 		break
 		break
@@ -184,6 +129,23 @@ func (m *containerMonitor) Start() error {
 	return err
 	return err
 }
 }
 
 
+// resetMonitor resets the stateful fields on the containerMonitor based on the
+// previous runs success or failure
+func (m *containerMonitor) resetMonitor(successful bool) {
+	// the container exited successfully so we need to reset the failure counter
+	// and the timeIncrement back to the default values
+	if successful {
+		m.failureCount = 0
+		m.timeIncrement = defaultTimeIncrement
+	} else {
+		// otherwise we need to increment the amount of time we wait before restarting
+		// the process.  We will build up by multiplying the increment by 2
+
+		m.failureCount++
+		m.timeIncrement *= 2
+	}
+}
+
 // shouldRestart checks the restart policy and applies the rules to determine if
 // shouldRestart checks the restart policy and applies the rules to determine if
 // the container's process should be restarted
 // the container's process should be restarted
 func (m *containerMonitor) shouldRestart(exitStatus int) bool {
 func (m *containerMonitor) shouldRestart(exitStatus int) bool {
@@ -229,3 +191,50 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
 		utils.Debugf("%s", err)
 		utils.Debugf("%s", err)
 	}
 	}
 }
 }
+
+// resetContainer resets the container's IO and ensures that the command is able to be executed again
+// by copying the data into a new struct
+func (m *containerMonitor) resetContainer() {
+	container := m.container
+
+	if container.Config.OpenStdin {
+		if err := container.stdin.Close(); err != nil {
+			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
+		}
+	}
+
+	if err := container.stdout.Clean(); err != nil {
+		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
+	}
+
+	if err := container.stderr.Clean(); err != nil {
+		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
+	}
+
+	if container.command != nil && container.command.Terminal != nil {
+		if err := container.command.Terminal.Close(); err != nil {
+			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
+		}
+	}
+
+	// Re-create a brand new stdin pipe once the container exited
+	if container.Config.OpenStdin {
+		container.stdin, container.stdinPipe = io.Pipe()
+	}
+
+	container.LogEvent("die")
+
+	c := container.command.Cmd
+
+	container.command.Cmd = exec.Cmd{
+		Stdin:       c.Stdin,
+		Stdout:      c.Stdout,
+		Stderr:      c.Stderr,
+		Path:        c.Path,
+		Env:         c.Env,
+		ExtraFiles:  c.ExtraFiles,
+		Args:        c.Args,
+		Dir:         c.Dir,
+		SysProcAttr: c.SysProcAttr,
+	}
+}

+ 30 - 0
daemon/state.go

@@ -12,6 +12,7 @@ type State struct {
 	sync.RWMutex
 	sync.RWMutex
 	Running    bool
 	Running    bool
 	Paused     bool
 	Paused     bool
+	Restarting bool
 	Pid        int
 	Pid        int
 	ExitCode   int
 	ExitCode   int
 	StartedAt  time.Time
 	StartedAt  time.Time
@@ -30,15 +31,22 @@ func (s *State) String() string {
 	s.RLock()
 	s.RLock()
 	defer s.RUnlock()
 	defer s.RUnlock()
 
 
+	if s.Restarting {
+		return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+	}
+
 	if s.Running {
 	if s.Running {
 		if s.Paused {
 		if s.Paused {
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 		}
 		}
+
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 	}
 	}
+
 	if s.FinishedAt.IsZero() {
 	if s.FinishedAt.IsZero() {
 		return ""
 		return ""
 	}
 	}
+
 	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
 	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
 }
 }
 
 
@@ -135,6 +143,28 @@ func (s *State) SetStopped(exitCode int) {
 	s.Unlock()
 	s.Unlock()
 }
 }
 
 
+// SetRestarting is when docker hanldes the auto restart of containers when they are
+// in the middle of a stop and being restarted again
+func (s *State) SetRestarting(exitCode int) {
+	s.Lock()
+	if s.Running {
+		s.Running = false
+		s.Pid = 0
+		s.FinishedAt = time.Now().UTC()
+		s.ExitCode = exitCode
+		close(s.waitChan) // fire waiters for stop
+		s.waitChan = make(chan struct{})
+	}
+	s.Unlock()
+}
+
+func (s *State) IsRestarting() bool {
+	s.RLock()
+	res := s.Restarting
+	s.RUnlock()
+	return res
+}
+
 func (s *State) SetPaused() {
 func (s *State) SetPaused() {
 	s.Lock()
 	s.Lock()
 	s.Paused = true
 	s.Paused = true