Browse Source

container: protect the health status with mutex

Adds a mutex to protect the status, as well. When running the race
detector with the unit test, we can see that the Status field is written
without holding this lock. Adding a mutex to read and set status
addresses the issue.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
Stephen J Day 7 years ago
parent
commit
7db30ab0cd
4 changed files with 47 additions and 23 deletions
  1. 30 7
      container/health.go
  2. 9 8
      daemon/health.go
  3. 7 7
      daemon/health_test.go
  4. 1 1
      daemon/inspect.go

+ 30 - 7
container/health.go

@@ -16,19 +16,42 @@ type Health struct {
 
 
 // String returns a human-readable description of the health-check state
 // String returns a human-readable description of the health-check state
 func (s *Health) String() string {
 func (s *Health) String() string {
-	// This happens when the monitor has yet to be setup.
-	if s.Status == "" {
-		return types.Unhealthy
-	}
+	status := s.Status()
 
 
-	switch s.Status {
+	switch status {
 	case types.Starting:
 	case types.Starting:
 		return "health: starting"
 		return "health: starting"
 	default: // Healthy and Unhealthy are clear on their own
 	default: // Healthy and Unhealthy are clear on their own
-		return s.Status
+		return s.Health.Status
 	}
 	}
 }
 }
 
 
+// Status returns the current health status.
+//
+// Note that this takes a lock and the value may change after being read.
+func (s *Health) Status() string {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// This happens when the monitor has yet to be setup.
+	if s.Health.Status == "" {
+		return types.Unhealthy
+	}
+
+	return s.Health.Status
+}
+
+// SetStatus writes the current status to the underlying health structure,
+// obeying the locking semantics.
+//
+// Status may be set directly if another lock is used.
+func (s *Health) SetStatus(new string) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.Health.Status = new
+}
+
 // OpenMonitorChannel creates and returns a new monitor channel. If there
 // OpenMonitorChannel creates and returns a new monitor channel. If there
 // already is one, it returns nil.
 // already is one, it returns nil.
 func (s *Health) OpenMonitorChannel() chan struct{} {
 func (s *Health) OpenMonitorChannel() chan struct{} {
@@ -53,7 +76,7 @@ func (s *Health) CloseMonitorChannel() {
 		close(s.stop)
 		close(s.stop)
 		s.stop = nil
 		s.stop = nil
 		// unhealthy when the monitor has stopped for compatibility reasons
 		// unhealthy when the monitor has stopped for compatibility reasons
-		s.Status = types.Unhealthy
+		s.Health.Status = types.Unhealthy
 		logrus.Debug("CloseMonitorChannel done")
 		logrus.Debug("CloseMonitorChannel done")
 	}
 	}
 }
 }

+ 9 - 8
daemon/health.go

@@ -129,7 +129,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
 	}
 	}
 
 
 	h := c.State.Health
 	h := c.State.Health
-	oldStatus := h.Status
+	oldStatus := h.Status()
 
 
 	if len(h.Log) >= maxLogEntries {
 	if len(h.Log) >= maxLogEntries {
 		h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
 		h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
@@ -139,14 +139,14 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
 
 
 	if result.ExitCode == exitStatusHealthy {
 	if result.ExitCode == exitStatusHealthy {
 		h.FailingStreak = 0
 		h.FailingStreak = 0
-		h.Status = types.Healthy
+		h.SetStatus(types.Healthy)
 	} else { // Failure (including invalid exit code)
 	} else { // Failure (including invalid exit code)
 		shouldIncrementStreak := true
 		shouldIncrementStreak := true
 
 
 		// If the container is starting (i.e. we never had a successful health check)
 		// If the container is starting (i.e. we never had a successful health check)
 		// then we check if we are within the start period of the container in which
 		// then we check if we are within the start period of the container in which
 		// case we do not increment the failure streak.
 		// case we do not increment the failure streak.
-		if h.Status == types.Starting {
+		if h.Status() == types.Starting {
 			startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod)
 			startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod)
 			timeSinceStart := result.Start.Sub(c.State.StartedAt)
 			timeSinceStart := result.Start.Sub(c.State.StartedAt)
 
 
@@ -160,7 +160,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
 			h.FailingStreak++
 			h.FailingStreak++
 
 
 			if h.FailingStreak >= retries {
 			if h.FailingStreak >= retries {
-				h.Status = types.Unhealthy
+				h.SetStatus(types.Unhealthy)
 			}
 			}
 		}
 		}
 		// Else we're starting or healthy. Stay in that state.
 		// Else we're starting or healthy. Stay in that state.
@@ -173,8 +173,9 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
 		logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
 		logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
 	}
 	}
 
 
-	if oldStatus != h.Status {
-		d.LogContainerEvent(c, "health_status: "+h.Status)
+	current := h.Status()
+	if oldStatus != current {
+		d.LogContainerEvent(c, "health_status: "+current)
 	}
 	}
 }
 }
 
 
@@ -293,11 +294,11 @@ func (d *Daemon) initHealthMonitor(c *container.Container) {
 	d.stopHealthchecks(c)
 	d.stopHealthchecks(c)
 
 
 	if h := c.State.Health; h != nil {
 	if h := c.State.Health; h != nil {
-		h.Status = types.Starting
+		h.SetStatus(types.Starting)
 		h.FailingStreak = 0
 		h.FailingStreak = 0
 	} else {
 	} else {
 		h := &container.Health{}
 		h := &container.Health{}
-		h.Status = types.Starting
+		h.SetStatus(types.Starting)
 		c.State.Health = h
 		c.State.Health = h
 	}
 	}
 
 

+ 7 - 7
daemon/health_test.go

@@ -14,7 +14,7 @@ import (
 func reset(c *container.Container) {
 func reset(c *container.Container) {
 	c.State = &container.State{}
 	c.State = &container.State{}
 	c.State.Health = &container.Health{}
 	c.State.Health = &container.Health{}
-	c.State.Health.Status = types.Starting
+	c.State.Health.SetStatus(types.Starting)
 }
 }
 
 
 func TestNoneHealthcheck(t *testing.T) {
 func TestNoneHealthcheck(t *testing.T) {
@@ -111,8 +111,8 @@ func TestHealthStates(t *testing.T) {
 
 
 	handleResult(c.State.StartedAt.Add(20*time.Second), 1)
 	handleResult(c.State.StartedAt.Add(20*time.Second), 1)
 	handleResult(c.State.StartedAt.Add(40*time.Second), 1)
 	handleResult(c.State.StartedAt.Add(40*time.Second), 1)
-	if c.State.Health.Status != types.Starting {
-		t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
+	if status := c.State.Health.Status(); status != types.Starting {
+		t.Errorf("Expecting starting, but got %#v\n", status)
 	}
 	}
 	if c.State.Health.FailingStreak != 2 {
 	if c.State.Health.FailingStreak != 2 {
 		t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
 		t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
@@ -133,15 +133,15 @@ func TestHealthStates(t *testing.T) {
 	c.Config.Healthcheck.StartPeriod = 30 * time.Second
 	c.Config.Healthcheck.StartPeriod = 30 * time.Second
 
 
 	handleResult(c.State.StartedAt.Add(20*time.Second), 1)
 	handleResult(c.State.StartedAt.Add(20*time.Second), 1)
-	if c.State.Health.Status != types.Starting {
-		t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
+	if status := c.State.Health.Status(); status != types.Starting {
+		t.Errorf("Expecting starting, but got %#v\n", status)
 	}
 	}
 	if c.State.Health.FailingStreak != 0 {
 	if c.State.Health.FailingStreak != 0 {
 		t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
 		t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
 	}
 	}
 	handleResult(c.State.StartedAt.Add(50*time.Second), 1)
 	handleResult(c.State.StartedAt.Add(50*time.Second), 1)
-	if c.State.Health.Status != types.Starting {
-		t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
+	if status := c.State.Health.Status(); status != types.Starting {
+		t.Errorf("Expecting starting, but got %#v\n", status)
 	}
 	}
 	if c.State.Health.FailingStreak != 1 {
 	if c.State.Health.FailingStreak != 1 {
 		t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak)
 		t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak)

+ 1 - 1
daemon/inspect.go

@@ -139,7 +139,7 @@ func (daemon *Daemon) getInspectData(container *container.Container) (*types.Con
 	var containerHealth *types.Health
 	var containerHealth *types.Health
 	if container.State.Health != nil {
 	if container.State.Health != nil {
 		containerHealth = &types.Health{
 		containerHealth = &types.Health{
-			Status:        container.State.Health.Status,
+			Status:        container.State.Health.Status(),
 			FailingStreak: container.State.Health.FailingStreak,
 			FailingStreak: container.State.Health.FailingStreak,
 			Log:           append([]*types.HealthcheckResult{}, container.State.Health.Log...),
 			Log:           append([]*types.HealthcheckResult{}, container.State.Health.Log...),
 		}
 		}