Fix deadlock on cancelling healthcheck

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi 2016-11-15 18:02:26 -08:00
parent 27c20a7ea0
commit 89b1234737
3 changed files with 12 additions and 8 deletions

View file

@ -42,10 +42,7 @@ func (s *Health) OpenMonitorChannel() chan struct{} {
func (s *Health) CloseMonitorChannel() {
if s.stop != nil {
logrus.Debug("CloseMonitorChannel: waiting for probe to stop")
// This channel does not buffer. Once the write succeeds, the monitor
// has read the stop request and will not make any further updates
// to c.State.Health.
s.stop <- struct{}{}
close(s.stop)
s.stop = nil
logrus.Debug("CloseMonitorChannel done")
}

View file

@ -107,10 +107,17 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Cont
}
// Update the container's Status.Health struct based on the latest probe's result.
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) {
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult, done chan struct{}) {
c.Lock()
defer c.Unlock()
// probe may have been cancelled while waiting on lock. Ignore result then
select {
case <-done:
return
default:
}
retries := c.Config.Healthcheck.Retries
if retries <= 0 {
retries = defaultProbeRetries
@ -183,7 +190,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
cancelProbe()
return
case result := <-results:
handleProbeResult(d, c, result)
handleProbeResult(d, c, result, stop)
// Stop timeout
cancelProbe()
case <-ctx.Done():
@ -193,7 +200,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout),
Start: startTime,
End: time.Now(),
})
}, stop)
cancelProbe()
// Wait for probe to exit (it might take a while to respond to the TERM
// signal and we don't want dying probes to pile up).

View file

@ -80,7 +80,7 @@ func TestHealthStates(t *testing.T) {
Start: startTime,
End: startTime,
ExitCode: exitCode,
})
}, nil)
}
// starting -> failed -> success -> failed