Fix deadlock on cancelling healthcheck
Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
parent
27c20a7ea0
commit
89b1234737
3 changed files with 12 additions and 8 deletions
|
@ -42,10 +42,7 @@ func (s *Health) OpenMonitorChannel() chan struct{} {
|
|||
func (s *Health) CloseMonitorChannel() {
|
||||
if s.stop != nil {
|
||||
logrus.Debug("CloseMonitorChannel: waiting for probe to stop")
|
||||
// This channel does not buffer. Once the write succeeds, the monitor
|
||||
// has read the stop request and will not make any further updates
|
||||
// to c.State.Health.
|
||||
s.stop <- struct{}{}
|
||||
close(s.stop)
|
||||
s.stop = nil
|
||||
logrus.Debug("CloseMonitorChannel done")
|
||||
}
|
||||
|
|
|
@ -107,10 +107,17 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Cont
|
|||
}
|
||||
|
||||
// Update the container's Status.Health struct based on the latest probe's result.
|
||||
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) {
|
||||
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult, done chan struct{}) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
|
||||
// probe may have been cancelled while waiting on lock. Ignore result then
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
retries := c.Config.Healthcheck.Retries
|
||||
if retries <= 0 {
|
||||
retries = defaultProbeRetries
|
||||
|
@ -183,7 +190,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
|
|||
cancelProbe()
|
||||
return
|
||||
case result := <-results:
|
||||
handleProbeResult(d, c, result)
|
||||
handleProbeResult(d, c, result, stop)
|
||||
// Stop timeout
|
||||
cancelProbe()
|
||||
case <-ctx.Done():
|
||||
|
@ -193,7 +200,7 @@ func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe)
|
|||
Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout),
|
||||
Start: startTime,
|
||||
End: time.Now(),
|
||||
})
|
||||
}, stop)
|
||||
cancelProbe()
|
||||
// Wait for probe to exit (it might take a while to respond to the TERM
|
||||
// signal and we don't want dying probes to pile up).
|
||||
|
|
|
@ -80,7 +80,7 @@ func TestHealthStates(t *testing.T) {
|
|||
Start: startTime,
|
||||
End: startTime,
|
||||
ExitCode: exitCode,
|
||||
})
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// starting -> failed -> success -> failed
|
||||
|
|
Loading…
Reference in a new issue