Fix supervisor healthcheck throttling

Fix default case causing the throttling to not be used.
Ensure that nil client condition is handled.

Signed-off-by: Derek McGowan <derek@mcgstyle.net>
This commit is contained in:
Derek McGowan 2018-09-04 11:00:28 -07:00
parent 6ba1e91877
commit c3e3293843
No known key found for this signature in database
GPG key ID: F58C5D0A4405ACDB

View file

@ -245,20 +245,26 @@ func (r *remote) monitorDaemon(ctx context.Context) {
}() }()
for { for {
select { if delay != nil {
case <-ctx.Done(): select {
r.logger.Info("stopping healthcheck following graceful shutdown") case <-ctx.Done():
if client != nil { r.logger.Info("stopping healthcheck following graceful shutdown")
client.Close() if client != nil {
client.Close()
}
return
case <-delay:
} }
return
case <-delay:
default:
} }
if r.daemonPid == -1 { if r.daemonPid == -1 {
if r.daemonWaitCh != nil { if r.daemonWaitCh != nil {
<-r.daemonWaitCh select {
case <-ctx.Done():
r.logger.Info("stopping containerd startup following graceful shutdown")
return
case <-r.daemonWaitCh:
}
} }
os.RemoveAll(r.GRPC.Address) os.RemoveAll(r.GRPC.Address)
@ -276,26 +282,28 @@ func (r *remote) monitorDaemon(ctx context.Context) {
} }
} }
tctx, cancel := context.WithTimeout(ctx, healthCheckTimeout) if client != nil {
_, err := client.IsServing(tctx) tctx, cancel := context.WithTimeout(ctx, healthCheckTimeout)
cancel() _, err := client.IsServing(tctx)
if err == nil { cancel()
if !started { if err == nil {
close(r.daemonStartCh) if !started {
started = true close(r.daemonStartCh)
started = true
}
transientFailureCount = 0
delay = time.After(500 * time.Millisecond)
continue
} }
transientFailureCount = 0 r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
delay = time.After(500 * time.Millisecond)
continue
}
r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding") transientFailureCount++
if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
transientFailureCount++ delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond)
if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) { continue
delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond) }
continue
} }
if system.IsProcessAlive(r.daemonPid) { if system.IsProcessAlive(r.daemonPid) {
@ -304,6 +312,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
} }
client.Close() client.Close()
client = nil
r.daemonPid = -1 r.daemonPid = -1
delay = nil delay = nil
transientFailureCount = 0 transientFailureCount = 0