From 02ce73f62e73e78a4ec29b29fb2ba552221fe885 Mon Sep 17 00:00:00 2001 From: Wentao Zhang Date: Thu, 4 May 2017 00:45:35 +0800 Subject: [PATCH] Fix when containerd restarted, event handler may exit Description: Kill docker-containerd continuously, and use kill -SIGUSR1 to check docker callstacks. And we will find that event handler: startEventsMonitor or handleEventStream will exit. This will only happen when system is busy, containerd need more time to startup, and the monitor gorotine maybe exit. Signed-off-by: Wentao Zhang --- libcontainerd/remote_unix.go | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/libcontainerd/remote_unix.go b/libcontainerd/remote_unix.go index 54550595ad..9b3af8020f 100644 --- a/libcontainerd/remote_unix.go +++ b/libcontainerd/remote_unix.go @@ -49,7 +49,7 @@ type remote struct { stateDir string rpcAddr string startDaemon bool - closeManually bool + closedManually bool debugLog bool rpcConn *grpc.ClientConn clients []*client @@ -154,7 +154,7 @@ func (r *remote) handleConnectionChange() { logrus.Debugf("libcontainerd: containerd health check returned error: %v", err) if r.daemonPid != -1 { - if r.closeManually { + if r.closedManually { // Well, we asked for it to stop, just return return } @@ -180,7 +180,7 @@ func (r *remote) Cleanup() { if r.daemonPid == -1 { return } - r.closeManually = true + r.closedManually = true r.rpcConn.Close() // Ask the daemon to quit syscall.Kill(r.daemonPid, syscall.SIGTERM) @@ -280,10 +280,23 @@ func (r *remote) startEventsMonitor() error { er := &containerd.EventsRequest{ Timestamp: tsp, } - events, err := r.apiClient.Events(context.Background(), er, grpc.FailFast(false)) - if err != nil { - return err + + var events containerd.API_EventsClient + for { + events, err = r.apiClient.Events(context.Background(), er, grpc.FailFast(false)) + if err == nil { + break + } + logrus.Warnf("libcontainerd: failed to get events from containerd: %q", err) + + if r.closedManually { + // ignore error if grpc remote connection is closed manually + return nil + } + + <-time.After(100 * time.Millisecond) } + go r.handleEventStream(events) return nil } @@ -293,7 +306,7 @@ func (r *remote) handleEventStream(events containerd.API_EventsClient) { e, err := events.Recv() if err != nil { if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc && - r.closeManually { + r.closedManually { // ignore error if grpc remote connection is closed manually return }