Fix when containerd restarted, event handler may exit

Description:
Kill docker-containerd continuously, and use kill -SIGUSR1 <dockerpid>
to check docker callstacks. And we will find that event
handler: startEventsMonitor or handleEventStream will exit.

This will only happen when system is busy, containerd need more time to
startup, and the monitor gorotine maybe exit.

Signed-off-by: Wentao Zhang <zhangwentao234@huawei.com>
This commit is contained in:
Wentao Zhang 2017-05-04 00:45:35 +08:00
parent 916e9ad754
commit 02ce73f62e

View file

@ -49,7 +49,7 @@ type remote struct {
stateDir string
rpcAddr string
startDaemon bool
closeManually bool
closedManually bool
debugLog bool
rpcConn *grpc.ClientConn
clients []*client
@ -154,7 +154,7 @@ func (r *remote) handleConnectionChange() {
logrus.Debugf("libcontainerd: containerd health check returned error: %v", err)
if r.daemonPid != -1 {
if r.closeManually {
if r.closedManually {
// Well, we asked for it to stop, just return
return
}
@ -180,7 +180,7 @@ func (r *remote) Cleanup() {
if r.daemonPid == -1 {
return
}
r.closeManually = true
r.closedManually = true
r.rpcConn.Close()
// Ask the daemon to quit
syscall.Kill(r.daemonPid, syscall.SIGTERM)
@ -280,10 +280,23 @@ func (r *remote) startEventsMonitor() error {
er := &containerd.EventsRequest{
Timestamp: tsp,
}
events, err := r.apiClient.Events(context.Background(), er, grpc.FailFast(false))
if err != nil {
return err
var events containerd.API_EventsClient
for {
events, err = r.apiClient.Events(context.Background(), er, grpc.FailFast(false))
if err == nil {
break
}
logrus.Warnf("libcontainerd: failed to get events from containerd: %q", err)
if r.closedManually {
// ignore error if grpc remote connection is closed manually
return nil
}
<-time.After(100 * time.Millisecond)
}
go r.handleEventStream(events)
return nil
}
@ -293,7 +306,7 @@ func (r *remote) handleEventStream(events containerd.API_EventsClient) {
e, err := events.Recv()
if err != nil {
if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc &&
r.closeManually {
r.closedManually {
// ignore error if grpc remote connection is closed manually
return
}