Merge pull request #40137 from fuweid/me-wait-for-remote-containerd-before-reload
daemon: add grpc.WithBlock option
This commit is contained in:
commit
62bd5a33f7
1 changed files with 18 additions and 0 deletions
|
@ -888,6 +888,24 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
|||
registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
|
||||
|
||||
gopts := []grpc.DialOption{
|
||||
// WithBlock makes sure that the following containerd request
|
||||
// is reliable.
|
||||
//
|
||||
// NOTE: In one edge case with high load pressure, kernel kills
|
||||
// dockerd, containerd and containerd-shims caused by OOM.
|
||||
// When both dockerd and containerd restart, but containerd
|
||||
// will take time to recover all the existing containers. Before
|
||||
// containerd serving, dockerd will failed with gRPC error.
|
||||
// That bad thing is that restore action will still ignore the
|
||||
// any non-NotFound errors and returns running state for
|
||||
// already stopped container. It is unexpected behavior. And
|
||||
// we need to restart dockerd to make sure that anything is OK.
|
||||
//
|
||||
// It is painful. Add WithBlock can prevent the edge case. And
|
||||
// n common case, the containerd will be serving in shortly.
|
||||
// It is not harm to add WithBlock for containerd connection.
|
||||
grpc.WithBlock(),
|
||||
|
||||
grpc.WithInsecure(),
|
||||
grpc.WithBackoffMaxDelay(3 * time.Second),
|
||||
grpc.WithContextDialer(dialer.ContextDialer),
|
||||
|
|
Loading…
Add table
Reference in a new issue