|
@@ -888,6 +888,24 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
|
|
|
registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
|
|
|
|
|
|
gopts := []grpc.DialOption{
|
|
|
+ // WithBlock makes sure that the following containerd request
|
|
|
+ // is reliable.
|
|
|
+ //
|
|
|
+ // NOTE: In one edge case with high load pressure, kernel kills
|
|
|
+ // dockerd, containerd and containerd-shims caused by OOM.
|
|
|
+ // When both dockerd and containerd restart, but containerd
|
|
|
+ // will take time to recover all the existing containers. Before
|
|
|
+ // containerd serving, dockerd will failed with gRPC error.
|
|
|
+ // That bad thing is that restore action will still ignore the
|
|
|
+ // any non-NotFound errors and returns running state for
|
|
|
+ // already stopped container. It is unexpected behavior. And
|
|
|
+ // we need to restart dockerd to make sure that anything is OK.
|
|
|
+ //
|
|
|
+ // It is painful. Add WithBlock can prevent the edge case. And
|
|
|
+ // n common case, the containerd will be serving in shortly.
|
|
|
+ // It is not harm to add WithBlock for containerd connection.
|
|
|
+ grpc.WithBlock(),
|
|
|
+
|
|
|
grpc.WithInsecure(),
|
|
|
grpc.WithBackoffMaxDelay(3 * time.Second),
|
|
|
grpc.WithContextDialer(dialer.ContextDialer),
|