From fb7ec1555cca750f5d1d99bae5e99b4818712322 Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Thu, 25 May 2023 16:00:29 -0400 Subject: [PATCH] libcontainerd: work around exec start bug in c8d It turns out that the unnecessary serialization removed in b75246202ab9b1e5bb94c377f90db8ed38cfa0e0 happened to work around a bug in containerd. When many exec processes are started concurrently in the same containerd task, it takes seconds to minutes for them all to start. Add the workaround back in, only deliberately this time. Signed-off-by: Cory Snider --- libcontainerd/remote/client.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libcontainerd/remote/client.go b/libcontainerd/remote/client.go index 2635fa6305..6ea98b0c9c 100644 --- a/libcontainerd/remote/client.go +++ b/libcontainerd/remote/client.go @@ -60,6 +60,10 @@ type container struct { type task struct { containerd.Task ctr *container + + // Workaround for https://github.com/containerd/containerd/issues/8557. + // See also https://github.com/moby/moby/issues/45595. + serializeExecStartsWorkaround sync.Mutex } type process struct { @@ -296,7 +300,12 @@ func (t *task) Exec(ctx context.Context, processID string, spec *specs.Process, // the stdin of exec process will be created after p.Start in containerd defer func() { stdinCloseSync <- p }() - if err = p.Start(ctx); err != nil { + err = func() error { + t.serializeExecStartsWorkaround.Lock() + defer t.serializeExecStartsWorkaround.Unlock() + return p.Start(ctx) + }() + if err != nil { // use new context for cleanup because old one may be cancelled by user, but leave a timeout to make sure // we are not waiting forever if containerd is unresponsive or to work around fifo cancelling issues in // older containerd-shim