From 0556ba23a42205cdc6dbf56d0c861257977a1aed Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 5 May 2023 17:49:28 +0200 Subject: [PATCH 1/2] daemon: handleContainerExit(): use logrus.WithFields Use `WithFields()` instead of chaining multiple `WithField()` calls. Signed-off-by: Sebastiaan van Stijn (cherry picked from commit de363f14049df35a9e9d357b1abf0544afa92b8f) Signed-off-by: Sebastiaan van Stijn --- daemon/monitor.go | 54 ++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 90998d4643..77a14f4aa1 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -39,7 +39,10 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine es, err := tsk.Delete(ctx) cancel() if err != nil { - logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd") + logrus.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "container": c.ID, + }).Warn("failed to delete container from containerd") } else { exitStatus = container.ExitStatus{ ExitCode: int(es.ExitCode()), @@ -66,14 +69,15 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine execDuration := time.Since(c.StartedAt) restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration) if err != nil { - logrus.WithError(err). - WithField("container", c.ID). - WithField("restartCount", c.RestartCount). - WithField("exitStatus", exitStatus). - WithField("daemonShuttingDown", daemonShutdown). - WithField("hasBeenManuallyStopped", c.HasBeenManuallyStopped). - WithField("execDuration", execDuration). - Warn("ShouldRestart failed, container will not be restarted") + logrus.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "container": c.ID, + "restartCount": c.RestartCount, + "exitStatus": exitStatus, + "daemonShuttingDown": daemonShutdown, + "hasBeenManuallyStopped": c.HasBeenManuallyStopped, + "execDuration": execDuration, + }).Warn("ShouldRestart failed, container will not be restarted") restart = false } @@ -85,11 +89,12 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine if restart { c.RestartCount++ - logrus.WithField("container", c.ID). - WithField("restartCount", c.RestartCount). - WithField("exitStatus", exitStatus). - WithField("manualRestart", c.HasBeenManuallyRestarted). - Debug("Restarting container") + logrus.WithFields(logrus.Fields{ + "container": c.ID, + "restartCount": c.RestartCount, + "exitStatus": exitStatus, + "manualRestart": c.HasBeenManuallyRestarted, + }).Debug("Restarting container") c.SetRestarting(&exitStatus) } else { c.SetStopped(&exitStatus) @@ -188,9 +193,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei go func() { if _, err := execConfig.Process.Delete(context.Background()); err != nil { - logrus.WithError(err).WithFields(logrus.Fields{ - "container": ei.ContainerID, - "process": ei.ProcessID, + logrus.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "container": ei.ContainerID, + "process": ei.ProcessID, }).Warn("failed to delete process") } }() @@ -211,8 +217,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei if errdefs.IsNotFound(err) { // The container was started by not-docker and so could have been deleted by // not-docker before we got around to loading it from containerd. - logrus.WithField("container", c.ID).WithError(err). - Debug("could not load containerd container for start event") + logrus.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "container": c.ID, + }).Debug("could not load containerd container for start event") return nil } return err @@ -220,8 +228,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei tsk, err := ctr.Task(context.Background()) if err != nil { if errdefs.IsNotFound(err) { - logrus.WithField("container", c.ID).WithError(err). - Debug("failed to load task for externally-started container") + logrus.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "container": c.ID, + }).Debug("failed to load task for externally-started container") return nil } return err @@ -286,5 +296,5 @@ func (daemon *Daemon) autoRemove(c *container.Container) { return } - logrus.WithError(err).WithField("container", c.ID).Error("error removing container") + logrus.WithFields(logrus.Fields{logrus.ErrorKey: err, "container": c.ID}).Error("error removing container") } From 290fc0440c7bfc090021c08341eb1aa6bec6847d Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Thu, 22 Jun 2023 16:45:32 -0400 Subject: [PATCH 2/2] daemon: fix panic on failed exec start If an exec fails to start in such a way that containerd publishes an exit event for it, daemon.ProcessEvent will race daemon.ContainerExecStart in handling the failure. This race has been a long-standing bug, which was mostly harmless until 4bafaa00aa810dd17fde13e563def08f96fffc31. After that change, the daemon would dereference a nil pointer and crash if ProcessEvent won the race. Restore the status quo buggy behaviour by adding a check to skip the dereference if execConfig.Process is nil. Signed-off-by: Cory Snider (cherry picked from commit 3b28a24e97a3b4153b5c55bb6e757a1b6b326df4) Signed-off-by: Sebastiaan van Stijn --- daemon/monitor.go | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/daemon/monitor.go b/daemon/monitor.go index 77a14f4aa1..285a4890f3 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -175,7 +175,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei // Remove the exec command from the container's store only and not the // daemon's store so that the exec command can be inspected. Remove it // before mutating execConfig to maintain the invariant that - // c.ExecCommands only contain execs in the Running state. + // c.ExecCommands only contains execs that have not exited. c.ExecCommands.Delete(execConfig.ID) execConfig.ExitCode = &ec @@ -191,15 +191,25 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei exitCode = ec - go func() { - if _, err := execConfig.Process.Delete(context.Background()); err != nil { - logrus.WithFields(logrus.Fields{ - logrus.ErrorKey: err, - "container": ei.ContainerID, - "process": ei.ProcessID, - }).Warn("failed to delete process") - } - }() + // If the exec failed at start in such a way that containerd + // publishes an exit event for it, we will race processing the event + // with daemon.ContainerExecStart() removing the exec from + // c.ExecCommands. If we win the race, we will find that there is no + // process to clean up. (And ContainerExecStart will clobber the + // exit code we set.) Prevent a nil-dereferenc panic in that + // situation to restore the status quo where this is merely a + // logical race condition. + if execConfig.Process != nil { + go func() { + if _, err := execConfig.Process.Delete(context.Background()); err != nil { + logrus.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "container": ei.ContainerID, + "process": ei.ProcessID, + }).Warn("failed to delete process") + } + }() + } } attributes := map[string]string{ "execID": ei.ProcessID,