Merge pull request #45794 from thaJeztah/24.0_backport_fix_45770_processevent_nil_check

[24.0 backport] daemon: fix panic on failed exec start
This commit is contained in:
Bjorn Neergaard 2023-06-22 18:17:59 -06:00 committed by GitHub
commit a13eea29fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -39,7 +39,10 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
es, err := tsk.Delete(ctx)
cancel()
if err != nil {
logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd")
logrus.WithFields(logrus.Fields{
logrus.ErrorKey: err,
"container": c.ID,
}).Warn("failed to delete container from containerd")
} else {
exitStatus = container.ExitStatus{
ExitCode: int(es.ExitCode()),
@ -66,14 +69,15 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
execDuration := time.Since(c.StartedAt)
restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
if err != nil {
logrus.WithError(err).
WithField("container", c.ID).
WithField("restartCount", c.RestartCount).
WithField("exitStatus", exitStatus).
WithField("daemonShuttingDown", daemonShutdown).
WithField("hasBeenManuallyStopped", c.HasBeenManuallyStopped).
WithField("execDuration", execDuration).
Warn("ShouldRestart failed, container will not be restarted")
logrus.WithFields(logrus.Fields{
logrus.ErrorKey: err,
"container": c.ID,
"restartCount": c.RestartCount,
"exitStatus": exitStatus,
"daemonShuttingDown": daemonShutdown,
"hasBeenManuallyStopped": c.HasBeenManuallyStopped,
"execDuration": execDuration,
}).Warn("ShouldRestart failed, container will not be restarted")
restart = false
}
@ -85,11 +89,12 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
if restart {
c.RestartCount++
logrus.WithField("container", c.ID).
WithField("restartCount", c.RestartCount).
WithField("exitStatus", exitStatus).
WithField("manualRestart", c.HasBeenManuallyRestarted).
Debug("Restarting container")
logrus.WithFields(logrus.Fields{
"container": c.ID,
"restartCount": c.RestartCount,
"exitStatus": exitStatus,
"manualRestart": c.HasBeenManuallyRestarted,
}).Debug("Restarting container")
c.SetRestarting(&exitStatus)
} else {
c.SetStopped(&exitStatus)
@ -170,7 +175,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
// Remove the exec command from the container's store only and not the
// daemon's store so that the exec command can be inspected. Remove it
// before mutating execConfig to maintain the invariant that
// c.ExecCommands only contain execs in the Running state.
// c.ExecCommands only contains execs that have not exited.
c.ExecCommands.Delete(execConfig.ID)
execConfig.ExitCode = &ec
@ -186,14 +191,25 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
exitCode = ec
go func() {
if _, err := execConfig.Process.Delete(context.Background()); err != nil {
logrus.WithError(err).WithFields(logrus.Fields{
"container": ei.ContainerID,
"process": ei.ProcessID,
}).Warn("failed to delete process")
}
}()
// If the exec failed at start in such a way that containerd
// publishes an exit event for it, we will race processing the event
// with daemon.ContainerExecStart() removing the exec from
// c.ExecCommands. If we win the race, we will find that there is no
// process to clean up. (And ContainerExecStart will clobber the
// exit code we set.) Prevent a nil-dereferenc panic in that
// situation to restore the status quo where this is merely a
// logical race condition.
if execConfig.Process != nil {
go func() {
if _, err := execConfig.Process.Delete(context.Background()); err != nil {
logrus.WithFields(logrus.Fields{
logrus.ErrorKey: err,
"container": ei.ContainerID,
"process": ei.ProcessID,
}).Warn("failed to delete process")
}
}()
}
}
attributes := map[string]string{
"execID": ei.ProcessID,
@ -211,8 +227,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
if errdefs.IsNotFound(err) {
// The container was started by not-docker and so could have been deleted by
// not-docker before we got around to loading it from containerd.
logrus.WithField("container", c.ID).WithError(err).
Debug("could not load containerd container for start event")
logrus.WithFields(logrus.Fields{
logrus.ErrorKey: err,
"container": c.ID,
}).Debug("could not load containerd container for start event")
return nil
}
return err
@ -220,8 +238,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
tsk, err := ctr.Task(context.Background())
if err != nil {
if errdefs.IsNotFound(err) {
logrus.WithField("container", c.ID).WithError(err).
Debug("failed to load task for externally-started container")
logrus.WithFields(logrus.Fields{
logrus.ErrorKey: err,
"container": c.ID,
}).Debug("failed to load task for externally-started container")
return nil
}
return err
@ -286,5 +306,5 @@ func (daemon *Daemon) autoRemove(c *container.Container) {
return
}
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
logrus.WithFields(logrus.Fields{logrus.ErrorKey: err, "container": c.ID}).Error("error removing container")
}