Merge pull request #45794 from thaJeztah/24.0_backport_fix_45770_processevent_nil_check
[24.0 backport] daemon: fix panic on failed exec start
This commit is contained in:
commit
a13eea29fb
1 changed files with 48 additions and 28 deletions
|
@ -39,7 +39,10 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
|||
es, err := tsk.Delete(ctx)
|
||||
cancel()
|
||||
if err != nil {
|
||||
logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd")
|
||||
logrus.WithFields(logrus.Fields{
|
||||
logrus.ErrorKey: err,
|
||||
"container": c.ID,
|
||||
}).Warn("failed to delete container from containerd")
|
||||
} else {
|
||||
exitStatus = container.ExitStatus{
|
||||
ExitCode: int(es.ExitCode()),
|
||||
|
@ -66,14 +69,15 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
|||
execDuration := time.Since(c.StartedAt)
|
||||
restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
|
||||
if err != nil {
|
||||
logrus.WithError(err).
|
||||
WithField("container", c.ID).
|
||||
WithField("restartCount", c.RestartCount).
|
||||
WithField("exitStatus", exitStatus).
|
||||
WithField("daemonShuttingDown", daemonShutdown).
|
||||
WithField("hasBeenManuallyStopped", c.HasBeenManuallyStopped).
|
||||
WithField("execDuration", execDuration).
|
||||
Warn("ShouldRestart failed, container will not be restarted")
|
||||
logrus.WithFields(logrus.Fields{
|
||||
logrus.ErrorKey: err,
|
||||
"container": c.ID,
|
||||
"restartCount": c.RestartCount,
|
||||
"exitStatus": exitStatus,
|
||||
"daemonShuttingDown": daemonShutdown,
|
||||
"hasBeenManuallyStopped": c.HasBeenManuallyStopped,
|
||||
"execDuration": execDuration,
|
||||
}).Warn("ShouldRestart failed, container will not be restarted")
|
||||
restart = false
|
||||
}
|
||||
|
||||
|
@ -85,11 +89,12 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
|||
|
||||
if restart {
|
||||
c.RestartCount++
|
||||
logrus.WithField("container", c.ID).
|
||||
WithField("restartCount", c.RestartCount).
|
||||
WithField("exitStatus", exitStatus).
|
||||
WithField("manualRestart", c.HasBeenManuallyRestarted).
|
||||
Debug("Restarting container")
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"container": c.ID,
|
||||
"restartCount": c.RestartCount,
|
||||
"exitStatus": exitStatus,
|
||||
"manualRestart": c.HasBeenManuallyRestarted,
|
||||
}).Debug("Restarting container")
|
||||
c.SetRestarting(&exitStatus)
|
||||
} else {
|
||||
c.SetStopped(&exitStatus)
|
||||
|
@ -170,7 +175,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
|
|||
// Remove the exec command from the container's store only and not the
|
||||
// daemon's store so that the exec command can be inspected. Remove it
|
||||
// before mutating execConfig to maintain the invariant that
|
||||
// c.ExecCommands only contain execs in the Running state.
|
||||
// c.ExecCommands only contains execs that have not exited.
|
||||
c.ExecCommands.Delete(execConfig.ID)
|
||||
|
||||
execConfig.ExitCode = &ec
|
||||
|
@ -186,14 +191,25 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
|
|||
|
||||
exitCode = ec
|
||||
|
||||
go func() {
|
||||
if _, err := execConfig.Process.Delete(context.Background()); err != nil {
|
||||
logrus.WithError(err).WithFields(logrus.Fields{
|
||||
"container": ei.ContainerID,
|
||||
"process": ei.ProcessID,
|
||||
}).Warn("failed to delete process")
|
||||
}
|
||||
}()
|
||||
// If the exec failed at start in such a way that containerd
|
||||
// publishes an exit event for it, we will race processing the event
|
||||
// with daemon.ContainerExecStart() removing the exec from
|
||||
// c.ExecCommands. If we win the race, we will find that there is no
|
||||
// process to clean up. (And ContainerExecStart will clobber the
|
||||
// exit code we set.) Prevent a nil-dereferenc panic in that
|
||||
// situation to restore the status quo where this is merely a
|
||||
// logical race condition.
|
||||
if execConfig.Process != nil {
|
||||
go func() {
|
||||
if _, err := execConfig.Process.Delete(context.Background()); err != nil {
|
||||
logrus.WithFields(logrus.Fields{
|
||||
logrus.ErrorKey: err,
|
||||
"container": ei.ContainerID,
|
||||
"process": ei.ProcessID,
|
||||
}).Warn("failed to delete process")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
attributes := map[string]string{
|
||||
"execID": ei.ProcessID,
|
||||
|
@ -211,8 +227,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
|
|||
if errdefs.IsNotFound(err) {
|
||||
// The container was started by not-docker and so could have been deleted by
|
||||
// not-docker before we got around to loading it from containerd.
|
||||
logrus.WithField("container", c.ID).WithError(err).
|
||||
Debug("could not load containerd container for start event")
|
||||
logrus.WithFields(logrus.Fields{
|
||||
logrus.ErrorKey: err,
|
||||
"container": c.ID,
|
||||
}).Debug("could not load containerd container for start event")
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
|
@ -220,8 +238,10 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
|
|||
tsk, err := ctr.Task(context.Background())
|
||||
if err != nil {
|
||||
if errdefs.IsNotFound(err) {
|
||||
logrus.WithField("container", c.ID).WithError(err).
|
||||
Debug("failed to load task for externally-started container")
|
||||
logrus.WithFields(logrus.Fields{
|
||||
logrus.ErrorKey: err,
|
||||
"container": c.ID,
|
||||
}).Debug("failed to load task for externally-started container")
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
|
@ -286,5 +306,5 @@ func (daemon *Daemon) autoRemove(c *container.Container) {
|
|||
return
|
||||
}
|
||||
|
||||
logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
|
||||
logrus.WithFields(logrus.Fields{logrus.ErrorKey: err, "container": c.ID}).Error("error removing container")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue