Update container OOMKilled flag immediately

The OOMKilled flag on a container's state has historically behaved
rather unintuitively: it is updated on container exit to reflect whether
or not any process within the container has been OOM-killed during the
preceding run of the container. The OOMKilled flag would be set to true
when the container exits if any process within the container---including
execs---was OOM-killed at any time while the container was running,
whether or not the OOM-kill was the cause of the container exiting. The
flag is "sticky," persisting through the next start of the container;
only being cleared once the container exits without any processes having
been OOM-killed that run.

Alter the behavior of the OOMKilled flag such that it signals whether
any process in the container had been OOM-killed since the most recent
start of the container. Set the flag immediately upon any process being
OOM-killed, and clear it when the container transitions to the "running"
state.

There is an ulterior motive for this change. It reduces the amount of
state the libcontainerd client needs to keep track of and clean up on
container exit. It's one less place the client could leak memory if a
container was to be deleted without going through libcontainerd.

Signed-off-by: Cory Snider <csnider@mirantis.com>
This commit is contained in:
Cory Snider 2022-04-26 14:30:52 -04:00
parent b75246202a
commit 57d2d6ef62
6 changed files with 4 additions and 24 deletions

View file

@ -4650,7 +4650,8 @@ definitions:
example: false example: false
OOMKilled: OOMKilled:
description: | description: |
Whether this container has been killed because it ran out of memory. Whether a process within this container has been killed because it ran
out of memory since the container was last started.
type: "boolean" type: "boolean"
example: false example: false
Dead: Dead:

View file

@ -53,9 +53,6 @@ type ExitStatus struct {
// The exit code with which the container exited. // The exit code with which the container exited.
ExitCode int ExitCode int
// Whether the container encountered an OOM.
OOMKilled bool
// Time at which the container died // Time at which the container died
ExitedAt time.Time ExitedAt time.Time
} }

View file

@ -270,6 +270,7 @@ func (s *State) SetRunning(pid int, initial bool) {
} }
s.ExitCodeValue = 0 s.ExitCodeValue = 0
s.Pid = pid s.Pid = pid
s.OOMKilled = false
if initial { if initial {
s.StartedAt = time.Now().UTC() s.StartedAt = time.Now().UTC()
} }
@ -287,7 +288,6 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
s.FinishedAt = exitStatus.ExitedAt s.FinishedAt = exitStatus.ExitedAt
} }
s.ExitCodeValue = exitStatus.ExitCode s.ExitCodeValue = exitStatus.ExitCode
s.OOMKilled = exitStatus.OOMKilled
s.notifyAndClear(&s.stopWaiters) s.notifyAndClear(&s.stopWaiters)
} }
@ -303,7 +303,6 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
s.Pid = 0 s.Pid = 0
s.FinishedAt = time.Now().UTC() s.FinishedAt = time.Now().UTC()
s.ExitCodeValue = exitStatus.ExitCode s.ExitCodeValue = exitStatus.ExitCode
s.OOMKilled = exitStatus.OOMKilled
s.notifyAndClear(&s.stopWaiters) s.notifyAndClear(&s.stopWaiters)
} }

View file

@ -46,7 +46,6 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
if e != nil { if e != nil {
exitStatus.ExitCode = int(e.ExitCode) exitStatus.ExitCode = int(e.ExitCode)
exitStatus.ExitedAt = e.ExitedAt exitStatus.ExitedAt = e.ExitedAt
exitStatus.OOMKilled = e.OOMKilled
if e.Error != nil { if e.Error != nil {
c.SetError(e.Error) c.SetError(e.Error)
} }
@ -141,6 +140,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
c.Lock() c.Lock()
defer c.Unlock() defer c.Unlock()
c.OOMKilled = true
daemon.updateHealthMonitor(c) daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(daemon.containersReplica); err != nil { if err := c.CheckpointTo(daemon.containersReplica); err != nil {
return err return err

View file

@ -47,8 +47,6 @@ type client struct {
backend libcontainerdtypes.Backend backend libcontainerdtypes.Backend
eventQ queue.Queue eventQ queue.Queue
oomMu sync.Mutex
oom map[string]bool
v2runcoptionsMu sync.Mutex v2runcoptionsMu sync.Mutex
// v2runcoptions is used for copying options specified on Create() to Start() // v2runcoptions is used for copying options specified on Create() to Start()
v2runcoptions map[string]v2runcoptions.Options v2runcoptions map[string]v2runcoptions.Options
@ -62,7 +60,6 @@ func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string,
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns), logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
ns: ns, ns: ns,
backend: b, backend: b,
oom: make(map[string]bool),
v2runcoptions: make(map[string]v2runcoptions.Options), v2runcoptions: make(map[string]v2runcoptions.Options),
} }
@ -475,9 +472,6 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
if err := ctr.Delete(ctx); err != nil { if err := ctr.Delete(ctx); err != nil {
return wrapError(err) return wrapError(err)
} }
c.oomMu.Lock()
delete(c.oom, containerID)
c.oomMu.Unlock()
c.v2runcoptionsMu.Lock() c.v2runcoptionsMu.Lock()
delete(c.v2runcoptions, containerID) delete(c.v2runcoptions, containerID)
c.v2runcoptionsMu.Unlock() c.v2runcoptionsMu.Unlock()
@ -767,7 +761,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
c.logger.Debug("processing event stream") c.logger.Debug("processing event stream")
for { for {
var oomKilled bool
select { select {
case err = <-errC: case err = <-errC:
if err != nil { if err != nil {
@ -825,9 +818,7 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
et = libcontainerdtypes.EventOOM et = libcontainerdtypes.EventOOM
ei = libcontainerdtypes.EventInfo{ ei = libcontainerdtypes.EventInfo{
ContainerID: t.ContainerID, ContainerID: t.ContainerID,
OOMKilled: true,
} }
oomKilled = true
case *apievents.TaskExecAdded: case *apievents.TaskExecAdded:
et = libcontainerdtypes.EventExecAdded et = libcontainerdtypes.EventExecAdded
ei = libcontainerdtypes.EventInfo{ ei = libcontainerdtypes.EventInfo{
@ -866,13 +857,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
continue continue
} }
c.oomMu.Lock()
if oomKilled {
c.oom[ei.ContainerID] = true
}
ei.OOMKilled = c.oom[ei.ContainerID]
c.oomMu.Unlock()
c.processEvent(ctx, et, ei) c.processEvent(ctx, et, ei)
} }
} }

View file

@ -33,7 +33,6 @@ type EventInfo struct {
Pid uint32 Pid uint32
ExitCode uint32 ExitCode uint32
ExitedAt time.Time ExitedAt time.Time
OOMKilled bool
Error error Error error
} }