Update container OOMKilled flag immediately
The OOMKilled flag on a container's state has historically behaved rather unintuitively: it is updated on container exit to reflect whether or not any process within the container has been OOM-killed during the preceding run of the container. The OOMKilled flag would be set to true when the container exits if any process within the container---including execs---was OOM-killed at any time while the container was running, whether or not the OOM-kill was the cause of the container exiting. The flag is "sticky," persisting through the next start of the container; only being cleared once the container exits without any processes having been OOM-killed that run. Alter the behavior of the OOMKilled flag such that it signals whether any process in the container had been OOM-killed since the most recent start of the container. Set the flag immediately upon any process being OOM-killed, and clear it when the container transitions to the "running" state. There is an ulterior motive for this change. It reduces the amount of state the libcontainerd client needs to keep track of and clean up on container exit. It's one less place the client could leak memory if a container was to be deleted without going through libcontainerd. Signed-off-by: Cory Snider <csnider@mirantis.com>
This commit is contained in:
parent
b75246202a
commit
57d2d6ef62
6 changed files with 4 additions and 24 deletions
|
@ -4650,7 +4650,8 @@ definitions:
|
||||||
example: false
|
example: false
|
||||||
OOMKilled:
|
OOMKilled:
|
||||||
description: |
|
description: |
|
||||||
Whether this container has been killed because it ran out of memory.
|
Whether a process within this container has been killed because it ran
|
||||||
|
out of memory since the container was last started.
|
||||||
type: "boolean"
|
type: "boolean"
|
||||||
example: false
|
example: false
|
||||||
Dead:
|
Dead:
|
||||||
|
|
|
@ -53,9 +53,6 @@ type ExitStatus struct {
|
||||||
// The exit code with which the container exited.
|
// The exit code with which the container exited.
|
||||||
ExitCode int
|
ExitCode int
|
||||||
|
|
||||||
// Whether the container encountered an OOM.
|
|
||||||
OOMKilled bool
|
|
||||||
|
|
||||||
// Time at which the container died
|
// Time at which the container died
|
||||||
ExitedAt time.Time
|
ExitedAt time.Time
|
||||||
}
|
}
|
||||||
|
|
|
@ -270,6 +270,7 @@ func (s *State) SetRunning(pid int, initial bool) {
|
||||||
}
|
}
|
||||||
s.ExitCodeValue = 0
|
s.ExitCodeValue = 0
|
||||||
s.Pid = pid
|
s.Pid = pid
|
||||||
|
s.OOMKilled = false
|
||||||
if initial {
|
if initial {
|
||||||
s.StartedAt = time.Now().UTC()
|
s.StartedAt = time.Now().UTC()
|
||||||
}
|
}
|
||||||
|
@ -287,7 +288,6 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
|
||||||
s.FinishedAt = exitStatus.ExitedAt
|
s.FinishedAt = exitStatus.ExitedAt
|
||||||
}
|
}
|
||||||
s.ExitCodeValue = exitStatus.ExitCode
|
s.ExitCodeValue = exitStatus.ExitCode
|
||||||
s.OOMKilled = exitStatus.OOMKilled
|
|
||||||
|
|
||||||
s.notifyAndClear(&s.stopWaiters)
|
s.notifyAndClear(&s.stopWaiters)
|
||||||
}
|
}
|
||||||
|
@ -303,7 +303,6 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
|
||||||
s.Pid = 0
|
s.Pid = 0
|
||||||
s.FinishedAt = time.Now().UTC()
|
s.FinishedAt = time.Now().UTC()
|
||||||
s.ExitCodeValue = exitStatus.ExitCode
|
s.ExitCodeValue = exitStatus.ExitCode
|
||||||
s.OOMKilled = exitStatus.OOMKilled
|
|
||||||
|
|
||||||
s.notifyAndClear(&s.stopWaiters)
|
s.notifyAndClear(&s.stopWaiters)
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,6 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
|
||||||
if e != nil {
|
if e != nil {
|
||||||
exitStatus.ExitCode = int(e.ExitCode)
|
exitStatus.ExitCode = int(e.ExitCode)
|
||||||
exitStatus.ExitedAt = e.ExitedAt
|
exitStatus.ExitedAt = e.ExitedAt
|
||||||
exitStatus.OOMKilled = e.OOMKilled
|
|
||||||
if e.Error != nil {
|
if e.Error != nil {
|
||||||
c.SetError(e.Error)
|
c.SetError(e.Error)
|
||||||
}
|
}
|
||||||
|
@ -141,6 +140,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
|
||||||
|
|
||||||
c.Lock()
|
c.Lock()
|
||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
|
c.OOMKilled = true
|
||||||
daemon.updateHealthMonitor(c)
|
daemon.updateHealthMonitor(c)
|
||||||
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -47,8 +47,6 @@ type client struct {
|
||||||
|
|
||||||
backend libcontainerdtypes.Backend
|
backend libcontainerdtypes.Backend
|
||||||
eventQ queue.Queue
|
eventQ queue.Queue
|
||||||
oomMu sync.Mutex
|
|
||||||
oom map[string]bool
|
|
||||||
v2runcoptionsMu sync.Mutex
|
v2runcoptionsMu sync.Mutex
|
||||||
// v2runcoptions is used for copying options specified on Create() to Start()
|
// v2runcoptions is used for copying options specified on Create() to Start()
|
||||||
v2runcoptions map[string]v2runcoptions.Options
|
v2runcoptions map[string]v2runcoptions.Options
|
||||||
|
@ -62,7 +60,6 @@ func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string,
|
||||||
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
|
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
|
||||||
ns: ns,
|
ns: ns,
|
||||||
backend: b,
|
backend: b,
|
||||||
oom: make(map[string]bool),
|
|
||||||
v2runcoptions: make(map[string]v2runcoptions.Options),
|
v2runcoptions: make(map[string]v2runcoptions.Options),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -475,9 +472,6 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
|
||||||
if err := ctr.Delete(ctx); err != nil {
|
if err := ctr.Delete(ctx); err != nil {
|
||||||
return wrapError(err)
|
return wrapError(err)
|
||||||
}
|
}
|
||||||
c.oomMu.Lock()
|
|
||||||
delete(c.oom, containerID)
|
|
||||||
c.oomMu.Unlock()
|
|
||||||
c.v2runcoptionsMu.Lock()
|
c.v2runcoptionsMu.Lock()
|
||||||
delete(c.v2runcoptions, containerID)
|
delete(c.v2runcoptions, containerID)
|
||||||
c.v2runcoptionsMu.Unlock()
|
c.v2runcoptionsMu.Unlock()
|
||||||
|
@ -767,7 +761,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
|
||||||
c.logger.Debug("processing event stream")
|
c.logger.Debug("processing event stream")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var oomKilled bool
|
|
||||||
select {
|
select {
|
||||||
case err = <-errC:
|
case err = <-errC:
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -825,9 +818,7 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
|
||||||
et = libcontainerdtypes.EventOOM
|
et = libcontainerdtypes.EventOOM
|
||||||
ei = libcontainerdtypes.EventInfo{
|
ei = libcontainerdtypes.EventInfo{
|
||||||
ContainerID: t.ContainerID,
|
ContainerID: t.ContainerID,
|
||||||
OOMKilled: true,
|
|
||||||
}
|
}
|
||||||
oomKilled = true
|
|
||||||
case *apievents.TaskExecAdded:
|
case *apievents.TaskExecAdded:
|
||||||
et = libcontainerdtypes.EventExecAdded
|
et = libcontainerdtypes.EventExecAdded
|
||||||
ei = libcontainerdtypes.EventInfo{
|
ei = libcontainerdtypes.EventInfo{
|
||||||
|
@ -866,13 +857,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
c.oomMu.Lock()
|
|
||||||
if oomKilled {
|
|
||||||
c.oom[ei.ContainerID] = true
|
|
||||||
}
|
|
||||||
ei.OOMKilled = c.oom[ei.ContainerID]
|
|
||||||
c.oomMu.Unlock()
|
|
||||||
|
|
||||||
c.processEvent(ctx, et, ei)
|
c.processEvent(ctx, et, ei)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,6 @@ type EventInfo struct {
|
||||||
Pid uint32
|
Pid uint32
|
||||||
ExitCode uint32
|
ExitCode uint32
|
||||||
ExitedAt time.Time
|
ExitedAt time.Time
|
||||||
OOMKilled bool
|
|
||||||
Error error
|
Error error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue