Selaa lähdekoodia

Update container OOMKilled flag immediately

The OOMKilled flag on a container's state has historically behaved
rather unintuitively: it is updated on container exit to reflect whether
or not any process within the container has been OOM-killed during the
preceding run of the container. The OOMKilled flag would be set to true
when the container exits if any process within the container---including
execs---was OOM-killed at any time while the container was running,
whether or not the OOM-kill was the cause of the container exiting. The
flag is "sticky," persisting through the next start of the container;
only being cleared once the container exits without any processes having
been OOM-killed that run.

Alter the behavior of the OOMKilled flag such that it signals whether
any process in the container had been OOM-killed since the most recent
start of the container. Set the flag immediately upon any process being
OOM-killed, and clear it when the container transitions to the "running"
state.

There is an ulterior motive for this change. It reduces the amount of
state the libcontainerd client needs to keep track of and clean up on
container exit. It's one less place the client could leak memory if a
container was to be deleted without going through libcontainerd.

Signed-off-by: Cory Snider <csnider@mirantis.com>
Cory Snider 3 vuotta sitten
vanhempi
commit
57d2d6ef62

+ 2 - 1
api/swagger.yaml

@@ -4650,7 +4650,8 @@ definitions:
         example: false
       OOMKilled:
         description: |
-          Whether this container has been killed because it ran out of memory.
+          Whether a process within this container has been killed because it ran
+          out of memory since the container was last started.
         type: "boolean"
         example: false
       Dead:

+ 0 - 3
container/container.go

@@ -53,9 +53,6 @@ type ExitStatus struct {
 	// The exit code with which the container exited.
 	ExitCode int
 
-	// Whether the container encountered an OOM.
-	OOMKilled bool
-
 	// Time at which the container died
 	ExitedAt time.Time
 }

+ 1 - 2
container/state.go

@@ -270,6 +270,7 @@ func (s *State) SetRunning(pid int, initial bool) {
 	}
 	s.ExitCodeValue = 0
 	s.Pid = pid
+	s.OOMKilled = false
 	if initial {
 		s.StartedAt = time.Now().UTC()
 	}
@@ -287,7 +288,6 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
 		s.FinishedAt = exitStatus.ExitedAt
 	}
 	s.ExitCodeValue = exitStatus.ExitCode
-	s.OOMKilled = exitStatus.OOMKilled
 
 	s.notifyAndClear(&s.stopWaiters)
 }
@@ -303,7 +303,6 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
 	s.ExitCodeValue = exitStatus.ExitCode
-	s.OOMKilled = exitStatus.OOMKilled
 
 	s.notifyAndClear(&s.stopWaiters)
 }

+ 1 - 1
daemon/monitor.go

@@ -46,7 +46,6 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
 	if e != nil {
 		exitStatus.ExitCode = int(e.ExitCode)
 		exitStatus.ExitedAt = e.ExitedAt
-		exitStatus.OOMKilled = e.OOMKilled
 		if e.Error != nil {
 			c.SetError(e.Error)
 		}
@@ -141,6 +140,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
 
 		c.Lock()
 		defer c.Unlock()
+		c.OOMKilled = true
 		daemon.updateHealthMonitor(c)
 		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
 			return err

+ 0 - 16
libcontainerd/remote/client.go

@@ -47,8 +47,6 @@ type client struct {
 
 	backend         libcontainerdtypes.Backend
 	eventQ          queue.Queue
-	oomMu           sync.Mutex
-	oom             map[string]bool
 	v2runcoptionsMu sync.Mutex
 	// v2runcoptions is used for copying options specified on Create() to Start()
 	v2runcoptions map[string]v2runcoptions.Options
@@ -62,7 +60,6 @@ func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string,
 		logger:        logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
 		ns:            ns,
 		backend:       b,
-		oom:           make(map[string]bool),
 		v2runcoptions: make(map[string]v2runcoptions.Options),
 	}
 
@@ -475,9 +472,6 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
 	if err := ctr.Delete(ctx); err != nil {
 		return wrapError(err)
 	}
-	c.oomMu.Lock()
-	delete(c.oom, containerID)
-	c.oomMu.Unlock()
 	c.v2runcoptionsMu.Lock()
 	delete(c.v2runcoptions, containerID)
 	c.v2runcoptionsMu.Unlock()
@@ -767,7 +761,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
 	c.logger.Debug("processing event stream")
 
 	for {
-		var oomKilled bool
 		select {
 		case err = <-errC:
 			if err != nil {
@@ -825,9 +818,7 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
 				et = libcontainerdtypes.EventOOM
 				ei = libcontainerdtypes.EventInfo{
 					ContainerID: t.ContainerID,
-					OOMKilled:   true,
 				}
-				oomKilled = true
 			case *apievents.TaskExecAdded:
 				et = libcontainerdtypes.EventExecAdded
 				ei = libcontainerdtypes.EventInfo{
@@ -866,13 +857,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
 				continue
 			}
 
-			c.oomMu.Lock()
-			if oomKilled {
-				c.oom[ei.ContainerID] = true
-			}
-			ei.OOMKilled = c.oom[ei.ContainerID]
-			c.oomMu.Unlock()
-
 			c.processEvent(ctx, et, ei)
 		}
 	}

+ 0 - 1
libcontainerd/types/types.go

@@ -33,7 +33,6 @@ type EventInfo struct {
 	Pid         uint32
 	ExitCode    uint32
 	ExitedAt    time.Time
-	OOMKilled   bool
 	Error       error
 }