소스 검색

This patch adds ability in docker to detect out of memory conditions in containers.
Since the containers can handle the out of memory kernel kills gracefully, docker
will only provide out of memory information as an additional metadata as part of
container status.
Docker-DCO-1.1-Signed-off-by: Vishnu Kannan <vishnuk@google.com> (github: vishh)

Vishnu Kannan 10 년 전
부모
커밋
f96e04ffc7
8개의 변경된 파일119개의 추가작업 그리고 54개의 파일을 삭제
  1. 3 3
      daemon/daemon.go
  2. 10 1
      daemon/execdriver/driver.go
  3. 7 7
      daemon/execdriver/lxc/driver.go
  4. 68 28
      daemon/execdriver/native/driver.go
  5. 5 5
      daemon/monitor.go
  6. 21 8
      daemon/state.go
  7. 3 1
      daemon/state_test.go
  8. 2 1
      integration/runtime_test.go

+ 3 - 3
daemon/daemon.go

@@ -231,7 +231,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
 		log.Debugf("killing old running container %s", container.ID)
 
 		existingPid := container.Pid
-		container.SetStopped(0)
+		container.SetStopped(&execdriver.ExitStatus{0, false})
 
 		// We only have to handle this for lxc because the other drivers will ensure that
 		// no processes are left when docker dies
@@ -263,7 +263,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
 
 			log.Debugf("Marking as stopped")
 
-			container.SetStopped(-127)
+			container.SetStopped(&execdriver.ExitStatus{-127, false})
 			if err := container.ToDisk(); err != nil {
 				return err
 			}
@@ -991,7 +991,7 @@ func (daemon *Daemon) Diff(container *Container) (archive.Archive, error) {
 	return daemon.driver.Diff(container.ID, initID)
 }
 
-func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
 	return daemon.execDriver.Run(c.command, pipes, startCallback)
 }
 

+ 10 - 1
daemon/execdriver/driver.go

@@ -40,9 +40,18 @@ type TtyTerminal interface {
 	Master() *os.File
 }
 
+// ExitStatus provides exit reasons for a container.
+type ExitStatus struct {
+	// The exit code with which the container exited.
+	ExitCode int
+
+	// Whether the container encountered an OOM.
+	OOMKilled bool
+}
+
 type Driver interface {
 	Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
-	// Exec executes the process in a running container, blocks until the process exits and returns the exit code
+	// Exec executes the process in an existing container, blocks until the process exits and returns the exit code
 	Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error)
 	Kill(c *Command, sig int) error
 	Pause(c *Command) error

+ 7 - 7
daemon/execdriver/lxc/driver.go

@@ -55,7 +55,7 @@ func (d *driver) Name() string {
 	return fmt.Sprintf("%s-%s", DriverName, version)
 }
 
-func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
 	var (
 		term execdriver.Terminal
 		err  error
@@ -76,11 +76,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	})
 
 	if err := d.generateEnvConfig(c); err != nil {
-		return -1, err
+		return nil, err
 	}
 	configPath, err := d.generateLXCConfig(c)
 	if err != nil {
-		return -1, err
+		return nil, err
 	}
 	params := []string{
 		"lxc-start",
@@ -155,11 +155,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	c.ProcessConfig.Args = append([]string{name}, arg...)
 
 	if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
-		return -1, err
+		return nil, err
 	}
 
 	if err := c.ProcessConfig.Start(); err != nil {
-		return -1, err
+		return nil, err
 	}
 
 	var (
@@ -183,7 +183,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 			c.ProcessConfig.Process.Kill()
 			c.ProcessConfig.Wait()
 		}
-		return -1, err
+		return nil, err
 	}
 
 	c.ContainerPid = pid
@@ -194,7 +194,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 
 	<-waitLock
 
-	return getExitCode(c), waitErr
+	return &execdriver.ExitStatus{getExitCode(c), false}, waitErr
 }
 
 /// Return the exit code of the process

+ 68 - 28
daemon/execdriver/native/driver.go

@@ -14,6 +14,7 @@ import (
 	"sync"
 	"syscall"
 
+	log "github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/libcontainer"
@@ -60,11 +61,20 @@ func NewDriver(root, initPath string) (*driver, error) {
 	}, nil
 }
 
-func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
+func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
+	return fs.NotifyOnOOM(config.Cgroups)
+}
+
+type execOutput struct {
+	exitCode int
+	err      error
+}
+
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
 	// take the Command and populate the libcontainer.Config from it
 	container, err := d.createContainer(c)
 	if err != nil {
-		return -1, err
+		return nil, err
 	}
 
 	var term execdriver.Terminal
@@ -75,7 +85,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 		term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
 	}
 	if err != nil {
-		return -1, err
+		return nil, err
 	}
 	c.ProcessConfig.Terminal = term
 
@@ -92,40 +102,70 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
 	)
 
 	if err := d.createContainerRoot(c.ID); err != nil {
-		return -1, err
+		return nil, err
 	}
 	defer d.cleanContainer(c.ID)
 
 	if err := d.writeContainerFile(container, c.ID); err != nil {
-		return -1, err
+		return nil, err
 	}
 
-	return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
-		c.ProcessConfig.Path = d.initPath
-		c.ProcessConfig.Args = append([]string{
-			DriverName,
-			"-console", console,
-			"-pipe", "3",
-			"-root", filepath.Join(d.root, c.ID),
-			"--",
-		}, args...)
-
-		// set this to nil so that when we set the clone flags anything else is reset
-		c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
-			Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
-		}
-		c.ProcessConfig.ExtraFiles = []*os.File{child}
+	execOutputChan := make(chan execOutput, 0)
+	waitForStart := make(chan struct{}, 0)
 
-		c.ProcessConfig.Env = container.Env
-		c.ProcessConfig.Dir = container.RootFs
+	go func() {
+		exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
+			c.ProcessConfig.Path = d.initPath
+			c.ProcessConfig.Args = append([]string{
+				DriverName,
+				"-console", console,
+				"-pipe", "3",
+				"-root", filepath.Join(d.root, c.ID),
+				"--",
+			}, args...)
+
+			// set this to nil so that when we set the clone flags anything else is reset
+			c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
+				Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
+			}
+			c.ProcessConfig.ExtraFiles = []*os.File{child}
 
-		return &c.ProcessConfig.Cmd
-	}, func() {
-		if startCallback != nil {
-			c.ContainerPid = c.ProcessConfig.Process.Pid
-			startCallback(&c.ProcessConfig, c.ContainerPid)
+			c.ProcessConfig.Env = container.Env
+			c.ProcessConfig.Dir = container.RootFs
+
+			return &c.ProcessConfig.Cmd
+		}, func() {
+			close(waitForStart)
+			if startCallback != nil {
+				c.ContainerPid = c.ProcessConfig.Process.Pid
+				startCallback(&c.ProcessConfig, c.ContainerPid)
+			}
+		})
+		execOutputChan <- execOutput{exitCode, err}
+	}()
+
+	select {
+	case execOutput := <-execOutputChan:
+		return &execdriver.ExitStatus{execOutput.exitCode, false}, execOutput.err
+	case <-waitForStart:
+		break
+	}
+
+	oomKill := false
+	go func() {
+		oomKillNotification, err := d.notifyOnOOM(container)
+		if err == nil {
+			if _, ok := <-oomKillNotification; ok {
+				oomKill = true
+			}
+		} else {
+			log.Infof("WARNING: Your kernel does not support OOM notifications: %s", err)
 		}
-	})
+	}()
+	// wait for the container to exit.
+	execOutput := <-execOutputChan
+
+	return &execdriver.ExitStatus{execOutput.exitCode, oomKill}, execOutput.err
 }
 
 func (d *driver) Kill(p *execdriver.Command, sig int) error {

+ 5 - 5
daemon/monitor.go

@@ -100,7 +100,7 @@ func (m *containerMonitor) Close() error {
 func (m *containerMonitor) Start() error {
 	var (
 		err        error
-		exitStatus int
+		exitStatus *execdriver.ExitStatus
 		// this variable indicates where we in execution flow:
 		// before Run or after
 		afterRun bool
@@ -150,9 +150,9 @@ func (m *containerMonitor) Start() error {
 		// here container.Lock is already lost
 		afterRun = true
 
-		m.resetMonitor(err == nil && exitStatus == 0)
+		m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
 
-		if m.shouldRestart(exitStatus) {
+		if m.shouldRestart(exitStatus.ExitCode) {
 			m.container.SetRestarting(exitStatus)
 			m.container.LogEvent("die")
 			m.resetContainer(true)
@@ -209,7 +209,7 @@ func (m *containerMonitor) waitForNextRestart() {
 
 // shouldRestart checks the restart policy and applies the rules to determine if
 // the container's process should be restarted
-func (m *containerMonitor) shouldRestart(exitStatus int) bool {
+func (m *containerMonitor) shouldRestart(exitCode int) bool {
 	m.mux.Lock()
 	defer m.mux.Unlock()
 
@@ -228,7 +228,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
 			return false
 		}
 
-		return exitStatus != 0
+		return exitCode != 0
 	}
 
 	return false

+ 21 - 8
daemon/state.go

@@ -5,6 +5,7 @@ import (
 	"sync"
 	"time"
 
+	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/units"
 )
 
@@ -13,6 +14,7 @@ type State struct {
 	Running    bool
 	Paused     bool
 	Restarting bool
+	OOMKilled  bool
 	Pid        int
 	ExitCode   int
 	Error      string // contains last known error when starting the container
@@ -29,12 +31,16 @@ func NewState() *State {
 
 // String returns a human-readable description of the state
 func (s *State) String() string {
+	oomInfo := ""
+	if s.OOMKilled {
+		oomInfo = "possibly due to lack of memory"
+	}
 	if s.Running {
 		if s.Paused {
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 		}
 		if s.Restarting {
-			return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+			return fmt.Sprintf("Restarting (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
 		}
 
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
@@ -44,7 +50,7 @@ func (s *State) String() string {
 		return ""
 	}
 
-	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+	return fmt.Sprintf("Exited (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
 }
 
 // StateString returns a single string to describe state
@@ -149,25 +155,29 @@ func (s *State) setRunning(pid int) {
 	s.waitChan = make(chan struct{})
 }
 
-func (s *State) SetStopped(exitCode int) {
+func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
 	s.Lock()
-	s.setStopped(exitCode)
+	s.setStopped(exitStatus)
 	s.Unlock()
 }
 
-func (s *State) setStopped(exitCode int) {
+func (s *State) setStopped(exitStatus *execdriver.ExitStatus) {
 	s.Running = false
 	s.Restarting = false
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
-	s.ExitCode = exitCode
+	s.ExitCode = exitStatus.ExitCode
+	s.OOMKilled = false
+	if exitStatus.OOMKilled {
+		s.OOMKilled = true
+	}
 	close(s.waitChan) // fire waiters for stop
 	s.waitChan = make(chan struct{})
 }
 
 // SetRestarting is when docker hanldes the auto restart of containers when they are
 // in the middle of a stop and being restarted again
-func (s *State) SetRestarting(exitCode int) {
+func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
 	s.Lock()
 	// we should consider the container running when it is restarting because of
 	// all the checks in docker around rm/stop/etc
@@ -175,7 +185,10 @@ func (s *State) SetRestarting(exitCode int) {
 	s.Restarting = true
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
-	s.ExitCode = exitCode
+	s.ExitCode = exitStatus.ExitCode
+	if exitStatus.OOMKilled {
+		s.OOMKilled = true
+	}
 	close(s.waitChan) // fire waiters for stop
 	s.waitChan = make(chan struct{})
 	s.Unlock()

+ 3 - 1
daemon/state_test.go

@@ -4,6 +4,8 @@ import (
 	"sync/atomic"
 	"testing"
 	"time"
+
+	"github.com/docker/docker/daemon/execdriver"
 )
 
 func TestStateRunStop(t *testing.T) {
@@ -47,7 +49,7 @@ func TestStateRunStop(t *testing.T) {
 			atomic.StoreInt64(&exit, int64(exitCode))
 			close(stopped)
 		}()
-		s.SetStopped(i)
+		s.SetStopped(&execdriver.ExitStatus{i, false})
 		if s.IsRunning() {
 			t.Fatal("State is running")
 		}

+ 2 - 1
integration/runtime_test.go

@@ -18,6 +18,7 @@ import (
 
 	log "github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon"
+	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/engine"
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/nat"
@@ -652,7 +653,7 @@ func TestRestore(t *testing.T) {
 	if err := container3.Run(); err != nil {
 		t.Fatal(err)
 	}
-	container2.SetStopped(0)
+	container2.SetStopped(&execdriver.ExitStatus{0, false})
 }
 
 func TestDefaultContainerName(t *testing.T) {