123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- // +build linux,cgo
- package native
- import (
- "fmt"
- "io"
- "os"
- "os/exec"
- "path/filepath"
- "strings"
- "sync"
- "syscall"
- "time"
- "github.com/Sirupsen/logrus"
- "github.com/docker/docker/daemon/execdriver"
- "github.com/docker/docker/pkg/parsers"
- "github.com/docker/docker/pkg/pools"
- "github.com/docker/docker/pkg/reexec"
- sysinfo "github.com/docker/docker/pkg/system"
- "github.com/docker/docker/pkg/term"
- "github.com/opencontainers/runc/libcontainer"
- "github.com/opencontainers/runc/libcontainer/apparmor"
- "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
- "github.com/opencontainers/runc/libcontainer/configs"
- "github.com/opencontainers/runc/libcontainer/system"
- "github.com/opencontainers/runc/libcontainer/utils"
- )
- // Define constants for native driver
- const (
- DriverName = "native"
- Version = "0.2"
- )
- // Driver contains all information for native driver,
- // it implements execdriver.Driver.
- type Driver struct {
- root string
- initPath string
- activeContainers map[string]libcontainer.Container
- machineMemory int64
- factory libcontainer.Factory
- sync.Mutex
- }
- // NewDriver returns a new native driver, called from NewDriver of execdriver.
- func NewDriver(root, initPath string, options []string) (*Driver, error) {
- meminfo, err := sysinfo.ReadMemInfo()
- if err != nil {
- return nil, err
- }
- if err := sysinfo.MkdirAll(root, 0700); err != nil {
- return nil, err
- }
- if apparmor.IsEnabled() {
- if err := installAppArmorProfile(); err != nil {
- apparmorProfiles := []string{"docker-default"}
- // Allow daemon to run if loading failed, but are active
- // (possibly through another run, manually, or via system startup)
- for _, policy := range apparmorProfiles {
- if err := hasAppArmorProfileLoaded(policy); err != nil {
- return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
- }
- }
- }
- }
- // choose cgroup manager
- // this makes sure there are no breaking changes to people
- // who upgrade from versions without native.cgroupdriver opt
- cgm := libcontainer.Cgroupfs
- if systemd.UseSystemd() {
- cgm = libcontainer.SystemdCgroups
- }
- // parse the options
- for _, option := range options {
- key, val, err := parsers.ParseKeyValueOpt(option)
- if err != nil {
- return nil, err
- }
- key = strings.ToLower(key)
- switch key {
- case "native.cgroupdriver":
- // override the default if they set options
- switch val {
- case "systemd":
- if systemd.UseSystemd() {
- cgm = libcontainer.SystemdCgroups
- } else {
- // warn them that they chose the wrong driver
- logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
- }
- case "cgroupfs":
- cgm = libcontainer.Cgroupfs
- default:
- return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
- }
- default:
- return nil, fmt.Errorf("Unknown option %s\n", key)
- }
- }
- f, err := libcontainer.New(
- root,
- cgm,
- libcontainer.InitPath(reexec.Self(), DriverName),
- )
- if err != nil {
- return nil, err
- }
- return &Driver{
- root: root,
- initPath: initPath,
- activeContainers: make(map[string]libcontainer.Container),
- machineMemory: meminfo.MemTotal,
- factory: f,
- }, nil
- }
- type execOutput struct {
- exitCode int
- err error
- }
- // Run implements the exec driver Driver interface,
- // it calls libcontainer APIs to run a container.
- func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
- destroyed := false
- // take the Command and populate the libcontainer.Config from it
- container, err := d.createContainer(c, hooks)
- if err != nil {
- return execdriver.ExitStatus{ExitCode: -1}, err
- }
- p := &libcontainer.Process{
- Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
- Env: c.ProcessConfig.Env,
- Cwd: c.WorkingDir,
- User: c.ProcessConfig.User,
- }
- if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
- return execdriver.ExitStatus{ExitCode: -1}, err
- }
- cont, err := d.factory.Create(c.ID, container)
- if err != nil {
- return execdriver.ExitStatus{ExitCode: -1}, err
- }
- d.Lock()
- d.activeContainers[c.ID] = cont
- d.Unlock()
- defer func() {
- if !destroyed {
- cont.Destroy()
- }
- d.cleanContainer(c.ID)
- }()
- if err := cont.Start(p); err != nil {
- return execdriver.ExitStatus{ExitCode: -1}, err
- }
- oom := notifyOnOOM(cont)
- if hooks.Start != nil {
- pid, err := p.Pid()
- if err != nil {
- p.Signal(os.Kill)
- p.Wait()
- return execdriver.ExitStatus{ExitCode: -1}, err
- }
- hooks.Start(&c.ProcessConfig, pid, oom)
- }
- waitF := p.Wait
- if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
- // we need such hack for tracking processes with inherited fds,
- // because cmd.Wait() waiting for all streams to be copied
- waitF = waitInPIDHost(p, cont)
- }
- ps, err := waitF()
- if err != nil {
- execErr, ok := err.(*exec.ExitError)
- if !ok {
- return execdriver.ExitStatus{ExitCode: -1}, err
- }
- ps = execErr.ProcessState
- }
- cont.Destroy()
- destroyed = true
- _, oomKill := <-oom
- return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
- }
- // notifyOnOOM returns a channel that signals if the container received an OOM notification
- // for any process. If it is unable to subscribe to OOM notifications then a closed
- // channel is returned as it will be non-blocking and return the correct result when read.
- func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
- oom, err := container.NotifyOOM()
- if err != nil {
- logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
- c := make(chan struct{})
- close(c)
- return c
- }
- return oom
- }
- func killCgroupProcs(c libcontainer.Container) {
- var procs []*os.Process
- if err := c.Pause(); err != nil {
- logrus.Warn(err)
- }
- pids, err := c.Processes()
- if err != nil {
- // don't care about childs if we can't get them, this is mostly because cgroup already deleted
- logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
- }
- for _, pid := range pids {
- if p, err := os.FindProcess(pid); err == nil {
- procs = append(procs, p)
- if err := p.Kill(); err != nil {
- logrus.Warn(err)
- }
- }
- }
- if err := c.Resume(); err != nil {
- logrus.Warn(err)
- }
- for _, p := range procs {
- if _, err := p.Wait(); err != nil {
- logrus.Warn(err)
- }
- }
- }
- func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
- return func() (*os.ProcessState, error) {
- pid, err := p.Pid()
- if err != nil {
- return nil, err
- }
- process, err := os.FindProcess(pid)
- s, err := process.Wait()
- if err != nil {
- execErr, ok := err.(*exec.ExitError)
- if !ok {
- return s, err
- }
- s = execErr.ProcessState
- }
- killCgroupProcs(c)
- p.Wait()
- return s, err
- }
- }
- // Kill implements the exec driver Driver interface.
- func (d *Driver) Kill(c *execdriver.Command, sig int) error {
- d.Lock()
- active := d.activeContainers[c.ID]
- d.Unlock()
- if active == nil {
- return fmt.Errorf("active container for %s does not exist", c.ID)
- }
- state, err := active.State()
- if err != nil {
- return err
- }
- return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
- }
- // Pause implements the exec driver Driver interface,
- // it calls libcontainer API to pause a container.
- func (d *Driver) Pause(c *execdriver.Command) error {
- d.Lock()
- active := d.activeContainers[c.ID]
- d.Unlock()
- if active == nil {
- return fmt.Errorf("active container for %s does not exist", c.ID)
- }
- return active.Pause()
- }
- // Unpause implements the exec driver Driver interface,
- // it calls libcontainer API to unpause a container.
- func (d *Driver) Unpause(c *execdriver.Command) error {
- d.Lock()
- active := d.activeContainers[c.ID]
- d.Unlock()
- if active == nil {
- return fmt.Errorf("active container for %s does not exist", c.ID)
- }
- return active.Resume()
- }
- // Terminate implements the exec driver Driver interface.
- func (d *Driver) Terminate(c *execdriver.Command) error {
- defer d.cleanContainer(c.ID)
- container, err := d.factory.Load(c.ID)
- if err != nil {
- return err
- }
- defer container.Destroy()
- state, err := container.State()
- if err != nil {
- return err
- }
- pid := state.InitProcessPid
- currentStartTime, err := system.GetProcessStartTime(pid)
- if err != nil {
- return err
- }
- if state.InitProcessStartTime == currentStartTime {
- err = syscall.Kill(pid, 9)
- syscall.Wait4(pid, nil, 0, nil)
- }
- return err
- }
- // Info implements the exec driver Driver interface.
- func (d *Driver) Info(id string) execdriver.Info {
- return &info{
- ID: id,
- driver: d,
- }
- }
- // Name implements the exec driver Driver interface.
- func (d *Driver) Name() string {
- return fmt.Sprintf("%s-%s", DriverName, Version)
- }
- // GetPidsForContainer implements the exec driver Driver interface.
- func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
- d.Lock()
- active := d.activeContainers[id]
- d.Unlock()
- if active == nil {
- return nil, fmt.Errorf("active container for %s does not exist", id)
- }
- return active.Processes()
- }
- func (d *Driver) cleanContainer(id string) error {
- d.Lock()
- delete(d.activeContainers, id)
- d.Unlock()
- return os.RemoveAll(filepath.Join(d.root, id))
- }
- func (d *Driver) createContainerRoot(id string) error {
- return os.MkdirAll(filepath.Join(d.root, id), 0655)
- }
- // Clean implements the exec driver Driver interface.
- func (d *Driver) Clean(id string) error {
- return os.RemoveAll(filepath.Join(d.root, id))
- }
- // Stats implements the exec driver Driver interface.
- func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
- d.Lock()
- c := d.activeContainers[id]
- d.Unlock()
- if c == nil {
- return nil, execdriver.ErrNotRunning
- }
- now := time.Now()
- stats, err := c.Stats()
- if err != nil {
- return nil, err
- }
- memoryLimit := c.Config().Cgroups.Memory
- // if the container does not have any memory limit specified set the
- // limit to the machines memory
- if memoryLimit == 0 {
- memoryLimit = d.machineMemory
- }
- return &execdriver.ResourceStats{
- Stats: stats,
- Read: now,
- MemoryLimit: memoryLimit,
- }, nil
- }
- // TtyConsole implements the exec driver Terminal interface.
- type TtyConsole struct {
- console libcontainer.Console
- }
- // NewTtyConsole returns a new TtyConsole struct.
- func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
- tty := &TtyConsole{
- console: console,
- }
- if err := tty.AttachPipes(pipes); err != nil {
- tty.Close()
- return nil, err
- }
- return tty, nil
- }
- // Resize implements Resize method of Terminal interface
- func (t *TtyConsole) Resize(h, w int) error {
- return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
- }
- // AttachPipes attaches given pipes to TtyConsole
- func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
- go func() {
- if wb, ok := pipes.Stdout.(interface {
- CloseWriters() error
- }); ok {
- defer wb.CloseWriters()
- }
- pools.Copy(pipes.Stdout, t.console)
- }()
- if pipes.Stdin != nil {
- go func() {
- pools.Copy(t.console, pipes.Stdin)
- pipes.Stdin.Close()
- }()
- }
- return nil
- }
- // Close implements Close method of Terminal interface
- func (t *TtyConsole) Close() error {
- return t.console.Close()
- }
- func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
- rootuid, err := container.HostUID()
- if err != nil {
- return err
- }
- if processConfig.Tty {
- cons, err := p.NewConsole(rootuid)
- if err != nil {
- return err
- }
- term, err := NewTtyConsole(cons, pipes)
- if err != nil {
- return err
- }
- processConfig.Terminal = term
- return nil
- }
- // not a tty--set up stdio pipes
- term := &execdriver.StdConsole{}
- processConfig.Terminal = term
- // if we are not in a user namespace, there is no reason to go through
- // the hassle of setting up os-level pipes with proper (remapped) ownership
- // so we will do the prior shortcut for non-userns containers
- if rootuid == 0 {
- p.Stdout = pipes.Stdout
- p.Stderr = pipes.Stderr
- r, w, err := os.Pipe()
- if err != nil {
- return err
- }
- if pipes.Stdin != nil {
- go func() {
- io.Copy(w, pipes.Stdin)
- w.Close()
- }()
- p.Stdin = r
- }
- return nil
- }
- // if we have user namespaces enabled (rootuid != 0), we will set
- // up os pipes for stderr, stdout, stdin so we can chown them to
- // the proper ownership to allow for proper access to the underlying
- // fds
- var fds []int
- //setup stdout
- r, w, err := os.Pipe()
- if err != nil {
- return err
- }
- fds = append(fds, int(r.Fd()), int(w.Fd()))
- if pipes.Stdout != nil {
- go io.Copy(pipes.Stdout, r)
- }
- term.Closers = append(term.Closers, r)
- p.Stdout = w
- //setup stderr
- r, w, err = os.Pipe()
- if err != nil {
- return err
- }
- fds = append(fds, int(r.Fd()), int(w.Fd()))
- if pipes.Stderr != nil {
- go io.Copy(pipes.Stderr, r)
- }
- term.Closers = append(term.Closers, r)
- p.Stderr = w
- //setup stdin
- r, w, err = os.Pipe()
- if err != nil {
- return err
- }
- fds = append(fds, int(r.Fd()), int(w.Fd()))
- if pipes.Stdin != nil {
- go func() {
- io.Copy(w, pipes.Stdin)
- w.Close()
- }()
- p.Stdin = r
- }
- for _, fd := range fds {
- if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
- return fmt.Errorf("Failed to chown pipes fd: %v", err)
- }
- }
- return nil
- }
- // SupportsHooks implements the execdriver Driver interface.
- // The libcontainer/runC-based native execdriver does exploit the hook mechanism
- func (d *Driver) SupportsHooks() bool {
- return true
- }
|