driver.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. // +build linux,cgo
  2. package native
  3. import (
  4. "fmt"
  5. "io"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/Sirupsen/logrus"
  14. "github.com/docker/docker/daemon/execdriver"
  15. "github.com/docker/docker/pkg/parsers"
  16. "github.com/docker/docker/pkg/pools"
  17. "github.com/docker/docker/pkg/reexec"
  18. sysinfo "github.com/docker/docker/pkg/system"
  19. "github.com/docker/docker/pkg/term"
  20. "github.com/opencontainers/runc/libcontainer"
  21. "github.com/opencontainers/runc/libcontainer/apparmor"
  22. "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
  23. "github.com/opencontainers/runc/libcontainer/configs"
  24. "github.com/opencontainers/runc/libcontainer/system"
  25. "github.com/opencontainers/runc/libcontainer/utils"
  26. )
  27. // Define constants for native driver
  28. const (
  29. DriverName = "native"
  30. Version = "0.2"
  31. )
  32. // Driver contains all information for native driver,
  33. // it implements execdriver.Driver.
  34. type Driver struct {
  35. root string
  36. initPath string
  37. activeContainers map[string]libcontainer.Container
  38. machineMemory int64
  39. factory libcontainer.Factory
  40. sync.Mutex
  41. }
  42. // NewDriver returns a new native driver, called from NewDriver of execdriver.
  43. func NewDriver(root, initPath string, options []string) (*Driver, error) {
  44. meminfo, err := sysinfo.ReadMemInfo()
  45. if err != nil {
  46. return nil, err
  47. }
  48. if err := sysinfo.MkdirAll(root, 0700); err != nil {
  49. return nil, err
  50. }
  51. if apparmor.IsEnabled() {
  52. if err := installAppArmorProfile(); err != nil {
  53. apparmorProfiles := []string{"docker-default"}
  54. // Allow daemon to run if loading failed, but are active
  55. // (possibly through another run, manually, or via system startup)
  56. for _, policy := range apparmorProfiles {
  57. if err := hasAppArmorProfileLoaded(policy); err != nil {
  58. return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
  59. }
  60. }
  61. }
  62. }
  63. // choose cgroup manager
  64. // this makes sure there are no breaking changes to people
  65. // who upgrade from versions without native.cgroupdriver opt
  66. cgm := libcontainer.Cgroupfs
  67. if systemd.UseSystemd() {
  68. cgm = libcontainer.SystemdCgroups
  69. }
  70. // parse the options
  71. for _, option := range options {
  72. key, val, err := parsers.ParseKeyValueOpt(option)
  73. if err != nil {
  74. return nil, err
  75. }
  76. key = strings.ToLower(key)
  77. switch key {
  78. case "native.cgroupdriver":
  79. // override the default if they set options
  80. switch val {
  81. case "systemd":
  82. if systemd.UseSystemd() {
  83. cgm = libcontainer.SystemdCgroups
  84. } else {
  85. // warn them that they chose the wrong driver
  86. logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
  87. }
  88. case "cgroupfs":
  89. cgm = libcontainer.Cgroupfs
  90. default:
  91. return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
  92. }
  93. default:
  94. return nil, fmt.Errorf("Unknown option %s\n", key)
  95. }
  96. }
  97. f, err := libcontainer.New(
  98. root,
  99. cgm,
  100. libcontainer.InitPath(reexec.Self(), DriverName),
  101. )
  102. if err != nil {
  103. return nil, err
  104. }
  105. return &Driver{
  106. root: root,
  107. initPath: initPath,
  108. activeContainers: make(map[string]libcontainer.Container),
  109. machineMemory: meminfo.MemTotal,
  110. factory: f,
  111. }, nil
  112. }
  113. type execOutput struct {
  114. exitCode int
  115. err error
  116. }
  117. // Run implements the exec driver Driver interface,
  118. // it calls libcontainer APIs to run a container.
  119. func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
  120. destroyed := false
  121. // take the Command and populate the libcontainer.Config from it
  122. container, err := d.createContainer(c, hooks)
  123. if err != nil {
  124. return execdriver.ExitStatus{ExitCode: -1}, err
  125. }
  126. p := &libcontainer.Process{
  127. Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
  128. Env: c.ProcessConfig.Env,
  129. Cwd: c.WorkingDir,
  130. User: c.ProcessConfig.User,
  131. }
  132. if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
  133. return execdriver.ExitStatus{ExitCode: -1}, err
  134. }
  135. cont, err := d.factory.Create(c.ID, container)
  136. if err != nil {
  137. return execdriver.ExitStatus{ExitCode: -1}, err
  138. }
  139. d.Lock()
  140. d.activeContainers[c.ID] = cont
  141. d.Unlock()
  142. defer func() {
  143. if !destroyed {
  144. cont.Destroy()
  145. }
  146. d.cleanContainer(c.ID)
  147. }()
  148. if err := cont.Start(p); err != nil {
  149. return execdriver.ExitStatus{ExitCode: -1}, err
  150. }
  151. oom := notifyOnOOM(cont)
  152. if hooks.Start != nil {
  153. pid, err := p.Pid()
  154. if err != nil {
  155. p.Signal(os.Kill)
  156. p.Wait()
  157. return execdriver.ExitStatus{ExitCode: -1}, err
  158. }
  159. hooks.Start(&c.ProcessConfig, pid, oom)
  160. }
  161. waitF := p.Wait
  162. if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
  163. // we need such hack for tracking processes with inherited fds,
  164. // because cmd.Wait() waiting for all streams to be copied
  165. waitF = waitInPIDHost(p, cont)
  166. }
  167. ps, err := waitF()
  168. if err != nil {
  169. execErr, ok := err.(*exec.ExitError)
  170. if !ok {
  171. return execdriver.ExitStatus{ExitCode: -1}, err
  172. }
  173. ps = execErr.ProcessState
  174. }
  175. cont.Destroy()
  176. destroyed = true
  177. _, oomKill := <-oom
  178. return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
  179. }
  180. // notifyOnOOM returns a channel that signals if the container received an OOM notification
  181. // for any process. If it is unable to subscribe to OOM notifications then a closed
  182. // channel is returned as it will be non-blocking and return the correct result when read.
  183. func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
  184. oom, err := container.NotifyOOM()
  185. if err != nil {
  186. logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
  187. c := make(chan struct{})
  188. close(c)
  189. return c
  190. }
  191. return oom
  192. }
  193. func killCgroupProcs(c libcontainer.Container) {
  194. var procs []*os.Process
  195. if err := c.Pause(); err != nil {
  196. logrus.Warn(err)
  197. }
  198. pids, err := c.Processes()
  199. if err != nil {
  200. // don't care about childs if we can't get them, this is mostly because cgroup already deleted
  201. logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
  202. }
  203. for _, pid := range pids {
  204. if p, err := os.FindProcess(pid); err == nil {
  205. procs = append(procs, p)
  206. if err := p.Kill(); err != nil {
  207. logrus.Warn(err)
  208. }
  209. }
  210. }
  211. if err := c.Resume(); err != nil {
  212. logrus.Warn(err)
  213. }
  214. for _, p := range procs {
  215. if _, err := p.Wait(); err != nil {
  216. logrus.Warn(err)
  217. }
  218. }
  219. }
  220. func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
  221. return func() (*os.ProcessState, error) {
  222. pid, err := p.Pid()
  223. if err != nil {
  224. return nil, err
  225. }
  226. process, err := os.FindProcess(pid)
  227. s, err := process.Wait()
  228. if err != nil {
  229. execErr, ok := err.(*exec.ExitError)
  230. if !ok {
  231. return s, err
  232. }
  233. s = execErr.ProcessState
  234. }
  235. killCgroupProcs(c)
  236. p.Wait()
  237. return s, err
  238. }
  239. }
  240. // Kill implements the exec driver Driver interface.
  241. func (d *Driver) Kill(c *execdriver.Command, sig int) error {
  242. d.Lock()
  243. active := d.activeContainers[c.ID]
  244. d.Unlock()
  245. if active == nil {
  246. return fmt.Errorf("active container for %s does not exist", c.ID)
  247. }
  248. state, err := active.State()
  249. if err != nil {
  250. return err
  251. }
  252. return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
  253. }
  254. // Pause implements the exec driver Driver interface,
  255. // it calls libcontainer API to pause a container.
  256. func (d *Driver) Pause(c *execdriver.Command) error {
  257. d.Lock()
  258. active := d.activeContainers[c.ID]
  259. d.Unlock()
  260. if active == nil {
  261. return fmt.Errorf("active container for %s does not exist", c.ID)
  262. }
  263. return active.Pause()
  264. }
  265. // Unpause implements the exec driver Driver interface,
  266. // it calls libcontainer API to unpause a container.
  267. func (d *Driver) Unpause(c *execdriver.Command) error {
  268. d.Lock()
  269. active := d.activeContainers[c.ID]
  270. d.Unlock()
  271. if active == nil {
  272. return fmt.Errorf("active container for %s does not exist", c.ID)
  273. }
  274. return active.Resume()
  275. }
  276. // Terminate implements the exec driver Driver interface.
  277. func (d *Driver) Terminate(c *execdriver.Command) error {
  278. defer d.cleanContainer(c.ID)
  279. container, err := d.factory.Load(c.ID)
  280. if err != nil {
  281. return err
  282. }
  283. defer container.Destroy()
  284. state, err := container.State()
  285. if err != nil {
  286. return err
  287. }
  288. pid := state.InitProcessPid
  289. currentStartTime, err := system.GetProcessStartTime(pid)
  290. if err != nil {
  291. return err
  292. }
  293. if state.InitProcessStartTime == currentStartTime {
  294. err = syscall.Kill(pid, 9)
  295. syscall.Wait4(pid, nil, 0, nil)
  296. }
  297. return err
  298. }
  299. // Info implements the exec driver Driver interface.
  300. func (d *Driver) Info(id string) execdriver.Info {
  301. return &info{
  302. ID: id,
  303. driver: d,
  304. }
  305. }
  306. // Name implements the exec driver Driver interface.
  307. func (d *Driver) Name() string {
  308. return fmt.Sprintf("%s-%s", DriverName, Version)
  309. }
  310. // GetPidsForContainer implements the exec driver Driver interface.
  311. func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
  312. d.Lock()
  313. active := d.activeContainers[id]
  314. d.Unlock()
  315. if active == nil {
  316. return nil, fmt.Errorf("active container for %s does not exist", id)
  317. }
  318. return active.Processes()
  319. }
  320. func (d *Driver) cleanContainer(id string) error {
  321. d.Lock()
  322. delete(d.activeContainers, id)
  323. d.Unlock()
  324. return os.RemoveAll(filepath.Join(d.root, id))
  325. }
  326. func (d *Driver) createContainerRoot(id string) error {
  327. return os.MkdirAll(filepath.Join(d.root, id), 0655)
  328. }
  329. // Clean implements the exec driver Driver interface.
  330. func (d *Driver) Clean(id string) error {
  331. return os.RemoveAll(filepath.Join(d.root, id))
  332. }
  333. // Stats implements the exec driver Driver interface.
  334. func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
  335. d.Lock()
  336. c := d.activeContainers[id]
  337. d.Unlock()
  338. if c == nil {
  339. return nil, execdriver.ErrNotRunning
  340. }
  341. now := time.Now()
  342. stats, err := c.Stats()
  343. if err != nil {
  344. return nil, err
  345. }
  346. memoryLimit := c.Config().Cgroups.Memory
  347. // if the container does not have any memory limit specified set the
  348. // limit to the machines memory
  349. if memoryLimit == 0 {
  350. memoryLimit = d.machineMemory
  351. }
  352. return &execdriver.ResourceStats{
  353. Stats: stats,
  354. Read: now,
  355. MemoryLimit: memoryLimit,
  356. }, nil
  357. }
  358. // TtyConsole implements the exec driver Terminal interface.
  359. type TtyConsole struct {
  360. console libcontainer.Console
  361. }
  362. // NewTtyConsole returns a new TtyConsole struct.
  363. func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
  364. tty := &TtyConsole{
  365. console: console,
  366. }
  367. if err := tty.AttachPipes(pipes); err != nil {
  368. tty.Close()
  369. return nil, err
  370. }
  371. return tty, nil
  372. }
  373. // Resize implements Resize method of Terminal interface
  374. func (t *TtyConsole) Resize(h, w int) error {
  375. return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
  376. }
  377. // AttachPipes attaches given pipes to TtyConsole
  378. func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
  379. go func() {
  380. if wb, ok := pipes.Stdout.(interface {
  381. CloseWriters() error
  382. }); ok {
  383. defer wb.CloseWriters()
  384. }
  385. pools.Copy(pipes.Stdout, t.console)
  386. }()
  387. if pipes.Stdin != nil {
  388. go func() {
  389. pools.Copy(t.console, pipes.Stdin)
  390. pipes.Stdin.Close()
  391. }()
  392. }
  393. return nil
  394. }
  395. // Close implements Close method of Terminal interface
  396. func (t *TtyConsole) Close() error {
  397. return t.console.Close()
  398. }
  399. func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
  400. rootuid, err := container.HostUID()
  401. if err != nil {
  402. return err
  403. }
  404. if processConfig.Tty {
  405. cons, err := p.NewConsole(rootuid)
  406. if err != nil {
  407. return err
  408. }
  409. term, err := NewTtyConsole(cons, pipes)
  410. if err != nil {
  411. return err
  412. }
  413. processConfig.Terminal = term
  414. return nil
  415. }
  416. // not a tty--set up stdio pipes
  417. term := &execdriver.StdConsole{}
  418. processConfig.Terminal = term
  419. // if we are not in a user namespace, there is no reason to go through
  420. // the hassle of setting up os-level pipes with proper (remapped) ownership
  421. // so we will do the prior shortcut for non-userns containers
  422. if rootuid == 0 {
  423. p.Stdout = pipes.Stdout
  424. p.Stderr = pipes.Stderr
  425. r, w, err := os.Pipe()
  426. if err != nil {
  427. return err
  428. }
  429. if pipes.Stdin != nil {
  430. go func() {
  431. io.Copy(w, pipes.Stdin)
  432. w.Close()
  433. }()
  434. p.Stdin = r
  435. }
  436. return nil
  437. }
  438. // if we have user namespaces enabled (rootuid != 0), we will set
  439. // up os pipes for stderr, stdout, stdin so we can chown them to
  440. // the proper ownership to allow for proper access to the underlying
  441. // fds
  442. var fds []int
  443. //setup stdout
  444. r, w, err := os.Pipe()
  445. if err != nil {
  446. return err
  447. }
  448. fds = append(fds, int(r.Fd()), int(w.Fd()))
  449. if pipes.Stdout != nil {
  450. go io.Copy(pipes.Stdout, r)
  451. }
  452. term.Closers = append(term.Closers, r)
  453. p.Stdout = w
  454. //setup stderr
  455. r, w, err = os.Pipe()
  456. if err != nil {
  457. return err
  458. }
  459. fds = append(fds, int(r.Fd()), int(w.Fd()))
  460. if pipes.Stderr != nil {
  461. go io.Copy(pipes.Stderr, r)
  462. }
  463. term.Closers = append(term.Closers, r)
  464. p.Stderr = w
  465. //setup stdin
  466. r, w, err = os.Pipe()
  467. if err != nil {
  468. return err
  469. }
  470. fds = append(fds, int(r.Fd()), int(w.Fd()))
  471. if pipes.Stdin != nil {
  472. go func() {
  473. io.Copy(w, pipes.Stdin)
  474. w.Close()
  475. }()
  476. p.Stdin = r
  477. }
  478. for _, fd := range fds {
  479. if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
  480. return fmt.Errorf("Failed to chown pipes fd: %v", err)
  481. }
  482. }
  483. return nil
  484. }
  485. // SupportsHooks implements the execdriver Driver interface.
  486. // The libcontainer/runC-based native execdriver does exploit the hook mechanism
  487. func (d *Driver) SupportsHooks() bool {
  488. return true
  489. }