driver.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. // +build linux,cgo
  2. package native
  3. import (
  4. "fmt"
  5. "io"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/Sirupsen/logrus"
  14. "github.com/docker/docker/context"
  15. "github.com/docker/docker/daemon/execdriver"
  16. "github.com/docker/docker/pkg/parsers"
  17. "github.com/docker/docker/pkg/pools"
  18. "github.com/docker/docker/pkg/reexec"
  19. sysinfo "github.com/docker/docker/pkg/system"
  20. "github.com/docker/docker/pkg/term"
  21. "github.com/opencontainers/runc/libcontainer"
  22. "github.com/opencontainers/runc/libcontainer/apparmor"
  23. "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
  24. "github.com/opencontainers/runc/libcontainer/configs"
  25. "github.com/opencontainers/runc/libcontainer/system"
  26. "github.com/opencontainers/runc/libcontainer/utils"
  27. )
  28. // Define constants for native driver
  29. const (
  30. DriverName = "native"
  31. Version = "0.2"
  32. )
  33. // Driver contains all information for native driver,
  34. // it implements execdriver.Driver.
  35. type Driver struct {
  36. root string
  37. initPath string
  38. activeContainers map[string]libcontainer.Container
  39. machineMemory int64
  40. factory libcontainer.Factory
  41. sync.Mutex
  42. }
  43. // NewDriver returns a new native driver, called from NewDriver of execdriver.
  44. func NewDriver(root, initPath string, options []string) (*Driver, error) {
  45. meminfo, err := sysinfo.ReadMemInfo()
  46. if err != nil {
  47. return nil, err
  48. }
  49. if err := sysinfo.MkdirAll(root, 0700); err != nil {
  50. return nil, err
  51. }
  52. if apparmor.IsEnabled() {
  53. if err := installAppArmorProfile(); err != nil {
  54. apparmorProfiles := []string{"docker-default"}
  55. // Allow daemon to run if loading failed, but are active
  56. // (possibly through another run, manually, or via system startup)
  57. for _, policy := range apparmorProfiles {
  58. if err := hasAppArmorProfileLoaded(policy); err != nil {
  59. return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
  60. }
  61. }
  62. }
  63. }
  64. // choose cgroup manager
  65. // this makes sure there are no breaking changes to people
  66. // who upgrade from versions without native.cgroupdriver opt
  67. cgm := libcontainer.Cgroupfs
  68. if systemd.UseSystemd() {
  69. cgm = libcontainer.SystemdCgroups
  70. }
  71. // parse the options
  72. for _, option := range options {
  73. key, val, err := parsers.ParseKeyValueOpt(option)
  74. if err != nil {
  75. return nil, err
  76. }
  77. key = strings.ToLower(key)
  78. switch key {
  79. case "native.cgroupdriver":
  80. // override the default if they set options
  81. switch val {
  82. case "systemd":
  83. if systemd.UseSystemd() {
  84. cgm = libcontainer.SystemdCgroups
  85. } else {
  86. // warn them that they chose the wrong driver
  87. logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
  88. }
  89. case "cgroupfs":
  90. cgm = libcontainer.Cgroupfs
  91. default:
  92. return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
  93. }
  94. default:
  95. return nil, fmt.Errorf("Unknown option %s\n", key)
  96. }
  97. }
  98. f, err := libcontainer.New(
  99. root,
  100. cgm,
  101. libcontainer.InitPath(reexec.Self(), DriverName),
  102. )
  103. if err != nil {
  104. return nil, err
  105. }
  106. return &Driver{
  107. root: root,
  108. initPath: initPath,
  109. activeContainers: make(map[string]libcontainer.Container),
  110. machineMemory: meminfo.MemTotal,
  111. factory: f,
  112. }, nil
  113. }
  114. type execOutput struct {
  115. exitCode int
  116. err error
  117. }
  118. // Run implements the exec driver Driver interface,
  119. // it calls libcontainer APIs to run a container.
  120. func (d *Driver) Run(ctx context.Context, c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
  121. // take the Command and populate the libcontainer.Config from it
  122. container, err := d.createContainer(ctx, c, hooks)
  123. if err != nil {
  124. return execdriver.ExitStatus{ExitCode: -1}, err
  125. }
  126. p := &libcontainer.Process{
  127. Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
  128. Env: c.ProcessConfig.Env,
  129. Cwd: c.WorkingDir,
  130. User: c.ProcessConfig.User,
  131. }
  132. if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
  133. return execdriver.ExitStatus{ExitCode: -1}, err
  134. }
  135. cont, err := d.factory.Create(c.ID, container)
  136. if err != nil {
  137. return execdriver.ExitStatus{ExitCode: -1}, err
  138. }
  139. d.Lock()
  140. d.activeContainers[c.ID] = cont
  141. d.Unlock()
  142. defer func() {
  143. cont.Destroy()
  144. d.cleanContainer(c.ID)
  145. }()
  146. if err := cont.Start(p); err != nil {
  147. return execdriver.ExitStatus{ExitCode: -1}, err
  148. }
  149. oom := notifyOnOOM(cont)
  150. if hooks.Start != nil {
  151. pid, err := p.Pid()
  152. if err != nil {
  153. p.Signal(os.Kill)
  154. p.Wait()
  155. return execdriver.ExitStatus{ExitCode: -1}, err
  156. }
  157. hooks.Start(ctx, &c.ProcessConfig, pid, oom)
  158. }
  159. waitF := p.Wait
  160. if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
  161. // we need such hack for tracking processes with inherited fds,
  162. // because cmd.Wait() waiting for all streams to be copied
  163. waitF = waitInPIDHost(p, cont)
  164. }
  165. ps, err := waitF()
  166. if err != nil {
  167. execErr, ok := err.(*exec.ExitError)
  168. if !ok {
  169. return execdriver.ExitStatus{ExitCode: -1}, err
  170. }
  171. ps = execErr.ProcessState
  172. }
  173. cont.Destroy()
  174. _, oomKill := <-oom
  175. return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
  176. }
  177. // notifyOnOOM returns a channel that signals if the container received an OOM notification
  178. // for any process. If it is unable to subscribe to OOM notifications then a closed
  179. // channel is returned as it will be non-blocking and return the correct result when read.
  180. func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
  181. oom, err := container.NotifyOOM()
  182. if err != nil {
  183. logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
  184. c := make(chan struct{})
  185. close(c)
  186. return c
  187. }
  188. return oom
  189. }
  190. func killCgroupProcs(c libcontainer.Container) {
  191. var procs []*os.Process
  192. if err := c.Pause(); err != nil {
  193. logrus.Warn(err)
  194. }
  195. pids, err := c.Processes()
  196. if err != nil {
  197. // don't care about childs if we can't get them, this is mostly because cgroup already deleted
  198. logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
  199. }
  200. for _, pid := range pids {
  201. if p, err := os.FindProcess(pid); err == nil {
  202. procs = append(procs, p)
  203. if err := p.Kill(); err != nil {
  204. logrus.Warn(err)
  205. }
  206. }
  207. }
  208. if err := c.Resume(); err != nil {
  209. logrus.Warn(err)
  210. }
  211. for _, p := range procs {
  212. if _, err := p.Wait(); err != nil {
  213. logrus.Warn(err)
  214. }
  215. }
  216. }
  217. func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
  218. return func() (*os.ProcessState, error) {
  219. pid, err := p.Pid()
  220. if err != nil {
  221. return nil, err
  222. }
  223. process, err := os.FindProcess(pid)
  224. s, err := process.Wait()
  225. if err != nil {
  226. execErr, ok := err.(*exec.ExitError)
  227. if !ok {
  228. return s, err
  229. }
  230. s = execErr.ProcessState
  231. }
  232. killCgroupProcs(c)
  233. p.Wait()
  234. return s, err
  235. }
  236. }
  237. // Kill implements the exec driver Driver interface.
  238. func (d *Driver) Kill(c *execdriver.Command, sig int) error {
  239. d.Lock()
  240. active := d.activeContainers[c.ID]
  241. d.Unlock()
  242. if active == nil {
  243. return fmt.Errorf("active container for %s does not exist", c.ID)
  244. }
  245. state, err := active.State()
  246. if err != nil {
  247. return err
  248. }
  249. return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
  250. }
  251. // Pause implements the exec driver Driver interface,
  252. // it calls libcontainer API to pause a container.
  253. func (d *Driver) Pause(c *execdriver.Command) error {
  254. d.Lock()
  255. active := d.activeContainers[c.ID]
  256. d.Unlock()
  257. if active == nil {
  258. return fmt.Errorf("active container for %s does not exist", c.ID)
  259. }
  260. return active.Pause()
  261. }
  262. // Unpause implements the exec driver Driver interface,
  263. // it calls libcontainer API to unpause a container.
  264. func (d *Driver) Unpause(c *execdriver.Command) error {
  265. d.Lock()
  266. active := d.activeContainers[c.ID]
  267. d.Unlock()
  268. if active == nil {
  269. return fmt.Errorf("active container for %s does not exist", c.ID)
  270. }
  271. return active.Resume()
  272. }
  273. // Terminate implements the exec driver Driver interface.
  274. func (d *Driver) Terminate(c *execdriver.Command) error {
  275. defer d.cleanContainer(c.ID)
  276. container, err := d.factory.Load(c.ID)
  277. if err != nil {
  278. return err
  279. }
  280. defer container.Destroy()
  281. state, err := container.State()
  282. if err != nil {
  283. return err
  284. }
  285. pid := state.InitProcessPid
  286. currentStartTime, err := system.GetProcessStartTime(pid)
  287. if err != nil {
  288. return err
  289. }
  290. if state.InitProcessStartTime == currentStartTime {
  291. err = syscall.Kill(pid, 9)
  292. syscall.Wait4(pid, nil, 0, nil)
  293. }
  294. return err
  295. }
  296. // Info implements the exec driver Driver interface.
  297. func (d *Driver) Info(id string) execdriver.Info {
  298. return &info{
  299. ID: id,
  300. driver: d,
  301. }
  302. }
  303. // Name implements the exec driver Driver interface.
  304. func (d *Driver) Name() string {
  305. return fmt.Sprintf("%s-%s", DriverName, Version)
  306. }
  307. // GetPidsForContainer implements the exec driver Driver interface.
  308. func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
  309. d.Lock()
  310. active := d.activeContainers[id]
  311. d.Unlock()
  312. if active == nil {
  313. return nil, fmt.Errorf("active container for %s does not exist", id)
  314. }
  315. return active.Processes()
  316. }
  317. func (d *Driver) cleanContainer(id string) error {
  318. d.Lock()
  319. delete(d.activeContainers, id)
  320. d.Unlock()
  321. return os.RemoveAll(filepath.Join(d.root, id))
  322. }
  323. func (d *Driver) createContainerRoot(id string) error {
  324. return os.MkdirAll(filepath.Join(d.root, id), 0655)
  325. }
  326. // Clean implements the exec driver Driver interface.
  327. func (d *Driver) Clean(id string) error {
  328. return os.RemoveAll(filepath.Join(d.root, id))
  329. }
  330. // Stats implements the exec driver Driver interface.
  331. func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
  332. d.Lock()
  333. c := d.activeContainers[id]
  334. d.Unlock()
  335. if c == nil {
  336. return nil, execdriver.ErrNotRunning
  337. }
  338. now := time.Now()
  339. stats, err := c.Stats()
  340. if err != nil {
  341. return nil, err
  342. }
  343. memoryLimit := c.Config().Cgroups.Memory
  344. // if the container does not have any memory limit specified set the
  345. // limit to the machines memory
  346. if memoryLimit == 0 {
  347. memoryLimit = d.machineMemory
  348. }
  349. return &execdriver.ResourceStats{
  350. Stats: stats,
  351. Read: now,
  352. MemoryLimit: memoryLimit,
  353. }, nil
  354. }
  355. // TtyConsole implements the exec driver Terminal interface.
  356. type TtyConsole struct {
  357. console libcontainer.Console
  358. }
  359. // NewTtyConsole returns a new TtyConsole struct.
  360. func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
  361. tty := &TtyConsole{
  362. console: console,
  363. }
  364. if err := tty.AttachPipes(pipes); err != nil {
  365. tty.Close()
  366. return nil, err
  367. }
  368. return tty, nil
  369. }
  370. // Resize implements Resize method of Terminal interface
  371. func (t *TtyConsole) Resize(h, w int) error {
  372. return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
  373. }
  374. // AttachPipes attaches given pipes to TtyConsole
  375. func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
  376. go func() {
  377. if wb, ok := pipes.Stdout.(interface {
  378. CloseWriters() error
  379. }); ok {
  380. defer wb.CloseWriters()
  381. }
  382. pools.Copy(pipes.Stdout, t.console)
  383. }()
  384. if pipes.Stdin != nil {
  385. go func() {
  386. pools.Copy(t.console, pipes.Stdin)
  387. pipes.Stdin.Close()
  388. }()
  389. }
  390. return nil
  391. }
  392. // Close implements Close method of Terminal interface
  393. func (t *TtyConsole) Close() error {
  394. return t.console.Close()
  395. }
  396. func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
  397. var term execdriver.Terminal
  398. var err error
  399. if processConfig.Tty {
  400. rootuid, err := container.HostUID()
  401. if err != nil {
  402. return err
  403. }
  404. cons, err := p.NewConsole(rootuid)
  405. if err != nil {
  406. return err
  407. }
  408. term, err = NewTtyConsole(cons, pipes)
  409. } else {
  410. p.Stdout = pipes.Stdout
  411. p.Stderr = pipes.Stderr
  412. r, w, err := os.Pipe()
  413. if err != nil {
  414. return err
  415. }
  416. if pipes.Stdin != nil {
  417. go func() {
  418. io.Copy(w, pipes.Stdin)
  419. w.Close()
  420. }()
  421. p.Stdin = r
  422. }
  423. term = &execdriver.StdConsole{}
  424. }
  425. if err != nil {
  426. return err
  427. }
  428. processConfig.Terminal = term
  429. return nil
  430. }
  431. // SupportsHooks implements the execdriver Driver interface.
  432. // The libcontainer/runC-based native execdriver does exploit the hook mechanism
  433. func (d *Driver) SupportsHooks() bool {
  434. return true
  435. }