driver.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. // +build linux,cgo
  2. package native
  3. import (
  4. "fmt"
  5. "io"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/Sirupsen/logrus"
  14. "github.com/docker/docker/daemon/execdriver"
  15. "github.com/docker/docker/pkg/parsers"
  16. "github.com/docker/docker/pkg/pools"
  17. "github.com/docker/docker/pkg/reexec"
  18. sysinfo "github.com/docker/docker/pkg/system"
  19. "github.com/docker/docker/pkg/term"
  20. "github.com/opencontainers/runc/libcontainer"
  21. "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
  22. "github.com/opencontainers/runc/libcontainer/configs"
  23. "github.com/opencontainers/runc/libcontainer/system"
  24. "github.com/opencontainers/runc/libcontainer/utils"
  25. )
  26. const (
  27. DriverName = "native"
  28. Version = "0.2"
  29. )
  30. type driver struct {
  31. root string
  32. initPath string
  33. activeContainers map[string]libcontainer.Container
  34. machineMemory int64
  35. factory libcontainer.Factory
  36. sync.Mutex
  37. }
  38. func NewDriver(root, initPath string, options []string) (*driver, error) {
  39. meminfo, err := sysinfo.ReadMemInfo()
  40. if err != nil {
  41. return nil, err
  42. }
  43. if err := sysinfo.MkdirAll(root, 0700); err != nil {
  44. return nil, err
  45. }
  46. // native driver root is at docker_root/execdriver/native. Put apparmor at docker_root
  47. if err := installApparmorProfile(); err != nil {
  48. return nil, err
  49. }
  50. // choose cgroup manager
  51. // this makes sure there are no breaking changes to people
  52. // who upgrade from versions without native.cgroupdriver opt
  53. cgm := libcontainer.Cgroupfs
  54. if systemd.UseSystemd() {
  55. cgm = libcontainer.SystemdCgroups
  56. }
  57. // parse the options
  58. for _, option := range options {
  59. key, val, err := parsers.ParseKeyValueOpt(option)
  60. if err != nil {
  61. return nil, err
  62. }
  63. key = strings.ToLower(key)
  64. switch key {
  65. case "native.cgroupdriver":
  66. // override the default if they set options
  67. switch val {
  68. case "systemd":
  69. if systemd.UseSystemd() {
  70. cgm = libcontainer.SystemdCgroups
  71. } else {
  72. // warn them that they chose the wrong driver
  73. logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
  74. }
  75. case "cgroupfs":
  76. cgm = libcontainer.Cgroupfs
  77. default:
  78. return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
  79. }
  80. default:
  81. return nil, fmt.Errorf("Unknown option %s\n", key)
  82. }
  83. }
  84. f, err := libcontainer.New(
  85. root,
  86. cgm,
  87. libcontainer.InitPath(reexec.Self(), DriverName),
  88. )
  89. if err != nil {
  90. return nil, err
  91. }
  92. return &driver{
  93. root: root,
  94. initPath: initPath,
  95. activeContainers: make(map[string]libcontainer.Container),
  96. machineMemory: meminfo.MemTotal,
  97. factory: f,
  98. }, nil
  99. }
  100. type execOutput struct {
  101. exitCode int
  102. err error
  103. }
  104. func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (execdriver.ExitStatus, error) {
  105. // take the Command and populate the libcontainer.Config from it
  106. container, err := d.createContainer(c)
  107. if err != nil {
  108. return execdriver.ExitStatus{ExitCode: -1}, err
  109. }
  110. p := &libcontainer.Process{
  111. Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
  112. Env: c.ProcessConfig.Env,
  113. Cwd: c.WorkingDir,
  114. User: c.ProcessConfig.User,
  115. }
  116. if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
  117. return execdriver.ExitStatus{ExitCode: -1}, err
  118. }
  119. cont, err := d.factory.Create(c.ID, container)
  120. if err != nil {
  121. return execdriver.ExitStatus{ExitCode: -1}, err
  122. }
  123. d.Lock()
  124. d.activeContainers[c.ID] = cont
  125. d.Unlock()
  126. defer func() {
  127. cont.Destroy()
  128. d.cleanContainer(c.ID)
  129. }()
  130. if err := cont.Start(p); err != nil {
  131. return execdriver.ExitStatus{ExitCode: -1}, err
  132. }
  133. if startCallback != nil {
  134. pid, err := p.Pid()
  135. if err != nil {
  136. p.Signal(os.Kill)
  137. p.Wait()
  138. return execdriver.ExitStatus{ExitCode: -1}, err
  139. }
  140. startCallback(&c.ProcessConfig, pid)
  141. }
  142. oom := notifyOnOOM(cont)
  143. waitF := p.Wait
  144. if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
  145. // we need such hack for tracking processes with inherited fds,
  146. // because cmd.Wait() waiting for all streams to be copied
  147. waitF = waitInPIDHost(p, cont)
  148. }
  149. ps, err := waitF()
  150. if err != nil {
  151. execErr, ok := err.(*exec.ExitError)
  152. if !ok {
  153. return execdriver.ExitStatus{ExitCode: -1}, err
  154. }
  155. ps = execErr.ProcessState
  156. }
  157. cont.Destroy()
  158. _, oomKill := <-oom
  159. return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
  160. }
  161. // notifyOnOOM returns a channel that signals if the container received an OOM notification
  162. // for any process. If it is unable to subscribe to OOM notifications then a closed
  163. // channel is returned as it will be non-blocking and return the correct result when read.
  164. func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
  165. oom, err := container.NotifyOOM()
  166. if err != nil {
  167. logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
  168. c := make(chan struct{})
  169. close(c)
  170. return c
  171. }
  172. return oom
  173. }
  174. func killCgroupProcs(c libcontainer.Container) {
  175. var procs []*os.Process
  176. if err := c.Pause(); err != nil {
  177. logrus.Warn(err)
  178. }
  179. pids, err := c.Processes()
  180. if err != nil {
  181. // don't care about childs if we can't get them, this is mostly because cgroup already deleted
  182. logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
  183. }
  184. for _, pid := range pids {
  185. if p, err := os.FindProcess(pid); err == nil {
  186. procs = append(procs, p)
  187. if err := p.Kill(); err != nil {
  188. logrus.Warn(err)
  189. }
  190. }
  191. }
  192. if err := c.Resume(); err != nil {
  193. logrus.Warn(err)
  194. }
  195. for _, p := range procs {
  196. if _, err := p.Wait(); err != nil {
  197. logrus.Warn(err)
  198. }
  199. }
  200. }
  201. func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
  202. return func() (*os.ProcessState, error) {
  203. pid, err := p.Pid()
  204. if err != nil {
  205. return nil, err
  206. }
  207. process, err := os.FindProcess(pid)
  208. s, err := process.Wait()
  209. if err != nil {
  210. execErr, ok := err.(*exec.ExitError)
  211. if !ok {
  212. return s, err
  213. }
  214. s = execErr.ProcessState
  215. }
  216. killCgroupProcs(c)
  217. p.Wait()
  218. return s, err
  219. }
  220. }
  221. func (d *driver) Kill(c *execdriver.Command, sig int) error {
  222. d.Lock()
  223. active := d.activeContainers[c.ID]
  224. d.Unlock()
  225. if active == nil {
  226. return fmt.Errorf("active container for %s does not exist", c.ID)
  227. }
  228. state, err := active.State()
  229. if err != nil {
  230. return err
  231. }
  232. return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
  233. }
  234. func (d *driver) Pause(c *execdriver.Command) error {
  235. d.Lock()
  236. active := d.activeContainers[c.ID]
  237. d.Unlock()
  238. if active == nil {
  239. return fmt.Errorf("active container for %s does not exist", c.ID)
  240. }
  241. return active.Pause()
  242. }
  243. func (d *driver) Unpause(c *execdriver.Command) error {
  244. d.Lock()
  245. active := d.activeContainers[c.ID]
  246. d.Unlock()
  247. if active == nil {
  248. return fmt.Errorf("active container for %s does not exist", c.ID)
  249. }
  250. return active.Resume()
  251. }
  252. func (d *driver) Terminate(c *execdriver.Command) error {
  253. defer d.cleanContainer(c.ID)
  254. container, err := d.factory.Load(c.ID)
  255. if err != nil {
  256. return err
  257. }
  258. defer container.Destroy()
  259. state, err := container.State()
  260. if err != nil {
  261. return err
  262. }
  263. pid := state.InitProcessPid
  264. currentStartTime, err := system.GetProcessStartTime(pid)
  265. if err != nil {
  266. return err
  267. }
  268. if state.InitProcessStartTime == currentStartTime {
  269. err = syscall.Kill(pid, 9)
  270. syscall.Wait4(pid, nil, 0, nil)
  271. }
  272. return err
  273. }
  274. func (d *driver) Info(id string) execdriver.Info {
  275. return &info{
  276. ID: id,
  277. driver: d,
  278. }
  279. }
  280. func (d *driver) Name() string {
  281. return fmt.Sprintf("%s-%s", DriverName, Version)
  282. }
  283. func (d *driver) GetPidsForContainer(id string) ([]int, error) {
  284. d.Lock()
  285. active := d.activeContainers[id]
  286. d.Unlock()
  287. if active == nil {
  288. return nil, fmt.Errorf("active container for %s does not exist", id)
  289. }
  290. return active.Processes()
  291. }
  292. func (d *driver) cleanContainer(id string) error {
  293. d.Lock()
  294. delete(d.activeContainers, id)
  295. d.Unlock()
  296. return os.RemoveAll(filepath.Join(d.root, id))
  297. }
  298. func (d *driver) createContainerRoot(id string) error {
  299. return os.MkdirAll(filepath.Join(d.root, id), 0655)
  300. }
  301. func (d *driver) Clean(id string) error {
  302. return os.RemoveAll(filepath.Join(d.root, id))
  303. }
  304. func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
  305. d.Lock()
  306. c := d.activeContainers[id]
  307. d.Unlock()
  308. if c == nil {
  309. return nil, execdriver.ErrNotRunning
  310. }
  311. now := time.Now()
  312. stats, err := c.Stats()
  313. if err != nil {
  314. return nil, err
  315. }
  316. memoryLimit := c.Config().Cgroups.Memory
  317. // if the container does not have any memory limit specified set the
  318. // limit to the machines memory
  319. if memoryLimit == 0 {
  320. memoryLimit = d.machineMemory
  321. }
  322. return &execdriver.ResourceStats{
  323. Stats: stats,
  324. Read: now,
  325. MemoryLimit: memoryLimit,
  326. }, nil
  327. }
  328. type TtyConsole struct {
  329. console libcontainer.Console
  330. }
  331. func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, rootuid int) (*TtyConsole, error) {
  332. tty := &TtyConsole{
  333. console: console,
  334. }
  335. if err := tty.AttachPipes(pipes); err != nil {
  336. tty.Close()
  337. return nil, err
  338. }
  339. return tty, nil
  340. }
  341. func (t *TtyConsole) Master() libcontainer.Console {
  342. return t.console
  343. }
  344. func (t *TtyConsole) Resize(h, w int) error {
  345. return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
  346. }
  347. func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
  348. go func() {
  349. if wb, ok := pipes.Stdout.(interface {
  350. CloseWriters() error
  351. }); ok {
  352. defer wb.CloseWriters()
  353. }
  354. pools.Copy(pipes.Stdout, t.console)
  355. }()
  356. if pipes.Stdin != nil {
  357. go func() {
  358. pools.Copy(t.console, pipes.Stdin)
  359. pipes.Stdin.Close()
  360. }()
  361. }
  362. return nil
  363. }
  364. func (t *TtyConsole) Close() error {
  365. return t.console.Close()
  366. }
  367. func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
  368. var term execdriver.Terminal
  369. var err error
  370. if processConfig.Tty {
  371. rootuid, err := container.HostUID()
  372. if err != nil {
  373. return err
  374. }
  375. cons, err := p.NewConsole(rootuid)
  376. if err != nil {
  377. return err
  378. }
  379. term, err = NewTtyConsole(cons, pipes, rootuid)
  380. } else {
  381. p.Stdout = pipes.Stdout
  382. p.Stderr = pipes.Stderr
  383. r, w, err := os.Pipe()
  384. if err != nil {
  385. return err
  386. }
  387. if pipes.Stdin != nil {
  388. go func() {
  389. io.Copy(w, pipes.Stdin)
  390. w.Close()
  391. }()
  392. p.Stdin = r
  393. }
  394. term = &execdriver.StdConsole{}
  395. }
  396. if err != nil {
  397. return err
  398. }
  399. processConfig.Terminal = term
  400. return nil
  401. }