oci_linux.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830
  1. package daemon
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "os/exec"
  7. "path/filepath"
  8. "regexp"
  9. "sort"
  10. "strconv"
  11. "strings"
  12. "github.com/Sirupsen/logrus"
  13. containertypes "github.com/docker/docker/api/types/container"
  14. "github.com/docker/docker/container"
  15. "github.com/docker/docker/daemon/caps"
  16. "github.com/docker/docker/oci"
  17. "github.com/docker/docker/pkg/idtools"
  18. "github.com/docker/docker/pkg/mount"
  19. "github.com/docker/docker/pkg/stringutils"
  20. "github.com/docker/docker/pkg/symlink"
  21. "github.com/docker/docker/volume"
  22. "github.com/opencontainers/runc/libcontainer/apparmor"
  23. "github.com/opencontainers/runc/libcontainer/cgroups"
  24. "github.com/opencontainers/runc/libcontainer/devices"
  25. "github.com/opencontainers/runc/libcontainer/user"
  26. specs "github.com/opencontainers/runtime-spec/specs-go"
  27. )
  28. var (
  29. deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$")
  30. )
  31. func setResources(s *specs.Spec, r containertypes.Resources) error {
  32. weightDevices, err := getBlkioWeightDevices(r)
  33. if err != nil {
  34. return err
  35. }
  36. readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
  37. if err != nil {
  38. return err
  39. }
  40. writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
  41. if err != nil {
  42. return err
  43. }
  44. readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
  45. if err != nil {
  46. return err
  47. }
  48. writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
  49. if err != nil {
  50. return err
  51. }
  52. memoryRes := getMemoryResources(r)
  53. cpuRes := getCPUResources(r)
  54. blkioWeight := r.BlkioWeight
  55. specResources := &specs.Resources{
  56. Memory: memoryRes,
  57. CPU: cpuRes,
  58. BlockIO: &specs.BlockIO{
  59. Weight: &blkioWeight,
  60. WeightDevice: weightDevices,
  61. ThrottleReadBpsDevice: readBpsDevice,
  62. ThrottleWriteBpsDevice: writeBpsDevice,
  63. ThrottleReadIOPSDevice: readIOpsDevice,
  64. ThrottleWriteIOPSDevice: writeIOpsDevice,
  65. },
  66. DisableOOMKiller: r.OomKillDisable,
  67. Pids: &specs.Pids{
  68. Limit: &r.PidsLimit,
  69. },
  70. }
  71. if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
  72. specResources.Devices = s.Linux.Resources.Devices
  73. }
  74. s.Linux.Resources = specResources
  75. return nil
  76. }
  77. func setDevices(s *specs.Spec, c *container.Container) error {
  78. // Build lists of devices allowed and created within the container.
  79. var devs []specs.Device
  80. devPermissions := s.Linux.Resources.Devices
  81. if c.HostConfig.Privileged {
  82. hostDevices, err := devices.HostDevices()
  83. if err != nil {
  84. return err
  85. }
  86. for _, d := range hostDevices {
  87. devs = append(devs, oci.Device(d))
  88. }
  89. rwm := "rwm"
  90. devPermissions = []specs.DeviceCgroup{
  91. {
  92. Allow: true,
  93. Access: &rwm,
  94. },
  95. }
  96. } else {
  97. for _, deviceMapping := range c.HostConfig.Devices {
  98. d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
  99. if err != nil {
  100. return err
  101. }
  102. devs = append(devs, d...)
  103. devPermissions = append(devPermissions, dPermissions...)
  104. }
  105. for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules {
  106. ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1)
  107. if len(ss[0]) != 5 {
  108. return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule)
  109. }
  110. matches := ss[0]
  111. dPermissions := specs.DeviceCgroup{
  112. Allow: true,
  113. Type: &matches[1],
  114. Access: &matches[4],
  115. }
  116. if matches[2] == "*" {
  117. major := int64(-1)
  118. dPermissions.Major = &major
  119. } else {
  120. major, err := strconv.ParseInt(matches[2], 10, 64)
  121. if err != nil {
  122. return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule)
  123. }
  124. dPermissions.Major = &major
  125. }
  126. if matches[3] == "*" {
  127. minor := int64(-1)
  128. dPermissions.Minor = &minor
  129. } else {
  130. minor, err := strconv.ParseInt(matches[3], 10, 64)
  131. if err != nil {
  132. return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule)
  133. }
  134. dPermissions.Minor = &minor
  135. }
  136. devPermissions = append(devPermissions, dPermissions)
  137. }
  138. }
  139. s.Linux.Devices = append(s.Linux.Devices, devs...)
  140. s.Linux.Resources.Devices = devPermissions
  141. return nil
  142. }
  143. func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  144. var rlimits []specs.Rlimit
  145. // We want to leave the original HostConfig alone so make a copy here
  146. hostConfig := *c.HostConfig
  147. // Merge with the daemon defaults
  148. daemon.mergeUlimits(&hostConfig)
  149. for _, ul := range hostConfig.Ulimits {
  150. rlimits = append(rlimits, specs.Rlimit{
  151. Type: "RLIMIT_" + strings.ToUpper(ul.Name),
  152. Soft: uint64(ul.Soft),
  153. Hard: uint64(ul.Hard),
  154. })
  155. }
  156. s.Process.Rlimits = rlimits
  157. return nil
  158. }
  159. func setUser(s *specs.Spec, c *container.Container) error {
  160. uid, gid, additionalGids, err := getUser(c, c.Config.User)
  161. if err != nil {
  162. return err
  163. }
  164. s.Process.User.UID = uid
  165. s.Process.User.GID = gid
  166. s.Process.User.AdditionalGids = additionalGids
  167. return nil
  168. }
  169. func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
  170. fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
  171. if err != nil {
  172. return nil, err
  173. }
  174. return os.Open(fp)
  175. }
  176. func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
  177. passwdPath, err := user.GetPasswdPath()
  178. if err != nil {
  179. return 0, 0, nil, err
  180. }
  181. groupPath, err := user.GetGroupPath()
  182. if err != nil {
  183. return 0, 0, nil, err
  184. }
  185. passwdFile, err := readUserFile(c, passwdPath)
  186. if err == nil {
  187. defer passwdFile.Close()
  188. }
  189. groupFile, err := readUserFile(c, groupPath)
  190. if err == nil {
  191. defer groupFile.Close()
  192. }
  193. execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
  194. if err != nil {
  195. return 0, 0, nil, err
  196. }
  197. // todo: fix this double read by a change to libcontainer/user pkg
  198. groupFile, err = readUserFile(c, groupPath)
  199. if err == nil {
  200. defer groupFile.Close()
  201. }
  202. var addGroups []int
  203. if len(c.HostConfig.GroupAdd) > 0 {
  204. addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
  205. if err != nil {
  206. return 0, 0, nil, err
  207. }
  208. }
  209. uid := uint32(execUser.Uid)
  210. gid := uint32(execUser.Gid)
  211. sgids := append(execUser.Sgids, addGroups...)
  212. var additionalGids []uint32
  213. for _, g := range sgids {
  214. additionalGids = append(additionalGids, uint32(g))
  215. }
  216. return uid, gid, additionalGids, nil
  217. }
  218. func setNamespace(s *specs.Spec, ns specs.Namespace) {
  219. for i, n := range s.Linux.Namespaces {
  220. if n.Type == ns.Type {
  221. s.Linux.Namespaces[i] = ns
  222. return
  223. }
  224. }
  225. s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
  226. }
  227. func setCapabilities(s *specs.Spec, c *container.Container) error {
  228. var caplist []string
  229. var err error
  230. if c.HostConfig.Privileged {
  231. caplist = caps.GetAllCapabilities()
  232. } else {
  233. caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
  234. if err != nil {
  235. return err
  236. }
  237. }
  238. s.Process.Capabilities = caplist
  239. return nil
  240. }
  241. func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  242. userNS := false
  243. // user
  244. if c.HostConfig.UsernsMode.IsPrivate() {
  245. uidMap, gidMap := daemon.GetUIDGIDMaps()
  246. if uidMap != nil {
  247. userNS = true
  248. ns := specs.Namespace{Type: "user"}
  249. setNamespace(s, ns)
  250. s.Linux.UIDMappings = specMapping(uidMap)
  251. s.Linux.GIDMappings = specMapping(gidMap)
  252. }
  253. }
  254. // network
  255. if !c.Config.NetworkDisabled {
  256. ns := specs.Namespace{Type: "network"}
  257. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
  258. if parts[0] == "container" {
  259. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
  260. if err != nil {
  261. return err
  262. }
  263. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
  264. if userNS {
  265. // to share a net namespace, they must also share a user namespace
  266. nsUser := specs.Namespace{Type: "user"}
  267. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
  268. setNamespace(s, nsUser)
  269. }
  270. } else if c.HostConfig.NetworkMode.IsHost() {
  271. ns.Path = c.NetworkSettings.SandboxKey
  272. }
  273. setNamespace(s, ns)
  274. }
  275. // ipc
  276. if c.HostConfig.IpcMode.IsContainer() {
  277. ns := specs.Namespace{Type: "ipc"}
  278. ic, err := daemon.getIpcContainer(c)
  279. if err != nil {
  280. return err
  281. }
  282. ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
  283. setNamespace(s, ns)
  284. if userNS {
  285. // to share an IPC namespace, they must also share a user namespace
  286. nsUser := specs.Namespace{Type: "user"}
  287. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
  288. setNamespace(s, nsUser)
  289. }
  290. } else if c.HostConfig.IpcMode.IsHost() {
  291. oci.RemoveNamespace(s, specs.NamespaceType("ipc"))
  292. } else {
  293. ns := specs.Namespace{Type: "ipc"}
  294. setNamespace(s, ns)
  295. }
  296. // pid
  297. if c.HostConfig.PidMode.IsContainer() {
  298. ns := specs.Namespace{Type: "pid"}
  299. pc, err := daemon.getPidContainer(c)
  300. if err != nil {
  301. return err
  302. }
  303. ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
  304. setNamespace(s, ns)
  305. if userNS {
  306. // to share a PID namespace, they must also share a user namespace
  307. nsUser := specs.Namespace{Type: "user"}
  308. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
  309. setNamespace(s, nsUser)
  310. }
  311. } else if c.HostConfig.PidMode.IsHost() {
  312. oci.RemoveNamespace(s, specs.NamespaceType("pid"))
  313. } else {
  314. ns := specs.Namespace{Type: "pid"}
  315. setNamespace(s, ns)
  316. }
  317. // uts
  318. if c.HostConfig.UTSMode.IsHost() {
  319. oci.RemoveNamespace(s, specs.NamespaceType("uts"))
  320. s.Hostname = ""
  321. }
  322. return nil
  323. }
  324. func specMapping(s []idtools.IDMap) []specs.IDMapping {
  325. var ids []specs.IDMapping
  326. for _, item := range s {
  327. ids = append(ids, specs.IDMapping{
  328. HostID: uint32(item.HostID),
  329. ContainerID: uint32(item.ContainerID),
  330. Size: uint32(item.Size),
  331. })
  332. }
  333. return ids
  334. }
  335. func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
  336. for _, m := range mountinfo {
  337. if m.Mountpoint == dir {
  338. return m
  339. }
  340. }
  341. return nil
  342. }
  343. // Get the source mount point of directory passed in as argument. Also return
  344. // optional fields.
  345. func getSourceMount(source string) (string, string, error) {
  346. // Ensure any symlinks are resolved.
  347. sourcePath, err := filepath.EvalSymlinks(source)
  348. if err != nil {
  349. return "", "", err
  350. }
  351. mountinfos, err := mount.GetMounts()
  352. if err != nil {
  353. return "", "", err
  354. }
  355. mountinfo := getMountInfo(mountinfos, sourcePath)
  356. if mountinfo != nil {
  357. return sourcePath, mountinfo.Optional, nil
  358. }
  359. path := sourcePath
  360. for {
  361. path = filepath.Dir(path)
  362. mountinfo = getMountInfo(mountinfos, path)
  363. if mountinfo != nil {
  364. return path, mountinfo.Optional, nil
  365. }
  366. if path == "/" {
  367. break
  368. }
  369. }
  370. // If we are here, we did not find parent mount. Something is wrong.
  371. return "", "", fmt.Errorf("Could not find source mount of %s", source)
  372. }
  373. // Ensure mount point on which path is mounted, is shared.
  374. func ensureShared(path string) error {
  375. sharedMount := false
  376. sourceMount, optionalOpts, err := getSourceMount(path)
  377. if err != nil {
  378. return err
  379. }
  380. // Make sure source mount point is shared.
  381. optsSplit := strings.Split(optionalOpts, " ")
  382. for _, opt := range optsSplit {
  383. if strings.HasPrefix(opt, "shared:") {
  384. sharedMount = true
  385. break
  386. }
  387. }
  388. if !sharedMount {
  389. return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
  390. }
  391. return nil
  392. }
  393. // Ensure mount point on which path is mounted, is either shared or slave.
  394. func ensureSharedOrSlave(path string) error {
  395. sharedMount := false
  396. slaveMount := false
  397. sourceMount, optionalOpts, err := getSourceMount(path)
  398. if err != nil {
  399. return err
  400. }
  401. // Make sure source mount point is shared.
  402. optsSplit := strings.Split(optionalOpts, " ")
  403. for _, opt := range optsSplit {
  404. if strings.HasPrefix(opt, "shared:") {
  405. sharedMount = true
  406. break
  407. } else if strings.HasPrefix(opt, "master:") {
  408. slaveMount = true
  409. break
  410. }
  411. }
  412. if !sharedMount && !slaveMount {
  413. return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
  414. }
  415. return nil
  416. }
  417. var (
  418. mountPropagationMap = map[string]int{
  419. "private": mount.PRIVATE,
  420. "rprivate": mount.RPRIVATE,
  421. "shared": mount.SHARED,
  422. "rshared": mount.RSHARED,
  423. "slave": mount.SLAVE,
  424. "rslave": mount.RSLAVE,
  425. }
  426. mountPropagationReverseMap = map[int]string{
  427. mount.PRIVATE: "private",
  428. mount.RPRIVATE: "rprivate",
  429. mount.SHARED: "shared",
  430. mount.RSHARED: "rshared",
  431. mount.SLAVE: "slave",
  432. mount.RSLAVE: "rslave",
  433. }
  434. )
  435. func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
  436. userMounts := make(map[string]struct{})
  437. for _, m := range mounts {
  438. userMounts[m.Destination] = struct{}{}
  439. }
  440. // Filter out mounts that are overridden by user supplied mounts
  441. var defaultMounts []specs.Mount
  442. _, mountDev := userMounts["/dev"]
  443. for _, m := range s.Mounts {
  444. if _, ok := userMounts[m.Destination]; !ok {
  445. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  446. continue
  447. }
  448. defaultMounts = append(defaultMounts, m)
  449. }
  450. }
  451. s.Mounts = defaultMounts
  452. for _, m := range mounts {
  453. for _, cm := range s.Mounts {
  454. if cm.Destination == m.Destination {
  455. return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
  456. }
  457. }
  458. if m.Source == "tmpfs" {
  459. data := m.Data
  460. options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
  461. if data != "" {
  462. options = append(options, strings.Split(data, ",")...)
  463. }
  464. merged, err := mount.MergeTmpfsOptions(options)
  465. if err != nil {
  466. return err
  467. }
  468. s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
  469. continue
  470. }
  471. mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
  472. // Determine property of RootPropagation based on volume
  473. // properties. If a volume is shared, then keep root propagation
  474. // shared. This should work for slave and private volumes too.
  475. //
  476. // For slave volumes, it can be either [r]shared/[r]slave.
  477. //
  478. // For private volumes any root propagation value should work.
  479. pFlag := mountPropagationMap[m.Propagation]
  480. if pFlag == mount.SHARED || pFlag == mount.RSHARED {
  481. if err := ensureShared(m.Source); err != nil {
  482. return err
  483. }
  484. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  485. if rootpg != mount.SHARED && rootpg != mount.RSHARED {
  486. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
  487. }
  488. } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
  489. if err := ensureSharedOrSlave(m.Source); err != nil {
  490. return err
  491. }
  492. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  493. if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
  494. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
  495. }
  496. }
  497. opts := []string{"rbind"}
  498. if !m.Writable {
  499. opts = append(opts, "ro")
  500. }
  501. if pFlag != 0 {
  502. opts = append(opts, mountPropagationReverseMap[pFlag])
  503. }
  504. mt.Options = opts
  505. s.Mounts = append(s.Mounts, mt)
  506. }
  507. if s.Root.Readonly {
  508. for i, m := range s.Mounts {
  509. switch m.Destination {
  510. case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
  511. continue
  512. }
  513. if _, ok := userMounts[m.Destination]; !ok {
  514. if !stringutils.InSlice(m.Options, "ro") {
  515. s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
  516. }
  517. }
  518. }
  519. }
  520. if c.HostConfig.Privileged {
  521. if !s.Root.Readonly {
  522. // clear readonly for /sys
  523. for i := range s.Mounts {
  524. if s.Mounts[i].Destination == "/sys" {
  525. clearReadOnly(&s.Mounts[i])
  526. }
  527. }
  528. }
  529. s.Linux.ReadonlyPaths = nil
  530. s.Linux.MaskedPaths = nil
  531. }
  532. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  533. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  534. if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
  535. for i, m := range s.Mounts {
  536. if m.Type == "cgroup" {
  537. clearReadOnly(&s.Mounts[i])
  538. }
  539. }
  540. }
  541. return nil
  542. }
  543. func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
  544. linkedEnv, err := daemon.setupLinkedContainers(c)
  545. if err != nil {
  546. return err
  547. }
  548. s.Root = specs.Root{
  549. Path: c.BaseFS,
  550. Readonly: c.HostConfig.ReadonlyRootfs,
  551. }
  552. rootUID, rootGID := daemon.GetRemappedUIDGID()
  553. if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
  554. return err
  555. }
  556. cwd := c.Config.WorkingDir
  557. if len(cwd) == 0 {
  558. cwd = "/"
  559. }
  560. s.Process.Args = append([]string{c.Path}, c.Args...)
  561. // only add the custom init if it is specified and the container is running in its
  562. // own private pid namespace. It does not make sense to add if it is running in the
  563. // host namespace or another container's pid namespace where we already have an init
  564. if c.HostConfig.PidMode.IsPrivate() {
  565. if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
  566. (c.HostConfig.Init == nil && daemon.configStore.Init) {
  567. s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...)
  568. var path string
  569. if daemon.configStore.InitPath == "" && c.HostConfig.InitPath == "" {
  570. path, err = exec.LookPath(DefaultInitBinary)
  571. if err != nil {
  572. return err
  573. }
  574. }
  575. if daemon.configStore.InitPath != "" {
  576. path = daemon.configStore.InitPath
  577. }
  578. if c.HostConfig.InitPath != "" {
  579. path = c.HostConfig.InitPath
  580. }
  581. s.Mounts = append(s.Mounts, specs.Mount{
  582. Destination: "/dev/init",
  583. Type: "bind",
  584. Source: path,
  585. Options: []string{"bind", "ro"},
  586. })
  587. }
  588. }
  589. s.Process.Cwd = cwd
  590. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  591. s.Process.Terminal = c.Config.Tty
  592. s.Hostname = c.FullHostname()
  593. return nil
  594. }
  595. func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
  596. s := oci.DefaultSpec()
  597. if err := daemon.populateCommonSpec(&s, c); err != nil {
  598. return nil, err
  599. }
  600. var cgroupsPath string
  601. scopePrefix := "docker"
  602. parent := "/docker"
  603. useSystemd := UsingSystemd(daemon.configStore)
  604. if useSystemd {
  605. parent = "system.slice"
  606. }
  607. if c.HostConfig.CgroupParent != "" {
  608. parent = c.HostConfig.CgroupParent
  609. } else if daemon.configStore.CgroupParent != "" {
  610. parent = daemon.configStore.CgroupParent
  611. }
  612. if useSystemd {
  613. cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
  614. logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
  615. } else {
  616. cgroupsPath = filepath.Join(parent, c.ID)
  617. }
  618. s.Linux.CgroupsPath = &cgroupsPath
  619. if err := setResources(&s, c.HostConfig.Resources); err != nil {
  620. return nil, fmt.Errorf("linux runtime spec resources: %v", err)
  621. }
  622. s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
  623. s.Linux.Sysctl = c.HostConfig.Sysctls
  624. p := *s.Linux.CgroupsPath
  625. if useSystemd {
  626. initPath, err := cgroups.GetInitCgroupDir("cpu")
  627. if err != nil {
  628. return nil, err
  629. }
  630. p, _ = cgroups.GetThisCgroupDir("cpu")
  631. if err != nil {
  632. return nil, err
  633. }
  634. p = filepath.Join(initPath, p)
  635. }
  636. // Clean path to guard against things like ../../../BAD
  637. parentPath := filepath.Dir(p)
  638. if !filepath.IsAbs(parentPath) {
  639. parentPath = filepath.Clean("/" + parentPath)
  640. }
  641. if err := daemon.initCgroupsPath(parentPath); err != nil {
  642. return nil, fmt.Errorf("linux init cgroups path: %v", err)
  643. }
  644. if err := setDevices(&s, c); err != nil {
  645. return nil, fmt.Errorf("linux runtime spec devices: %v", err)
  646. }
  647. if err := setRlimits(daemon, &s, c); err != nil {
  648. return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
  649. }
  650. if err := setUser(&s, c); err != nil {
  651. return nil, fmt.Errorf("linux spec user: %v", err)
  652. }
  653. if err := setNamespaces(daemon, &s, c); err != nil {
  654. return nil, fmt.Errorf("linux spec namespaces: %v", err)
  655. }
  656. if err := setCapabilities(&s, c); err != nil {
  657. return nil, fmt.Errorf("linux spec capabilities: %v", err)
  658. }
  659. if err := setSeccomp(daemon, &s, c); err != nil {
  660. return nil, fmt.Errorf("linux seccomp: %v", err)
  661. }
  662. if err := daemon.setupIpcDirs(c); err != nil {
  663. return nil, err
  664. }
  665. if err := daemon.setupSecretDir(c); err != nil {
  666. return nil, err
  667. }
  668. ms, err := daemon.setupMounts(c)
  669. if err != nil {
  670. return nil, err
  671. }
  672. ms = append(ms, c.IpcMounts()...)
  673. tmpfsMounts, err := c.TmpfsMounts()
  674. if err != nil {
  675. return nil, err
  676. }
  677. ms = append(ms, tmpfsMounts...)
  678. if m := c.SecretMount(); m != nil {
  679. ms = append(ms, *m)
  680. }
  681. sort.Sort(mounts(ms))
  682. if err := setMounts(daemon, &s, c, ms); err != nil {
  683. return nil, fmt.Errorf("linux mounts: %v", err)
  684. }
  685. for _, ns := range s.Linux.Namespaces {
  686. if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
  687. target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
  688. if err != nil {
  689. return nil, err
  690. }
  691. s.Hooks = specs.Hooks{
  692. Prestart: []specs.Hook{{
  693. Path: target, // FIXME: cross-platform
  694. Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
  695. }},
  696. }
  697. }
  698. }
  699. if apparmor.IsEnabled() {
  700. var appArmorProfile string
  701. if c.AppArmorProfile != "" {
  702. appArmorProfile = c.AppArmorProfile
  703. } else if c.HostConfig.Privileged {
  704. appArmorProfile = "unconfined"
  705. } else {
  706. appArmorProfile = "docker-default"
  707. }
  708. if appArmorProfile == "docker-default" {
  709. // Unattended upgrades and other fun services can unload AppArmor
  710. // profiles inadvertently. Since we cannot store our profile in
  711. // /etc/apparmor.d, nor can we practically add other ways of
  712. // telling the system to keep our profile loaded, in order to make
  713. // sure that we keep the default profile enabled we dynamically
  714. // reload it if necessary.
  715. if err := ensureDefaultAppArmorProfile(); err != nil {
  716. return nil, err
  717. }
  718. }
  719. s.Process.ApparmorProfile = appArmorProfile
  720. }
  721. s.Process.SelinuxLabel = c.GetProcessLabel()
  722. s.Process.NoNewPrivileges = c.NoNewPrivileges
  723. s.Linux.MountLabel = c.MountLabel
  724. return (*specs.Spec)(&s), nil
  725. }
  726. func clearReadOnly(m *specs.Mount) {
  727. var opt []string
  728. for _, o := range m.Options {
  729. if o != "ro" {
  730. opt = append(opt, o)
  731. }
  732. }
  733. m.Options = opt
  734. }
  735. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  736. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
  737. ulimits := c.Ulimits
  738. // Merge ulimits with daemon defaults
  739. ulIdx := make(map[string]struct{})
  740. for _, ul := range ulimits {
  741. ulIdx[ul.Name] = struct{}{}
  742. }
  743. for name, ul := range daemon.configStore.Ulimits {
  744. if _, exists := ulIdx[name]; !exists {
  745. ulimits = append(ulimits, ul)
  746. }
  747. }
  748. c.Ulimits = ulimits
  749. }