oci_linux.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. package daemon
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "os/exec"
  7. "path/filepath"
  8. "sort"
  9. "strconv"
  10. "strings"
  11. "github.com/Sirupsen/logrus"
  12. containertypes "github.com/docker/docker/api/types/container"
  13. "github.com/docker/docker/container"
  14. "github.com/docker/docker/daemon/caps"
  15. "github.com/docker/docker/oci"
  16. "github.com/docker/docker/pkg/idtools"
  17. "github.com/docker/docker/pkg/mount"
  18. "github.com/docker/docker/pkg/stringutils"
  19. "github.com/docker/docker/pkg/symlink"
  20. "github.com/docker/docker/volume"
  21. "github.com/opencontainers/runc/libcontainer/apparmor"
  22. "github.com/opencontainers/runc/libcontainer/cgroups"
  23. "github.com/opencontainers/runc/libcontainer/devices"
  24. "github.com/opencontainers/runc/libcontainer/user"
  25. specs "github.com/opencontainers/runtime-spec/specs-go"
  26. )
  27. func setResources(s *specs.Spec, r containertypes.Resources) error {
  28. weightDevices, err := getBlkioWeightDevices(r)
  29. if err != nil {
  30. return err
  31. }
  32. readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
  33. if err != nil {
  34. return err
  35. }
  36. writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
  37. if err != nil {
  38. return err
  39. }
  40. readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
  41. if err != nil {
  42. return err
  43. }
  44. writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
  45. if err != nil {
  46. return err
  47. }
  48. memoryRes := getMemoryResources(r)
  49. cpuRes := getCPUResources(r)
  50. blkioWeight := r.BlkioWeight
  51. specResources := &specs.Resources{
  52. Memory: memoryRes,
  53. CPU: cpuRes,
  54. BlockIO: &specs.BlockIO{
  55. Weight: &blkioWeight,
  56. WeightDevice: weightDevices,
  57. ThrottleReadBpsDevice: readBpsDevice,
  58. ThrottleWriteBpsDevice: writeBpsDevice,
  59. ThrottleReadIOPSDevice: readIOpsDevice,
  60. ThrottleWriteIOPSDevice: writeIOpsDevice,
  61. },
  62. DisableOOMKiller: r.OomKillDisable,
  63. Pids: &specs.Pids{
  64. Limit: &r.PidsLimit,
  65. },
  66. }
  67. if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
  68. specResources.Devices = s.Linux.Resources.Devices
  69. }
  70. s.Linux.Resources = specResources
  71. return nil
  72. }
  73. func setDevices(s *specs.Spec, c *container.Container) error {
  74. // Build lists of devices allowed and created within the container.
  75. var devs []specs.Device
  76. devPermissions := s.Linux.Resources.Devices
  77. if c.HostConfig.Privileged {
  78. hostDevices, err := devices.HostDevices()
  79. if err != nil {
  80. return err
  81. }
  82. for _, d := range hostDevices {
  83. devs = append(devs, oci.Device(d))
  84. }
  85. rwm := "rwm"
  86. devPermissions = []specs.DeviceCgroup{
  87. {
  88. Allow: true,
  89. Access: &rwm,
  90. },
  91. }
  92. } else {
  93. for _, deviceMapping := range c.HostConfig.Devices {
  94. d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
  95. if err != nil {
  96. return err
  97. }
  98. devs = append(devs, d...)
  99. devPermissions = append(devPermissions, dPermissions...)
  100. }
  101. }
  102. s.Linux.Devices = append(s.Linux.Devices, devs...)
  103. s.Linux.Resources.Devices = devPermissions
  104. return nil
  105. }
  106. func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  107. var rlimits []specs.Rlimit
  108. // We want to leave the original HostConfig alone so make a copy here
  109. hostConfig := *c.HostConfig
  110. // Merge with the daemon defaults
  111. daemon.mergeUlimits(&hostConfig)
  112. for _, ul := range hostConfig.Ulimits {
  113. rlimits = append(rlimits, specs.Rlimit{
  114. Type: "RLIMIT_" + strings.ToUpper(ul.Name),
  115. Soft: uint64(ul.Soft),
  116. Hard: uint64(ul.Hard),
  117. })
  118. }
  119. s.Process.Rlimits = rlimits
  120. return nil
  121. }
  122. func setUser(s *specs.Spec, c *container.Container) error {
  123. uid, gid, additionalGids, err := getUser(c, c.Config.User)
  124. if err != nil {
  125. return err
  126. }
  127. s.Process.User.UID = uid
  128. s.Process.User.GID = gid
  129. s.Process.User.AdditionalGids = additionalGids
  130. return nil
  131. }
  132. func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
  133. fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
  134. if err != nil {
  135. return nil, err
  136. }
  137. return os.Open(fp)
  138. }
  139. func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
  140. passwdPath, err := user.GetPasswdPath()
  141. if err != nil {
  142. return 0, 0, nil, err
  143. }
  144. groupPath, err := user.GetGroupPath()
  145. if err != nil {
  146. return 0, 0, nil, err
  147. }
  148. passwdFile, err := readUserFile(c, passwdPath)
  149. if err == nil {
  150. defer passwdFile.Close()
  151. }
  152. groupFile, err := readUserFile(c, groupPath)
  153. if err == nil {
  154. defer groupFile.Close()
  155. }
  156. execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
  157. if err != nil {
  158. return 0, 0, nil, err
  159. }
  160. // todo: fix this double read by a change to libcontainer/user pkg
  161. groupFile, err = readUserFile(c, groupPath)
  162. if err == nil {
  163. defer groupFile.Close()
  164. }
  165. var addGroups []int
  166. if len(c.HostConfig.GroupAdd) > 0 {
  167. addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
  168. if err != nil {
  169. return 0, 0, nil, err
  170. }
  171. }
  172. uid := uint32(execUser.Uid)
  173. gid := uint32(execUser.Gid)
  174. sgids := append(execUser.Sgids, addGroups...)
  175. var additionalGids []uint32
  176. for _, g := range sgids {
  177. additionalGids = append(additionalGids, uint32(g))
  178. }
  179. return uid, gid, additionalGids, nil
  180. }
  181. func setNamespace(s *specs.Spec, ns specs.Namespace) {
  182. for i, n := range s.Linux.Namespaces {
  183. if n.Type == ns.Type {
  184. s.Linux.Namespaces[i] = ns
  185. return
  186. }
  187. }
  188. s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
  189. }
  190. func setCapabilities(s *specs.Spec, c *container.Container) error {
  191. var caplist []string
  192. var err error
  193. if c.HostConfig.Privileged {
  194. caplist = caps.GetAllCapabilities()
  195. } else {
  196. caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
  197. if err != nil {
  198. return err
  199. }
  200. }
  201. s.Process.Capabilities = caplist
  202. return nil
  203. }
  204. func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  205. userNS := false
  206. // user
  207. if c.HostConfig.UsernsMode.IsPrivate() {
  208. uidMap, gidMap := daemon.GetUIDGIDMaps()
  209. if uidMap != nil {
  210. userNS = true
  211. ns := specs.Namespace{Type: "user"}
  212. setNamespace(s, ns)
  213. s.Linux.UIDMappings = specMapping(uidMap)
  214. s.Linux.GIDMappings = specMapping(gidMap)
  215. }
  216. }
  217. // network
  218. if !c.Config.NetworkDisabled {
  219. ns := specs.Namespace{Type: "network"}
  220. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
  221. if parts[0] == "container" {
  222. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
  223. if err != nil {
  224. return err
  225. }
  226. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
  227. if userNS {
  228. // to share a net namespace, they must also share a user namespace
  229. nsUser := specs.Namespace{Type: "user"}
  230. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
  231. setNamespace(s, nsUser)
  232. }
  233. } else if c.HostConfig.NetworkMode.IsHost() {
  234. ns.Path = c.NetworkSettings.SandboxKey
  235. }
  236. setNamespace(s, ns)
  237. }
  238. // ipc
  239. if c.HostConfig.IpcMode.IsContainer() {
  240. ns := specs.Namespace{Type: "ipc"}
  241. ic, err := daemon.getIpcContainer(c)
  242. if err != nil {
  243. return err
  244. }
  245. ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
  246. setNamespace(s, ns)
  247. if userNS {
  248. // to share an IPC namespace, they must also share a user namespace
  249. nsUser := specs.Namespace{Type: "user"}
  250. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
  251. setNamespace(s, nsUser)
  252. }
  253. } else if c.HostConfig.IpcMode.IsHost() {
  254. oci.RemoveNamespace(s, specs.NamespaceType("ipc"))
  255. } else {
  256. ns := specs.Namespace{Type: "ipc"}
  257. setNamespace(s, ns)
  258. }
  259. // pid
  260. if c.HostConfig.PidMode.IsContainer() {
  261. ns := specs.Namespace{Type: "pid"}
  262. pc, err := daemon.getPidContainer(c)
  263. if err != nil {
  264. return err
  265. }
  266. ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
  267. setNamespace(s, ns)
  268. if userNS {
  269. // to share a PID namespace, they must also share a user namespace
  270. nsUser := specs.Namespace{Type: "user"}
  271. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
  272. setNamespace(s, nsUser)
  273. }
  274. } else if c.HostConfig.PidMode.IsHost() {
  275. oci.RemoveNamespace(s, specs.NamespaceType("pid"))
  276. } else {
  277. ns := specs.Namespace{Type: "pid"}
  278. setNamespace(s, ns)
  279. }
  280. // uts
  281. if c.HostConfig.UTSMode.IsHost() {
  282. oci.RemoveNamespace(s, specs.NamespaceType("uts"))
  283. s.Hostname = ""
  284. }
  285. return nil
  286. }
  287. func specMapping(s []idtools.IDMap) []specs.IDMapping {
  288. var ids []specs.IDMapping
  289. for _, item := range s {
  290. ids = append(ids, specs.IDMapping{
  291. HostID: uint32(item.HostID),
  292. ContainerID: uint32(item.ContainerID),
  293. Size: uint32(item.Size),
  294. })
  295. }
  296. return ids
  297. }
  298. func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
  299. for _, m := range mountinfo {
  300. if m.Mountpoint == dir {
  301. return m
  302. }
  303. }
  304. return nil
  305. }
  306. // Get the source mount point of directory passed in as argument. Also return
  307. // optional fields.
  308. func getSourceMount(source string) (string, string, error) {
  309. // Ensure any symlinks are resolved.
  310. sourcePath, err := filepath.EvalSymlinks(source)
  311. if err != nil {
  312. return "", "", err
  313. }
  314. mountinfos, err := mount.GetMounts()
  315. if err != nil {
  316. return "", "", err
  317. }
  318. mountinfo := getMountInfo(mountinfos, sourcePath)
  319. if mountinfo != nil {
  320. return sourcePath, mountinfo.Optional, nil
  321. }
  322. path := sourcePath
  323. for {
  324. path = filepath.Dir(path)
  325. mountinfo = getMountInfo(mountinfos, path)
  326. if mountinfo != nil {
  327. return path, mountinfo.Optional, nil
  328. }
  329. if path == "/" {
  330. break
  331. }
  332. }
  333. // If we are here, we did not find parent mount. Something is wrong.
  334. return "", "", fmt.Errorf("Could not find source mount of %s", source)
  335. }
  336. // Ensure mount point on which path is mounted, is shared.
  337. func ensureShared(path string) error {
  338. sharedMount := false
  339. sourceMount, optionalOpts, err := getSourceMount(path)
  340. if err != nil {
  341. return err
  342. }
  343. // Make sure source mount point is shared.
  344. optsSplit := strings.Split(optionalOpts, " ")
  345. for _, opt := range optsSplit {
  346. if strings.HasPrefix(opt, "shared:") {
  347. sharedMount = true
  348. break
  349. }
  350. }
  351. if !sharedMount {
  352. return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
  353. }
  354. return nil
  355. }
  356. // Ensure mount point on which path is mounted, is either shared or slave.
  357. func ensureSharedOrSlave(path string) error {
  358. sharedMount := false
  359. slaveMount := false
  360. sourceMount, optionalOpts, err := getSourceMount(path)
  361. if err != nil {
  362. return err
  363. }
  364. // Make sure source mount point is shared.
  365. optsSplit := strings.Split(optionalOpts, " ")
  366. for _, opt := range optsSplit {
  367. if strings.HasPrefix(opt, "shared:") {
  368. sharedMount = true
  369. break
  370. } else if strings.HasPrefix(opt, "master:") {
  371. slaveMount = true
  372. break
  373. }
  374. }
  375. if !sharedMount && !slaveMount {
  376. return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
  377. }
  378. return nil
  379. }
  380. var (
  381. mountPropagationMap = map[string]int{
  382. "private": mount.PRIVATE,
  383. "rprivate": mount.RPRIVATE,
  384. "shared": mount.SHARED,
  385. "rshared": mount.RSHARED,
  386. "slave": mount.SLAVE,
  387. "rslave": mount.RSLAVE,
  388. }
  389. mountPropagationReverseMap = map[int]string{
  390. mount.PRIVATE: "private",
  391. mount.RPRIVATE: "rprivate",
  392. mount.SHARED: "shared",
  393. mount.RSHARED: "rshared",
  394. mount.SLAVE: "slave",
  395. mount.RSLAVE: "rslave",
  396. }
  397. )
  398. func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
  399. userMounts := make(map[string]struct{})
  400. for _, m := range mounts {
  401. userMounts[m.Destination] = struct{}{}
  402. }
  403. // Filter out mounts that are overridden by user supplied mounts
  404. var defaultMounts []specs.Mount
  405. _, mountDev := userMounts["/dev"]
  406. for _, m := range s.Mounts {
  407. if _, ok := userMounts[m.Destination]; !ok {
  408. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  409. continue
  410. }
  411. defaultMounts = append(defaultMounts, m)
  412. }
  413. }
  414. s.Mounts = defaultMounts
  415. for _, m := range mounts {
  416. for _, cm := range s.Mounts {
  417. if cm.Destination == m.Destination {
  418. return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
  419. }
  420. }
  421. if m.Source == "tmpfs" {
  422. data := m.Data
  423. options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
  424. if data != "" {
  425. options = append(options, strings.Split(data, ",")...)
  426. }
  427. merged, err := mount.MergeTmpfsOptions(options)
  428. if err != nil {
  429. return err
  430. }
  431. s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
  432. continue
  433. }
  434. mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
  435. // Determine property of RootPropagation based on volume
  436. // properties. If a volume is shared, then keep root propagation
  437. // shared. This should work for slave and private volumes too.
  438. //
  439. // For slave volumes, it can be either [r]shared/[r]slave.
  440. //
  441. // For private volumes any root propagation value should work.
  442. pFlag := mountPropagationMap[m.Propagation]
  443. if pFlag == mount.SHARED || pFlag == mount.RSHARED {
  444. if err := ensureShared(m.Source); err != nil {
  445. return err
  446. }
  447. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  448. if rootpg != mount.SHARED && rootpg != mount.RSHARED {
  449. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
  450. }
  451. } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
  452. if err := ensureSharedOrSlave(m.Source); err != nil {
  453. return err
  454. }
  455. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  456. if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
  457. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
  458. }
  459. }
  460. opts := []string{"rbind"}
  461. if !m.Writable {
  462. opts = append(opts, "ro")
  463. }
  464. if pFlag != 0 {
  465. opts = append(opts, mountPropagationReverseMap[pFlag])
  466. }
  467. mt.Options = opts
  468. s.Mounts = append(s.Mounts, mt)
  469. }
  470. if s.Root.Readonly {
  471. for i, m := range s.Mounts {
  472. switch m.Destination {
  473. case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
  474. continue
  475. }
  476. if _, ok := userMounts[m.Destination]; !ok {
  477. if !stringutils.InSlice(m.Options, "ro") {
  478. s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
  479. }
  480. }
  481. }
  482. }
  483. if c.HostConfig.Privileged {
  484. if !s.Root.Readonly {
  485. // clear readonly for /sys
  486. for i := range s.Mounts {
  487. if s.Mounts[i].Destination == "/sys" {
  488. clearReadOnly(&s.Mounts[i])
  489. }
  490. }
  491. }
  492. s.Linux.ReadonlyPaths = nil
  493. s.Linux.MaskedPaths = nil
  494. }
  495. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  496. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  497. if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
  498. for i, m := range s.Mounts {
  499. if m.Type == "cgroup" {
  500. clearReadOnly(&s.Mounts[i])
  501. }
  502. }
  503. }
  504. return nil
  505. }
  506. func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
  507. linkedEnv, err := daemon.setupLinkedContainers(c)
  508. if err != nil {
  509. return err
  510. }
  511. s.Root = specs.Root{
  512. Path: c.BaseFS,
  513. Readonly: c.HostConfig.ReadonlyRootfs,
  514. }
  515. rootUID, rootGID := daemon.GetRemappedUIDGID()
  516. if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
  517. return err
  518. }
  519. cwd := c.Config.WorkingDir
  520. if len(cwd) == 0 {
  521. cwd = "/"
  522. }
  523. s.Process.Args = append([]string{c.Path}, c.Args...)
  524. // only add the custom init if it is specified and the container is running in its
  525. // own private pid namespace. It does not make sense to add if it is running in the
  526. // host namespace or another container's pid namespace where we already have an init
  527. if c.HostConfig.PidMode.IsPrivate() {
  528. if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
  529. (c.HostConfig.Init == nil && daemon.configStore.Init) {
  530. s.Process.Args = append([]string{"/dev/init", c.Path}, c.Args...)
  531. var path string
  532. if daemon.configStore.InitPath == "" && c.HostConfig.InitPath == "" {
  533. path, err = exec.LookPath(DefaultInitBinary)
  534. if err != nil {
  535. return err
  536. }
  537. }
  538. if daemon.configStore.InitPath != "" {
  539. path = daemon.configStore.InitPath
  540. }
  541. if c.HostConfig.InitPath != "" {
  542. path = c.HostConfig.InitPath
  543. }
  544. s.Mounts = append(s.Mounts, specs.Mount{
  545. Destination: "/dev/init",
  546. Type: "bind",
  547. Source: path,
  548. Options: []string{"bind", "ro"},
  549. })
  550. }
  551. }
  552. s.Process.Cwd = cwd
  553. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  554. s.Process.Terminal = c.Config.Tty
  555. s.Hostname = c.FullHostname()
  556. return nil
  557. }
  558. func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
  559. s := oci.DefaultSpec()
  560. if err := daemon.populateCommonSpec(&s, c); err != nil {
  561. return nil, err
  562. }
  563. var cgroupsPath string
  564. scopePrefix := "docker"
  565. parent := "/docker"
  566. useSystemd := UsingSystemd(daemon.configStore)
  567. if useSystemd {
  568. parent = "system.slice"
  569. }
  570. if c.HostConfig.CgroupParent != "" {
  571. parent = c.HostConfig.CgroupParent
  572. } else if daemon.configStore.CgroupParent != "" {
  573. parent = daemon.configStore.CgroupParent
  574. }
  575. if useSystemd {
  576. cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
  577. logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
  578. } else {
  579. cgroupsPath = filepath.Join(parent, c.ID)
  580. }
  581. s.Linux.CgroupsPath = &cgroupsPath
  582. if err := setResources(&s, c.HostConfig.Resources); err != nil {
  583. return nil, fmt.Errorf("linux runtime spec resources: %v", err)
  584. }
  585. s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
  586. s.Linux.Sysctl = c.HostConfig.Sysctls
  587. p := *s.Linux.CgroupsPath
  588. if useSystemd {
  589. initPath, err := cgroups.GetInitCgroupDir("cpu")
  590. if err != nil {
  591. return nil, err
  592. }
  593. p, _ = cgroups.GetThisCgroupDir("cpu")
  594. if err != nil {
  595. return nil, err
  596. }
  597. p = filepath.Join(initPath, p)
  598. }
  599. // Clean path to guard against things like ../../../BAD
  600. parentPath := filepath.Dir(p)
  601. if !filepath.IsAbs(parentPath) {
  602. parentPath = filepath.Clean("/" + parentPath)
  603. }
  604. if err := daemon.initCgroupsPath(parentPath); err != nil {
  605. return nil, fmt.Errorf("linux init cgroups path: %v", err)
  606. }
  607. if err := setDevices(&s, c); err != nil {
  608. return nil, fmt.Errorf("linux runtime spec devices: %v", err)
  609. }
  610. if err := setRlimits(daemon, &s, c); err != nil {
  611. return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
  612. }
  613. if err := setUser(&s, c); err != nil {
  614. return nil, fmt.Errorf("linux spec user: %v", err)
  615. }
  616. if err := setNamespaces(daemon, &s, c); err != nil {
  617. return nil, fmt.Errorf("linux spec namespaces: %v", err)
  618. }
  619. if err := setCapabilities(&s, c); err != nil {
  620. return nil, fmt.Errorf("linux spec capabilities: %v", err)
  621. }
  622. if err := setSeccomp(daemon, &s, c); err != nil {
  623. return nil, fmt.Errorf("linux seccomp: %v", err)
  624. }
  625. if err := daemon.setupIpcDirs(c); err != nil {
  626. return nil, err
  627. }
  628. if err := daemon.setupSecretDir(c); err != nil {
  629. return nil, err
  630. }
  631. ms, err := daemon.setupMounts(c)
  632. if err != nil {
  633. return nil, err
  634. }
  635. ms = append(ms, c.IpcMounts()...)
  636. tmpfsMounts, err := c.TmpfsMounts()
  637. if err != nil {
  638. return nil, err
  639. }
  640. ms = append(ms, tmpfsMounts...)
  641. if m := c.SecretMount(); m != nil {
  642. ms = append(ms, *m)
  643. }
  644. sort.Sort(mounts(ms))
  645. if err := setMounts(daemon, &s, c, ms); err != nil {
  646. return nil, fmt.Errorf("linux mounts: %v", err)
  647. }
  648. for _, ns := range s.Linux.Namespaces {
  649. if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
  650. target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
  651. if err != nil {
  652. return nil, err
  653. }
  654. s.Hooks = specs.Hooks{
  655. Prestart: []specs.Hook{{
  656. Path: target, // FIXME: cross-platform
  657. Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
  658. }},
  659. }
  660. }
  661. }
  662. if apparmor.IsEnabled() {
  663. appArmorProfile := "docker-default"
  664. if len(c.AppArmorProfile) > 0 {
  665. appArmorProfile = c.AppArmorProfile
  666. } else if c.HostConfig.Privileged {
  667. appArmorProfile = "unconfined"
  668. }
  669. s.Process.ApparmorProfile = appArmorProfile
  670. }
  671. s.Process.SelinuxLabel = c.GetProcessLabel()
  672. s.Process.NoNewPrivileges = c.NoNewPrivileges
  673. s.Linux.MountLabel = c.MountLabel
  674. return (*specs.Spec)(&s), nil
  675. }
  676. func clearReadOnly(m *specs.Mount) {
  677. var opt []string
  678. for _, o := range m.Options {
  679. if o != "ro" {
  680. opt = append(opt, o)
  681. }
  682. }
  683. m.Options = opt
  684. }
  685. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  686. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
  687. ulimits := c.Ulimits
  688. // Merge ulimits with daemon defaults
  689. ulIdx := make(map[string]struct{})
  690. for _, ul := range ulimits {
  691. ulIdx[ul.Name] = struct{}{}
  692. }
  693. for name, ul := range daemon.configStore.Ulimits {
  694. if _, exists := ulIdx[name]; !exists {
  695. ulimits = append(ulimits, ul)
  696. }
  697. }
  698. c.Ulimits = ulimits
  699. }