oci_linux.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. package daemon
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "os/exec"
  7. "path/filepath"
  8. "regexp"
  9. "sort"
  10. "strconv"
  11. "strings"
  12. containertypes "github.com/docker/docker/api/types/container"
  13. "github.com/docker/docker/container"
  14. "github.com/docker/docker/daemon/caps"
  15. daemonconfig "github.com/docker/docker/daemon/config"
  16. "github.com/docker/docker/oci"
  17. "github.com/docker/docker/pkg/idtools"
  18. "github.com/docker/docker/pkg/mount"
  19. "github.com/docker/docker/pkg/stringutils"
  20. "github.com/docker/docker/volume"
  21. "github.com/opencontainers/runc/libcontainer/apparmor"
  22. "github.com/opencontainers/runc/libcontainer/cgroups"
  23. "github.com/opencontainers/runc/libcontainer/devices"
  24. "github.com/opencontainers/runc/libcontainer/user"
  25. specs "github.com/opencontainers/runtime-spec/specs-go"
  26. "github.com/sirupsen/logrus"
  27. )
  28. // nolint: gosimple
  29. var (
  30. deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$")
  31. )
  32. func setResources(s *specs.Spec, r containertypes.Resources) error {
  33. weightDevices, err := getBlkioWeightDevices(r)
  34. if err != nil {
  35. return err
  36. }
  37. readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
  38. if err != nil {
  39. return err
  40. }
  41. writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
  42. if err != nil {
  43. return err
  44. }
  45. readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
  46. if err != nil {
  47. return err
  48. }
  49. writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
  50. if err != nil {
  51. return err
  52. }
  53. memoryRes := getMemoryResources(r)
  54. cpuRes, err := getCPUResources(r)
  55. if err != nil {
  56. return err
  57. }
  58. blkioWeight := r.BlkioWeight
  59. specResources := &specs.LinuxResources{
  60. Memory: memoryRes,
  61. CPU: cpuRes,
  62. BlockIO: &specs.LinuxBlockIO{
  63. Weight: &blkioWeight,
  64. WeightDevice: weightDevices,
  65. ThrottleReadBpsDevice: readBpsDevice,
  66. ThrottleWriteBpsDevice: writeBpsDevice,
  67. ThrottleReadIOPSDevice: readIOpsDevice,
  68. ThrottleWriteIOPSDevice: writeIOpsDevice,
  69. },
  70. Pids: &specs.LinuxPids{
  71. Limit: r.PidsLimit,
  72. },
  73. }
  74. if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
  75. specResources.Devices = s.Linux.Resources.Devices
  76. }
  77. s.Linux.Resources = specResources
  78. return nil
  79. }
  80. func setDevices(s *specs.Spec, c *container.Container) error {
  81. // Build lists of devices allowed and created within the container.
  82. var devs []specs.LinuxDevice
  83. devPermissions := s.Linux.Resources.Devices
  84. if c.HostConfig.Privileged {
  85. hostDevices, err := devices.HostDevices()
  86. if err != nil {
  87. return err
  88. }
  89. for _, d := range hostDevices {
  90. devs = append(devs, oci.Device(d))
  91. }
  92. devPermissions = []specs.LinuxDeviceCgroup{
  93. {
  94. Allow: true,
  95. Access: "rwm",
  96. },
  97. }
  98. } else {
  99. for _, deviceMapping := range c.HostConfig.Devices {
  100. d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
  101. if err != nil {
  102. return err
  103. }
  104. devs = append(devs, d...)
  105. devPermissions = append(devPermissions, dPermissions...)
  106. }
  107. for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules {
  108. ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1)
  109. if len(ss[0]) != 5 {
  110. return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule)
  111. }
  112. matches := ss[0]
  113. dPermissions := specs.LinuxDeviceCgroup{
  114. Allow: true,
  115. Type: matches[1],
  116. Access: matches[4],
  117. }
  118. if matches[2] == "*" {
  119. major := int64(-1)
  120. dPermissions.Major = &major
  121. } else {
  122. major, err := strconv.ParseInt(matches[2], 10, 64)
  123. if err != nil {
  124. return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule)
  125. }
  126. dPermissions.Major = &major
  127. }
  128. if matches[3] == "*" {
  129. minor := int64(-1)
  130. dPermissions.Minor = &minor
  131. } else {
  132. minor, err := strconv.ParseInt(matches[3], 10, 64)
  133. if err != nil {
  134. return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule)
  135. }
  136. dPermissions.Minor = &minor
  137. }
  138. devPermissions = append(devPermissions, dPermissions)
  139. }
  140. }
  141. s.Linux.Devices = append(s.Linux.Devices, devs...)
  142. s.Linux.Resources.Devices = devPermissions
  143. return nil
  144. }
  145. func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  146. var rlimits []specs.POSIXRlimit
  147. // We want to leave the original HostConfig alone so make a copy here
  148. hostConfig := *c.HostConfig
  149. // Merge with the daemon defaults
  150. daemon.mergeUlimits(&hostConfig)
  151. for _, ul := range hostConfig.Ulimits {
  152. rlimits = append(rlimits, specs.POSIXRlimit{
  153. Type: "RLIMIT_" + strings.ToUpper(ul.Name),
  154. Soft: uint64(ul.Soft),
  155. Hard: uint64(ul.Hard),
  156. })
  157. }
  158. s.Process.Rlimits = rlimits
  159. return nil
  160. }
  161. func setUser(s *specs.Spec, c *container.Container) error {
  162. uid, gid, additionalGids, err := getUser(c, c.Config.User)
  163. if err != nil {
  164. return err
  165. }
  166. s.Process.User.UID = uid
  167. s.Process.User.GID = gid
  168. s.Process.User.AdditionalGids = additionalGids
  169. return nil
  170. }
  171. func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
  172. fp, err := c.GetResourcePath(p)
  173. if err != nil {
  174. return nil, err
  175. }
  176. return os.Open(fp)
  177. }
  178. func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
  179. passwdPath, err := user.GetPasswdPath()
  180. if err != nil {
  181. return 0, 0, nil, err
  182. }
  183. groupPath, err := user.GetGroupPath()
  184. if err != nil {
  185. return 0, 0, nil, err
  186. }
  187. passwdFile, err := readUserFile(c, passwdPath)
  188. if err == nil {
  189. defer passwdFile.Close()
  190. }
  191. groupFile, err := readUserFile(c, groupPath)
  192. if err == nil {
  193. defer groupFile.Close()
  194. }
  195. execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
  196. if err != nil {
  197. return 0, 0, nil, err
  198. }
  199. // todo: fix this double read by a change to libcontainer/user pkg
  200. groupFile, err = readUserFile(c, groupPath)
  201. if err == nil {
  202. defer groupFile.Close()
  203. }
  204. var addGroups []int
  205. if len(c.HostConfig.GroupAdd) > 0 {
  206. addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
  207. if err != nil {
  208. return 0, 0, nil, err
  209. }
  210. }
  211. uid := uint32(execUser.Uid)
  212. gid := uint32(execUser.Gid)
  213. sgids := append(execUser.Sgids, addGroups...)
  214. var additionalGids []uint32
  215. for _, g := range sgids {
  216. additionalGids = append(additionalGids, uint32(g))
  217. }
  218. return uid, gid, additionalGids, nil
  219. }
  220. func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) {
  221. for i, n := range s.Linux.Namespaces {
  222. if n.Type == ns.Type {
  223. s.Linux.Namespaces[i] = ns
  224. return
  225. }
  226. }
  227. s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
  228. }
  229. func setCapabilities(s *specs.Spec, c *container.Container) error {
  230. var caplist []string
  231. var err error
  232. if c.HostConfig.Privileged {
  233. caplist = caps.GetAllCapabilities()
  234. } else {
  235. caplist, err = caps.TweakCapabilities(s.Process.Capabilities.Effective, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
  236. if err != nil {
  237. return err
  238. }
  239. }
  240. s.Process.Capabilities.Effective = caplist
  241. s.Process.Capabilities.Bounding = caplist
  242. s.Process.Capabilities.Permitted = caplist
  243. s.Process.Capabilities.Inheritable = caplist
  244. return nil
  245. }
  246. func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  247. userNS := false
  248. // user
  249. if c.HostConfig.UsernsMode.IsPrivate() {
  250. uidMap := daemon.idMappings.UIDs()
  251. if uidMap != nil {
  252. userNS = true
  253. ns := specs.LinuxNamespace{Type: "user"}
  254. setNamespace(s, ns)
  255. s.Linux.UIDMappings = specMapping(uidMap)
  256. s.Linux.GIDMappings = specMapping(daemon.idMappings.GIDs())
  257. }
  258. }
  259. // network
  260. if !c.Config.NetworkDisabled {
  261. ns := specs.LinuxNamespace{Type: "network"}
  262. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
  263. if parts[0] == "container" {
  264. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
  265. if err != nil {
  266. return err
  267. }
  268. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
  269. if userNS {
  270. // to share a net namespace, they must also share a user namespace
  271. nsUser := specs.LinuxNamespace{Type: "user"}
  272. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
  273. setNamespace(s, nsUser)
  274. }
  275. } else if c.HostConfig.NetworkMode.IsHost() {
  276. ns.Path = c.NetworkSettings.SandboxKey
  277. }
  278. setNamespace(s, ns)
  279. }
  280. // ipc
  281. ipcMode := c.HostConfig.IpcMode
  282. switch {
  283. case ipcMode.IsContainer():
  284. ns := specs.LinuxNamespace{Type: "ipc"}
  285. ic, err := daemon.getIpcContainer(ipcMode.Container())
  286. if err != nil {
  287. return err
  288. }
  289. ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
  290. setNamespace(s, ns)
  291. if userNS {
  292. // to share an IPC namespace, they must also share a user namespace
  293. nsUser := specs.LinuxNamespace{Type: "user"}
  294. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
  295. setNamespace(s, nsUser)
  296. }
  297. case ipcMode.IsHost():
  298. oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc"))
  299. case ipcMode.IsEmpty():
  300. // A container was created by an older version of the daemon.
  301. // The default behavior used to be what is now called "shareable".
  302. fallthrough
  303. case ipcMode.IsPrivate(), ipcMode.IsShareable(), ipcMode.IsNone():
  304. ns := specs.LinuxNamespace{Type: "ipc"}
  305. setNamespace(s, ns)
  306. default:
  307. return fmt.Errorf("Invalid IPC mode: %v", ipcMode)
  308. }
  309. // pid
  310. if c.HostConfig.PidMode.IsContainer() {
  311. ns := specs.LinuxNamespace{Type: "pid"}
  312. pc, err := daemon.getPidContainer(c)
  313. if err != nil {
  314. return err
  315. }
  316. ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
  317. setNamespace(s, ns)
  318. if userNS {
  319. // to share a PID namespace, they must also share a user namespace
  320. nsUser := specs.LinuxNamespace{Type: "user"}
  321. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
  322. setNamespace(s, nsUser)
  323. }
  324. } else if c.HostConfig.PidMode.IsHost() {
  325. oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid"))
  326. } else {
  327. ns := specs.LinuxNamespace{Type: "pid"}
  328. setNamespace(s, ns)
  329. }
  330. // uts
  331. if c.HostConfig.UTSMode.IsHost() {
  332. oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts"))
  333. s.Hostname = ""
  334. }
  335. return nil
  336. }
  337. func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
  338. var ids []specs.LinuxIDMapping
  339. for _, item := range s {
  340. ids = append(ids, specs.LinuxIDMapping{
  341. HostID: uint32(item.HostID),
  342. ContainerID: uint32(item.ContainerID),
  343. Size: uint32(item.Size),
  344. })
  345. }
  346. return ids
  347. }
  348. func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
  349. for _, m := range mountinfo {
  350. if m.Mountpoint == dir {
  351. return m
  352. }
  353. }
  354. return nil
  355. }
  356. // Get the source mount point of directory passed in as argument. Also return
  357. // optional fields.
  358. func getSourceMount(source string) (string, string, error) {
  359. // Ensure any symlinks are resolved.
  360. sourcePath, err := filepath.EvalSymlinks(source)
  361. if err != nil {
  362. return "", "", err
  363. }
  364. mountinfos, err := mount.GetMounts()
  365. if err != nil {
  366. return "", "", err
  367. }
  368. mountinfo := getMountInfo(mountinfos, sourcePath)
  369. if mountinfo != nil {
  370. return sourcePath, mountinfo.Optional, nil
  371. }
  372. path := sourcePath
  373. for {
  374. path = filepath.Dir(path)
  375. mountinfo = getMountInfo(mountinfos, path)
  376. if mountinfo != nil {
  377. return path, mountinfo.Optional, nil
  378. }
  379. if path == "/" {
  380. break
  381. }
  382. }
  383. // If we are here, we did not find parent mount. Something is wrong.
  384. return "", "", fmt.Errorf("Could not find source mount of %s", source)
  385. }
  386. // Ensure mount point on which path is mounted, is shared.
  387. func ensureShared(path string) error {
  388. sharedMount := false
  389. sourceMount, optionalOpts, err := getSourceMount(path)
  390. if err != nil {
  391. return err
  392. }
  393. // Make sure source mount point is shared.
  394. optsSplit := strings.Split(optionalOpts, " ")
  395. for _, opt := range optsSplit {
  396. if strings.HasPrefix(opt, "shared:") {
  397. sharedMount = true
  398. break
  399. }
  400. }
  401. if !sharedMount {
  402. return fmt.Errorf("path %s is mounted on %s but it is not a shared mount", path, sourceMount)
  403. }
  404. return nil
  405. }
  406. // Ensure mount point on which path is mounted, is either shared or slave.
  407. func ensureSharedOrSlave(path string) error {
  408. sharedMount := false
  409. slaveMount := false
  410. sourceMount, optionalOpts, err := getSourceMount(path)
  411. if err != nil {
  412. return err
  413. }
  414. // Make sure source mount point is shared.
  415. optsSplit := strings.Split(optionalOpts, " ")
  416. for _, opt := range optsSplit {
  417. if strings.HasPrefix(opt, "shared:") {
  418. sharedMount = true
  419. break
  420. } else if strings.HasPrefix(opt, "master:") {
  421. slaveMount = true
  422. break
  423. }
  424. }
  425. if !sharedMount && !slaveMount {
  426. return fmt.Errorf("path %s is mounted on %s but it is not a shared or slave mount", path, sourceMount)
  427. }
  428. return nil
  429. }
  430. var (
  431. mountPropagationMap = map[string]int{
  432. "private": mount.PRIVATE,
  433. "rprivate": mount.RPRIVATE,
  434. "shared": mount.SHARED,
  435. "rshared": mount.RSHARED,
  436. "slave": mount.SLAVE,
  437. "rslave": mount.RSLAVE,
  438. }
  439. mountPropagationReverseMap = map[int]string{
  440. mount.PRIVATE: "private",
  441. mount.RPRIVATE: "rprivate",
  442. mount.SHARED: "shared",
  443. mount.RSHARED: "rshared",
  444. mount.SLAVE: "slave",
  445. mount.RSLAVE: "rslave",
  446. }
  447. )
  448. func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
  449. userMounts := make(map[string]struct{})
  450. for _, m := range mounts {
  451. userMounts[m.Destination] = struct{}{}
  452. }
  453. // Filter out mounts from spec
  454. noIpc := c.HostConfig.IpcMode.IsNone()
  455. // Filter out mounts that are overridden by user supplied mounts
  456. var defaultMounts []specs.Mount
  457. _, mountDev := userMounts["/dev"]
  458. for _, m := range s.Mounts {
  459. // filter out /dev/shm mount if case IpcMode is none
  460. if noIpc && m.Destination == "/dev/shm" {
  461. continue
  462. }
  463. // filter out mount overridden by a user supplied mount
  464. if _, ok := userMounts[m.Destination]; !ok {
  465. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  466. continue
  467. }
  468. defaultMounts = append(defaultMounts, m)
  469. }
  470. }
  471. s.Mounts = defaultMounts
  472. for _, m := range mounts {
  473. for _, cm := range s.Mounts {
  474. if cm.Destination == m.Destination {
  475. return duplicateMountPointError(m.Destination)
  476. }
  477. }
  478. if m.Source == "tmpfs" {
  479. data := m.Data
  480. parser := volume.NewParser("linux")
  481. options := []string{"noexec", "nosuid", "nodev", string(parser.DefaultPropagationMode())}
  482. if data != "" {
  483. options = append(options, strings.Split(data, ",")...)
  484. }
  485. merged, err := mount.MergeTmpfsOptions(options)
  486. if err != nil {
  487. return err
  488. }
  489. s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
  490. continue
  491. }
  492. mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
  493. // Determine property of RootPropagation based on volume
  494. // properties. If a volume is shared, then keep root propagation
  495. // shared. This should work for slave and private volumes too.
  496. //
  497. // For slave volumes, it can be either [r]shared/[r]slave.
  498. //
  499. // For private volumes any root propagation value should work.
  500. pFlag := mountPropagationMap[m.Propagation]
  501. if pFlag == mount.SHARED || pFlag == mount.RSHARED {
  502. if err := ensureShared(m.Source); err != nil {
  503. return err
  504. }
  505. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  506. if rootpg != mount.SHARED && rootpg != mount.RSHARED {
  507. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
  508. }
  509. } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
  510. if err := ensureSharedOrSlave(m.Source); err != nil {
  511. return err
  512. }
  513. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  514. if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
  515. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
  516. }
  517. }
  518. opts := []string{"rbind"}
  519. if !m.Writable {
  520. opts = append(opts, "ro")
  521. }
  522. if pFlag != 0 {
  523. opts = append(opts, mountPropagationReverseMap[pFlag])
  524. }
  525. mt.Options = opts
  526. s.Mounts = append(s.Mounts, mt)
  527. }
  528. if s.Root.Readonly {
  529. for i, m := range s.Mounts {
  530. switch m.Destination {
  531. case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
  532. continue
  533. }
  534. if _, ok := userMounts[m.Destination]; !ok {
  535. if !stringutils.InSlice(m.Options, "ro") {
  536. s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
  537. }
  538. }
  539. }
  540. }
  541. if c.HostConfig.Privileged {
  542. if !s.Root.Readonly {
  543. // clear readonly for /sys
  544. for i := range s.Mounts {
  545. if s.Mounts[i].Destination == "/sys" {
  546. clearReadOnly(&s.Mounts[i])
  547. }
  548. }
  549. }
  550. s.Linux.ReadonlyPaths = nil
  551. s.Linux.MaskedPaths = nil
  552. }
  553. // Set size for /dev/shm mount that comes from spec (IpcMode: private only)
  554. for i, m := range s.Mounts {
  555. if m.Destination == "/dev/shm" {
  556. sizeOpt := "size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
  557. s.Mounts[i].Options = append(s.Mounts[i].Options, sizeOpt)
  558. }
  559. }
  560. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  561. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  562. if uidMap := daemon.idMappings.UIDs(); uidMap != nil || c.HostConfig.Privileged {
  563. for i, m := range s.Mounts {
  564. if m.Type == "cgroup" {
  565. clearReadOnly(&s.Mounts[i])
  566. }
  567. }
  568. }
  569. return nil
  570. }
  571. func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
  572. linkedEnv, err := daemon.setupLinkedContainers(c)
  573. if err != nil {
  574. return err
  575. }
  576. s.Root = &specs.Root{
  577. Path: c.BaseFS.Path(),
  578. Readonly: c.HostConfig.ReadonlyRootfs,
  579. }
  580. if err := c.SetupWorkingDirectory(daemon.idMappings.RootPair()); err != nil {
  581. return err
  582. }
  583. cwd := c.Config.WorkingDir
  584. if len(cwd) == 0 {
  585. cwd = "/"
  586. }
  587. s.Process.Args = append([]string{c.Path}, c.Args...)
  588. // only add the custom init if it is specified and the container is running in its
  589. // own private pid namespace. It does not make sense to add if it is running in the
  590. // host namespace or another container's pid namespace where we already have an init
  591. if c.HostConfig.PidMode.IsPrivate() {
  592. if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
  593. (c.HostConfig.Init == nil && daemon.configStore.Init) {
  594. s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...)
  595. var path string
  596. if daemon.configStore.InitPath == "" {
  597. path, err = exec.LookPath(daemonconfig.DefaultInitBinary)
  598. if err != nil {
  599. return err
  600. }
  601. }
  602. if daemon.configStore.InitPath != "" {
  603. path = daemon.configStore.InitPath
  604. }
  605. s.Mounts = append(s.Mounts, specs.Mount{
  606. Destination: "/dev/init",
  607. Type: "bind",
  608. Source: path,
  609. Options: []string{"bind", "ro"},
  610. })
  611. }
  612. }
  613. s.Process.Cwd = cwd
  614. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  615. s.Process.Terminal = c.Config.Tty
  616. s.Hostname = c.FullHostname()
  617. return nil
  618. }
  619. func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
  620. s := oci.DefaultSpec()
  621. if err := daemon.populateCommonSpec(&s, c); err != nil {
  622. return nil, err
  623. }
  624. var cgroupsPath string
  625. scopePrefix := "docker"
  626. parent := "/docker"
  627. useSystemd := UsingSystemd(daemon.configStore)
  628. if useSystemd {
  629. parent = "system.slice"
  630. }
  631. if c.HostConfig.CgroupParent != "" {
  632. parent = c.HostConfig.CgroupParent
  633. } else if daemon.configStore.CgroupParent != "" {
  634. parent = daemon.configStore.CgroupParent
  635. }
  636. if useSystemd {
  637. cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
  638. logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
  639. } else {
  640. cgroupsPath = filepath.Join(parent, c.ID)
  641. }
  642. s.Linux.CgroupsPath = cgroupsPath
  643. if err := setResources(&s, c.HostConfig.Resources); err != nil {
  644. return nil, fmt.Errorf("linux runtime spec resources: %v", err)
  645. }
  646. s.Linux.Sysctl = c.HostConfig.Sysctls
  647. p := s.Linux.CgroupsPath
  648. if useSystemd {
  649. initPath, err := cgroups.GetInitCgroup("cpu")
  650. if err != nil {
  651. return nil, err
  652. }
  653. p, _ = cgroups.GetOwnCgroup("cpu")
  654. if err != nil {
  655. return nil, err
  656. }
  657. p = filepath.Join(initPath, p)
  658. }
  659. // Clean path to guard against things like ../../../BAD
  660. parentPath := filepath.Dir(p)
  661. if !filepath.IsAbs(parentPath) {
  662. parentPath = filepath.Clean("/" + parentPath)
  663. }
  664. if err := daemon.initCgroupsPath(parentPath); err != nil {
  665. return nil, fmt.Errorf("linux init cgroups path: %v", err)
  666. }
  667. if err := setDevices(&s, c); err != nil {
  668. return nil, fmt.Errorf("linux runtime spec devices: %v", err)
  669. }
  670. if err := setRlimits(daemon, &s, c); err != nil {
  671. return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
  672. }
  673. if err := setUser(&s, c); err != nil {
  674. return nil, fmt.Errorf("linux spec user: %v", err)
  675. }
  676. if err := setNamespaces(daemon, &s, c); err != nil {
  677. return nil, fmt.Errorf("linux spec namespaces: %v", err)
  678. }
  679. if err := setCapabilities(&s, c); err != nil {
  680. return nil, fmt.Errorf("linux spec capabilities: %v", err)
  681. }
  682. if err := setSeccomp(daemon, &s, c); err != nil {
  683. return nil, fmt.Errorf("linux seccomp: %v", err)
  684. }
  685. if err := daemon.setupIpcDirs(c); err != nil {
  686. return nil, err
  687. }
  688. if err := daemon.setupSecretDir(c); err != nil {
  689. return nil, err
  690. }
  691. if err := daemon.setupConfigDir(c); err != nil {
  692. return nil, err
  693. }
  694. ms, err := daemon.setupMounts(c)
  695. if err != nil {
  696. return nil, err
  697. }
  698. if !c.HostConfig.IpcMode.IsPrivate() && !c.HostConfig.IpcMode.IsEmpty() {
  699. ms = append(ms, c.IpcMounts()...)
  700. }
  701. tmpfsMounts, err := c.TmpfsMounts()
  702. if err != nil {
  703. return nil, err
  704. }
  705. ms = append(ms, tmpfsMounts...)
  706. if m := c.SecretMounts(); m != nil {
  707. ms = append(ms, m...)
  708. }
  709. ms = append(ms, c.ConfigMounts()...)
  710. sort.Sort(mounts(ms))
  711. if err := setMounts(daemon, &s, c, ms); err != nil {
  712. return nil, fmt.Errorf("linux mounts: %v", err)
  713. }
  714. for _, ns := range s.Linux.Namespaces {
  715. if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
  716. target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
  717. if err != nil {
  718. return nil, err
  719. }
  720. s.Hooks = &specs.Hooks{
  721. Prestart: []specs.Hook{{
  722. Path: target, // FIXME: cross-platform
  723. Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
  724. }},
  725. }
  726. }
  727. }
  728. if apparmor.IsEnabled() {
  729. var appArmorProfile string
  730. if c.AppArmorProfile != "" {
  731. appArmorProfile = c.AppArmorProfile
  732. } else if c.HostConfig.Privileged {
  733. appArmorProfile = "unconfined"
  734. } else {
  735. appArmorProfile = "docker-default"
  736. }
  737. if appArmorProfile == "docker-default" {
  738. // Unattended upgrades and other fun services can unload AppArmor
  739. // profiles inadvertently. Since we cannot store our profile in
  740. // /etc/apparmor.d, nor can we practically add other ways of
  741. // telling the system to keep our profile loaded, in order to make
  742. // sure that we keep the default profile enabled we dynamically
  743. // reload it if necessary.
  744. if err := ensureDefaultAppArmorProfile(); err != nil {
  745. return nil, err
  746. }
  747. }
  748. s.Process.ApparmorProfile = appArmorProfile
  749. }
  750. s.Process.SelinuxLabel = c.GetProcessLabel()
  751. s.Process.NoNewPrivileges = c.NoNewPrivileges
  752. s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj
  753. s.Linux.MountLabel = c.MountLabel
  754. return &s, nil
  755. }
  756. func clearReadOnly(m *specs.Mount) {
  757. var opt []string
  758. for _, o := range m.Options {
  759. if o != "ro" {
  760. opt = append(opt, o)
  761. }
  762. }
  763. m.Options = opt
  764. }
  765. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  766. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
  767. ulimits := c.Ulimits
  768. // Merge ulimits with daemon defaults
  769. ulIdx := make(map[string]struct{})
  770. for _, ul := range ulimits {
  771. ulIdx[ul.Name] = struct{}{}
  772. }
  773. for name, ul := range daemon.configStore.Ulimits {
  774. if _, exists := ulIdx[name]; !exists {
  775. ulimits = append(ulimits, ul)
  776. }
  777. }
  778. c.Ulimits = ulimits
  779. }