oci_linux.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. package daemon
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "os/exec"
  7. "path/filepath"
  8. "regexp"
  9. "sort"
  10. "strconv"
  11. "strings"
  12. containertypes "github.com/docker/docker/api/types/container"
  13. "github.com/docker/docker/container"
  14. "github.com/docker/docker/daemon/caps"
  15. daemonconfig "github.com/docker/docker/daemon/config"
  16. "github.com/docker/docker/oci"
  17. "github.com/docker/docker/pkg/idtools"
  18. "github.com/docker/docker/pkg/mount"
  19. "github.com/docker/docker/pkg/stringutils"
  20. "github.com/docker/docker/pkg/symlink"
  21. "github.com/docker/docker/volume"
  22. "github.com/opencontainers/runc/libcontainer/apparmor"
  23. "github.com/opencontainers/runc/libcontainer/cgroups"
  24. "github.com/opencontainers/runc/libcontainer/devices"
  25. "github.com/opencontainers/runc/libcontainer/user"
  26. specs "github.com/opencontainers/runtime-spec/specs-go"
  27. "github.com/sirupsen/logrus"
  28. )
  29. var (
  30. deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$")
  31. )
  32. func setResources(s *specs.Spec, r containertypes.Resources) error {
  33. weightDevices, err := getBlkioWeightDevices(r)
  34. if err != nil {
  35. return err
  36. }
  37. readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
  38. if err != nil {
  39. return err
  40. }
  41. writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
  42. if err != nil {
  43. return err
  44. }
  45. readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
  46. if err != nil {
  47. return err
  48. }
  49. writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
  50. if err != nil {
  51. return err
  52. }
  53. memoryRes := getMemoryResources(r)
  54. cpuRes, err := getCPUResources(r)
  55. if err != nil {
  56. return err
  57. }
  58. blkioWeight := r.BlkioWeight
  59. specResources := &specs.LinuxResources{
  60. Memory: memoryRes,
  61. CPU: cpuRes,
  62. BlockIO: &specs.LinuxBlockIO{
  63. Weight: &blkioWeight,
  64. WeightDevice: weightDevices,
  65. ThrottleReadBpsDevice: readBpsDevice,
  66. ThrottleWriteBpsDevice: writeBpsDevice,
  67. ThrottleReadIOPSDevice: readIOpsDevice,
  68. ThrottleWriteIOPSDevice: writeIOpsDevice,
  69. },
  70. DisableOOMKiller: r.OomKillDisable,
  71. Pids: &specs.LinuxPids{
  72. Limit: r.PidsLimit,
  73. },
  74. }
  75. if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
  76. specResources.Devices = s.Linux.Resources.Devices
  77. }
  78. s.Linux.Resources = specResources
  79. return nil
  80. }
  81. func setDevices(s *specs.Spec, c *container.Container) error {
  82. // Build lists of devices allowed and created within the container.
  83. var devs []specs.LinuxDevice
  84. devPermissions := s.Linux.Resources.Devices
  85. if c.HostConfig.Privileged {
  86. hostDevices, err := devices.HostDevices()
  87. if err != nil {
  88. return err
  89. }
  90. for _, d := range hostDevices {
  91. devs = append(devs, oci.Device(d))
  92. }
  93. devPermissions = []specs.LinuxDeviceCgroup{
  94. {
  95. Allow: true,
  96. Access: "rwm",
  97. },
  98. }
  99. } else {
  100. for _, deviceMapping := range c.HostConfig.Devices {
  101. d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
  102. if err != nil {
  103. return err
  104. }
  105. devs = append(devs, d...)
  106. devPermissions = append(devPermissions, dPermissions...)
  107. }
  108. for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules {
  109. ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1)
  110. if len(ss[0]) != 5 {
  111. return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule)
  112. }
  113. matches := ss[0]
  114. dPermissions := specs.LinuxDeviceCgroup{
  115. Allow: true,
  116. Type: matches[1],
  117. Access: matches[4],
  118. }
  119. if matches[2] == "*" {
  120. major := int64(-1)
  121. dPermissions.Major = &major
  122. } else {
  123. major, err := strconv.ParseInt(matches[2], 10, 64)
  124. if err != nil {
  125. return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule)
  126. }
  127. dPermissions.Major = &major
  128. }
  129. if matches[3] == "*" {
  130. minor := int64(-1)
  131. dPermissions.Minor = &minor
  132. } else {
  133. minor, err := strconv.ParseInt(matches[3], 10, 64)
  134. if err != nil {
  135. return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule)
  136. }
  137. dPermissions.Minor = &minor
  138. }
  139. devPermissions = append(devPermissions, dPermissions)
  140. }
  141. }
  142. s.Linux.Devices = append(s.Linux.Devices, devs...)
  143. s.Linux.Resources.Devices = devPermissions
  144. return nil
  145. }
  146. func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  147. var rlimits []specs.LinuxRlimit
  148. // We want to leave the original HostConfig alone so make a copy here
  149. hostConfig := *c.HostConfig
  150. // Merge with the daemon defaults
  151. daemon.mergeUlimits(&hostConfig)
  152. for _, ul := range hostConfig.Ulimits {
  153. rlimits = append(rlimits, specs.LinuxRlimit{
  154. Type: "RLIMIT_" + strings.ToUpper(ul.Name),
  155. Soft: uint64(ul.Soft),
  156. Hard: uint64(ul.Hard),
  157. })
  158. }
  159. s.Process.Rlimits = rlimits
  160. return nil
  161. }
  162. func setUser(s *specs.Spec, c *container.Container) error {
  163. uid, gid, additionalGids, err := getUser(c, c.Config.User)
  164. if err != nil {
  165. return err
  166. }
  167. s.Process.User.UID = uid
  168. s.Process.User.GID = gid
  169. s.Process.User.AdditionalGids = additionalGids
  170. return nil
  171. }
  172. func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
  173. fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
  174. if err != nil {
  175. return nil, err
  176. }
  177. return os.Open(fp)
  178. }
  179. func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
  180. passwdPath, err := user.GetPasswdPath()
  181. if err != nil {
  182. return 0, 0, nil, err
  183. }
  184. groupPath, err := user.GetGroupPath()
  185. if err != nil {
  186. return 0, 0, nil, err
  187. }
  188. passwdFile, err := readUserFile(c, passwdPath)
  189. if err == nil {
  190. defer passwdFile.Close()
  191. }
  192. groupFile, err := readUserFile(c, groupPath)
  193. if err == nil {
  194. defer groupFile.Close()
  195. }
  196. execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
  197. if err != nil {
  198. return 0, 0, nil, err
  199. }
  200. // todo: fix this double read by a change to libcontainer/user pkg
  201. groupFile, err = readUserFile(c, groupPath)
  202. if err == nil {
  203. defer groupFile.Close()
  204. }
  205. var addGroups []int
  206. if len(c.HostConfig.GroupAdd) > 0 {
  207. addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
  208. if err != nil {
  209. return 0, 0, nil, err
  210. }
  211. }
  212. uid := uint32(execUser.Uid)
  213. gid := uint32(execUser.Gid)
  214. sgids := append(execUser.Sgids, addGroups...)
  215. var additionalGids []uint32
  216. for _, g := range sgids {
  217. additionalGids = append(additionalGids, uint32(g))
  218. }
  219. return uid, gid, additionalGids, nil
  220. }
  221. func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) {
  222. for i, n := range s.Linux.Namespaces {
  223. if n.Type == ns.Type {
  224. s.Linux.Namespaces[i] = ns
  225. return
  226. }
  227. }
  228. s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
  229. }
  230. func setCapabilities(s *specs.Spec, c *container.Container) error {
  231. var caplist []string
  232. var err error
  233. if c.HostConfig.Privileged {
  234. caplist = caps.GetAllCapabilities()
  235. } else {
  236. caplist, err = caps.TweakCapabilities(s.Process.Capabilities.Effective, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
  237. if err != nil {
  238. return err
  239. }
  240. }
  241. s.Process.Capabilities.Effective = caplist
  242. s.Process.Capabilities.Bounding = caplist
  243. s.Process.Capabilities.Permitted = caplist
  244. s.Process.Capabilities.Inheritable = caplist
  245. return nil
  246. }
  247. func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  248. userNS := false
  249. // user
  250. if c.HostConfig.UsernsMode.IsPrivate() {
  251. uidMap := daemon.idMappings.UIDs()
  252. if uidMap != nil {
  253. userNS = true
  254. ns := specs.LinuxNamespace{Type: "user"}
  255. setNamespace(s, ns)
  256. s.Linux.UIDMappings = specMapping(uidMap)
  257. s.Linux.GIDMappings = specMapping(daemon.idMappings.GIDs())
  258. }
  259. }
  260. // network
  261. if !c.Config.NetworkDisabled {
  262. ns := specs.LinuxNamespace{Type: "network"}
  263. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
  264. if parts[0] == "container" {
  265. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
  266. if err != nil {
  267. return err
  268. }
  269. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
  270. if userNS {
  271. // to share a net namespace, they must also share a user namespace
  272. nsUser := specs.LinuxNamespace{Type: "user"}
  273. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
  274. setNamespace(s, nsUser)
  275. }
  276. } else if c.HostConfig.NetworkMode.IsHost() {
  277. ns.Path = c.NetworkSettings.SandboxKey
  278. }
  279. setNamespace(s, ns)
  280. }
  281. // ipc
  282. ipcMode := c.HostConfig.IpcMode
  283. switch {
  284. case ipcMode.IsContainer():
  285. ns := specs.LinuxNamespace{Type: "ipc"}
  286. ic, err := daemon.getIpcContainer(ipcMode.Container())
  287. if err != nil {
  288. return err
  289. }
  290. ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
  291. setNamespace(s, ns)
  292. if userNS {
  293. // to share an IPC namespace, they must also share a user namespace
  294. nsUser := specs.LinuxNamespace{Type: "user"}
  295. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
  296. setNamespace(s, nsUser)
  297. }
  298. case ipcMode.IsHost():
  299. oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc"))
  300. case ipcMode.IsEmpty():
  301. // A container was created by an older version of the daemon.
  302. // The default behavior used to be what is now called "shareable".
  303. fallthrough
  304. case ipcMode.IsPrivate(), ipcMode.IsShareable(), ipcMode.IsNone():
  305. ns := specs.LinuxNamespace{Type: "ipc"}
  306. setNamespace(s, ns)
  307. default:
  308. return fmt.Errorf("Invalid IPC mode: %v", ipcMode)
  309. }
  310. // pid
  311. if c.HostConfig.PidMode.IsContainer() {
  312. ns := specs.LinuxNamespace{Type: "pid"}
  313. pc, err := daemon.getPidContainer(c)
  314. if err != nil {
  315. return err
  316. }
  317. ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
  318. setNamespace(s, ns)
  319. if userNS {
  320. // to share a PID namespace, they must also share a user namespace
  321. nsUser := specs.LinuxNamespace{Type: "user"}
  322. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
  323. setNamespace(s, nsUser)
  324. }
  325. } else if c.HostConfig.PidMode.IsHost() {
  326. oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid"))
  327. } else {
  328. ns := specs.LinuxNamespace{Type: "pid"}
  329. setNamespace(s, ns)
  330. }
  331. // uts
  332. if c.HostConfig.UTSMode.IsHost() {
  333. oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts"))
  334. s.Hostname = ""
  335. }
  336. return nil
  337. }
  338. func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
  339. var ids []specs.LinuxIDMapping
  340. for _, item := range s {
  341. ids = append(ids, specs.LinuxIDMapping{
  342. HostID: uint32(item.HostID),
  343. ContainerID: uint32(item.ContainerID),
  344. Size: uint32(item.Size),
  345. })
  346. }
  347. return ids
  348. }
  349. func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
  350. for _, m := range mountinfo {
  351. if m.Mountpoint == dir {
  352. return m
  353. }
  354. }
  355. return nil
  356. }
  357. // Get the source mount point of directory passed in as argument. Also return
  358. // optional fields.
  359. func getSourceMount(source string) (string, string, error) {
  360. // Ensure any symlinks are resolved.
  361. sourcePath, err := filepath.EvalSymlinks(source)
  362. if err != nil {
  363. return "", "", err
  364. }
  365. mountinfos, err := mount.GetMounts()
  366. if err != nil {
  367. return "", "", err
  368. }
  369. mountinfo := getMountInfo(mountinfos, sourcePath)
  370. if mountinfo != nil {
  371. return sourcePath, mountinfo.Optional, nil
  372. }
  373. path := sourcePath
  374. for {
  375. path = filepath.Dir(path)
  376. mountinfo = getMountInfo(mountinfos, path)
  377. if mountinfo != nil {
  378. return path, mountinfo.Optional, nil
  379. }
  380. if path == "/" {
  381. break
  382. }
  383. }
  384. // If we are here, we did not find parent mount. Something is wrong.
  385. return "", "", fmt.Errorf("Could not find source mount of %s", source)
  386. }
  387. // Ensure mount point on which path is mounted, is shared.
  388. func ensureShared(path string) error {
  389. sharedMount := false
  390. sourceMount, optionalOpts, err := getSourceMount(path)
  391. if err != nil {
  392. return err
  393. }
  394. // Make sure source mount point is shared.
  395. optsSplit := strings.Split(optionalOpts, " ")
  396. for _, opt := range optsSplit {
  397. if strings.HasPrefix(opt, "shared:") {
  398. sharedMount = true
  399. break
  400. }
  401. }
  402. if !sharedMount {
  403. return fmt.Errorf("path %s is mounted on %s but it is not a shared mount", path, sourceMount)
  404. }
  405. return nil
  406. }
  407. // Ensure mount point on which path is mounted, is either shared or slave.
  408. func ensureSharedOrSlave(path string) error {
  409. sharedMount := false
  410. slaveMount := false
  411. sourceMount, optionalOpts, err := getSourceMount(path)
  412. if err != nil {
  413. return err
  414. }
  415. // Make sure source mount point is shared.
  416. optsSplit := strings.Split(optionalOpts, " ")
  417. for _, opt := range optsSplit {
  418. if strings.HasPrefix(opt, "shared:") {
  419. sharedMount = true
  420. break
  421. } else if strings.HasPrefix(opt, "master:") {
  422. slaveMount = true
  423. break
  424. }
  425. }
  426. if !sharedMount && !slaveMount {
  427. return fmt.Errorf("path %s is mounted on %s but it is not a shared or slave mount", path, sourceMount)
  428. }
  429. return nil
  430. }
  431. var (
  432. mountPropagationMap = map[string]int{
  433. "private": mount.PRIVATE,
  434. "rprivate": mount.RPRIVATE,
  435. "shared": mount.SHARED,
  436. "rshared": mount.RSHARED,
  437. "slave": mount.SLAVE,
  438. "rslave": mount.RSLAVE,
  439. }
  440. mountPropagationReverseMap = map[int]string{
  441. mount.PRIVATE: "private",
  442. mount.RPRIVATE: "rprivate",
  443. mount.SHARED: "shared",
  444. mount.RSHARED: "rshared",
  445. mount.SLAVE: "slave",
  446. mount.RSLAVE: "rslave",
  447. }
  448. )
  449. func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
  450. userMounts := make(map[string]struct{})
  451. for _, m := range mounts {
  452. userMounts[m.Destination] = struct{}{}
  453. }
  454. // Filter out mounts from spec
  455. noIpc := c.HostConfig.IpcMode.IsNone()
  456. var defaultMounts []specs.Mount
  457. _, mountDev := userMounts["/dev"]
  458. for _, m := range s.Mounts {
  459. // filter out /dev/shm mount if case IpcMode is none
  460. if noIpc && m.Destination == "/dev/shm" {
  461. continue
  462. }
  463. // filter out mount overridden by a user supplied mount
  464. if _, ok := userMounts[m.Destination]; !ok {
  465. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  466. continue
  467. }
  468. defaultMounts = append(defaultMounts, m)
  469. }
  470. }
  471. s.Mounts = defaultMounts
  472. for _, m := range mounts {
  473. for _, cm := range s.Mounts {
  474. if cm.Destination == m.Destination {
  475. return duplicateMountPointError(m.Destination)
  476. }
  477. }
  478. if m.Source == "tmpfs" {
  479. data := m.Data
  480. options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
  481. if data != "" {
  482. options = append(options, strings.Split(data, ",")...)
  483. }
  484. merged, err := mount.MergeTmpfsOptions(options)
  485. if err != nil {
  486. return err
  487. }
  488. s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
  489. continue
  490. }
  491. mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
  492. // Determine property of RootPropagation based on volume
  493. // properties. If a volume is shared, then keep root propagation
  494. // shared. This should work for slave and private volumes too.
  495. //
  496. // For slave volumes, it can be either [r]shared/[r]slave.
  497. //
  498. // For private volumes any root propagation value should work.
  499. pFlag := mountPropagationMap[m.Propagation]
  500. if pFlag == mount.SHARED || pFlag == mount.RSHARED {
  501. if err := ensureShared(m.Source); err != nil {
  502. return err
  503. }
  504. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  505. if rootpg != mount.SHARED && rootpg != mount.RSHARED {
  506. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
  507. }
  508. } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
  509. if err := ensureSharedOrSlave(m.Source); err != nil {
  510. return err
  511. }
  512. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  513. if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
  514. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
  515. }
  516. }
  517. opts := []string{"rbind"}
  518. if !m.Writable {
  519. opts = append(opts, "ro")
  520. }
  521. if pFlag != 0 {
  522. opts = append(opts, mountPropagationReverseMap[pFlag])
  523. }
  524. mt.Options = opts
  525. s.Mounts = append(s.Mounts, mt)
  526. }
  527. if s.Root.Readonly {
  528. for i, m := range s.Mounts {
  529. switch m.Destination {
  530. case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
  531. continue
  532. }
  533. if _, ok := userMounts[m.Destination]; !ok {
  534. if !stringutils.InSlice(m.Options, "ro") {
  535. s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
  536. }
  537. }
  538. }
  539. }
  540. if c.HostConfig.Privileged {
  541. if !s.Root.Readonly {
  542. // clear readonly for /sys
  543. for i := range s.Mounts {
  544. if s.Mounts[i].Destination == "/sys" {
  545. clearReadOnly(&s.Mounts[i])
  546. }
  547. }
  548. }
  549. s.Linux.ReadonlyPaths = nil
  550. s.Linux.MaskedPaths = nil
  551. }
  552. // Set size for /dev/shm mount that comes from spec (IpcMode: private only)
  553. for i, m := range s.Mounts {
  554. if m.Destination == "/dev/shm" {
  555. sizeOpt := "size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
  556. s.Mounts[i].Options = append(s.Mounts[i].Options, sizeOpt)
  557. }
  558. }
  559. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  560. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  561. if uidMap := daemon.idMappings.UIDs(); uidMap != nil || c.HostConfig.Privileged {
  562. for i, m := range s.Mounts {
  563. if m.Type == "cgroup" {
  564. clearReadOnly(&s.Mounts[i])
  565. }
  566. }
  567. }
  568. return nil
  569. }
  570. func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
  571. linkedEnv, err := daemon.setupLinkedContainers(c)
  572. if err != nil {
  573. return err
  574. }
  575. s.Root = specs.Root{
  576. Path: c.BaseFS,
  577. Readonly: c.HostConfig.ReadonlyRootfs,
  578. }
  579. if err := c.SetupWorkingDirectory(daemon.idMappings.RootPair()); err != nil {
  580. return err
  581. }
  582. cwd := c.Config.WorkingDir
  583. if len(cwd) == 0 {
  584. cwd = "/"
  585. }
  586. s.Process.Args = append([]string{c.Path}, c.Args...)
  587. // only add the custom init if it is specified and the container is running in its
  588. // own private pid namespace. It does not make sense to add if it is running in the
  589. // host namespace or another container's pid namespace where we already have an init
  590. if c.HostConfig.PidMode.IsPrivate() {
  591. if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
  592. (c.HostConfig.Init == nil && daemon.configStore.Init) {
  593. s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...)
  594. var path string
  595. if daemon.configStore.InitPath == "" {
  596. path, err = exec.LookPath(daemonconfig.DefaultInitBinary)
  597. if err != nil {
  598. return err
  599. }
  600. }
  601. if daemon.configStore.InitPath != "" {
  602. path = daemon.configStore.InitPath
  603. }
  604. s.Mounts = append(s.Mounts, specs.Mount{
  605. Destination: "/dev/init",
  606. Type: "bind",
  607. Source: path,
  608. Options: []string{"bind", "ro"},
  609. })
  610. }
  611. }
  612. s.Process.Cwd = cwd
  613. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  614. s.Process.Terminal = c.Config.Tty
  615. s.Hostname = c.FullHostname()
  616. return nil
  617. }
  618. func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
  619. s := oci.DefaultSpec()
  620. if err := daemon.populateCommonSpec(&s, c); err != nil {
  621. return nil, err
  622. }
  623. var cgroupsPath string
  624. scopePrefix := "docker"
  625. parent := "/docker"
  626. useSystemd := UsingSystemd(daemon.configStore)
  627. if useSystemd {
  628. parent = "system.slice"
  629. }
  630. if c.HostConfig.CgroupParent != "" {
  631. parent = c.HostConfig.CgroupParent
  632. } else if daemon.configStore.CgroupParent != "" {
  633. parent = daemon.configStore.CgroupParent
  634. }
  635. if useSystemd {
  636. cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
  637. logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
  638. } else {
  639. cgroupsPath = filepath.Join(parent, c.ID)
  640. }
  641. s.Linux.CgroupsPath = cgroupsPath
  642. if err := setResources(&s, c.HostConfig.Resources); err != nil {
  643. return nil, fmt.Errorf("linux runtime spec resources: %v", err)
  644. }
  645. s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
  646. s.Linux.Sysctl = c.HostConfig.Sysctls
  647. p := s.Linux.CgroupsPath
  648. if useSystemd {
  649. initPath, err := cgroups.GetInitCgroup("cpu")
  650. if err != nil {
  651. return nil, err
  652. }
  653. p, _ = cgroups.GetOwnCgroup("cpu")
  654. if err != nil {
  655. return nil, err
  656. }
  657. p = filepath.Join(initPath, p)
  658. }
  659. // Clean path to guard against things like ../../../BAD
  660. parentPath := filepath.Dir(p)
  661. if !filepath.IsAbs(parentPath) {
  662. parentPath = filepath.Clean("/" + parentPath)
  663. }
  664. if err := daemon.initCgroupsPath(parentPath); err != nil {
  665. return nil, fmt.Errorf("linux init cgroups path: %v", err)
  666. }
  667. if err := setDevices(&s, c); err != nil {
  668. return nil, fmt.Errorf("linux runtime spec devices: %v", err)
  669. }
  670. if err := setRlimits(daemon, &s, c); err != nil {
  671. return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
  672. }
  673. if err := setUser(&s, c); err != nil {
  674. return nil, fmt.Errorf("linux spec user: %v", err)
  675. }
  676. if err := setNamespaces(daemon, &s, c); err != nil {
  677. return nil, fmt.Errorf("linux spec namespaces: %v", err)
  678. }
  679. if err := setCapabilities(&s, c); err != nil {
  680. return nil, fmt.Errorf("linux spec capabilities: %v", err)
  681. }
  682. if err := setSeccomp(daemon, &s, c); err != nil {
  683. return nil, fmt.Errorf("linux seccomp: %v", err)
  684. }
  685. if err := daemon.setupIpcDirs(c); err != nil {
  686. return nil, err
  687. }
  688. if err := daemon.setupSecretDir(c); err != nil {
  689. return nil, err
  690. }
  691. if err := daemon.setupConfigDir(c); err != nil {
  692. return nil, err
  693. }
  694. ms, err := daemon.setupMounts(c)
  695. if err != nil {
  696. return nil, err
  697. }
  698. if !c.HostConfig.IpcMode.IsPrivate() && !c.HostConfig.IpcMode.IsEmpty() {
  699. ms = append(ms, c.IpcMounts()...)
  700. }
  701. tmpfsMounts, err := c.TmpfsMounts()
  702. if err != nil {
  703. return nil, err
  704. }
  705. ms = append(ms, tmpfsMounts...)
  706. if m := c.SecretMounts(); m != nil {
  707. ms = append(ms, m...)
  708. }
  709. ms = append(ms, c.ConfigMounts()...)
  710. sort.Sort(mounts(ms))
  711. if err := setMounts(daemon, &s, c, ms); err != nil {
  712. return nil, fmt.Errorf("linux mounts: %v", err)
  713. }
  714. for _, ns := range s.Linux.Namespaces {
  715. if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
  716. target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
  717. if err != nil {
  718. return nil, err
  719. }
  720. s.Hooks = &specs.Hooks{
  721. Prestart: []specs.Hook{{
  722. Path: target, // FIXME: cross-platform
  723. Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
  724. }},
  725. }
  726. }
  727. }
  728. if apparmor.IsEnabled() {
  729. var appArmorProfile string
  730. if c.AppArmorProfile != "" {
  731. appArmorProfile = c.AppArmorProfile
  732. } else if c.HostConfig.Privileged {
  733. appArmorProfile = "unconfined"
  734. } else {
  735. appArmorProfile = "docker-default"
  736. }
  737. if appArmorProfile == "docker-default" {
  738. // Unattended upgrades and other fun services can unload AppArmor
  739. // profiles inadvertently. Since we cannot store our profile in
  740. // /etc/apparmor.d, nor can we practically add other ways of
  741. // telling the system to keep our profile loaded, in order to make
  742. // sure that we keep the default profile enabled we dynamically
  743. // reload it if necessary.
  744. if err := ensureDefaultAppArmorProfile(); err != nil {
  745. return nil, err
  746. }
  747. }
  748. s.Process.ApparmorProfile = appArmorProfile
  749. }
  750. s.Process.SelinuxLabel = c.GetProcessLabel()
  751. s.Process.NoNewPrivileges = c.NoNewPrivileges
  752. s.Linux.MountLabel = c.MountLabel
  753. return (*specs.Spec)(&s), nil
  754. }
  755. func clearReadOnly(m *specs.Mount) {
  756. var opt []string
  757. for _, o := range m.Options {
  758. if o != "ro" {
  759. opt = append(opt, o)
  760. }
  761. }
  762. m.Options = opt
  763. }
  764. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  765. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
  766. ulimits := c.Ulimits
  767. // Merge ulimits with daemon defaults
  768. ulIdx := make(map[string]struct{})
  769. for _, ul := range ulimits {
  770. ulIdx[ul.Name] = struct{}{}
  771. }
  772. for name, ul := range daemon.configStore.Ulimits {
  773. if _, exists := ulIdx[name]; !exists {
  774. ulimits = append(ulimits, ul)
  775. }
  776. }
  777. c.Ulimits = ulimits
  778. }