oci_linux.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "os/exec"
  7. "path/filepath"
  8. "sort"
  9. "strconv"
  10. "strings"
  11. containertypes "github.com/docker/docker/api/types/container"
  12. "github.com/docker/docker/container"
  13. daemonconfig "github.com/docker/docker/daemon/config"
  14. "github.com/docker/docker/oci"
  15. "github.com/docker/docker/pkg/idtools"
  16. "github.com/docker/docker/pkg/mount"
  17. volumemounts "github.com/docker/docker/volume/mounts"
  18. "github.com/opencontainers/runc/libcontainer/apparmor"
  19. "github.com/opencontainers/runc/libcontainer/cgroups"
  20. "github.com/opencontainers/runc/libcontainer/devices"
  21. "github.com/opencontainers/runc/libcontainer/user"
  22. "github.com/opencontainers/runtime-spec/specs-go"
  23. "github.com/pkg/errors"
  24. "github.com/sirupsen/logrus"
  25. "golang.org/x/sys/unix"
  26. )
  27. func setResources(s *specs.Spec, r containertypes.Resources) error {
  28. weightDevices, err := getBlkioWeightDevices(r)
  29. if err != nil {
  30. return err
  31. }
  32. readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
  33. if err != nil {
  34. return err
  35. }
  36. writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
  37. if err != nil {
  38. return err
  39. }
  40. readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
  41. if err != nil {
  42. return err
  43. }
  44. writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
  45. if err != nil {
  46. return err
  47. }
  48. memoryRes := getMemoryResources(r)
  49. cpuRes, err := getCPUResources(r)
  50. if err != nil {
  51. return err
  52. }
  53. blkioWeight := r.BlkioWeight
  54. specResources := &specs.LinuxResources{
  55. Memory: memoryRes,
  56. CPU: cpuRes,
  57. BlockIO: &specs.LinuxBlockIO{
  58. Weight: &blkioWeight,
  59. WeightDevice: weightDevices,
  60. ThrottleReadBpsDevice: readBpsDevice,
  61. ThrottleWriteBpsDevice: writeBpsDevice,
  62. ThrottleReadIOPSDevice: readIOpsDevice,
  63. ThrottleWriteIOPSDevice: writeIOpsDevice,
  64. },
  65. Pids: &specs.LinuxPids{
  66. Limit: r.PidsLimit,
  67. },
  68. }
  69. if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
  70. specResources.Devices = s.Linux.Resources.Devices
  71. }
  72. s.Linux.Resources = specResources
  73. return nil
  74. }
  75. func setDevices(s *specs.Spec, c *container.Container) error {
  76. // Build lists of devices allowed and created within the container.
  77. var devs []specs.LinuxDevice
  78. devPermissions := s.Linux.Resources.Devices
  79. if c.HostConfig.Privileged {
  80. hostDevices, err := devices.HostDevices()
  81. if err != nil {
  82. return err
  83. }
  84. for _, d := range hostDevices {
  85. devs = append(devs, oci.Device(d))
  86. }
  87. devPermissions = []specs.LinuxDeviceCgroup{
  88. {
  89. Allow: true,
  90. Access: "rwm",
  91. },
  92. }
  93. } else {
  94. for _, deviceMapping := range c.HostConfig.Devices {
  95. d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
  96. if err != nil {
  97. return err
  98. }
  99. devs = append(devs, d...)
  100. devPermissions = append(devPermissions, dPermissions...)
  101. }
  102. var err error
  103. devPermissions, err = appendDevicePermissionsFromCgroupRules(devPermissions, c.HostConfig.DeviceCgroupRules)
  104. if err != nil {
  105. return err
  106. }
  107. }
  108. s.Linux.Devices = append(s.Linux.Devices, devs...)
  109. s.Linux.Resources.Devices = devPermissions
  110. return nil
  111. }
  112. func (daemon *Daemon) setRlimits(s *specs.Spec, c *container.Container) error {
  113. var rlimits []specs.POSIXRlimit
  114. // We want to leave the original HostConfig alone so make a copy here
  115. hostConfig := *c.HostConfig
  116. // Merge with the daemon defaults
  117. daemon.mergeUlimits(&hostConfig)
  118. for _, ul := range hostConfig.Ulimits {
  119. rlimits = append(rlimits, specs.POSIXRlimit{
  120. Type: "RLIMIT_" + strings.ToUpper(ul.Name),
  121. Soft: uint64(ul.Soft),
  122. Hard: uint64(ul.Hard),
  123. })
  124. }
  125. s.Process.Rlimits = rlimits
  126. return nil
  127. }
  128. func setUser(s *specs.Spec, c *container.Container) error {
  129. uid, gid, additionalGids, err := getUser(c, c.Config.User)
  130. if err != nil {
  131. return err
  132. }
  133. s.Process.User.UID = uid
  134. s.Process.User.GID = gid
  135. s.Process.User.AdditionalGids = additionalGids
  136. return nil
  137. }
  138. func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
  139. fp, err := c.GetResourcePath(p)
  140. if err != nil {
  141. return nil, err
  142. }
  143. return os.Open(fp)
  144. }
  145. func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
  146. passwdPath, err := user.GetPasswdPath()
  147. if err != nil {
  148. return 0, 0, nil, err
  149. }
  150. groupPath, err := user.GetGroupPath()
  151. if err != nil {
  152. return 0, 0, nil, err
  153. }
  154. passwdFile, err := readUserFile(c, passwdPath)
  155. if err == nil {
  156. defer passwdFile.Close()
  157. }
  158. groupFile, err := readUserFile(c, groupPath)
  159. if err == nil {
  160. defer groupFile.Close()
  161. }
  162. execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
  163. if err != nil {
  164. return 0, 0, nil, err
  165. }
  166. // todo: fix this double read by a change to libcontainer/user pkg
  167. groupFile, err = readUserFile(c, groupPath)
  168. if err == nil {
  169. defer groupFile.Close()
  170. }
  171. var addGroups []int
  172. if len(c.HostConfig.GroupAdd) > 0 {
  173. addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
  174. if err != nil {
  175. return 0, 0, nil, err
  176. }
  177. }
  178. uid := uint32(execUser.Uid)
  179. gid := uint32(execUser.Gid)
  180. sgids := append(execUser.Sgids, addGroups...)
  181. var additionalGids []uint32
  182. for _, g := range sgids {
  183. additionalGids = append(additionalGids, uint32(g))
  184. }
  185. return uid, gid, additionalGids, nil
  186. }
  187. func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) {
  188. for i, n := range s.Linux.Namespaces {
  189. if n.Type == ns.Type {
  190. s.Linux.Namespaces[i] = ns
  191. return
  192. }
  193. }
  194. s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
  195. }
  196. func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  197. userNS := false
  198. // user
  199. if c.HostConfig.UsernsMode.IsPrivate() {
  200. uidMap := daemon.idMappings.UIDs()
  201. if uidMap != nil {
  202. userNS = true
  203. ns := specs.LinuxNamespace{Type: "user"}
  204. setNamespace(s, ns)
  205. s.Linux.UIDMappings = specMapping(uidMap)
  206. s.Linux.GIDMappings = specMapping(daemon.idMappings.GIDs())
  207. }
  208. }
  209. // network
  210. if !c.Config.NetworkDisabled {
  211. ns := specs.LinuxNamespace{Type: "network"}
  212. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
  213. if parts[0] == "container" {
  214. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
  215. if err != nil {
  216. return err
  217. }
  218. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
  219. if userNS {
  220. // to share a net namespace, they must also share a user namespace
  221. nsUser := specs.LinuxNamespace{Type: "user"}
  222. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
  223. setNamespace(s, nsUser)
  224. }
  225. } else if c.HostConfig.NetworkMode.IsHost() {
  226. ns.Path = c.NetworkSettings.SandboxKey
  227. }
  228. setNamespace(s, ns)
  229. }
  230. // ipc
  231. ipcMode := c.HostConfig.IpcMode
  232. switch {
  233. case ipcMode.IsContainer():
  234. ns := specs.LinuxNamespace{Type: "ipc"}
  235. ic, err := daemon.getIpcContainer(ipcMode.Container())
  236. if err != nil {
  237. return err
  238. }
  239. ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
  240. setNamespace(s, ns)
  241. if userNS {
  242. // to share an IPC namespace, they must also share a user namespace
  243. nsUser := specs.LinuxNamespace{Type: "user"}
  244. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
  245. setNamespace(s, nsUser)
  246. }
  247. case ipcMode.IsHost():
  248. oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc"))
  249. case ipcMode.IsEmpty():
  250. // A container was created by an older version of the daemon.
  251. // The default behavior used to be what is now called "shareable".
  252. fallthrough
  253. case ipcMode.IsPrivate(), ipcMode.IsShareable(), ipcMode.IsNone():
  254. ns := specs.LinuxNamespace{Type: "ipc"}
  255. setNamespace(s, ns)
  256. default:
  257. return fmt.Errorf("Invalid IPC mode: %v", ipcMode)
  258. }
  259. // pid
  260. if c.HostConfig.PidMode.IsContainer() {
  261. ns := specs.LinuxNamespace{Type: "pid"}
  262. pc, err := daemon.getPidContainer(c)
  263. if err != nil {
  264. return err
  265. }
  266. ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
  267. setNamespace(s, ns)
  268. if userNS {
  269. // to share a PID namespace, they must also share a user namespace
  270. nsUser := specs.LinuxNamespace{Type: "user"}
  271. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
  272. setNamespace(s, nsUser)
  273. }
  274. } else if c.HostConfig.PidMode.IsHost() {
  275. oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid"))
  276. } else {
  277. ns := specs.LinuxNamespace{Type: "pid"}
  278. setNamespace(s, ns)
  279. }
  280. // uts
  281. if c.HostConfig.UTSMode.IsHost() {
  282. oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts"))
  283. s.Hostname = ""
  284. }
  285. return nil
  286. }
  287. func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
  288. var ids []specs.LinuxIDMapping
  289. for _, item := range s {
  290. ids = append(ids, specs.LinuxIDMapping{
  291. HostID: uint32(item.HostID),
  292. ContainerID: uint32(item.ContainerID),
  293. Size: uint32(item.Size),
  294. })
  295. }
  296. return ids
  297. }
  298. // Get the source mount point of directory passed in as argument. Also return
  299. // optional fields.
  300. func getSourceMount(source string) (string, string, error) {
  301. // Ensure any symlinks are resolved.
  302. sourcePath, err := filepath.EvalSymlinks(source)
  303. if err != nil {
  304. return "", "", err
  305. }
  306. mi, err := mount.GetMounts(mount.ParentsFilter(sourcePath))
  307. if err != nil {
  308. return "", "", err
  309. }
  310. if len(mi) < 1 {
  311. return "", "", fmt.Errorf("Can't find mount point of %s", source)
  312. }
  313. // find the longest mount point
  314. var idx, maxlen int
  315. for i := range mi {
  316. if len(mi[i].Mountpoint) > maxlen {
  317. maxlen = len(mi[i].Mountpoint)
  318. idx = i
  319. }
  320. }
  321. return mi[idx].Mountpoint, mi[idx].Optional, nil
  322. }
  323. const (
  324. sharedPropagationOption = "shared:"
  325. slavePropagationOption = "master:"
  326. )
  327. // hasMountinfoOption checks if any of the passed any of the given option values
  328. // are set in the passed in option string.
  329. func hasMountinfoOption(opts string, vals ...string) bool {
  330. for _, opt := range strings.Split(opts, " ") {
  331. for _, val := range vals {
  332. if strings.HasPrefix(opt, val) {
  333. return true
  334. }
  335. }
  336. }
  337. return false
  338. }
  339. // Ensure mount point on which path is mounted, is shared.
  340. func ensureShared(path string) error {
  341. sourceMount, optionalOpts, err := getSourceMount(path)
  342. if err != nil {
  343. return err
  344. }
  345. // Make sure source mount point is shared.
  346. if !hasMountinfoOption(optionalOpts, sharedPropagationOption) {
  347. return errors.Errorf("path %s is mounted on %s but it is not a shared mount", path, sourceMount)
  348. }
  349. return nil
  350. }
  351. // Ensure mount point on which path is mounted, is either shared or slave.
  352. func ensureSharedOrSlave(path string) error {
  353. sourceMount, optionalOpts, err := getSourceMount(path)
  354. if err != nil {
  355. return err
  356. }
  357. if !hasMountinfoOption(optionalOpts, sharedPropagationOption, slavePropagationOption) {
  358. return errors.Errorf("path %s is mounted on %s but it is not a shared or slave mount", path, sourceMount)
  359. }
  360. return nil
  361. }
  362. // Get the set of mount flags that are set on the mount that contains the given
  363. // path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
  364. // bind-mounting "with options" will not fail with user namespaces, due to
  365. // kernel restrictions that require user namespace mounts to preserve
  366. // CL_UNPRIVILEGED locked flags.
  367. func getUnprivilegedMountFlags(path string) ([]string, error) {
  368. var statfs unix.Statfs_t
  369. if err := unix.Statfs(path, &statfs); err != nil {
  370. return nil, err
  371. }
  372. // The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
  373. unprivilegedFlags := map[uint64]string{
  374. unix.MS_RDONLY: "ro",
  375. unix.MS_NODEV: "nodev",
  376. unix.MS_NOEXEC: "noexec",
  377. unix.MS_NOSUID: "nosuid",
  378. unix.MS_NOATIME: "noatime",
  379. unix.MS_RELATIME: "relatime",
  380. unix.MS_NODIRATIME: "nodiratime",
  381. }
  382. var flags []string
  383. for mask, flag := range unprivilegedFlags {
  384. if uint64(statfs.Flags)&mask == mask {
  385. flags = append(flags, flag)
  386. }
  387. }
  388. return flags, nil
  389. }
  390. var (
  391. mountPropagationMap = map[string]int{
  392. "private": mount.PRIVATE,
  393. "rprivate": mount.RPRIVATE,
  394. "shared": mount.SHARED,
  395. "rshared": mount.RSHARED,
  396. "slave": mount.SLAVE,
  397. "rslave": mount.RSLAVE,
  398. }
  399. mountPropagationReverseMap = map[int]string{
  400. mount.PRIVATE: "private",
  401. mount.RPRIVATE: "rprivate",
  402. mount.SHARED: "shared",
  403. mount.RSHARED: "rshared",
  404. mount.SLAVE: "slave",
  405. mount.RSLAVE: "rslave",
  406. }
  407. )
  408. // inSlice tests whether a string is contained in a slice of strings or not.
  409. // Comparison is case sensitive
  410. func inSlice(slice []string, s string) bool {
  411. for _, ss := range slice {
  412. if s == ss {
  413. return true
  414. }
  415. }
  416. return false
  417. }
  418. func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
  419. userMounts := make(map[string]struct{})
  420. for _, m := range mounts {
  421. userMounts[m.Destination] = struct{}{}
  422. }
  423. // Copy all mounts from spec to defaultMounts, except for
  424. // - mounts overridden by a user supplied mount;
  425. // - all mounts under /dev if a user supplied /dev is present;
  426. // - /dev/shm, in case IpcMode is none.
  427. // While at it, also
  428. // - set size for /dev/shm from shmsize.
  429. defaultMounts := s.Mounts[:0]
  430. _, mountDev := userMounts["/dev"]
  431. for _, m := range s.Mounts {
  432. if _, ok := userMounts[m.Destination]; ok {
  433. // filter out mount overridden by a user supplied mount
  434. continue
  435. }
  436. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  437. // filter out everything under /dev if /dev is user-mounted
  438. continue
  439. }
  440. if m.Destination == "/dev/shm" {
  441. if c.HostConfig.IpcMode.IsNone() {
  442. // filter out /dev/shm for "none" IpcMode
  443. continue
  444. }
  445. // set size for /dev/shm mount from spec
  446. sizeOpt := "size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
  447. m.Options = append(m.Options, sizeOpt)
  448. }
  449. defaultMounts = append(defaultMounts, m)
  450. }
  451. s.Mounts = defaultMounts
  452. for _, m := range mounts {
  453. for _, cm := range s.Mounts {
  454. if cm.Destination == m.Destination {
  455. return duplicateMountPointError(m.Destination)
  456. }
  457. }
  458. if m.Source == "tmpfs" {
  459. data := m.Data
  460. parser := volumemounts.NewParser("linux")
  461. options := []string{"noexec", "nosuid", "nodev", string(parser.DefaultPropagationMode())}
  462. if data != "" {
  463. options = append(options, strings.Split(data, ",")...)
  464. }
  465. merged, err := mount.MergeTmpfsOptions(options)
  466. if err != nil {
  467. return err
  468. }
  469. s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
  470. continue
  471. }
  472. mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
  473. // Determine property of RootPropagation based on volume
  474. // properties. If a volume is shared, then keep root propagation
  475. // shared. This should work for slave and private volumes too.
  476. //
  477. // For slave volumes, it can be either [r]shared/[r]slave.
  478. //
  479. // For private volumes any root propagation value should work.
  480. pFlag := mountPropagationMap[m.Propagation]
  481. switch pFlag {
  482. case mount.SHARED, mount.RSHARED:
  483. if err := ensureShared(m.Source); err != nil {
  484. return err
  485. }
  486. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  487. if rootpg != mount.SHARED && rootpg != mount.RSHARED {
  488. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
  489. }
  490. case mount.SLAVE, mount.RSLAVE:
  491. var fallback bool
  492. if err := ensureSharedOrSlave(m.Source); err != nil {
  493. // For backwards compatibility purposes, treat mounts from the daemon root
  494. // as special since we automatically add rslave propagation to these mounts
  495. // when the user did not set anything, so we should fallback to the old
  496. // behavior which is to use private propagation which is normally the
  497. // default.
  498. if !strings.HasPrefix(m.Source, daemon.root) && !strings.HasPrefix(daemon.root, m.Source) {
  499. return err
  500. }
  501. cm, ok := c.MountPoints[m.Destination]
  502. if !ok {
  503. return err
  504. }
  505. if cm.Spec.BindOptions != nil && cm.Spec.BindOptions.Propagation != "" {
  506. // This means the user explicitly set a propagation, do not fallback in that case.
  507. return err
  508. }
  509. fallback = true
  510. logrus.WithField("container", c.ID).WithField("source", m.Source).Warn("Falling back to default propagation for bind source in daemon root")
  511. }
  512. if !fallback {
  513. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  514. if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
  515. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
  516. }
  517. }
  518. }
  519. opts := []string{"rbind"}
  520. if !m.Writable {
  521. opts = append(opts, "ro")
  522. }
  523. if pFlag != 0 {
  524. opts = append(opts, mountPropagationReverseMap[pFlag])
  525. }
  526. // If we are using user namespaces, then we must make sure that we
  527. // don't drop any of the CL_UNPRIVILEGED "locked" flags of the source
  528. // "mount" when we bind-mount. The reason for this is that at the point
  529. // when runc sets up the root filesystem, it is already inside a user
  530. // namespace, and thus cannot change any flags that are locked.
  531. if daemon.configStore.RemappedRoot != "" {
  532. unprivOpts, err := getUnprivilegedMountFlags(m.Source)
  533. if err != nil {
  534. return err
  535. }
  536. opts = append(opts, unprivOpts...)
  537. }
  538. mt.Options = opts
  539. s.Mounts = append(s.Mounts, mt)
  540. }
  541. if s.Root.Readonly {
  542. for i, m := range s.Mounts {
  543. switch m.Destination {
  544. case "/proc", "/dev/pts", "/dev/shm", "/dev/mqueue", "/dev":
  545. continue
  546. }
  547. if _, ok := userMounts[m.Destination]; !ok {
  548. if !inSlice(m.Options, "ro") {
  549. s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
  550. }
  551. }
  552. }
  553. }
  554. if c.HostConfig.Privileged {
  555. // clear readonly for /sys
  556. for i := range s.Mounts {
  557. if s.Mounts[i].Destination == "/sys" {
  558. clearReadOnly(&s.Mounts[i])
  559. }
  560. }
  561. s.Linux.ReadonlyPaths = nil
  562. s.Linux.MaskedPaths = nil
  563. }
  564. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  565. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  566. if uidMap := daemon.idMappings.UIDs(); uidMap != nil || c.HostConfig.Privileged {
  567. for i, m := range s.Mounts {
  568. if m.Type == "cgroup" {
  569. clearReadOnly(&s.Mounts[i])
  570. }
  571. }
  572. }
  573. return nil
  574. }
  575. func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
  576. if c.BaseFS == nil {
  577. return errors.New("populateCommonSpec: BaseFS of container " + c.ID + " is unexpectedly nil")
  578. }
  579. linkedEnv, err := daemon.setupLinkedContainers(c)
  580. if err != nil {
  581. return err
  582. }
  583. s.Root = &specs.Root{
  584. Path: c.BaseFS.Path(),
  585. Readonly: c.HostConfig.ReadonlyRootfs,
  586. }
  587. if err := c.SetupWorkingDirectory(daemon.idMappings.RootPair()); err != nil {
  588. return err
  589. }
  590. cwd := c.Config.WorkingDir
  591. if len(cwd) == 0 {
  592. cwd = "/"
  593. }
  594. s.Process.Args = append([]string{c.Path}, c.Args...)
  595. // only add the custom init if it is specified and the container is running in its
  596. // own private pid namespace. It does not make sense to add if it is running in the
  597. // host namespace or another container's pid namespace where we already have an init
  598. if c.HostConfig.PidMode.IsPrivate() {
  599. if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
  600. (c.HostConfig.Init == nil && daemon.configStore.Init) {
  601. s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...)
  602. var path string
  603. if daemon.configStore.InitPath == "" {
  604. path, err = exec.LookPath(daemonconfig.DefaultInitBinary)
  605. if err != nil {
  606. return err
  607. }
  608. }
  609. if daemon.configStore.InitPath != "" {
  610. path = daemon.configStore.InitPath
  611. }
  612. s.Mounts = append(s.Mounts, specs.Mount{
  613. Destination: "/dev/init",
  614. Type: "bind",
  615. Source: path,
  616. Options: []string{"bind", "ro"},
  617. })
  618. }
  619. }
  620. s.Process.Cwd = cwd
  621. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  622. s.Process.Terminal = c.Config.Tty
  623. s.Hostname = c.FullHostname()
  624. return nil
  625. }
  626. func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, err error) {
  627. s := oci.DefaultSpec()
  628. if err := daemon.populateCommonSpec(&s, c); err != nil {
  629. return nil, err
  630. }
  631. var cgroupsPath string
  632. scopePrefix := "docker"
  633. parent := "/docker"
  634. useSystemd := UsingSystemd(daemon.configStore)
  635. if useSystemd {
  636. parent = "system.slice"
  637. }
  638. if c.HostConfig.CgroupParent != "" {
  639. parent = c.HostConfig.CgroupParent
  640. } else if daemon.configStore.CgroupParent != "" {
  641. parent = daemon.configStore.CgroupParent
  642. }
  643. if useSystemd {
  644. cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
  645. logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
  646. } else {
  647. cgroupsPath = filepath.Join(parent, c.ID)
  648. }
  649. s.Linux.CgroupsPath = cgroupsPath
  650. if err := setResources(&s, c.HostConfig.Resources); err != nil {
  651. return nil, fmt.Errorf("linux runtime spec resources: %v", err)
  652. }
  653. s.Linux.Sysctl = c.HostConfig.Sysctls
  654. p := s.Linux.CgroupsPath
  655. if useSystemd {
  656. initPath, err := cgroups.GetInitCgroup("cpu")
  657. if err != nil {
  658. return nil, err
  659. }
  660. _, err = cgroups.GetOwnCgroup("cpu")
  661. if err != nil {
  662. return nil, err
  663. }
  664. p = filepath.Join(initPath, s.Linux.CgroupsPath)
  665. }
  666. // Clean path to guard against things like ../../../BAD
  667. parentPath := filepath.Dir(p)
  668. if !filepath.IsAbs(parentPath) {
  669. parentPath = filepath.Clean("/" + parentPath)
  670. }
  671. if err := daemon.initCgroupsPath(parentPath); err != nil {
  672. return nil, fmt.Errorf("linux init cgroups path: %v", err)
  673. }
  674. if err := setDevices(&s, c); err != nil {
  675. return nil, fmt.Errorf("linux runtime spec devices: %v", err)
  676. }
  677. if err := daemon.setRlimits(&s, c); err != nil {
  678. return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
  679. }
  680. if err := setUser(&s, c); err != nil {
  681. return nil, fmt.Errorf("linux spec user: %v", err)
  682. }
  683. if err := setNamespaces(daemon, &s, c); err != nil {
  684. return nil, fmt.Errorf("linux spec namespaces: %v", err)
  685. }
  686. if err := setCapabilities(&s, c); err != nil {
  687. return nil, fmt.Errorf("linux spec capabilities: %v", err)
  688. }
  689. if err := setSeccomp(daemon, &s, c); err != nil {
  690. return nil, fmt.Errorf("linux seccomp: %v", err)
  691. }
  692. if err := daemon.setupContainerMountsRoot(c); err != nil {
  693. return nil, err
  694. }
  695. if err := daemon.setupIpcDirs(c); err != nil {
  696. return nil, err
  697. }
  698. defer func() {
  699. if err != nil {
  700. daemon.cleanupSecretDir(c)
  701. }
  702. }()
  703. if err := daemon.setupSecretDir(c); err != nil {
  704. return nil, err
  705. }
  706. ms, err := daemon.setupMounts(c)
  707. if err != nil {
  708. return nil, err
  709. }
  710. if !c.HostConfig.IpcMode.IsPrivate() && !c.HostConfig.IpcMode.IsEmpty() {
  711. ms = append(ms, c.IpcMounts()...)
  712. }
  713. tmpfsMounts, err := c.TmpfsMounts()
  714. if err != nil {
  715. return nil, err
  716. }
  717. ms = append(ms, tmpfsMounts...)
  718. secretMounts, err := c.SecretMounts()
  719. if err != nil {
  720. return nil, err
  721. }
  722. ms = append(ms, secretMounts...)
  723. sort.Sort(mounts(ms))
  724. if err := setMounts(daemon, &s, c, ms); err != nil {
  725. return nil, fmt.Errorf("linux mounts: %v", err)
  726. }
  727. for _, ns := range s.Linux.Namespaces {
  728. if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
  729. target := filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")
  730. s.Hooks = &specs.Hooks{
  731. Prestart: []specs.Hook{{
  732. Path: target,
  733. Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
  734. }},
  735. }
  736. }
  737. }
  738. if apparmor.IsEnabled() {
  739. var appArmorProfile string
  740. if c.AppArmorProfile != "" {
  741. appArmorProfile = c.AppArmorProfile
  742. } else if c.HostConfig.Privileged {
  743. appArmorProfile = "unconfined"
  744. } else {
  745. appArmorProfile = "docker-default"
  746. }
  747. if appArmorProfile == "docker-default" {
  748. // Unattended upgrades and other fun services can unload AppArmor
  749. // profiles inadvertently. Since we cannot store our profile in
  750. // /etc/apparmor.d, nor can we practically add other ways of
  751. // telling the system to keep our profile loaded, in order to make
  752. // sure that we keep the default profile enabled we dynamically
  753. // reload it if necessary.
  754. if err := ensureDefaultAppArmorProfile(); err != nil {
  755. return nil, err
  756. }
  757. }
  758. s.Process.ApparmorProfile = appArmorProfile
  759. }
  760. s.Process.SelinuxLabel = c.GetProcessLabel()
  761. s.Process.NoNewPrivileges = c.NoNewPrivileges
  762. s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj
  763. s.Linux.MountLabel = c.MountLabel
  764. // Set the masked and readonly paths with regard to the host config options if they are set.
  765. if c.HostConfig.MaskedPaths != nil {
  766. s.Linux.MaskedPaths = c.HostConfig.MaskedPaths
  767. }
  768. if c.HostConfig.ReadonlyPaths != nil {
  769. s.Linux.ReadonlyPaths = c.HostConfig.ReadonlyPaths
  770. }
  771. return &s, nil
  772. }
  773. func clearReadOnly(m *specs.Mount) {
  774. var opt []string
  775. for _, o := range m.Options {
  776. if o != "ro" {
  777. opt = append(opt, o)
  778. }
  779. }
  780. m.Options = opt
  781. }
  782. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  783. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
  784. ulimits := c.Ulimits
  785. // Merge ulimits with daemon defaults
  786. ulIdx := make(map[string]struct{})
  787. for _, ul := range ulimits {
  788. ulIdx[ul.Name] = struct{}{}
  789. }
  790. for name, ul := range daemon.configStore.Ulimits {
  791. if _, exists := ulIdx[name]; !exists {
  792. ulimits = append(ulimits, ul)
  793. }
  794. }
  795. c.Ulimits = ulimits
  796. }