oci_linux.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. package daemon
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "os/exec"
  7. "path/filepath"
  8. "regexp"
  9. "sort"
  10. "strconv"
  11. "strings"
  12. "github.com/Sirupsen/logrus"
  13. containertypes "github.com/docker/docker/api/types/container"
  14. "github.com/docker/docker/container"
  15. "github.com/docker/docker/daemon/caps"
  16. daemonconfig "github.com/docker/docker/daemon/config"
  17. "github.com/docker/docker/oci"
  18. "github.com/docker/docker/pkg/idtools"
  19. "github.com/docker/docker/pkg/mount"
  20. "github.com/docker/docker/pkg/stringutils"
  21. "github.com/docker/docker/pkg/symlink"
  22. "github.com/docker/docker/volume"
  23. "github.com/opencontainers/runc/libcontainer/apparmor"
  24. "github.com/opencontainers/runc/libcontainer/cgroups"
  25. "github.com/opencontainers/runc/libcontainer/devices"
  26. "github.com/opencontainers/runc/libcontainer/user"
  27. specs "github.com/opencontainers/runtime-spec/specs-go"
  28. )
  29. var (
  30. deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$")
  31. )
  32. func setResources(s *specs.Spec, r containertypes.Resources) error {
  33. weightDevices, err := getBlkioWeightDevices(r)
  34. if err != nil {
  35. return err
  36. }
  37. readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
  38. if err != nil {
  39. return err
  40. }
  41. writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
  42. if err != nil {
  43. return err
  44. }
  45. readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
  46. if err != nil {
  47. return err
  48. }
  49. writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
  50. if err != nil {
  51. return err
  52. }
  53. memoryRes := getMemoryResources(r)
  54. cpuRes, err := getCPUResources(r)
  55. if err != nil {
  56. return err
  57. }
  58. blkioWeight := r.BlkioWeight
  59. specResources := &specs.LinuxResources{
  60. Memory: memoryRes,
  61. CPU: cpuRes,
  62. BlockIO: &specs.LinuxBlockIO{
  63. Weight: &blkioWeight,
  64. WeightDevice: weightDevices,
  65. ThrottleReadBpsDevice: readBpsDevice,
  66. ThrottleWriteBpsDevice: writeBpsDevice,
  67. ThrottleReadIOPSDevice: readIOpsDevice,
  68. ThrottleWriteIOPSDevice: writeIOpsDevice,
  69. },
  70. DisableOOMKiller: r.OomKillDisable,
  71. Pids: &specs.LinuxPids{
  72. Limit: r.PidsLimit,
  73. },
  74. }
  75. if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
  76. specResources.Devices = s.Linux.Resources.Devices
  77. }
  78. s.Linux.Resources = specResources
  79. return nil
  80. }
  81. func setDevices(s *specs.Spec, c *container.Container) error {
  82. // Build lists of devices allowed and created within the container.
  83. var devs []specs.LinuxDevice
  84. devPermissions := s.Linux.Resources.Devices
  85. if c.HostConfig.Privileged {
  86. hostDevices, err := devices.HostDevices()
  87. if err != nil {
  88. return err
  89. }
  90. for _, d := range hostDevices {
  91. devs = append(devs, oci.Device(d))
  92. }
  93. devPermissions = []specs.LinuxDeviceCgroup{
  94. {
  95. Allow: true,
  96. Access: "rwm",
  97. },
  98. }
  99. } else {
  100. for _, deviceMapping := range c.HostConfig.Devices {
  101. d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
  102. if err != nil {
  103. return err
  104. }
  105. devs = append(devs, d...)
  106. devPermissions = append(devPermissions, dPermissions...)
  107. }
  108. for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules {
  109. ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1)
  110. if len(ss[0]) != 5 {
  111. return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule)
  112. }
  113. matches := ss[0]
  114. dPermissions := specs.LinuxDeviceCgroup{
  115. Allow: true,
  116. Type: matches[1],
  117. Access: matches[4],
  118. }
  119. if matches[2] == "*" {
  120. major := int64(-1)
  121. dPermissions.Major = &major
  122. } else {
  123. major, err := strconv.ParseInt(matches[2], 10, 64)
  124. if err != nil {
  125. return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule)
  126. }
  127. dPermissions.Major = &major
  128. }
  129. if matches[3] == "*" {
  130. minor := int64(-1)
  131. dPermissions.Minor = &minor
  132. } else {
  133. minor, err := strconv.ParseInt(matches[3], 10, 64)
  134. if err != nil {
  135. return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule)
  136. }
  137. dPermissions.Minor = &minor
  138. }
  139. devPermissions = append(devPermissions, dPermissions)
  140. }
  141. }
  142. s.Linux.Devices = append(s.Linux.Devices, devs...)
  143. s.Linux.Resources.Devices = devPermissions
  144. return nil
  145. }
  146. func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  147. var rlimits []specs.LinuxRlimit
  148. // We want to leave the original HostConfig alone so make a copy here
  149. hostConfig := *c.HostConfig
  150. // Merge with the daemon defaults
  151. daemon.mergeUlimits(&hostConfig)
  152. for _, ul := range hostConfig.Ulimits {
  153. rlimits = append(rlimits, specs.LinuxRlimit{
  154. Type: "RLIMIT_" + strings.ToUpper(ul.Name),
  155. Soft: uint64(ul.Soft),
  156. Hard: uint64(ul.Hard),
  157. })
  158. }
  159. s.Process.Rlimits = rlimits
  160. return nil
  161. }
  162. func setUser(s *specs.Spec, c *container.Container) error {
  163. uid, gid, additionalGids, err := getUser(c, c.Config.User)
  164. if err != nil {
  165. return err
  166. }
  167. s.Process.User.UID = uid
  168. s.Process.User.GID = gid
  169. s.Process.User.AdditionalGids = additionalGids
  170. return nil
  171. }
  172. func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
  173. fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
  174. if err != nil {
  175. return nil, err
  176. }
  177. return os.Open(fp)
  178. }
  179. func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
  180. passwdPath, err := user.GetPasswdPath()
  181. if err != nil {
  182. return 0, 0, nil, err
  183. }
  184. groupPath, err := user.GetGroupPath()
  185. if err != nil {
  186. return 0, 0, nil, err
  187. }
  188. passwdFile, err := readUserFile(c, passwdPath)
  189. if err == nil {
  190. defer passwdFile.Close()
  191. }
  192. groupFile, err := readUserFile(c, groupPath)
  193. if err == nil {
  194. defer groupFile.Close()
  195. }
  196. execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
  197. if err != nil {
  198. return 0, 0, nil, err
  199. }
  200. // todo: fix this double read by a change to libcontainer/user pkg
  201. groupFile, err = readUserFile(c, groupPath)
  202. if err == nil {
  203. defer groupFile.Close()
  204. }
  205. var addGroups []int
  206. if len(c.HostConfig.GroupAdd) > 0 {
  207. addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
  208. if err != nil {
  209. return 0, 0, nil, err
  210. }
  211. }
  212. uid := uint32(execUser.Uid)
  213. gid := uint32(execUser.Gid)
  214. sgids := append(execUser.Sgids, addGroups...)
  215. var additionalGids []uint32
  216. for _, g := range sgids {
  217. additionalGids = append(additionalGids, uint32(g))
  218. }
  219. return uid, gid, additionalGids, nil
  220. }
  221. func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) {
  222. for i, n := range s.Linux.Namespaces {
  223. if n.Type == ns.Type {
  224. s.Linux.Namespaces[i] = ns
  225. return
  226. }
  227. }
  228. s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
  229. }
  230. func setCapabilities(s *specs.Spec, c *container.Container) error {
  231. var caplist []string
  232. var err error
  233. if c.HostConfig.Privileged {
  234. caplist = caps.GetAllCapabilities()
  235. } else {
  236. caplist, err = caps.TweakCapabilities(s.Process.Capabilities.Effective, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
  237. if err != nil {
  238. return err
  239. }
  240. }
  241. s.Process.Capabilities.Effective = caplist
  242. s.Process.Capabilities.Bounding = caplist
  243. s.Process.Capabilities.Permitted = caplist
  244. s.Process.Capabilities.Inheritable = caplist
  245. return nil
  246. }
  247. func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
  248. userNS := false
  249. // user
  250. if c.HostConfig.UsernsMode.IsPrivate() {
  251. uidMap, gidMap := daemon.GetUIDGIDMaps()
  252. if uidMap != nil {
  253. userNS = true
  254. ns := specs.LinuxNamespace{Type: "user"}
  255. setNamespace(s, ns)
  256. s.Linux.UIDMappings = specMapping(uidMap)
  257. s.Linux.GIDMappings = specMapping(gidMap)
  258. }
  259. }
  260. // network
  261. if !c.Config.NetworkDisabled {
  262. ns := specs.LinuxNamespace{Type: "network"}
  263. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
  264. if parts[0] == "container" {
  265. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
  266. if err != nil {
  267. return err
  268. }
  269. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
  270. if userNS {
  271. // to share a net namespace, they must also share a user namespace
  272. nsUser := specs.LinuxNamespace{Type: "user"}
  273. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
  274. setNamespace(s, nsUser)
  275. }
  276. } else if c.HostConfig.NetworkMode.IsHost() {
  277. ns.Path = c.NetworkSettings.SandboxKey
  278. }
  279. setNamespace(s, ns)
  280. }
  281. // ipc
  282. if c.HostConfig.IpcMode.IsContainer() {
  283. ns := specs.LinuxNamespace{Type: "ipc"}
  284. ic, err := daemon.getIpcContainer(c)
  285. if err != nil {
  286. return err
  287. }
  288. ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
  289. setNamespace(s, ns)
  290. if userNS {
  291. // to share an IPC namespace, they must also share a user namespace
  292. nsUser := specs.LinuxNamespace{Type: "user"}
  293. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
  294. setNamespace(s, nsUser)
  295. }
  296. } else if c.HostConfig.IpcMode.IsHost() {
  297. oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc"))
  298. } else {
  299. ns := specs.LinuxNamespace{Type: "ipc"}
  300. setNamespace(s, ns)
  301. }
  302. // pid
  303. if c.HostConfig.PidMode.IsContainer() {
  304. ns := specs.LinuxNamespace{Type: "pid"}
  305. pc, err := daemon.getPidContainer(c)
  306. if err != nil {
  307. return err
  308. }
  309. ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
  310. setNamespace(s, ns)
  311. if userNS {
  312. // to share a PID namespace, they must also share a user namespace
  313. nsUser := specs.LinuxNamespace{Type: "user"}
  314. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
  315. setNamespace(s, nsUser)
  316. }
  317. } else if c.HostConfig.PidMode.IsHost() {
  318. oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid"))
  319. } else {
  320. ns := specs.LinuxNamespace{Type: "pid"}
  321. setNamespace(s, ns)
  322. }
  323. // uts
  324. if c.HostConfig.UTSMode.IsHost() {
  325. oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts"))
  326. s.Hostname = ""
  327. }
  328. return nil
  329. }
  330. func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
  331. var ids []specs.LinuxIDMapping
  332. for _, item := range s {
  333. ids = append(ids, specs.LinuxIDMapping{
  334. HostID: uint32(item.HostID),
  335. ContainerID: uint32(item.ContainerID),
  336. Size: uint32(item.Size),
  337. })
  338. }
  339. return ids
  340. }
  341. func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
  342. for _, m := range mountinfo {
  343. if m.Mountpoint == dir {
  344. return m
  345. }
  346. }
  347. return nil
  348. }
  349. // Get the source mount point of directory passed in as argument. Also return
  350. // optional fields.
  351. func getSourceMount(source string) (string, string, error) {
  352. // Ensure any symlinks are resolved.
  353. sourcePath, err := filepath.EvalSymlinks(source)
  354. if err != nil {
  355. return "", "", err
  356. }
  357. mountinfos, err := mount.GetMounts()
  358. if err != nil {
  359. return "", "", err
  360. }
  361. mountinfo := getMountInfo(mountinfos, sourcePath)
  362. if mountinfo != nil {
  363. return sourcePath, mountinfo.Optional, nil
  364. }
  365. path := sourcePath
  366. for {
  367. path = filepath.Dir(path)
  368. mountinfo = getMountInfo(mountinfos, path)
  369. if mountinfo != nil {
  370. return path, mountinfo.Optional, nil
  371. }
  372. if path == "/" {
  373. break
  374. }
  375. }
  376. // If we are here, we did not find parent mount. Something is wrong.
  377. return "", "", fmt.Errorf("Could not find source mount of %s", source)
  378. }
  379. // Ensure mount point on which path is mounted, is shared.
  380. func ensureShared(path string) error {
  381. sharedMount := false
  382. sourceMount, optionalOpts, err := getSourceMount(path)
  383. if err != nil {
  384. return err
  385. }
  386. // Make sure source mount point is shared.
  387. optsSplit := strings.Split(optionalOpts, " ")
  388. for _, opt := range optsSplit {
  389. if strings.HasPrefix(opt, "shared:") {
  390. sharedMount = true
  391. break
  392. }
  393. }
  394. if !sharedMount {
  395. return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
  396. }
  397. return nil
  398. }
  399. // Ensure mount point on which path is mounted, is either shared or slave.
  400. func ensureSharedOrSlave(path string) error {
  401. sharedMount := false
  402. slaveMount := false
  403. sourceMount, optionalOpts, err := getSourceMount(path)
  404. if err != nil {
  405. return err
  406. }
  407. // Make sure source mount point is shared.
  408. optsSplit := strings.Split(optionalOpts, " ")
  409. for _, opt := range optsSplit {
  410. if strings.HasPrefix(opt, "shared:") {
  411. sharedMount = true
  412. break
  413. } else if strings.HasPrefix(opt, "master:") {
  414. slaveMount = true
  415. break
  416. }
  417. }
  418. if !sharedMount && !slaveMount {
  419. return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
  420. }
  421. return nil
  422. }
  423. var (
  424. mountPropagationMap = map[string]int{
  425. "private": mount.PRIVATE,
  426. "rprivate": mount.RPRIVATE,
  427. "shared": mount.SHARED,
  428. "rshared": mount.RSHARED,
  429. "slave": mount.SLAVE,
  430. "rslave": mount.RSLAVE,
  431. }
  432. mountPropagationReverseMap = map[int]string{
  433. mount.PRIVATE: "private",
  434. mount.RPRIVATE: "rprivate",
  435. mount.SHARED: "shared",
  436. mount.RSHARED: "rshared",
  437. mount.SLAVE: "slave",
  438. mount.RSLAVE: "rslave",
  439. }
  440. )
  441. func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
  442. userMounts := make(map[string]struct{})
  443. for _, m := range mounts {
  444. userMounts[m.Destination] = struct{}{}
  445. }
  446. // Filter out mounts that are overridden by user supplied mounts
  447. var defaultMounts []specs.Mount
  448. _, mountDev := userMounts["/dev"]
  449. for _, m := range s.Mounts {
  450. if _, ok := userMounts[m.Destination]; !ok {
  451. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  452. continue
  453. }
  454. defaultMounts = append(defaultMounts, m)
  455. }
  456. }
  457. s.Mounts = defaultMounts
  458. for _, m := range mounts {
  459. for _, cm := range s.Mounts {
  460. if cm.Destination == m.Destination {
  461. return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
  462. }
  463. }
  464. if m.Source == "tmpfs" {
  465. data := m.Data
  466. options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
  467. if data != "" {
  468. options = append(options, strings.Split(data, ",")...)
  469. }
  470. merged, err := mount.MergeTmpfsOptions(options)
  471. if err != nil {
  472. return err
  473. }
  474. s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
  475. continue
  476. }
  477. mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
  478. // Determine property of RootPropagation based on volume
  479. // properties. If a volume is shared, then keep root propagation
  480. // shared. This should work for slave and private volumes too.
  481. //
  482. // For slave volumes, it can be either [r]shared/[r]slave.
  483. //
  484. // For private volumes any root propagation value should work.
  485. pFlag := mountPropagationMap[m.Propagation]
  486. if pFlag == mount.SHARED || pFlag == mount.RSHARED {
  487. if err := ensureShared(m.Source); err != nil {
  488. return err
  489. }
  490. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  491. if rootpg != mount.SHARED && rootpg != mount.RSHARED {
  492. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
  493. }
  494. } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
  495. if err := ensureSharedOrSlave(m.Source); err != nil {
  496. return err
  497. }
  498. rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
  499. if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
  500. s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
  501. }
  502. }
  503. opts := []string{"rbind"}
  504. if !m.Writable {
  505. opts = append(opts, "ro")
  506. }
  507. if pFlag != 0 {
  508. opts = append(opts, mountPropagationReverseMap[pFlag])
  509. }
  510. mt.Options = opts
  511. s.Mounts = append(s.Mounts, mt)
  512. }
  513. if s.Root.Readonly {
  514. for i, m := range s.Mounts {
  515. switch m.Destination {
  516. case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
  517. continue
  518. }
  519. if _, ok := userMounts[m.Destination]; !ok {
  520. if !stringutils.InSlice(m.Options, "ro") {
  521. s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
  522. }
  523. }
  524. }
  525. }
  526. if c.HostConfig.Privileged {
  527. if !s.Root.Readonly {
  528. // clear readonly for /sys
  529. for i := range s.Mounts {
  530. if s.Mounts[i].Destination == "/sys" {
  531. clearReadOnly(&s.Mounts[i])
  532. }
  533. }
  534. }
  535. s.Linux.ReadonlyPaths = nil
  536. s.Linux.MaskedPaths = nil
  537. }
  538. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  539. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  540. if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
  541. for i, m := range s.Mounts {
  542. if m.Type == "cgroup" {
  543. clearReadOnly(&s.Mounts[i])
  544. }
  545. }
  546. }
  547. return nil
  548. }
  549. func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
  550. linkedEnv, err := daemon.setupLinkedContainers(c)
  551. if err != nil {
  552. return err
  553. }
  554. s.Root = specs.Root{
  555. Path: c.BaseFS,
  556. Readonly: c.HostConfig.ReadonlyRootfs,
  557. }
  558. rootUID, rootGID := daemon.GetRemappedUIDGID()
  559. if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
  560. return err
  561. }
  562. cwd := c.Config.WorkingDir
  563. if len(cwd) == 0 {
  564. cwd = "/"
  565. }
  566. s.Process.Args = append([]string{c.Path}, c.Args...)
  567. // only add the custom init if it is specified and the container is running in its
  568. // own private pid namespace. It does not make sense to add if it is running in the
  569. // host namespace or another container's pid namespace where we already have an init
  570. if c.HostConfig.PidMode.IsPrivate() {
  571. if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
  572. (c.HostConfig.Init == nil && daemon.configStore.Init) {
  573. s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...)
  574. var path string
  575. if daemon.configStore.InitPath == "" {
  576. path, err = exec.LookPath(daemonconfig.DefaultInitBinary)
  577. if err != nil {
  578. return err
  579. }
  580. }
  581. if daemon.configStore.InitPath != "" {
  582. path = daemon.configStore.InitPath
  583. }
  584. s.Mounts = append(s.Mounts, specs.Mount{
  585. Destination: "/dev/init",
  586. Type: "bind",
  587. Source: path,
  588. Options: []string{"bind", "ro"},
  589. })
  590. }
  591. }
  592. s.Process.Cwd = cwd
  593. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  594. s.Process.Terminal = c.Config.Tty
  595. s.Hostname = c.FullHostname()
  596. return nil
  597. }
  598. func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
  599. s := oci.DefaultSpec()
  600. if err := daemon.populateCommonSpec(&s, c); err != nil {
  601. return nil, err
  602. }
  603. var cgroupsPath string
  604. scopePrefix := "docker"
  605. parent := "/docker"
  606. useSystemd := UsingSystemd(daemon.configStore)
  607. if useSystemd {
  608. parent = "system.slice"
  609. }
  610. if c.HostConfig.CgroupParent != "" {
  611. parent = c.HostConfig.CgroupParent
  612. } else if daemon.configStore.CgroupParent != "" {
  613. parent = daemon.configStore.CgroupParent
  614. }
  615. if useSystemd {
  616. cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
  617. logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
  618. } else {
  619. cgroupsPath = filepath.Join(parent, c.ID)
  620. }
  621. s.Linux.CgroupsPath = cgroupsPath
  622. if err := setResources(&s, c.HostConfig.Resources); err != nil {
  623. return nil, fmt.Errorf("linux runtime spec resources: %v", err)
  624. }
  625. s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
  626. s.Linux.Sysctl = c.HostConfig.Sysctls
  627. p := s.Linux.CgroupsPath
  628. if useSystemd {
  629. initPath, err := cgroups.GetInitCgroup("cpu")
  630. if err != nil {
  631. return nil, err
  632. }
  633. p, _ = cgroups.GetOwnCgroup("cpu")
  634. if err != nil {
  635. return nil, err
  636. }
  637. p = filepath.Join(initPath, p)
  638. }
  639. // Clean path to guard against things like ../../../BAD
  640. parentPath := filepath.Dir(p)
  641. if !filepath.IsAbs(parentPath) {
  642. parentPath = filepath.Clean("/" + parentPath)
  643. }
  644. if err := daemon.initCgroupsPath(parentPath); err != nil {
  645. return nil, fmt.Errorf("linux init cgroups path: %v", err)
  646. }
  647. if err := setDevices(&s, c); err != nil {
  648. return nil, fmt.Errorf("linux runtime spec devices: %v", err)
  649. }
  650. if err := setRlimits(daemon, &s, c); err != nil {
  651. return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
  652. }
  653. if err := setUser(&s, c); err != nil {
  654. return nil, fmt.Errorf("linux spec user: %v", err)
  655. }
  656. if err := setNamespaces(daemon, &s, c); err != nil {
  657. return nil, fmt.Errorf("linux spec namespaces: %v", err)
  658. }
  659. if err := setCapabilities(&s, c); err != nil {
  660. return nil, fmt.Errorf("linux spec capabilities: %v", err)
  661. }
  662. if err := setSeccomp(daemon, &s, c); err != nil {
  663. return nil, fmt.Errorf("linux seccomp: %v", err)
  664. }
  665. if err := daemon.setupIpcDirs(c); err != nil {
  666. return nil, err
  667. }
  668. if err := daemon.setupSecretDir(c); err != nil {
  669. return nil, err
  670. }
  671. if err := daemon.setupConfigDir(c); err != nil {
  672. return nil, err
  673. }
  674. ms, err := daemon.setupMounts(c)
  675. if err != nil {
  676. return nil, err
  677. }
  678. ms = append(ms, c.IpcMounts()...)
  679. tmpfsMounts, err := c.TmpfsMounts()
  680. if err != nil {
  681. return nil, err
  682. }
  683. ms = append(ms, tmpfsMounts...)
  684. if m := c.SecretMounts(); m != nil {
  685. ms = append(ms, m...)
  686. }
  687. ms = append(ms, c.ConfigMounts()...)
  688. sort.Sort(mounts(ms))
  689. if err := setMounts(daemon, &s, c, ms); err != nil {
  690. return nil, fmt.Errorf("linux mounts: %v", err)
  691. }
  692. for _, ns := range s.Linux.Namespaces {
  693. if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
  694. target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
  695. if err != nil {
  696. return nil, err
  697. }
  698. s.Hooks = &specs.Hooks{
  699. Prestart: []specs.Hook{{
  700. Path: target, // FIXME: cross-platform
  701. Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
  702. }},
  703. }
  704. }
  705. }
  706. if apparmor.IsEnabled() {
  707. var appArmorProfile string
  708. if c.AppArmorProfile != "" {
  709. appArmorProfile = c.AppArmorProfile
  710. } else if c.HostConfig.Privileged {
  711. appArmorProfile = "unconfined"
  712. } else {
  713. appArmorProfile = "docker-default"
  714. }
  715. if appArmorProfile == "docker-default" {
  716. // Unattended upgrades and other fun services can unload AppArmor
  717. // profiles inadvertently. Since we cannot store our profile in
  718. // /etc/apparmor.d, nor can we practically add other ways of
  719. // telling the system to keep our profile loaded, in order to make
  720. // sure that we keep the default profile enabled we dynamically
  721. // reload it if necessary.
  722. if err := ensureDefaultAppArmorProfile(); err != nil {
  723. return nil, err
  724. }
  725. }
  726. s.Process.ApparmorProfile = appArmorProfile
  727. }
  728. s.Process.SelinuxLabel = c.GetProcessLabel()
  729. s.Process.NoNewPrivileges = c.NoNewPrivileges
  730. s.Linux.MountLabel = c.MountLabel
  731. return (*specs.Spec)(&s), nil
  732. }
  733. func clearReadOnly(m *specs.Mount) {
  734. var opt []string
  735. for _, o := range m.Options {
  736. if o != "ro" {
  737. opt = append(opt, o)
  738. }
  739. }
  740. m.Options = opt
  741. }
  742. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  743. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
  744. ulimits := c.Ulimits
  745. // Merge ulimits with daemon defaults
  746. ulIdx := make(map[string]struct{})
  747. for _, ul := range ulimits {
  748. ulIdx[ul.Name] = struct{}{}
  749. }
  750. for name, ul := range daemon.configStore.Ulimits {
  751. if _, exists := ulIdx[name]; !exists {
  752. ulimits = append(ulimits, ul)
  753. }
  754. }
  755. c.Ulimits = ulimits
  756. }