create.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. // +build linux,cgo
  2. package native
  3. import (
  4. "fmt"
  5. "path/filepath"
  6. "strings"
  7. "syscall"
  8. "github.com/docker/docker/daemon/execdriver"
  9. derr "github.com/docker/docker/errors"
  10. "github.com/docker/docker/pkg/mount"
  11. "github.com/opencontainers/runc/libcontainer/apparmor"
  12. "github.com/opencontainers/runc/libcontainer/configs"
  13. "github.com/opencontainers/runc/libcontainer/devices"
  14. )
  15. // createContainer populates and configures the container type with the
  16. // data provided by the execdriver.Command
  17. func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
  18. container = execdriver.InitContainer(c)
  19. if err := d.createIpc(container, c); err != nil {
  20. return nil, err
  21. }
  22. if err := d.createPid(container, c); err != nil {
  23. return nil, err
  24. }
  25. if err := d.createUTS(container, c); err != nil {
  26. return nil, err
  27. }
  28. if err := d.setupRemappedRoot(container, c); err != nil {
  29. return nil, err
  30. }
  31. if err := d.createNetwork(container, c, hooks); err != nil {
  32. return nil, err
  33. }
  34. if c.ProcessConfig.Privileged {
  35. if !container.Readonlyfs {
  36. // clear readonly for /sys
  37. for i := range container.Mounts {
  38. if container.Mounts[i].Destination == "/sys" {
  39. container.Mounts[i].Flags &= ^syscall.MS_RDONLY
  40. }
  41. }
  42. container.ReadonlyPaths = nil
  43. }
  44. // clear readonly for cgroup
  45. for i := range container.Mounts {
  46. if container.Mounts[i].Device == "cgroup" {
  47. container.Mounts[i].Flags &= ^syscall.MS_RDONLY
  48. }
  49. }
  50. container.MaskPaths = nil
  51. if err := d.setPrivileged(container); err != nil {
  52. return nil, err
  53. }
  54. } else {
  55. if err := d.setCapabilities(container, c); err != nil {
  56. return nil, err
  57. }
  58. }
  59. // add CAP_ prefix to all caps for new libcontainer update to match
  60. // the spec format.
  61. for i, s := range container.Capabilities {
  62. if !strings.HasPrefix(s, "CAP_") {
  63. container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
  64. }
  65. }
  66. container.AdditionalGroups = c.GroupAdd
  67. if c.AppArmorProfile != "" {
  68. container.AppArmorProfile = c.AppArmorProfile
  69. }
  70. if c.SeccompProfile != "" {
  71. container.Seccomp, err = loadSeccompProfile(c.SeccompProfile)
  72. if err != nil {
  73. return nil, err
  74. }
  75. }
  76. if err := execdriver.SetupCgroups(container, c); err != nil {
  77. return nil, err
  78. }
  79. container.OomScoreAdj = c.OomScoreAdj
  80. if container.Readonlyfs {
  81. for i := range container.Mounts {
  82. switch container.Mounts[i].Destination {
  83. case "/proc", "/dev", "/dev/pts":
  84. continue
  85. }
  86. container.Mounts[i].Flags |= syscall.MS_RDONLY
  87. }
  88. /* These paths must be remounted as r/o */
  89. container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
  90. }
  91. if err := d.setupMounts(container, c); err != nil {
  92. return nil, err
  93. }
  94. d.setupLabels(container, c)
  95. d.setupRlimits(container, c)
  96. return container, nil
  97. }
  98. func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
  99. if c.Network == nil {
  100. return nil
  101. }
  102. if c.Network.ContainerID != "" {
  103. d.Lock()
  104. active := d.activeContainers[c.Network.ContainerID]
  105. d.Unlock()
  106. if active == nil {
  107. return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
  108. }
  109. state, err := active.State()
  110. if err != nil {
  111. return err
  112. }
  113. container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
  114. return nil
  115. }
  116. if c.Network.NamespacePath != "" {
  117. container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
  118. return nil
  119. }
  120. // only set up prestart hook if the namespace path is not set (this should be
  121. // all cases *except* for --net=host shared networking)
  122. container.Hooks = &configs.Hooks{
  123. Prestart: []configs.Hook{
  124. configs.NewFunctionHook(func(s configs.HookState) error {
  125. if len(hooks.PreStart) > 0 {
  126. for _, fnHook := range hooks.PreStart {
  127. // A closed channel for OOM is returned here as it will be
  128. // non-blocking and return the correct result when read.
  129. chOOM := make(chan struct{})
  130. close(chOOM)
  131. if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
  132. return err
  133. }
  134. }
  135. }
  136. return nil
  137. }),
  138. },
  139. }
  140. return nil
  141. }
  142. func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
  143. if c.Ipc.HostIpc {
  144. container.Namespaces.Remove(configs.NEWIPC)
  145. return nil
  146. }
  147. if c.Ipc.ContainerID != "" {
  148. d.Lock()
  149. active := d.activeContainers[c.Ipc.ContainerID]
  150. d.Unlock()
  151. if active == nil {
  152. return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
  153. }
  154. state, err := active.State()
  155. if err != nil {
  156. return err
  157. }
  158. container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
  159. }
  160. return nil
  161. }
  162. func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
  163. if c.Pid.HostPid {
  164. container.Namespaces.Remove(configs.NEWPID)
  165. return nil
  166. }
  167. return nil
  168. }
  169. func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
  170. if c.UTS.HostUTS {
  171. container.Namespaces.Remove(configs.NEWUTS)
  172. container.Hostname = ""
  173. return nil
  174. }
  175. return nil
  176. }
  177. func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
  178. if c.RemappedRoot.UID == 0 {
  179. container.Namespaces.Remove(configs.NEWUSER)
  180. return nil
  181. }
  182. // convert the Docker daemon id map to the libcontainer variant of the same struct
  183. // this keeps us from having to import libcontainer code across Docker client + daemon packages
  184. cuidMaps := []configs.IDMap{}
  185. cgidMaps := []configs.IDMap{}
  186. for _, idMap := range c.UIDMapping {
  187. cuidMaps = append(cuidMaps, configs.IDMap(idMap))
  188. }
  189. for _, idMap := range c.GIDMapping {
  190. cgidMaps = append(cgidMaps, configs.IDMap(idMap))
  191. }
  192. container.UidMappings = cuidMaps
  193. container.GidMappings = cgidMaps
  194. for _, node := range container.Devices {
  195. node.Uid = uint32(c.RemappedRoot.UID)
  196. node.Gid = uint32(c.RemappedRoot.GID)
  197. }
  198. // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
  199. // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
  200. for i := range container.Mounts {
  201. if container.Mounts[i].Device == "cgroup" {
  202. container.Mounts[i].Flags &= ^syscall.MS_RDONLY
  203. }
  204. }
  205. return nil
  206. }
  207. func (d *Driver) setPrivileged(container *configs.Config) (err error) {
  208. container.Capabilities = execdriver.GetAllCapabilities()
  209. container.Cgroups.AllowAllDevices = true
  210. hostDevices, err := devices.HostDevices()
  211. if err != nil {
  212. return err
  213. }
  214. container.Devices = hostDevices
  215. if apparmor.IsEnabled() {
  216. container.AppArmorProfile = "unconfined"
  217. }
  218. return nil
  219. }
  220. func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
  221. container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
  222. return err
  223. }
  224. func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
  225. if c.Resources == nil {
  226. return
  227. }
  228. for _, rlimit := range c.Resources.Rlimits {
  229. container.Rlimits = append(container.Rlimits, configs.Rlimit{
  230. Type: rlimit.Type,
  231. Hard: rlimit.Hard,
  232. Soft: rlimit.Soft,
  233. })
  234. }
  235. }
  236. func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
  237. userMounts := make(map[string]struct{})
  238. for _, m := range c.Mounts {
  239. userMounts[m.Destination] = struct{}{}
  240. }
  241. // Filter out mounts that are overridden by user supplied mounts
  242. var defaultMounts []*configs.Mount
  243. _, mountDev := userMounts["/dev"]
  244. for _, m := range container.Mounts {
  245. if _, ok := userMounts[m.Destination]; !ok {
  246. if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
  247. container.Devices = nil
  248. continue
  249. }
  250. defaultMounts = append(defaultMounts, m)
  251. }
  252. }
  253. container.Mounts = defaultMounts
  254. for _, m := range c.Mounts {
  255. for _, cm := range container.Mounts {
  256. if cm.Destination == m.Destination {
  257. return derr.ErrorCodeMountDup.WithArgs(m.Destination)
  258. }
  259. }
  260. if m.Source == "tmpfs" {
  261. var (
  262. data = "size=65536k"
  263. flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
  264. err error
  265. )
  266. fulldest := filepath.Join(c.Rootfs, m.Destination)
  267. if m.Data != "" {
  268. flags, data, err = mount.ParseTmpfsOptions(m.Data)
  269. if err != nil {
  270. return err
  271. }
  272. }
  273. container.Mounts = append(container.Mounts, &configs.Mount{
  274. Source: m.Source,
  275. Destination: m.Destination,
  276. Data: data,
  277. Device: "tmpfs",
  278. Flags: flags,
  279. PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
  280. PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
  281. })
  282. continue
  283. }
  284. flags := syscall.MS_BIND | syscall.MS_REC
  285. if !m.Writable {
  286. flags |= syscall.MS_RDONLY
  287. }
  288. if m.Slave {
  289. flags |= syscall.MS_SLAVE
  290. }
  291. container.Mounts = append(container.Mounts, &configs.Mount{
  292. Source: m.Source,
  293. Destination: m.Destination,
  294. Device: "bind",
  295. Flags: flags,
  296. })
  297. }
  298. return nil
  299. }
  300. func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
  301. container.ProcessLabel = c.ProcessLabel
  302. container.MountLabel = c.MountLabel
  303. }