123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507 |
- // +build linux,cgo
- package native
- import (
- "fmt"
- "path/filepath"
- "strings"
- "syscall"
- "github.com/docker/docker/daemon/execdriver"
- derr "github.com/docker/docker/errors"
- "github.com/docker/docker/pkg/mount"
- "github.com/docker/docker/volume"
- "github.com/opencontainers/runc/libcontainer/apparmor"
- "github.com/opencontainers/runc/libcontainer/configs"
- "github.com/opencontainers/runc/libcontainer/devices"
- )
- // createContainer populates and configures the container type with the
- // data provided by the execdriver.Command
- func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
- container = execdriver.InitContainer(c)
- if err := d.createIpc(container, c); err != nil {
- return nil, err
- }
- if err := d.createPid(container, c); err != nil {
- return nil, err
- }
- if err := d.createUTS(container, c); err != nil {
- return nil, err
- }
- if err := d.setupRemappedRoot(container, c); err != nil {
- return nil, err
- }
- if err := d.createNetwork(container, c, hooks); err != nil {
- return nil, err
- }
- if c.ProcessConfig.Privileged {
- if !container.Readonlyfs {
- // clear readonly for /sys
- for i := range container.Mounts {
- if container.Mounts[i].Destination == "/sys" {
- container.Mounts[i].Flags &= ^syscall.MS_RDONLY
- }
- }
- container.ReadonlyPaths = nil
- }
- // clear readonly for cgroup
- for i := range container.Mounts {
- if container.Mounts[i].Device == "cgroup" {
- container.Mounts[i].Flags &= ^syscall.MS_RDONLY
- }
- }
- container.MaskPaths = nil
- if err := d.setPrivileged(container); err != nil {
- return nil, err
- }
- } else {
- if err := d.setCapabilities(container, c); err != nil {
- return nil, err
- }
- }
- // add CAP_ prefix to all caps for new libcontainer update to match
- // the spec format.
- for i, s := range container.Capabilities {
- if !strings.HasPrefix(s, "CAP_") {
- container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
- }
- }
- container.AdditionalGroups = c.GroupAdd
- if c.AppArmorProfile != "" {
- container.AppArmorProfile = c.AppArmorProfile
- }
- if c.SeccompProfile != "" {
- container.Seccomp, err = loadSeccompProfile(c.SeccompProfile)
- if err != nil {
- return nil, err
- }
- }
- if err := execdriver.SetupCgroups(container, c); err != nil {
- return nil, err
- }
- container.OomScoreAdj = c.OomScoreAdj
- if container.Readonlyfs {
- for i := range container.Mounts {
- switch container.Mounts[i].Destination {
- case "/proc", "/dev", "/dev/pts":
- continue
- }
- container.Mounts[i].Flags |= syscall.MS_RDONLY
- }
- /* These paths must be remounted as r/o */
- container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
- }
- if err := d.setupMounts(container, c); err != nil {
- return nil, err
- }
- d.setupLabels(container, c)
- d.setupRlimits(container, c)
- return container, nil
- }
- func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
- if c.Network == nil {
- return nil
- }
- if c.Network.ContainerID != "" {
- d.Lock()
- active := d.activeContainers[c.Network.ContainerID]
- d.Unlock()
- if active == nil {
- return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
- }
- state, err := active.State()
- if err != nil {
- return err
- }
- container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
- return nil
- }
- if c.Network.NamespacePath != "" {
- container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
- return nil
- }
- // only set up prestart hook if the namespace path is not set (this should be
- // all cases *except* for --net=host shared networking)
- container.Hooks = &configs.Hooks{
- Prestart: []configs.Hook{
- configs.NewFunctionHook(func(s configs.HookState) error {
- if len(hooks.PreStart) > 0 {
- for _, fnHook := range hooks.PreStart {
- // A closed channel for OOM is returned here as it will be
- // non-blocking and return the correct result when read.
- chOOM := make(chan struct{})
- close(chOOM)
- if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
- return err
- }
- }
- }
- return nil
- }),
- },
- }
- return nil
- }
- func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
- if c.Ipc.HostIpc {
- container.Namespaces.Remove(configs.NEWIPC)
- return nil
- }
- if c.Ipc.ContainerID != "" {
- d.Lock()
- active := d.activeContainers[c.Ipc.ContainerID]
- d.Unlock()
- if active == nil {
- return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
- }
- state, err := active.State()
- if err != nil {
- return err
- }
- container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
- }
- return nil
- }
- func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
- if c.Pid.HostPid {
- container.Namespaces.Remove(configs.NEWPID)
- return nil
- }
- return nil
- }
- func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
- if c.UTS.HostUTS {
- container.Namespaces.Remove(configs.NEWUTS)
- container.Hostname = ""
- return nil
- }
- return nil
- }
- func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
- if c.RemappedRoot.UID == 0 {
- container.Namespaces.Remove(configs.NEWUSER)
- return nil
- }
- // convert the Docker daemon id map to the libcontainer variant of the same struct
- // this keeps us from having to import libcontainer code across Docker client + daemon packages
- cuidMaps := []configs.IDMap{}
- cgidMaps := []configs.IDMap{}
- for _, idMap := range c.UIDMapping {
- cuidMaps = append(cuidMaps, configs.IDMap(idMap))
- }
- for _, idMap := range c.GIDMapping {
- cgidMaps = append(cgidMaps, configs.IDMap(idMap))
- }
- container.UidMappings = cuidMaps
- container.GidMappings = cgidMaps
- for _, node := range container.Devices {
- node.Uid = uint32(c.RemappedRoot.UID)
- node.Gid = uint32(c.RemappedRoot.GID)
- }
- // TODO: until a kernel/mount solution exists for handling remount in a user namespace,
- // we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
- for i := range container.Mounts {
- if container.Mounts[i].Device == "cgroup" {
- container.Mounts[i].Flags &= ^syscall.MS_RDONLY
- }
- }
- return nil
- }
- func (d *Driver) setPrivileged(container *configs.Config) (err error) {
- container.Capabilities = execdriver.GetAllCapabilities()
- container.Cgroups.AllowAllDevices = true
- hostDevices, err := devices.HostDevices()
- if err != nil {
- return err
- }
- container.Devices = hostDevices
- if apparmor.IsEnabled() {
- container.AppArmorProfile = "unconfined"
- }
- return nil
- }
- func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
- container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
- return err
- }
- func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
- if c.Resources == nil {
- return
- }
- for _, rlimit := range c.Resources.Rlimits {
- container.Rlimits = append(container.Rlimits, configs.Rlimit{
- Type: rlimit.Type,
- Hard: rlimit.Hard,
- Soft: rlimit.Soft,
- })
- }
- }
- // If rootfs mount propagation is RPRIVATE, that means all the volumes are
- // going to be private anyway. There is no need to apply per volume
- // propagation on top. This is just an optimzation so that cost of per volume
- // propagation is paid only if user decides to make some volume non-private
- // which will force rootfs mount propagation to be non RPRIVATE.
- func checkResetVolumePropagation(container *configs.Config) {
- if container.RootPropagation != mount.RPRIVATE {
- return
- }
- for _, m := range container.Mounts {
- m.PropagationFlags = nil
- }
- }
- func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
- for _, m := range mountinfo {
- if m.Mountpoint == dir {
- return m
- }
- }
- return nil
- }
- // Get the source mount point of directory passed in as argument. Also return
- // optional fields.
- func getSourceMount(source string) (string, string, error) {
- // Ensure any symlinks are resolved.
- sourcePath, err := filepath.EvalSymlinks(source)
- if err != nil {
- return "", "", err
- }
- mountinfos, err := mount.GetMounts()
- if err != nil {
- return "", "", err
- }
- mountinfo := getMountInfo(mountinfos, sourcePath)
- if mountinfo != nil {
- return sourcePath, mountinfo.Optional, nil
- }
- path := sourcePath
- for {
- path = filepath.Dir(path)
- mountinfo = getMountInfo(mountinfos, path)
- if mountinfo != nil {
- return path, mountinfo.Optional, nil
- }
- if path == "/" {
- break
- }
- }
- // If we are here, we did not find parent mount. Something is wrong.
- return "", "", fmt.Errorf("Could not find source mount of %s", source)
- }
- // Ensure mount point on which path is mouted, is shared.
- func ensureShared(path string) error {
- sharedMount := false
- sourceMount, optionalOpts, err := getSourceMount(path)
- if err != nil {
- return err
- }
- // Make sure source mount point is shared.
- optsSplit := strings.Split(optionalOpts, " ")
- for _, opt := range optsSplit {
- if strings.HasPrefix(opt, "shared:") {
- sharedMount = true
- break
- }
- }
- if !sharedMount {
- return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
- }
- return nil
- }
- // Ensure mount point on which path is mounted, is either shared or slave.
- func ensureSharedOrSlave(path string) error {
- sharedMount := false
- slaveMount := false
- sourceMount, optionalOpts, err := getSourceMount(path)
- if err != nil {
- return err
- }
- // Make sure source mount point is shared.
- optsSplit := strings.Split(optionalOpts, " ")
- for _, opt := range optsSplit {
- if strings.HasPrefix(opt, "shared:") {
- sharedMount = true
- break
- } else if strings.HasPrefix(opt, "master:") {
- slaveMount = true
- break
- }
- }
- if !sharedMount && !slaveMount {
- return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
- }
- return nil
- }
- func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
- userMounts := make(map[string]struct{})
- for _, m := range c.Mounts {
- userMounts[m.Destination] = struct{}{}
- }
- // Filter out mounts that are overriden by user supplied mounts
- var defaultMounts []*configs.Mount
- _, mountDev := userMounts["/dev"]
- for _, m := range container.Mounts {
- if _, ok := userMounts[m.Destination]; !ok {
- if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
- container.Devices = nil
- continue
- }
- defaultMounts = append(defaultMounts, m)
- }
- }
- container.Mounts = defaultMounts
- mountPropagationMap := map[string]int{
- "private": mount.PRIVATE,
- "rprivate": mount.RPRIVATE,
- "shared": mount.SHARED,
- "rshared": mount.RSHARED,
- "slave": mount.SLAVE,
- "rslave": mount.RSLAVE,
- }
- for _, m := range c.Mounts {
- for _, cm := range container.Mounts {
- if cm.Destination == m.Destination {
- return derr.ErrorCodeMountDup.WithArgs(m.Destination)
- }
- }
- if m.Source == "tmpfs" {
- var (
- data = "size=65536k"
- flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
- err error
- )
- fulldest := filepath.Join(c.Rootfs, m.Destination)
- if m.Data != "" {
- flags, data, err = mount.ParseTmpfsOptions(m.Data)
- if err != nil {
- return err
- }
- }
- container.Mounts = append(container.Mounts, &configs.Mount{
- Source: m.Source,
- Destination: m.Destination,
- Data: data,
- Device: "tmpfs",
- Flags: flags,
- PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
- PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
- PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
- })
- continue
- }
- flags := syscall.MS_BIND | syscall.MS_REC
- var pFlag int
- if !m.Writable {
- flags |= syscall.MS_RDONLY
- }
- // Determine property of RootPropagation based on volume
- // properties. If a volume is shared, then keep root propagtion
- // shared. This should work for slave and private volumes too.
- //
- // For slave volumes, it can be either [r]shared/[r]slave.
- //
- // For private volumes any root propagation value should work.
- pFlag = mountPropagationMap[m.Propagation]
- if pFlag == mount.SHARED || pFlag == mount.RSHARED {
- if err := ensureShared(m.Source); err != nil {
- return err
- }
- rootpg := container.RootPropagation
- if rootpg != mount.SHARED && rootpg != mount.RSHARED {
- execdriver.SetRootPropagation(container, mount.SHARED)
- }
- } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
- if err := ensureSharedOrSlave(m.Source); err != nil {
- return err
- }
- rootpg := container.RootPropagation
- if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
- execdriver.SetRootPropagation(container, mount.RSLAVE)
- }
- }
- mount := &configs.Mount{
- Source: m.Source,
- Destination: m.Destination,
- Device: "bind",
- Flags: flags,
- }
- if pFlag != 0 {
- mount.PropagationFlags = []int{pFlag}
- }
- container.Mounts = append(container.Mounts, mount)
- }
- checkResetVolumePropagation(container)
- return nil
- }
- func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
- container.ProcessLabel = c.ProcessLabel
- container.MountLabel = c.MountLabel
- }
|