From 23feaaa240853c0e7f9817f8c2d272dd1c93ac3f Mon Sep 17 00:00:00 2001 From: Dan Walsh Date: Tue, 25 Nov 2014 15:10:53 -0500 Subject: [PATCH] Allow the container to share the PID namespace with the host We want to be able to use container without the PID namespace. We basically want containers that can manage the host os, which I call Super Privileged Containers. We eventually would like to get to the point where the only namespace we use is the MNT namespace to bring the Apps userspace with it. By eliminating the PID namespace we can get better communication between the host and the clients and potentially tools like strace and gdb become easier to use. We also see tools like libvirtd running within a container telling systemd to place a VM in a particular cgroup, we need to have communications of the PID. I don't see us needing to share PID namespaces between containers, since this is really what docker exec does. So currently I see us just needing docker run --pid=host Docker-DCO-1.1-Signed-off-by: Dan Walsh (github: rhatdan) --- daemon/container.go | 4 ++++ daemon/create.go | 6 +++--- daemon/execdriver/driver.go | 6 ++++++ daemon/execdriver/native/create.go | 13 +++++++++++++ docs/man/docker-create.1.md | 6 ++++++ docs/man/docker-run.1.md | 6 ++++++ docs/sources/reference/commandline/cli.md | 1 + docs/sources/reference/run.md | 22 +++++++++++++++++++++- runconfig/hostconfig.go | 23 +++++++++++++++++++++++ runconfig/parse.go | 9 ++++++++- 10 files changed, 91 insertions(+), 5 deletions(-) diff --git a/daemon/container.go b/daemon/container.go index 8bbfb07b27..86c0a7d84f 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -250,6 +250,9 @@ func populateCommand(c *Container, env []string) error { ipc.HostIpc = c.hostConfig.IpcMode.IsHost() } + pid := &execdriver.Pid{} + pid.HostPid = c.hostConfig.PidMode.IsHost() + // Build lists of devices allowed and created within the container. userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices)) for i, deviceMapping := range c.hostConfig.Devices { @@ -295,6 +298,7 @@ func populateCommand(c *Container, env []string) error { WorkingDir: c.Config.WorkingDir, Network: en, Ipc: ipc, + Pid: pid, Resources: resources, AllowedDevices: allowedDevices, AutoCreatedDevices: autoCreatedDevices, diff --git a/daemon/create.go b/daemon/create.go index f53461a450..785b0cc345 100644 --- a/daemon/create.go +++ b/daemon/create.go @@ -92,7 +92,7 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos return nil, nil, err } if hostConfig != nil && hostConfig.SecurityOpt == nil { - hostConfig.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode) + hostConfig.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode) if err != nil { return nil, nil, err } @@ -124,8 +124,8 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos return container, warnings, nil } -func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) { - if ipcMode.IsHost() { +func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode, pidMode runconfig.PidMode) ([]string, error) { + if ipcMode.IsHost() || pidMode.IsHost() { return label.DisableSecOpt(), nil } if ipcContainer := ipcMode.Container(); ipcContainer != "" { diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 2a5eff5565..80aad44ff9 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -77,6 +77,11 @@ type Ipc struct { HostIpc bool `json:"host_ipc"` } +// PID settings of the container +type Pid struct { + HostPid bool `json:"host_pid"` +} + type NetworkInterface struct { Gateway string `json:"gateway"` IPAddress string `json:"ip"` @@ -126,6 +131,7 @@ type Command struct { ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver Network *Network `json:"network"` Ipc *Ipc `json:"ipc"` + Pid *Pid `json:"pid"` Resources *Resources `json:"resources"` Mounts []Mount `json:"mounts"` AllowedDevices []*devices.Device `json:"allowed_devices"` diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 99c21a20b8..7b764a50e2 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -40,6 +40,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e return nil, err } + if err := d.createPid(container, c); err != nil { + return nil, err + } + if err := d.createNetwork(container, c); err != nil { return nil, err } @@ -151,6 +155,15 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command return nil } +func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error { + if c.Pid.HostPid { + container.Namespaces.Remove(libcontainer.NEWPID) + return nil + } + + return nil +} + func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { container.Capabilities = capabilities.GetAllCapabilities() container.Cgroups.AllowAllDevices = true diff --git a/docs/man/docker-create.1.md b/docs/man/docker-create.1.md index d4b6c44e8d..63fe20ed13 100644 --- a/docs/man/docker-create.1.md +++ b/docs/man/docker-create.1.md @@ -32,6 +32,7 @@ docker-create - Create a new container [**--net**[=*"bridge"*]] [**-P**|**--publish-all**[=*false*]] [**-p**|**--publish**[=*[]*]] +[**--pid**[=*[]*]] [**--privileged**[=*false*]] [**--restart**[=*RESTART*]] [**--security-opt**[=*[]*]] @@ -131,6 +132,11 @@ IMAGE [COMMAND] [ARG...] When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`) (use 'docker port' to see the actual mapping) +**--pid**=host + Set the PID mode for the container + **host**: use the host's PID namespace inside the container. + Note: the host mode gives the container full access to local PID and is therefore considered insecure. + **--privileged**=*true*|*false* Give extended privileges to this container. The default is *false*. diff --git a/docs/man/docker-run.1.md b/docs/man/docker-run.1.md index 2ba984a6c5..de035e9655 100644 --- a/docs/man/docker-run.1.md +++ b/docs/man/docker-run.1.md @@ -33,6 +33,7 @@ docker-run - Run a command in a new container [**--net**[=*"bridge"*]] [**-P**|**--publish-all**[=*false*]] [**-p**|**--publish**[=*[]*]] +[**--pid**[=*[]*]] [**--privileged**[=*false*]] [**--restart**[=*RESTART*]] [**--rm**[=*false*]] @@ -234,6 +235,11 @@ mapping between the host ports and the exposed ports, use **docker port**. When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`) (use 'docker port' to see the actual mapping) +**--pid**=host + Set the PID mode for the container + **host**: use the host's PID namespace inside the container. + Note: the host mode gives the container full access to local PID and is therefore considered insecure. + **--privileged**=*true*|*false* Give extended privileges to this container. The default is *false*. diff --git a/docs/sources/reference/commandline/cli.md b/docs/sources/reference/commandline/cli.md index 877a19508c..0b7b0cda03 100644 --- a/docs/sources/reference/commandline/cli.md +++ b/docs/sources/reference/commandline/cli.md @@ -1604,6 +1604,7 @@ removed before the image is removed. Both hostPort and containerPort can be specified as a range of ports. When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`) (use 'docker port' to see the actual mapping) + --pid=host 'host': use the host PID namespace inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. --privileged=false Give extended privileges to this container --restart="" Restart policy to apply when a container exits (no, on-failure[:max-retry], always) --rm=false Automatically remove the container when it exits (incompatible with -d) diff --git a/docs/sources/reference/run.md b/docs/sources/reference/run.md index 012a6e71ff..d594066ad0 100644 --- a/docs/sources/reference/run.md +++ b/docs/sources/reference/run.md @@ -133,11 +133,31 @@ While not strictly a means of identifying a container, you can specify a version image you'd like to run the container with by adding `image[:tag]` to the command. For example, `docker run ubuntu:14.04`. +## PID Settings + --pid="" : Set the PID (Process) Namespace mode for the container, + 'host': use the host's PID namespace inside the container +By default, all containers have the PID namespace enabled. + +PID namespace provides separation of processes. The PID Namespace removes the +view of the system processes, and allows process ids to be reused including +pid 1. + +In certain cases you want your container to share the host's process namespace, +basically allowing processes within the container to see all of the processes +on the system. For example, you could build a container with debugging tools +like `strace` or `gdb`, but want to use these tools when debugging processes +within the container. + + $ sudo docker run --pid=host rhel7 strace -p 1234 + +This command would allow you to use `strace` inside the container on pid 1234 on +the host. + ## IPC Settings --ipc="" : Set the IPC mode for the container, 'container:': reuses another container's IPC namespace 'host': use the host's IPC namespace inside the container -By default, all containers have the IPC namespace enabled +By default, all containers have the IPC namespace enabled. IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues. diff --git a/runconfig/hostconfig.go b/runconfig/hostconfig.go index b619e9c31c..054c683627 100644 --- a/runconfig/hostconfig.go +++ b/runconfig/hostconfig.go @@ -66,6 +66,27 @@ func (n IpcMode) Container() string { return "" } +type PidMode string + +// IsPrivate indicates whether container use it's private pid stack +func (n PidMode) IsPrivate() bool { + return !(n.IsHost()) +} + +func (n PidMode) IsHost() bool { + return n == "host" +} + +func (n PidMode) Valid() bool { + parts := strings.Split(string(n), ":") + switch mode := parts[0]; mode { + case "", "host": + default: + return false + } + return true +} + type DeviceMapping struct { PathOnHost string PathInContainer string @@ -92,6 +113,7 @@ type HostConfig struct { Devices []DeviceMapping NetworkMode NetworkMode IpcMode IpcMode + PidMode PidMode CapAdd []string CapDrop []string RestartPolicy RestartPolicy @@ -125,6 +147,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig { PublishAllPorts: job.GetenvBool("PublishAllPorts"), NetworkMode: NetworkMode(job.Getenv("NetworkMode")), IpcMode: IpcMode(job.Getenv("IpcMode")), + PidMode: PidMode(job.Getenv("PidMode")), } job.GetenvJson("LxcConf", &hostConfig.LxcConf) diff --git a/runconfig/parse.go b/runconfig/parse.go index 3bab8ac765..781f721f65 100644 --- a/runconfig/parse.go +++ b/runconfig/parse.go @@ -46,6 +46,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe flNetwork = cmd.Bool([]string{"#n", "#-networking"}, true, "Enable networking for this container") flPrivileged = cmd.Bool([]string{"#privileged", "-privileged"}, false, "Give extended privileges to this container") + flPidMode = cmd.String([]string{"-pid"}, "", "Default is to create a private PID namespace for the container\n'host': use the host PID namespace inside the container. Note: the host mode gives the container full access to processes on the system and is therefore considered insecure.") flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports on the host interfaces") flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached") flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY") @@ -248,7 +249,12 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe ipcMode := IpcMode(*flIpcMode) if !ipcMode.Valid() { - return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err) + return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode") + } + + pidMode := PidMode(*flPidMode) + if !pidMode.Valid() { + return nil, nil, cmd, fmt.Errorf("--pid: invalid PID mode") } netMode, err := parseNetMode(*flNetMode) @@ -300,6 +306,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe VolumesFrom: flVolumesFrom.GetAll(), NetworkMode: netMode, IpcMode: ipcMode, + PidMode: pidMode, Devices: deviceMappings, CapAdd: flCapAdd.GetAll(), CapDrop: flCapDrop.GetAll(),