Allow the container to share the PID namespace with the host

We want to be able to use container without the PID namespace.  We basically
want containers that can manage the host os, which I call Super Privileged
Containers.  We eventually would like to get to the point where the only
namespace we use is the MNT namespace to bring the Apps userspace with it.

By eliminating the PID namespace we can get better communication between the
host and the clients and potentially tools like strace and gdb become easier
to use.  We also see tools like libvirtd running within a container telling
systemd to place a VM in a particular cgroup, we need to have communications of the PID.

I don't see us needing to share PID namespaces between containers, since this
is really what docker exec does.

So currently I see us just needing docker run --pid=host

Docker-DCO-1.1-Signed-off-by: Dan Walsh <dwalsh@redhat.com> (github: rhatdan)
This commit is contained in:
Dan Walsh 2014-11-25 15:10:53 -05:00 committed by Michael Crosby
parent 0eefae8e0d
commit 23feaaa240
10 changed files with 91 additions and 5 deletions

View file

@ -250,6 +250,9 @@ func populateCommand(c *Container, env []string) error {
ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
}
pid := &execdriver.Pid{}
pid.HostPid = c.hostConfig.PidMode.IsHost()
// Build lists of devices allowed and created within the container.
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
for i, deviceMapping := range c.hostConfig.Devices {
@ -295,6 +298,7 @@ func populateCommand(c *Container, env []string) error {
WorkingDir: c.Config.WorkingDir,
Network: en,
Ipc: ipc,
Pid: pid,
Resources: resources,
AllowedDevices: allowedDevices,
AutoCreatedDevices: autoCreatedDevices,

View file

@ -92,7 +92,7 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
return nil, nil, err
}
if hostConfig != nil && hostConfig.SecurityOpt == nil {
hostConfig.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
hostConfig.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode)
if err != nil {
return nil, nil, err
}
@ -124,8 +124,8 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
return container, warnings, nil
}
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
if ipcMode.IsHost() {
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode, pidMode runconfig.PidMode) ([]string, error) {
if ipcMode.IsHost() || pidMode.IsHost() {
return label.DisableSecOpt(), nil
}
if ipcContainer := ipcMode.Container(); ipcContainer != "" {

View file

@ -77,6 +77,11 @@ type Ipc struct {
HostIpc bool `json:"host_ipc"`
}
// PID settings of the container
type Pid struct {
HostPid bool `json:"host_pid"`
}
type NetworkInterface struct {
Gateway string `json:"gateway"`
IPAddress string `json:"ip"`
@ -126,6 +131,7 @@ type Command struct {
ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
Network *Network `json:"network"`
Ipc *Ipc `json:"ipc"`
Pid *Pid `json:"pid"`
Resources *Resources `json:"resources"`
Mounts []Mount `json:"mounts"`
AllowedDevices []*devices.Device `json:"allowed_devices"`

View file

@ -40,6 +40,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
return nil, err
}
if err := d.createPid(container, c); err != nil {
return nil, err
}
if err := d.createNetwork(container, c); err != nil {
return nil, err
}
@ -151,6 +155,15 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
return nil
}
func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error {
if c.Pid.HostPid {
container.Namespaces.Remove(libcontainer.NEWPID)
return nil
}
return nil
}
func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
container.Capabilities = capabilities.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true

View file

@ -32,6 +32,7 @@ docker-create - Create a new container
[**--net**[=*"bridge"*]]
[**-P**|**--publish-all**[=*false*]]
[**-p**|**--publish**[=*[]*]]
[**--pid**[=*[]*]]
[**--privileged**[=*false*]]
[**--restart**[=*RESTART*]]
[**--security-opt**[=*[]*]]
@ -131,6 +132,11 @@ IMAGE [COMMAND] [ARG...]
When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`)
(use 'docker port' to see the actual mapping)
**--pid**=host
Set the PID mode for the container
**host**: use the host's PID namespace inside the container.
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
**--privileged**=*true*|*false*
Give extended privileges to this container. The default is *false*.

View file

@ -33,6 +33,7 @@ docker-run - Run a command in a new container
[**--net**[=*"bridge"*]]
[**-P**|**--publish-all**[=*false*]]
[**-p**|**--publish**[=*[]*]]
[**--pid**[=*[]*]]
[**--privileged**[=*false*]]
[**--restart**[=*RESTART*]]
[**--rm**[=*false*]]
@ -234,6 +235,11 @@ mapping between the host ports and the exposed ports, use **docker port**.
When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`)
(use 'docker port' to see the actual mapping)
**--pid**=host
Set the PID mode for the container
**host**: use the host's PID namespace inside the container.
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
**--privileged**=*true*|*false*
Give extended privileges to this container. The default is *false*.

View file

@ -1604,6 +1604,7 @@ removed before the image is removed.
Both hostPort and containerPort can be specified as a range of ports.
When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`)
(use 'docker port' to see the actual mapping)
--pid=host 'host': use the host PID namespace inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
--privileged=false Give extended privileges to this container
--restart="" Restart policy to apply when a container exits (no, on-failure[:max-retry], always)
--rm=false Automatically remove the container when it exits (incompatible with -d)

View file

@ -133,11 +133,31 @@ While not strictly a means of identifying a container, you can specify a version
image you'd like to run the container with by adding `image[:tag]` to the command. For
example, `docker run ubuntu:14.04`.
## PID Settings
--pid="" : Set the PID (Process) Namespace mode for the container,
'host': use the host's PID namespace inside the container
By default, all containers have the PID namespace enabled.
PID namespace provides separation of processes. The PID Namespace removes the
view of the system processes, and allows process ids to be reused including
pid 1.
In certain cases you want your container to share the host's process namespace,
basically allowing processes within the container to see all of the processes
on the system. For example, you could build a container with debugging tools
like `strace` or `gdb`, but want to use these tools when debugging processes
within the container.
$ sudo docker run --pid=host rhel7 strace -p 1234
This command would allow you to use `strace` inside the container on pid 1234 on
the host.
## IPC Settings
--ipc="" : Set the IPC mode for the container,
'container:<name|id>': reuses another container's IPC namespace
'host': use the host's IPC namespace inside the container
By default, all containers have the IPC namespace enabled
By default, all containers have the IPC namespace enabled.
IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.

View file

@ -66,6 +66,27 @@ func (n IpcMode) Container() string {
return ""
}
type PidMode string
// IsPrivate indicates whether container use it's private pid stack
func (n PidMode) IsPrivate() bool {
return !(n.IsHost())
}
func (n PidMode) IsHost() bool {
return n == "host"
}
func (n PidMode) Valid() bool {
parts := strings.Split(string(n), ":")
switch mode := parts[0]; mode {
case "", "host":
default:
return false
}
return true
}
type DeviceMapping struct {
PathOnHost string
PathInContainer string
@ -92,6 +113,7 @@ type HostConfig struct {
Devices []DeviceMapping
NetworkMode NetworkMode
IpcMode IpcMode
PidMode PidMode
CapAdd []string
CapDrop []string
RestartPolicy RestartPolicy
@ -125,6 +147,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
PublishAllPorts: job.GetenvBool("PublishAllPorts"),
NetworkMode: NetworkMode(job.Getenv("NetworkMode")),
IpcMode: IpcMode(job.Getenv("IpcMode")),
PidMode: PidMode(job.Getenv("PidMode")),
}
job.GetenvJson("LxcConf", &hostConfig.LxcConf)

View file

@ -46,6 +46,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
flNetwork = cmd.Bool([]string{"#n", "#-networking"}, true, "Enable networking for this container")
flPrivileged = cmd.Bool([]string{"#privileged", "-privileged"}, false, "Give extended privileges to this container")
flPidMode = cmd.String([]string{"-pid"}, "", "Default is to create a private PID namespace for the container\n'host': use the host PID namespace inside the container. Note: the host mode gives the container full access to processes on the system and is therefore considered insecure.")
flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports on the host interfaces")
flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
@ -248,7 +249,12 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
ipcMode := IpcMode(*flIpcMode)
if !ipcMode.Valid() {
return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode")
}
pidMode := PidMode(*flPidMode)
if !pidMode.Valid() {
return nil, nil, cmd, fmt.Errorf("--pid: invalid PID mode")
}
netMode, err := parseNetMode(*flNetMode)
@ -300,6 +306,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
VolumesFrom: flVolumesFrom.GetAll(),
NetworkMode: netMode,
IpcMode: ipcMode,
PidMode: pidMode,
Devices: deviceMappings,
CapAdd: flCapAdd.GetAll(),
CapDrop: flCapDrop.GetAll(),