Allow the container to share the PID namespace with the host
We want to be able to use container without the PID namespace. We basically want containers that can manage the host os, which I call Super Privileged Containers. We eventually would like to get to the point where the only namespace we use is the MNT namespace to bring the Apps userspace with it. By eliminating the PID namespace we can get better communication between the host and the clients and potentially tools like strace and gdb become easier to use. We also see tools like libvirtd running within a container telling systemd to place a VM in a particular cgroup, we need to have communications of the PID. I don't see us needing to share PID namespaces between containers, since this is really what docker exec does. So currently I see us just needing docker run --pid=host Docker-DCO-1.1-Signed-off-by: Dan Walsh <dwalsh@redhat.com> (github: rhatdan)
This commit is contained in:
parent
0eefae8e0d
commit
23feaaa240
10 changed files with 91 additions and 5 deletions
|
@ -250,6 +250,9 @@ func populateCommand(c *Container, env []string) error {
|
|||
ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
|
||||
}
|
||||
|
||||
pid := &execdriver.Pid{}
|
||||
pid.HostPid = c.hostConfig.PidMode.IsHost()
|
||||
|
||||
// Build lists of devices allowed and created within the container.
|
||||
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
|
||||
for i, deviceMapping := range c.hostConfig.Devices {
|
||||
|
@ -295,6 +298,7 @@ func populateCommand(c *Container, env []string) error {
|
|||
WorkingDir: c.Config.WorkingDir,
|
||||
Network: en,
|
||||
Ipc: ipc,
|
||||
Pid: pid,
|
||||
Resources: resources,
|
||||
AllowedDevices: allowedDevices,
|
||||
AutoCreatedDevices: autoCreatedDevices,
|
||||
|
|
|
@ -92,7 +92,7 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
|
|||
return nil, nil, err
|
||||
}
|
||||
if hostConfig != nil && hostConfig.SecurityOpt == nil {
|
||||
hostConfig.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
|
||||
hostConfig.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode, hostConfig.PidMode)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
@ -124,8 +124,8 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
|
|||
return container, warnings, nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
|
||||
if ipcMode.IsHost() {
|
||||
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode, pidMode runconfig.PidMode) ([]string, error) {
|
||||
if ipcMode.IsHost() || pidMode.IsHost() {
|
||||
return label.DisableSecOpt(), nil
|
||||
}
|
||||
if ipcContainer := ipcMode.Container(); ipcContainer != "" {
|
||||
|
|
|
@ -77,6 +77,11 @@ type Ipc struct {
|
|||
HostIpc bool `json:"host_ipc"`
|
||||
}
|
||||
|
||||
// PID settings of the container
|
||||
type Pid struct {
|
||||
HostPid bool `json:"host_pid"`
|
||||
}
|
||||
|
||||
type NetworkInterface struct {
|
||||
Gateway string `json:"gateway"`
|
||||
IPAddress string `json:"ip"`
|
||||
|
@ -126,6 +131,7 @@ type Command struct {
|
|||
ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
|
||||
Network *Network `json:"network"`
|
||||
Ipc *Ipc `json:"ipc"`
|
||||
Pid *Pid `json:"pid"`
|
||||
Resources *Resources `json:"resources"`
|
||||
Mounts []Mount `json:"mounts"`
|
||||
AllowedDevices []*devices.Device `json:"allowed_devices"`
|
||||
|
|
|
@ -40,6 +40,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
|
|||
return nil, err
|
||||
}
|
||||
|
||||
if err := d.createPid(container, c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := d.createNetwork(container, c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -151,6 +155,15 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
|
|||
return nil
|
||||
}
|
||||
|
||||
func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error {
|
||||
if c.Pid.HostPid {
|
||||
container.Namespaces.Remove(libcontainer.NEWPID)
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
|
||||
container.Capabilities = capabilities.GetAllCapabilities()
|
||||
container.Cgroups.AllowAllDevices = true
|
||||
|
|
|
@ -32,6 +32,7 @@ docker-create - Create a new container
|
|||
[**--net**[=*"bridge"*]]
|
||||
[**-P**|**--publish-all**[=*false*]]
|
||||
[**-p**|**--publish**[=*[]*]]
|
||||
[**--pid**[=*[]*]]
|
||||
[**--privileged**[=*false*]]
|
||||
[**--restart**[=*RESTART*]]
|
||||
[**--security-opt**[=*[]*]]
|
||||
|
@ -131,6 +132,11 @@ IMAGE [COMMAND] [ARG...]
|
|||
When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`)
|
||||
(use 'docker port' to see the actual mapping)
|
||||
|
||||
**--pid**=host
|
||||
Set the PID mode for the container
|
||||
**host**: use the host's PID namespace inside the container.
|
||||
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
|
||||
|
||||
**--privileged**=*true*|*false*
|
||||
Give extended privileges to this container. The default is *false*.
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ docker-run - Run a command in a new container
|
|||
[**--net**[=*"bridge"*]]
|
||||
[**-P**|**--publish-all**[=*false*]]
|
||||
[**-p**|**--publish**[=*[]*]]
|
||||
[**--pid**[=*[]*]]
|
||||
[**--privileged**[=*false*]]
|
||||
[**--restart**[=*RESTART*]]
|
||||
[**--rm**[=*false*]]
|
||||
|
@ -234,6 +235,11 @@ mapping between the host ports and the exposed ports, use **docker port**.
|
|||
When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`)
|
||||
(use 'docker port' to see the actual mapping)
|
||||
|
||||
**--pid**=host
|
||||
Set the PID mode for the container
|
||||
**host**: use the host's PID namespace inside the container.
|
||||
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
|
||||
|
||||
**--privileged**=*true*|*false*
|
||||
Give extended privileges to this container. The default is *false*.
|
||||
|
||||
|
|
|
@ -1604,6 +1604,7 @@ removed before the image is removed.
|
|||
Both hostPort and containerPort can be specified as a range of ports.
|
||||
When specifying ranges for both, the number of container ports in the range must match the number of host ports in the range. (e.g., `-p 1234-1236:1234-1236/tcp`)
|
||||
(use 'docker port' to see the actual mapping)
|
||||
--pid=host 'host': use the host PID namespace inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
|
||||
--privileged=false Give extended privileges to this container
|
||||
--restart="" Restart policy to apply when a container exits (no, on-failure[:max-retry], always)
|
||||
--rm=false Automatically remove the container when it exits (incompatible with -d)
|
||||
|
|
|
@ -133,11 +133,31 @@ While not strictly a means of identifying a container, you can specify a version
|
|||
image you'd like to run the container with by adding `image[:tag]` to the command. For
|
||||
example, `docker run ubuntu:14.04`.
|
||||
|
||||
## PID Settings
|
||||
--pid="" : Set the PID (Process) Namespace mode for the container,
|
||||
'host': use the host's PID namespace inside the container
|
||||
By default, all containers have the PID namespace enabled.
|
||||
|
||||
PID namespace provides separation of processes. The PID Namespace removes the
|
||||
view of the system processes, and allows process ids to be reused including
|
||||
pid 1.
|
||||
|
||||
In certain cases you want your container to share the host's process namespace,
|
||||
basically allowing processes within the container to see all of the processes
|
||||
on the system. For example, you could build a container with debugging tools
|
||||
like `strace` or `gdb`, but want to use these tools when debugging processes
|
||||
within the container.
|
||||
|
||||
$ sudo docker run --pid=host rhel7 strace -p 1234
|
||||
|
||||
This command would allow you to use `strace` inside the container on pid 1234 on
|
||||
the host.
|
||||
|
||||
## IPC Settings
|
||||
--ipc="" : Set the IPC mode for the container,
|
||||
'container:<name|id>': reuses another container's IPC namespace
|
||||
'host': use the host's IPC namespace inside the container
|
||||
By default, all containers have the IPC namespace enabled
|
||||
By default, all containers have the IPC namespace enabled.
|
||||
|
||||
IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.
|
||||
|
||||
|
|
|
@ -66,6 +66,27 @@ func (n IpcMode) Container() string {
|
|||
return ""
|
||||
}
|
||||
|
||||
type PidMode string
|
||||
|
||||
// IsPrivate indicates whether container use it's private pid stack
|
||||
func (n PidMode) IsPrivate() bool {
|
||||
return !(n.IsHost())
|
||||
}
|
||||
|
||||
func (n PidMode) IsHost() bool {
|
||||
return n == "host"
|
||||
}
|
||||
|
||||
func (n PidMode) Valid() bool {
|
||||
parts := strings.Split(string(n), ":")
|
||||
switch mode := parts[0]; mode {
|
||||
case "", "host":
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type DeviceMapping struct {
|
||||
PathOnHost string
|
||||
PathInContainer string
|
||||
|
@ -92,6 +113,7 @@ type HostConfig struct {
|
|||
Devices []DeviceMapping
|
||||
NetworkMode NetworkMode
|
||||
IpcMode IpcMode
|
||||
PidMode PidMode
|
||||
CapAdd []string
|
||||
CapDrop []string
|
||||
RestartPolicy RestartPolicy
|
||||
|
@ -125,6 +147,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
|
|||
PublishAllPorts: job.GetenvBool("PublishAllPorts"),
|
||||
NetworkMode: NetworkMode(job.Getenv("NetworkMode")),
|
||||
IpcMode: IpcMode(job.Getenv("IpcMode")),
|
||||
PidMode: PidMode(job.Getenv("PidMode")),
|
||||
}
|
||||
|
||||
job.GetenvJson("LxcConf", &hostConfig.LxcConf)
|
||||
|
|
|
@ -46,6 +46,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
|
|||
|
||||
flNetwork = cmd.Bool([]string{"#n", "#-networking"}, true, "Enable networking for this container")
|
||||
flPrivileged = cmd.Bool([]string{"#privileged", "-privileged"}, false, "Give extended privileges to this container")
|
||||
flPidMode = cmd.String([]string{"-pid"}, "", "Default is to create a private PID namespace for the container\n'host': use the host PID namespace inside the container. Note: the host mode gives the container full access to processes on the system and is therefore considered insecure.")
|
||||
flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports on the host interfaces")
|
||||
flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
|
||||
flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
|
||||
|
@ -248,7 +249,12 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
|
|||
|
||||
ipcMode := IpcMode(*flIpcMode)
|
||||
if !ipcMode.Valid() {
|
||||
return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
|
||||
return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode")
|
||||
}
|
||||
|
||||
pidMode := PidMode(*flPidMode)
|
||||
if !pidMode.Valid() {
|
||||
return nil, nil, cmd, fmt.Errorf("--pid: invalid PID mode")
|
||||
}
|
||||
|
||||
netMode, err := parseNetMode(*flNetMode)
|
||||
|
@ -300,6 +306,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
|
|||
VolumesFrom: flVolumesFrom.GetAll(),
|
||||
NetworkMode: netMode,
|
||||
IpcMode: ipcMode,
|
||||
PidMode: pidMode,
|
||||
Devices: deviceMappings,
|
||||
CapAdd: flCapAdd.GetAll(),
|
||||
CapDrop: flCapDrop.GetAll(),
|
||||
|
|
Loading…
Reference in a new issue