Allow IPC namespace to be shared between containers or with the host

Some workloads rely on IPC for communications with other processes.  We
would like to split workloads between two container but still allow them
to communicate though shared IPC.

This patch mimics the --net code to allow --ipc=host to not split off
the IPC Namespace.  ipc=container:CONTAINERID to share ipc between containers

If you share IPC between containers, then you need to make sure SELinux labels
match.

Docker-DCO-1.1-Signed-off-by: Dan Walsh <dwalsh@redhat.com> (github: rhatdan)
This commit is contained in:
Dan Walsh 2014-11-10 16:14:17 -05:00
parent 6ad1cd5d0f
commit 497fc8876e
9 changed files with 298 additions and 4 deletions

View file

@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error {
return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode)
}
ipc := &execdriver.Ipc{}
if c.hostConfig.IpcMode.IsContainer() {
ic, err := c.getIpcContainer()
if err != nil {
return err
}
ipc.ContainerID = ic.ID
} else {
ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
}
// Build lists of devices allowed and created within the container.
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
for i, deviceMapping := range c.hostConfig.Devices {
@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error {
InitPath: "/.dockerinit",
WorkingDir: c.Config.WorkingDir,
Network: en,
Ipc: ipc,
Resources: resources,
AllowedDevices: allowedDevices,
AutoCreatedDevices: autoCreatedDevices,
@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string {
return container.MountLabel
}
func (container *Container) getIpcContainer() (*Container, error) {
containerID := container.hostConfig.IpcMode.Container()
c := container.daemon.Get(containerID)
if c == nil {
return nil, fmt.Errorf("no such container to join IPC: %s", containerID)
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
}
return c, nil
}
func (container *Container) getNetworkedContainer() (*Container, error) {
parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2)
switch parts[0] {
case "container":
if len(parts) != 2 {
return nil, fmt.Errorf("no container specified to join network")
}
nc := container.daemon.Get(parts[1])
if nc == nil {
return nil, fmt.Errorf("no such container to join network: %s", parts[1])

View file

@ -1,10 +1,13 @@
package daemon
import (
"fmt"
"github.com/docker/docker/engine"
"github.com/docker/docker/graph"
"github.com/docker/docker/pkg/parsers"
"github.com/docker/docker/runconfig"
"github.com/docker/libcontainer/label"
)
func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status {
@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil {
return nil, nil, err
}
if hostConfig != nil && config.SecurityOpt == nil {
config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
if err != nil {
return nil, nil, err
}
}
if container, err = daemon.newContainer(name, config, img); err != nil {
return nil, nil, err
}
@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
}
return container, warnings, nil
}
func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
if ipcMode.IsHost() {
return label.DisableSecOpt(), nil
}
if ipcContainer := ipcMode.Container(); ipcContainer != "" {
c := daemon.Get(ipcContainer)
if c == nil {
return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer)
}
if !c.IsRunning() {
return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer)
}
return label.DupSecOpt(c.ProcessLabel), nil
}
return nil, nil
}

View file

@ -62,6 +62,12 @@ type Network struct {
HostNetworking bool `json:"host_networking"`
}
// IPC settings of the container
type Ipc struct {
ContainerID string `json:"container_id"` // id of the container to join ipc.
HostIpc bool `json:"host_ipc"`
}
type NetworkInterface struct {
Gateway string `json:"gateway"`
IPAddress string `json:"ip"`
@ -106,6 +112,7 @@ type Command struct {
WorkingDir string `json:"working_dir"`
ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
Network *Network `json:"network"`
Ipc *Ipc `json:"ipc"`
Resources *Resources `json:"resources"`
Mounts []Mount `json:"mounts"`
AllowedDevices []*devices.Device `json:"allowed_devices"`

View file

@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.RestrictSys = true
if err := d.createIpc(container, c); err != nil {
return nil, err
}
if err := d.createNetwork(container, c); err != nil {
return nil, err
}
@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
return nil
}
func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces["NEWIPC"] = false
return nil
}
if c.Ipc.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock()
if active == nil || active.cmd.Process == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
}
cmd := active.cmd
container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
}
return nil
}
func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
container.Capabilities = capabilities.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true

View file

@ -23,6 +23,7 @@ docker-run - Run a command in a new container
[**--expose**[=*[]*]]
[**-h**|**--hostname**[=*HOSTNAME*]]
[**-i**|**--interactive**[=*false*]]
[**--ipc**[=*[]*]]
[**--security-opt**[=*[]*]]
[**--link**[=*[]*]]
[**--lxc-conf**[=*[]*]]
@ -142,6 +143,12 @@ ENTRYPOINT.
**-i**, **--interactive**=*true*|*false*
When set to true, keep stdin open even if not attached. The default is false.
**--ipc**=[]
Set the IPC mode for the container
**container**:<*name*|*id*>: reuses another container's IPC stack
**host**: use the host's IPC stack inside the container.
Note: the host mode gives the container full access to local IPC and is therefore considered insecure.
**--security-opt**=*secdriver*:*name*:*value*
"label:user:USER" : Set the label user for the container
"label:role:ROLE" : Set the label role for the container
@ -183,10 +190,11 @@ and foreground Docker containers.
**--net**="bridge"
Set the Network mode for the container
'bridge': creates a new network stack for the container on the docker bridge
'none': no networking for this container
'container:<name|id>': reuses another container network stack
'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
**bridge**: creates a new network stack for the container on the docker bridge
**none**: no networking for this container
**container**:<*name*|*id*>: reuses another container's network stack
**host**: use the host network stack inside the container.
Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
**--mac-address**=*macaddress*
Set the MAC address for the container's Ethernet device:
@ -310,6 +318,71 @@ youd like to connect instead, as in:
# docker run -a stdin -a stdout -i -t fedora /bin/bash
## Sharing IPC between containers
Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html
Testing `--ipc=host` mode:
Host shows a shared memory segment with 7 pids attached, happens to be from httpd:
```
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x01128e25 0 root 600 1000 7
```
Now run a regular container, and it correctly does NOT see the shared memory segment from the host:
```
$ sudo docker run -it shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
```
Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd:
```
$ sudo docker run -it --ipc=host shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x01128e25 0 root 600 1000 7
```
Testing `--ipc=container:CONTAINERID` mode:
Start a container with a program to create a shared memory segment:
```
sudo docker run -it shm bash
$ sudo shm/shm_server &
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0000162e 0 root 666 27 1
```
Create a 2nd container correctly shows no shared memory segment from 1st container:
```
$ sudo docker run shm ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
```
Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first:
```
$ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m
$ sudo ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0000162e 0 root 666 27 1
```
## Linking Containers
The link feature allows multiple containers to communicate with each other. For

View file

@ -50,6 +50,7 @@ following options.
- [Container Identification](#container-identification)
- [Name (--name)](#name-name)
- [PID Equivalent](#pid-equivalent)
- [IPC Settings](#ipc-settings)
- [Network Settings](#network-settings)
- [Clean Up (--rm)](#clean-up-rm)
- [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory)
@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version
image you'd like to run the container with by adding `image[:tag]` to the command. For
example, `docker run ubuntu:14.04`.
## IPC Settings
--ipc="" : Set the IPC mode for the container,
'container:<name|id>': reuses another container's IPC namespace
'host': use the host's IPC namespace inside the container
By default, all containers have the IPC namespace enabled
IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.
Shared memory segments are used to accelerate inter-process communication at
memory speed, rather than through pipes or through the network stack. Shared
memory is commonly used by databases and custom-built (typically C/OpenMPI,
C++/using boost libraries) high performance applications for scientific
computing and financial services industries. If these types of applications
are broken into multiple containers, you might need to share the IPC mechanisms
of the containers.
## Network settings
--dns=[] : Set custom dns servers for the container

View file

@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) {
logDone("run - Unknown Command")
}
func TestRunModeIpcHost(t *testing.T) {
hostIpc, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}
out2 = strings.Trim(out2, "\n")
if hostIpc != out2 {
t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2)
}
cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err = runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}
out2 = strings.Trim(out2, "\n")
if hostIpc == out2 {
t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2)
}
deleteAllContainers()
logDone("run - hostname and several network modes")
}
func TestRunModeIpcContainer(t *testing.T) {
cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
out, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out)
}
id := strings.TrimSpace(out)
state, err := inspectField(id, "State.Running")
if err != nil {
t.Fatal(err)
}
if state != "true" {
t.Fatal("Container state is 'not running'")
}
pid1, err := inspectField(id, "State.Pid")
if err != nil {
t.Fatal(err)
}
parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1))
if err != nil {
t.Fatal(err)
}
cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc")
out2, _, err := runCommandWithOutput(cmd)
if err != nil {
t.Fatal(err, out2)
}
out2 = strings.Trim(out2, "\n")
if parentContainerIpc != out2 {
t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2)
}
deleteAllContainers()
logDone("run - hostname and several network modes")
}

View file

@ -28,6 +28,44 @@ func (n NetworkMode) IsNone() bool {
return n == "none"
}
type IpcMode string
// IsPrivate indicates whether container use it's private ipc stack
func (n IpcMode) IsPrivate() bool {
return !(n.IsHost() || n.IsContainer())
}
func (n IpcMode) IsHost() bool {
return n == "host"
}
func (n IpcMode) IsContainer() bool {
parts := strings.SplitN(string(n), ":", 2)
return len(parts) > 1 && parts[0] == "container"
}
func (n IpcMode) Valid() bool {
parts := strings.Split(string(n), ":")
switch mode := parts[0]; mode {
case "", "host":
case "container":
if len(parts) != 2 || parts[1] == "" {
return false
}
default:
return false
}
return true
}
func (n IpcMode) Container() string {
parts := strings.SplitN(string(n), ":", 2)
if len(parts) > 1 {
return parts[1]
}
return ""
}
type DeviceMapping struct {
PathOnHost string
PathInContainer string
@ -53,6 +91,7 @@ type HostConfig struct {
VolumesFrom []string
Devices []DeviceMapping
NetworkMode NetworkMode
IpcMode IpcMode
CapAdd []string
CapDrop []string
RestartPolicy RestartPolicy
@ -84,6 +123,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
Privileged: job.GetenvBool("Privileged"),
PublishAllPorts: job.GetenvBool("PublishAllPorts"),
NetworkMode: NetworkMode(job.Getenv("NetworkMode")),
IpcMode: IpcMode(job.Getenv("IpcMode")),
}
job.GetenvJson("LxcConf", &hostConfig.LxcConf)

View file

@ -60,6 +60,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
flMacAddress = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
flIpcMode = cmd.String([]string{"-ipc"}, "", "Default is to create a private IPC namespace (POSIX SysV IPC) for the container\n'container:<name|id>': reuses another container shared memory, semaphores and message queues\n'host': use the host shared memory,semaphores and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.")
flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)")
)
@ -241,6 +242,11 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
// parse the '-e' and '--env' after, to allow override
envVariables = append(envVariables, flEnv.GetAll()...)
ipcMode := IpcMode(*flIpcMode)
if !ipcMode.Valid() {
return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
}
netMode, err := parseNetMode(*flNetMode)
if err != nil {
return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
@ -289,6 +295,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
ExtraHosts: flExtraHosts.GetAll(),
VolumesFrom: flVolumesFrom.GetAll(),
NetworkMode: netMode,
IpcMode: ipcMode,
Devices: deviceMappings,
CapAdd: flCapAdd.GetAll(),
CapDrop: flCapDrop.GetAll(),