Przeglądaj źródła

Allow IPC namespace to be shared between containers or with the host

Some workloads rely on IPC for communications with other processes.  We
would like to split workloads between two container but still allow them
to communicate though shared IPC.

This patch mimics the --net code to allow --ipc=host to not split off
the IPC Namespace.  ipc=container:CONTAINERID to share ipc between containers

If you share IPC between containers, then you need to make sure SELinux labels
match.

Docker-DCO-1.1-Signed-off-by: Dan Walsh <dwalsh@redhat.com> (github: rhatdan)
Dan Walsh 10 lat temu
rodzic
commit
497fc8876e

+ 28 - 0
daemon/container.go

@@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error {
 		return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode)
 	}
 
+	ipc := &execdriver.Ipc{}
+
+	if c.hostConfig.IpcMode.IsContainer() {
+		ic, err := c.getIpcContainer()
+		if err != nil {
+			return err
+		}
+		ipc.ContainerID = ic.ID
+	} else {
+		ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
+	}
+
 	// Build lists of devices allowed and created within the container.
 	userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
 	for i, deviceMapping := range c.hostConfig.Devices {
@@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error {
 		InitPath:           "/.dockerinit",
 		WorkingDir:         c.Config.WorkingDir,
 		Network:            en,
+		Ipc:                ipc,
 		Resources:          resources,
 		AllowedDevices:     allowedDevices,
 		AutoCreatedDevices: autoCreatedDevices,
@@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string {
 	return container.MountLabel
 }
 
+func (container *Container) getIpcContainer() (*Container, error) {
+	containerID := container.hostConfig.IpcMode.Container()
+	c := container.daemon.Get(containerID)
+	if c == nil {
+		return nil, fmt.Errorf("no such container to join IPC: %s", containerID)
+	}
+	if !c.IsRunning() {
+		return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
+	}
+	return c, nil
+}
+
 func (container *Container) getNetworkedContainer() (*Container, error) {
 	parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2)
 	switch parts[0] {
 	case "container":
+		if len(parts) != 2 {
+			return nil, fmt.Errorf("no container specified to join network")
+		}
 		nc := container.daemon.Get(parts[1])
 		if nc == nil {
 			return nil, fmt.Errorf("no such container to join network: %s", parts[1])

+ 26 - 0
daemon/create.go

@@ -1,10 +1,13 @@
 package daemon
 
 import (
+	"fmt"
+
 	"github.com/docker/docker/engine"
 	"github.com/docker/docker/graph"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/runconfig"
+	"github.com/docker/libcontainer/label"
 )
 
 func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status {
@@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
 	if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil {
 		return nil, nil, err
 	}
+	if hostConfig != nil && config.SecurityOpt == nil {
+		config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
 	if container, err = daemon.newContainer(name, config, img); err != nil {
 		return nil, nil, err
 	}
@@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
 	}
 	return container, warnings, nil
 }
+func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
+	if ipcMode.IsHost() {
+		return label.DisableSecOpt(), nil
+	}
+	if ipcContainer := ipcMode.Container(); ipcContainer != "" {
+		c := daemon.Get(ipcContainer)
+		if c == nil {
+			return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer)
+		}
+		if !c.IsRunning() {
+			return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer)
+		}
+
+		return label.DupSecOpt(c.ProcessLabel), nil
+	}
+	return nil, nil
+}

+ 7 - 0
daemon/execdriver/driver.go

@@ -62,6 +62,12 @@ type Network struct {
 	HostNetworking bool              `json:"host_networking"`
 }
 
+// IPC settings of the container
+type Ipc struct {
+	ContainerID string `json:"container_id"` // id of the container to join ipc.
+	HostIpc     bool   `json:"host_ipc"`
+}
+
 type NetworkInterface struct {
 	Gateway     string `json:"gateway"`
 	IPAddress   string `json:"ip"`
@@ -106,6 +112,7 @@ type Command struct {
 	WorkingDir         string            `json:"working_dir"`
 	ConfigPath         string            `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
 	Network            *Network          `json:"network"`
+	Ipc                *Ipc              `json:"ipc"`
 	Resources          *Resources        `json:"resources"`
 	Mounts             []Mount           `json:"mounts"`
 	AllowedDevices     []*devices.Device `json:"allowed_devices"`

+ 26 - 0
daemon/execdriver/native/create.go

@@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
 	container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
 	container.RestrictSys = true
 
+	if err := d.createIpc(container, c); err != nil {
+		return nil, err
+	}
+
 	if err := d.createNetwork(container, c); err != nil {
 		return nil, err
 	}
@@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
 	return nil
 }
 
+func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
+	if c.Ipc.HostIpc {
+		container.Namespaces["NEWIPC"] = false
+		return nil
+	}
+
+	if c.Ipc.ContainerID != "" {
+		d.Lock()
+		active := d.activeContainers[c.Ipc.ContainerID]
+		d.Unlock()
+
+		if active == nil || active.cmd.Process == nil {
+			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
+		}
+		cmd := active.cmd
+
+		container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
+	}
+
+	return nil
+}
+
 func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
 	container.Capabilities = capabilities.GetAllCapabilities()
 	container.Cgroups.AllowAllDevices = true

+ 77 - 4
docs/man/docker-run.1.md

@@ -23,6 +23,7 @@ docker-run - Run a command in a new container
 [**--expose**[=*[]*]]
 [**-h**|**--hostname**[=*HOSTNAME*]]
 [**-i**|**--interactive**[=*false*]]
+[**--ipc**[=*[]*]]
 [**--security-opt**[=*[]*]]
 [**--link**[=*[]*]]
 [**--lxc-conf**[=*[]*]]
@@ -142,6 +143,12 @@ ENTRYPOINT.
 **-i**, **--interactive**=*true*|*false*
    When set to true, keep stdin open even if not attached. The default is false.
 
+**--ipc**=[]
+   Set the IPC mode for the container
+     **container**:<*name*|*id*>: reuses another container's IPC stack
+     **host**: use the host's IPC stack inside the container.  
+     Note: the host mode gives the container full access to local IPC and is therefore considered insecure.
+
 **--security-opt**=*secdriver*:*name*:*value*
     "label:user:USER"   : Set the label user for the container
     "label:role:ROLE"   : Set the label role for the container
@@ -183,10 +190,11 @@ and foreground Docker containers.
 
 **--net**="bridge"
    Set the Network mode for the container
-                               'bridge': creates a new network stack for the container on the docker bridge
-                               'none': no networking for this container
-                               'container:<name|id>': reuses another container network stack
-                               'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
+   **bridge**: creates a new network stack for the container on the docker bridge
+   **none**: no networking for this container
+   **container**:<*name*|*id*>: reuses another container's network stack
+   **host**: use the host network stack inside the container.  
+   Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
 
 **--mac-address**=*macaddress*
    Set the MAC address for the container's Ethernet device:
@@ -310,6 +318,71 @@ you’d like to connect instead, as in:
 
     # docker run -a stdin -a stdout -i -t fedora /bin/bash
 
+## Sharing IPC between containers
+
+Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html
+
+Testing `--ipc=host` mode:
+
+Host shows a shared memory segment with 7 pids attached, happens to be from httpd:
+
+```
+ $ sudo ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x01128e25 0          root       600        1000       7                       
+```
+
+Now run a regular container, and it correctly does NOT see the shared memory segment from the host:
+
+```
+ $ sudo docker run -it shm ipcs -m
+
+ ------ Shared Memory Segments --------	
+ key        shmid      owner      perms      bytes      nattch     status      
+```
+
+Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd:
+
+ ```
+ $ sudo docker run -it --ipc=host shm ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x01128e25 0          root       600        1000       7                   
+```
+Testing `--ipc=container:CONTAINERID` mode:
+
+Start a container with a program to create a shared memory segment:
+```
+ sudo docker run -it shm bash
+ $ sudo shm/shm_server &
+ $ sudo ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x0000162e 0          root       666        27         1                       
+```
+Create a 2nd container correctly shows no shared memory segment from 1st container:
+```
+ $ sudo docker run shm ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+```
+
+Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first:
+
+```
+ $ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m
+ $ sudo ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x0000162e 0          root       666        27         1
+```
+
 ## Linking Containers
 
 The link feature allows multiple containers to communicate with each other. For

+ 17 - 0
docs/sources/reference/run.md

@@ -50,6 +50,7 @@ following options.
  - [Container Identification](#container-identification)
      - [Name (--name)](#name-name)
      - [PID Equivalent](#pid-equivalent)
+ - [IPC Settings](#ipc-settings)
  - [Network Settings](#network-settings)
  - [Clean Up (--rm)](#clean-up-rm)
  - [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory)
@@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version
 image you'd like to run the container with by adding `image[:tag]` to the command. For
 example, `docker run ubuntu:14.04`.
 
+## IPC Settings
+    --ipc=""  : Set the IPC mode for the container,
+                                 'container:<name|id>': reuses another container's IPC namespace
+                                 'host': use the host's IPC namespace inside the container
+By default, all containers have the IPC namespace enabled 
+
+IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.  
+
+Shared memory segments are used to accelerate inter-process communication at
+memory speed, rather than through pipes or through the network stack. Shared
+memory is commonly used by databases and custom-built (typically C/OpenMPI, 
+C++/using boost libraries) high performance applications for scientific
+computing and financial services industries. If these types of applications
+are broken into multiple containers, you might need to share the IPC mechanisms
+of the containers.
+
 ## Network settings
 
     --dns=[]         : Set custom dns servers for the container

+ 70 - 0
integration-cli/docker_cli_run_test.go

@@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) {
 
 	logDone("run - Unknown Command")
 }
+
+func TestRunModeIpcHost(t *testing.T) {
+	hostIpc, err := os.Readlink("/proc/1/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc")
+	out2, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out2)
+	}
+
+	out2 = strings.Trim(out2, "\n")
+	if hostIpc != out2 {
+		t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2)
+	}
+
+	cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc")
+	out2, _, err = runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out2)
+	}
+
+	out2 = strings.Trim(out2, "\n")
+	if hostIpc == out2 {
+		t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2)
+	}
+	deleteAllContainers()
+
+	logDone("run - hostname and several network modes")
+}
+
+func TestRunModeIpcContainer(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
+	out, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out)
+	}
+	id := strings.TrimSpace(out)
+	state, err := inspectField(id, "State.Running")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if state != "true" {
+		t.Fatal("Container state is 'not running'")
+	}
+	pid1, err := inspectField(id, "State.Pid")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1))
+	if err != nil {
+		t.Fatal(err)
+	}
+	cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc")
+	out2, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out2)
+	}
+
+	out2 = strings.Trim(out2, "\n")
+	if parentContainerIpc != out2 {
+		t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2)
+	}
+	deleteAllContainers()
+
+	logDone("run - hostname and several network modes")
+}

+ 40 - 0
runconfig/hostconfig.go

@@ -28,6 +28,44 @@ func (n NetworkMode) IsNone() bool {
 	return n == "none"
 }
 
+type IpcMode string
+
+// IsPrivate indicates whether container use it's private ipc stack
+func (n IpcMode) IsPrivate() bool {
+	return !(n.IsHost() || n.IsContainer())
+}
+
+func (n IpcMode) IsHost() bool {
+	return n == "host"
+}
+
+func (n IpcMode) IsContainer() bool {
+	parts := strings.SplitN(string(n), ":", 2)
+	return len(parts) > 1 && parts[0] == "container"
+}
+
+func (n IpcMode) Valid() bool {
+	parts := strings.Split(string(n), ":")
+	switch mode := parts[0]; mode {
+	case "", "host":
+	case "container":
+		if len(parts) != 2 || parts[1] == "" {
+			return false
+		}
+	default:
+		return false
+	}
+	return true
+}
+
+func (n IpcMode) Container() string {
+	parts := strings.SplitN(string(n), ":", 2)
+	if len(parts) > 1 {
+		return parts[1]
+	}
+	return ""
+}
+
 type DeviceMapping struct {
 	PathOnHost        string
 	PathInContainer   string
@@ -53,6 +91,7 @@ type HostConfig struct {
 	VolumesFrom     []string
 	Devices         []DeviceMapping
 	NetworkMode     NetworkMode
+	IpcMode         IpcMode
 	CapAdd          []string
 	CapDrop         []string
 	RestartPolicy   RestartPolicy
@@ -84,6 +123,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 		Privileged:      job.GetenvBool("Privileged"),
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
+		IpcMode:         IpcMode(job.Getenv("IpcMode")),
 	}
 
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)

+ 7 - 0
runconfig/parse.go

@@ -60,6 +60,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
 		flMacAddress      = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
+		flIpcMode         = cmd.String([]string{"-ipc"}, "", "Default is to create a private IPC namespace (POSIX SysV IPC) for the container\n'container:<name|id>': reuses another container shared memory, semaphores and message queues\n'host': use the host shared memory,semaphores and message queues inside the container.  Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.")
 		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)")
 	)
 
@@ -241,6 +242,11 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
 	// parse the '-e' and '--env' after, to allow override
 	envVariables = append(envVariables, flEnv.GetAll()...)
 
+	ipcMode := IpcMode(*flIpcMode)
+	if !ipcMode.Valid() {
+		return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
+	}
+
 	netMode, err := parseNetMode(*flNetMode)
 	if err != nil {
 		return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
@@ -289,6 +295,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
 		ExtraHosts:      flExtraHosts.GetAll(),
 		VolumesFrom:     flVolumesFrom.GetAll(),
 		NetworkMode:     netMode,
+		IpcMode:         ipcMode,
 		Devices:         deviceMappings,
 		CapAdd:          flCapAdd.GetAll(),
 		CapDrop:         flCapDrop.GetAll(),