Bläddra i källkod

Merge pull request #9074 from rhatdan/shm

Allow IPC namespace to be shared between containers or with the host
Michael Crosby 10 år sedan
förälder
incheckning
07996d82c7

+ 28 - 0
daemon/container.go

@@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error {
 		return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode)
 	}
 
+	ipc := &execdriver.Ipc{}
+
+	if c.hostConfig.IpcMode.IsContainer() {
+		ic, err := c.getIpcContainer()
+		if err != nil {
+			return err
+		}
+		ipc.ContainerID = ic.ID
+	} else {
+		ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
+	}
+
 	// Build lists of devices allowed and created within the container.
 	userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
 	for i, deviceMapping := range c.hostConfig.Devices {
@@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error {
 		InitPath:           "/.dockerinit",
 		WorkingDir:         c.Config.WorkingDir,
 		Network:            en,
+		Ipc:                ipc,
 		Resources:          resources,
 		AllowedDevices:     allowedDevices,
 		AutoCreatedDevices: autoCreatedDevices,
@@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string {
 	return container.MountLabel
 }
 
+func (container *Container) getIpcContainer() (*Container, error) {
+	containerID := container.hostConfig.IpcMode.Container()
+	c := container.daemon.Get(containerID)
+	if c == nil {
+		return nil, fmt.Errorf("no such container to join IPC: %s", containerID)
+	}
+	if !c.IsRunning() {
+		return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
+	}
+	return c, nil
+}
+
 func (container *Container) getNetworkedContainer() (*Container, error) {
 	parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2)
 	switch parts[0] {
 	case "container":
+		if len(parts) != 2 {
+			return nil, fmt.Errorf("no container specified to join network")
+		}
 		nc := container.daemon.Get(parts[1])
 		if nc == nil {
 			return nil, fmt.Errorf("no such container to join network: %s", parts[1])

+ 26 - 0
daemon/create.go

@@ -1,10 +1,13 @@
 package daemon
 
 import (
+	"fmt"
+
 	"github.com/docker/docker/engine"
 	"github.com/docker/docker/graph"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/runconfig"
+	"github.com/docker/libcontainer/label"
 )
 
 func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status {
@@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
 	if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil {
 		return nil, nil, err
 	}
+	if hostConfig != nil && config.SecurityOpt == nil {
+		config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
 	if container, err = daemon.newContainer(name, config, img); err != nil {
 		return nil, nil, err
 	}
@@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
 	}
 	return container, warnings, nil
 }
+func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
+	if ipcMode.IsHost() {
+		return label.DisableSecOpt(), nil
+	}
+	if ipcContainer := ipcMode.Container(); ipcContainer != "" {
+		c := daemon.Get(ipcContainer)
+		if c == nil {
+			return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer)
+		}
+		if !c.IsRunning() {
+			return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer)
+		}
+
+		return label.DupSecOpt(c.ProcessLabel), nil
+	}
+	return nil, nil
+}

+ 7 - 0
daemon/execdriver/driver.go

@@ -71,6 +71,12 @@ type Network struct {
 	HostNetworking bool              `json:"host_networking"`
 }
 
+// IPC settings of the container
+type Ipc struct {
+	ContainerID string `json:"container_id"` // id of the container to join ipc.
+	HostIpc     bool   `json:"host_ipc"`
+}
+
 type NetworkInterface struct {
 	Gateway     string `json:"gateway"`
 	IPAddress   string `json:"ip"`
@@ -115,6 +121,7 @@ type Command struct {
 	WorkingDir         string            `json:"working_dir"`
 	ConfigPath         string            `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
 	Network            *Network          `json:"network"`
+	Ipc                *Ipc              `json:"ipc"`
 	Resources          *Resources        `json:"resources"`
 	Mounts             []Mount           `json:"mounts"`
 	AllowedDevices     []*devices.Device `json:"allowed_devices"`

+ 26 - 0
daemon/execdriver/native/create.go

@@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
 	container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
 	container.RestrictSys = true
 
+	if err := d.createIpc(container, c); err != nil {
+		return nil, err
+	}
+
 	if err := d.createNetwork(container, c); err != nil {
 		return nil, err
 	}
@@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
 	return nil
 }
 
+func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
+	if c.Ipc.HostIpc {
+		container.Namespaces["NEWIPC"] = false
+		return nil
+	}
+
+	if c.Ipc.ContainerID != "" {
+		d.Lock()
+		active := d.activeContainers[c.Ipc.ContainerID]
+		d.Unlock()
+
+		if active == nil || active.cmd.Process == nil {
+			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
+		}
+		cmd := active.cmd
+
+		container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
+	}
+
+	return nil
+}
+
 func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
 	container.Capabilities = capabilities.GetAllCapabilities()
 	container.Cgroups.AllowAllDevices = true

+ 77 - 4
docs/man/docker-run.1.md

@@ -23,6 +23,7 @@ docker-run - Run a command in a new container
 [**--expose**[=*[]*]]
 [**-h**|**--hostname**[=*HOSTNAME*]]
 [**-i**|**--interactive**[=*false*]]
+[**--ipc**[=*[]*]]
 [**--security-opt**[=*[]*]]
 [**--link**[=*[]*]]
 [**--lxc-conf**[=*[]*]]
@@ -142,6 +143,12 @@ ENTRYPOINT.
 **-i**, **--interactive**=*true*|*false*
    When set to true, keep stdin open even if not attached. The default is false.
 
+**--ipc**=[]
+   Set the IPC mode for the container
+     **container**:<*name*|*id*>: reuses another container's IPC stack
+     **host**: use the host's IPC stack inside the container.  
+     Note: the host mode gives the container full access to local IPC and is therefore considered insecure.
+
 **--security-opt**=*secdriver*:*name*:*value*
     "label:user:USER"   : Set the label user for the container
     "label:role:ROLE"   : Set the label role for the container
@@ -183,10 +190,11 @@ and foreground Docker containers.
 
 **--net**="bridge"
    Set the Network mode for the container
-                               'bridge': creates a new network stack for the container on the docker bridge
-                               'none': no networking for this container
-                               'container:<name|id>': reuses another container network stack
-                               'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
+   **bridge**: creates a new network stack for the container on the docker bridge
+   **none**: no networking for this container
+   **container**:<*name*|*id*>: reuses another container's network stack
+   **host**: use the host network stack inside the container.  
+   Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
 
 **--mac-address**=*macaddress*
    Set the MAC address for the container's Ethernet device:
@@ -310,6 +318,71 @@ you’d like to connect instead, as in:
 
     # docker run -a stdin -a stdout -i -t fedora /bin/bash
 
+## Sharing IPC between containers
+
+Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html
+
+Testing `--ipc=host` mode:
+
+Host shows a shared memory segment with 7 pids attached, happens to be from httpd:
+
+```
+ $ sudo ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x01128e25 0          root       600        1000       7                       
+```
+
+Now run a regular container, and it correctly does NOT see the shared memory segment from the host:
+
+```
+ $ sudo docker run -it shm ipcs -m
+
+ ------ Shared Memory Segments --------	
+ key        shmid      owner      perms      bytes      nattch     status      
+```
+
+Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd:
+
+ ```
+ $ sudo docker run -it --ipc=host shm ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x01128e25 0          root       600        1000       7                   
+```
+Testing `--ipc=container:CONTAINERID` mode:
+
+Start a container with a program to create a shared memory segment:
+```
+ sudo docker run -it shm bash
+ $ sudo shm/shm_server &
+ $ sudo ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x0000162e 0          root       666        27         1                       
+```
+Create a 2nd container correctly shows no shared memory segment from 1st container:
+```
+ $ sudo docker run shm ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+```
+
+Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first:
+
+```
+ $ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m
+ $ sudo ipcs -m
+
+ ------ Shared Memory Segments --------
+ key        shmid      owner      perms      bytes      nattch     status      
+ 0x0000162e 0          root       666        27         1
+```
+
 ## Linking Containers
 
 The link feature allows multiple containers to communicate with each other. For

+ 17 - 0
docs/sources/reference/run.md

@@ -50,6 +50,7 @@ following options.
  - [Container Identification](#container-identification)
      - [Name (--name)](#name-name)
      - [PID Equivalent](#pid-equivalent)
+ - [IPC Settings](#ipc-settings)
  - [Network Settings](#network-settings)
  - [Clean Up (--rm)](#clean-up-rm)
  - [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory)
@@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version
 image you'd like to run the container with by adding `image[:tag]` to the command. For
 example, `docker run ubuntu:14.04`.
 
+## IPC Settings
+    --ipc=""  : Set the IPC mode for the container,
+                                 'container:<name|id>': reuses another container's IPC namespace
+                                 'host': use the host's IPC namespace inside the container
+By default, all containers have the IPC namespace enabled 
+
+IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.  
+
+Shared memory segments are used to accelerate inter-process communication at
+memory speed, rather than through pipes or through the network stack. Shared
+memory is commonly used by databases and custom-built (typically C/OpenMPI, 
+C++/using boost libraries) high performance applications for scientific
+computing and financial services industries. If these types of applications
+are broken into multiple containers, you might need to share the IPC mechanisms
+of the containers.
+
 ## Network settings
 
     --dns=[]         : Set custom dns servers for the container

+ 70 - 0
integration-cli/docker_cli_run_test.go

@@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) {
 
 	logDone("run - Unknown Command")
 }
+
+func TestRunModeIpcHost(t *testing.T) {
+	hostIpc, err := os.Readlink("/proc/1/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc")
+	out2, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out2)
+	}
+
+	out2 = strings.Trim(out2, "\n")
+	if hostIpc != out2 {
+		t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2)
+	}
+
+	cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc")
+	out2, _, err = runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out2)
+	}
+
+	out2 = strings.Trim(out2, "\n")
+	if hostIpc == out2 {
+		t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2)
+	}
+	deleteAllContainers()
+
+	logDone("run - hostname and several network modes")
+}
+
+func TestRunModeIpcContainer(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
+	out, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out)
+	}
+	id := strings.TrimSpace(out)
+	state, err := inspectField(id, "State.Running")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if state != "true" {
+		t.Fatal("Container state is 'not running'")
+	}
+	pid1, err := inspectField(id, "State.Pid")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1))
+	if err != nil {
+		t.Fatal(err)
+	}
+	cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc")
+	out2, _, err := runCommandWithOutput(cmd)
+	if err != nil {
+		t.Fatal(err, out2)
+	}
+
+	out2 = strings.Trim(out2, "\n")
+	if parentContainerIpc != out2 {
+		t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2)
+	}
+	deleteAllContainers()
+
+	logDone("run - hostname and several network modes")
+}

+ 40 - 0
runconfig/hostconfig.go

@@ -28,6 +28,44 @@ func (n NetworkMode) IsNone() bool {
 	return n == "none"
 }
 
+type IpcMode string
+
+// IsPrivate indicates whether container use it's private ipc stack
+func (n IpcMode) IsPrivate() bool {
+	return !(n.IsHost() || n.IsContainer())
+}
+
+func (n IpcMode) IsHost() bool {
+	return n == "host"
+}
+
+func (n IpcMode) IsContainer() bool {
+	parts := strings.SplitN(string(n), ":", 2)
+	return len(parts) > 1 && parts[0] == "container"
+}
+
+func (n IpcMode) Valid() bool {
+	parts := strings.Split(string(n), ":")
+	switch mode := parts[0]; mode {
+	case "", "host":
+	case "container":
+		if len(parts) != 2 || parts[1] == "" {
+			return false
+		}
+	default:
+		return false
+	}
+	return true
+}
+
+func (n IpcMode) Container() string {
+	parts := strings.SplitN(string(n), ":", 2)
+	if len(parts) > 1 {
+		return parts[1]
+	}
+	return ""
+}
+
 type DeviceMapping struct {
 	PathOnHost        string
 	PathInContainer   string
@@ -53,6 +91,7 @@ type HostConfig struct {
 	VolumesFrom     []string
 	Devices         []DeviceMapping
 	NetworkMode     NetworkMode
+	IpcMode         IpcMode
 	CapAdd          []string
 	CapDrop         []string
 	RestartPolicy   RestartPolicy
@@ -84,6 +123,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 		Privileged:      job.GetenvBool("Privileged"),
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
+		IpcMode:         IpcMode(job.Getenv("IpcMode")),
 	}
 
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)

+ 7 - 0
runconfig/parse.go

@@ -60,6 +60,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
 		flMacAddress      = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
+		flIpcMode         = cmd.String([]string{"-ipc"}, "", "Default is to create a private IPC namespace (POSIX SysV IPC) for the container\n'container:<name|id>': reuses another container shared memory, semaphores and message queues\n'host': use the host shared memory,semaphores and message queues inside the container.  Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.")
 		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)")
 	)
 
@@ -241,6 +242,11 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
 	// parse the '-e' and '--env' after, to allow override
 	envVariables = append(envVariables, flEnv.GetAll()...)
 
+	ipcMode := IpcMode(*flIpcMode)
+	if !ipcMode.Valid() {
+		return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
+	}
+
 	netMode, err := parseNetMode(*flNetMode)
 	if err != nil {
 		return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
@@ -289,6 +295,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
 		ExtraHosts:      flExtraHosts.GetAll(),
 		VolumesFrom:     flVolumesFrom.GetAll(),
 		NetworkMode:     netMode,
+		IpcMode:         ipcMode,
 		Devices:         deviceMappings,
 		CapAdd:          flCapAdd.GetAll(),
 		CapDrop:         flCapDrop.GetAll(),