Browse Source

Merge pull request #17034 from rhvgoyal/volume-propagation

Capability to specify per volume mount propagation mode
Brian Goff 9 years ago
parent
commit
ce0b1841c8

+ 1 - 0
api/types/types.go

@@ -338,6 +338,7 @@ type MountPoint struct {
 	Driver      string `json:",omitempty"`
 	Driver      string `json:",omitempty"`
 	Mode        string
 	Mode        string
 	RW          bool
 	RW          bool
+	Propagation string
 }
 }
 
 
 // Volume represents the configuration of a volume for the remote API
 // Volume represents the configuration of a volume for the remote API

+ 5 - 5
container/container_unix.go

@@ -403,7 +403,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 				Source:      container.ResolvConfPath,
 				Source:      container.ResolvConfPath,
 				Destination: "/etc/resolv.conf",
 				Destination: "/etc/resolv.conf",
 				Writable:    writable,
 				Writable:    writable,
-				Private:     true,
+				Propagation: volume.DefaultPropagationMode,
 			})
 			})
 		}
 		}
 	}
 	}
@@ -420,7 +420,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 				Source:      container.HostnamePath,
 				Source:      container.HostnamePath,
 				Destination: "/etc/hostname",
 				Destination: "/etc/hostname",
 				Writable:    writable,
 				Writable:    writable,
-				Private:     true,
+				Propagation: volume.DefaultPropagationMode,
 			})
 			})
 		}
 		}
 	}
 	}
@@ -437,7 +437,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 				Source:      container.HostsPath,
 				Source:      container.HostsPath,
 				Destination: "/etc/hosts",
 				Destination: "/etc/hosts",
 				Writable:    writable,
 				Writable:    writable,
-				Private:     true,
+				Propagation: volume.DefaultPropagationMode,
 			})
 			})
 		}
 		}
 	}
 	}
@@ -534,7 +534,7 @@ func (container *Container) IpcMounts() []execdriver.Mount {
 			Source:      container.ShmPath,
 			Source:      container.ShmPath,
 			Destination: "/dev/shm",
 			Destination: "/dev/shm",
 			Writable:    true,
 			Writable:    true,
-			Private:     true,
+			Propagation: volume.DefaultPropagationMode,
 		})
 		})
 	}
 	}
 
 
@@ -544,7 +544,7 @@ func (container *Container) IpcMounts() []execdriver.Mount {
 			Source:      container.MqueuePath,
 			Source:      container.MqueuePath,
 			Destination: "/dev/mqueue",
 			Destination: "/dev/mqueue",
 			Writable:    true,
 			Writable:    true,
-			Private:     true,
+			Propagation: volume.DefaultPropagationMode,
 		})
 		})
 	}
 	}
 	return mounts
 	return mounts

+ 9 - 3
daemon/execdriver/driver_unix.go

@@ -26,9 +26,8 @@ type Mount struct {
 	Source      string `json:"source"`
 	Source      string `json:"source"`
 	Destination string `json:"destination"`
 	Destination string `json:"destination"`
 	Writable    bool   `json:"writable"`
 	Writable    bool   `json:"writable"`
-	Private     bool   `json:"private"`
-	Slave       bool   `json:"slave"`
 	Data        string `json:"data"`
 	Data        string `json:"data"`
+	Propagation string `json:"mountpropagation"`
 }
 }
 
 
 // Resources contains all resource configs for a driver.
 // Resources contains all resource configs for a driver.
@@ -125,6 +124,11 @@ type Command struct {
 	UTS                *UTS              `json:"uts"`
 	UTS                *UTS              `json:"uts"`
 }
 }
 
 
+// SetRootPropagation sets the root mount propagation mode.
+func SetRootPropagation(config *configs.Config, propagation int) {
+	config.RootPropagation = propagation
+}
+
 // InitContainer is the initialization of a container config.
 // InitContainer is the initialization of a container config.
 // It returns the initial configs for a container. It's mostly
 // It returns the initial configs for a container. It's mostly
 // defined by the default template.
 // defined by the default template.
@@ -137,7 +141,9 @@ func InitContainer(c *Command) *configs.Config {
 	container.Devices = c.AutoCreatedDevices
 	container.Devices = c.AutoCreatedDevices
 	container.Rootfs = c.Rootfs
 	container.Rootfs = c.Rootfs
 	container.Readonlyfs = c.ReadonlyRootfs
 	container.Readonlyfs = c.ReadonlyRootfs
-	container.RootPropagation = mount.RPRIVATE
+	// This can be overridden later by driver during mount setup based
+	// on volume options
+	SetRootPropagation(container, mount.RPRIVATE)
 
 
 	// check to see if we are running in ramdisk to disable pivot root
 	// check to see if we are running in ramdisk to disable pivot root
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""

+ 165 - 11
daemon/execdriver/native/create.go

@@ -12,6 +12,7 @@ import (
 	derr "github.com/docker/docker/errors"
 	derr "github.com/docker/docker/errors"
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/mount"
 
 
+	"github.com/docker/docker/volume"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/devices"
@@ -278,6 +279,116 @@ func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command)
 	}
 	}
 }
 }
 
 
+// If rootfs mount propagation is RPRIVATE, that means all the volumes are
+// going to be private anyway. There is no need to apply per volume
+// propagation on top. This is just an optimzation so that cost of per volume
+// propagation is paid only if user decides to make some volume non-private
+// which will force rootfs mount propagation to be non RPRIVATE.
+func checkResetVolumePropagation(container *configs.Config) {
+	if container.RootPropagation != mount.RPRIVATE {
+		return
+	}
+	for _, m := range container.Mounts {
+		m.PropagationFlags = nil
+	}
+}
+
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
+	for _, m := range mountinfo {
+		if m.Mountpoint == dir {
+			return m
+		}
+	}
+	return nil
+}
+
+// Get the source mount point of directory passed in as argument. Also return
+// optional fields.
+func getSourceMount(source string) (string, string, error) {
+	// Ensure any symlinks are resolved.
+	sourcePath, err := filepath.EvalSymlinks(source)
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfos, err := mount.GetMounts()
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfo := getMountInfo(mountinfos, sourcePath)
+	if mountinfo != nil {
+		return sourcePath, mountinfo.Optional, nil
+	}
+
+	path := sourcePath
+	for {
+		path = filepath.Dir(path)
+
+		mountinfo = getMountInfo(mountinfos, path)
+		if mountinfo != nil {
+			return path, mountinfo.Optional, nil
+		}
+
+		if path == "/" {
+			break
+		}
+	}
+
+	// If we are here, we did not find parent mount. Something is wrong.
+	return "", "", fmt.Errorf("Could not find source mount of %s", source)
+}
+
+// Ensure mount point on which path is mouted, is shared.
+func ensureShared(path string) error {
+	sharedMount := false
+
+	sourceMount, optionalOpts, err := getSourceMount(path)
+	if err != nil {
+		return err
+	}
+	// Make sure source mount point is shared.
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		}
+	}
+
+	if !sharedMount {
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
+	}
+	return nil
+}
+
+// Ensure mount point on which path is mounted, is either shared or slave.
+func ensureSharedOrSlave(path string) error {
+	sharedMount := false
+	slaveMount := false
+
+	sourceMount, optionalOpts, err := getSourceMount(path)
+	if err != nil {
+		return err
+	}
+	// Make sure source mount point is shared.
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		} else if strings.HasPrefix(opt, "master:") {
+			slaveMount = true
+			break
+		}
+	}
+
+	if !sharedMount && !slaveMount {
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
+	}
+	return nil
+}
+
 func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
 func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
 	userMounts := make(map[string]struct{})
 	userMounts := make(map[string]struct{})
 	for _, m := range c.Mounts {
 	for _, m := range c.Mounts {
@@ -298,6 +409,15 @@ func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) e
 	}
 	}
 	container.Mounts = defaultMounts
 	container.Mounts = defaultMounts
 
 
+	mountPropagationMap := map[string]int{
+		"private":  mount.PRIVATE,
+		"rprivate": mount.RPRIVATE,
+		"shared":   mount.SHARED,
+		"rshared":  mount.RSHARED,
+		"slave":    mount.SLAVE,
+		"rslave":   mount.RSLAVE,
+	}
+
 	for _, m := range c.Mounts {
 	for _, m := range c.Mounts {
 		for _, cm := range container.Mounts {
 		for _, cm := range container.Mounts {
 			if cm.Destination == m.Destination {
 			if cm.Destination == m.Destination {
@@ -319,31 +439,65 @@ func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) e
 				}
 				}
 			}
 			}
 			container.Mounts = append(container.Mounts, &configs.Mount{
 			container.Mounts = append(container.Mounts, &configs.Mount{
-				Source:        m.Source,
-				Destination:   m.Destination,
-				Data:          data,
-				Device:        "tmpfs",
-				Flags:         flags,
-				PremountCmds:  genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
-				PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
+				Source:           m.Source,
+				Destination:      m.Destination,
+				Data:             data,
+				Device:           "tmpfs",
+				Flags:            flags,
+				PremountCmds:     genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
+				PostmountCmds:    genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
+				PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
 			})
 			})
 			continue
 			continue
 		}
 		}
 		flags := syscall.MS_BIND | syscall.MS_REC
 		flags := syscall.MS_BIND | syscall.MS_REC
+		var pFlag int
 		if !m.Writable {
 		if !m.Writable {
 			flags |= syscall.MS_RDONLY
 			flags |= syscall.MS_RDONLY
 		}
 		}
-		if m.Slave {
-			flags |= syscall.MS_SLAVE
+
+		// Determine property of RootPropagation based on volume
+		// properties. If a volume is shared, then keep root propagtion
+		// shared. This should work for slave and private volumes too.
+		//
+		// For slave volumes, it can be either [r]shared/[r]slave.
+		//
+		// For private volumes any root propagation value should work.
+
+		pFlag = mountPropagationMap[m.Propagation]
+		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
+			if err := ensureShared(m.Source); err != nil {
+				return err
+			}
+			rootpg := container.RootPropagation
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
+				execdriver.SetRootPropagation(container, mount.SHARED)
+			}
+		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
+			if err := ensureSharedOrSlave(m.Source); err != nil {
+				return err
+			}
+			rootpg := container.RootPropagation
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
+				execdriver.SetRootPropagation(container, mount.RSLAVE)
+			}
 		}
 		}
 
 
-		container.Mounts = append(container.Mounts, &configs.Mount{
+		mount := &configs.Mount{
 			Source:      m.Source,
 			Source:      m.Source,
 			Destination: m.Destination,
 			Destination: m.Destination,
 			Device:      "bind",
 			Device:      "bind",
 			Flags:       flags,
 			Flags:       flags,
-		})
+		}
+
+		if pFlag != 0 {
+			mount.PropagationFlags = []int{pFlag}
+		}
+
+		container.Mounts = append(container.Mounts, mount)
 	}
 	}
+
+	checkResetVolumePropagation(container)
 	return nil
 	return nil
 }
 }
 
 

+ 1 - 0
daemon/inspect_unix.go

@@ -72,6 +72,7 @@ func addMountPoints(container *container.Container) []types.MountPoint {
 			Driver:      m.Driver,
 			Driver:      m.Driver,
 			Mode:        m.Mode,
 			Mode:        m.Mode,
 			RW:          m.RW,
 			RW:          m.RW,
+			Propagation: m.Propagation,
 		})
 		})
 	}
 	}
 	return mountPoints
 	return mountPoints

+ 1 - 0
daemon/volumes.go

@@ -99,6 +99,7 @@ func (daemon *Daemon) registerMountPoints(container *container.Container, hostCo
 				RW:          m.RW && volume.ReadWrite(mode),
 				RW:          m.RW && volume.ReadWrite(mode),
 				Driver:      m.Driver,
 				Driver:      m.Driver,
 				Destination: m.Destination,
 				Destination: m.Destination,
+				Propagation: m.Propagation,
 			}
 			}
 
 
 			if len(cp.Source) == 0 {
 			if len(cp.Source) == 0 {

+ 4 - 2
daemon/volumes_unix.go

@@ -24,11 +24,13 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 			return nil, err
 			return nil, err
 		}
 		}
 		if !container.TrySetNetworkMount(m.Destination, path) {
 		if !container.TrySetNetworkMount(m.Destination, path) {
-			mounts = append(mounts, execdriver.Mount{
+			mnt := execdriver.Mount{
 				Source:      path,
 				Source:      path,
 				Destination: m.Destination,
 				Destination: m.Destination,
 				Writable:    m.RW,
 				Writable:    m.RW,
-			})
+				Propagation: m.Propagation,
+			}
+			mounts = append(mounts, mnt)
 		}
 		}
 	}
 	}
 
 

+ 6 - 6
docs/reference/commandline/create.md

@@ -79,12 +79,12 @@ Creates a new container.
       -u, --user=""                 Username or UID
       -u, --user=""                 Username or UID
       --ulimit=[]                   Ulimit options
       --ulimit=[]                   Ulimit options
       --uts=""                      UTS namespace to use
       --uts=""                      UTS namespace to use
-      -v, --volume=[]               Bind mount a volume with: [host-src:]container-dest[:<options>], where
-                                    options are comma delimited and selected from [rw|ro] and [z|Z].
-                                    The 'host-src' can either be an absolute path or a name value.
-                                    If 'host-src' is missing, then docker creates a new volume.
-                                    If neither 'rw' or 'ro' is specified then the volume is mounted
-                                    in read-write mode.
+      -v, --volume=[host-src:]container-dest[:<options>]
+                                    Bind mount a volume. The comma-delimited
+                                    `options` are [rw|ro], [z|Z], or
+                                    [[r]shared|[r]slave|[r]private]. The
+                                    'host-src' is an absolute path or a name
+                                    value.
       --volume-driver=""            Container's volume driver
       --volume-driver=""            Container's volume driver
       --volumes-from=[]             Mount volumes from the specified container(s)
       --volumes-from=[]             Mount volumes from the specified container(s)
       -w, --workdir=""              Working directory inside the container
       -w, --workdir=""              Working directory inside the container

+ 6 - 6
docs/reference/commandline/run.md

@@ -80,12 +80,12 @@ parent = "smn_cli"
       -u, --user=""                 Username or UID (format: <name|uid>[:<group|gid>])
       -u, --user=""                 Username or UID (format: <name|uid>[:<group|gid>])
       --ulimit=[]                   Ulimit options
       --ulimit=[]                   Ulimit options
       --uts=""                      UTS namespace to use
       --uts=""                      UTS namespace to use
-      -v, --volume=[]               Bind mount a volume with: [host-src:]container-dest[:<options>], where
-                                    options are comma delimited and selected from [rw|ro] and [z|Z].
-                                    The 'host-src' can either be an absolute path or a name value.
-                                    If 'host-src' is missing, then docker creates a new volume.
-                                    If neither 'rw' or 'ro' is specified then the volume is mounted
-                                    in read-write mode.
+      -v, --volume=[host-src:]container-dest[:<options>]
+                                    Bind mount a volume. The comma-delimited
+                                    `options` are [rw|ro], [z|Z], or
+                                    [[r]shared|[r]slave|[r]private]. The
+                                    'host-src' is an absolute path or a name
+                                    value.
       --volume-driver=""            Container's volume driver
       --volume-driver=""            Container's volume driver
       --volumes-from=[]             Mount volumes from the specified container(s)
       --volumes-from=[]             Mount volumes from the specified container(s)
       -w, --workdir=""              Working directory inside the container
       -w, --workdir=""              Working directory inside the container

+ 8 - 5
docs/reference/run.md

@@ -1330,11 +1330,14 @@ Similarly the operator can set the **hostname** with `-h`.
 
 
 ### VOLUME (shared filesystems)
 ### VOLUME (shared filesystems)
 
 
-    -v=[]: Create a bind mount with: [host-src:]container-dest[:<options>], where
-    options are comma delimited and selected from [rw|ro] and [z|Z].
-           If 'host-src' is missing, then docker creates a new volume.
-		   If neither 'rw' or 'ro' is specified then the volume is mounted
-		   in read-write mode.
+    -v, --volume=[host-src:]container-dest[:<options>]: Bind mount a volume.
+    The comma-delimited `options` are [rw|ro], [z|Z], or
+    [[r]shared|[r]slave|[r]private]. The 'host-src' is an absolute path or a
+    name value.
+
+    If neither 'rw' or 'ro' is specified then the volume is mounted in
+    read-write mode.
+
     --volumes-from="": Mount all volumes from the given container(s)
     --volumes-from="": Mount all volumes from the given container(s)
 
 
 > **Note**:
 > **Note**:

+ 112 - 0
integration-cli/docker_cli_run_test.go

@@ -19,6 +19,7 @@ import (
 	"time"
 	"time"
 
 
 	"github.com/docker/docker/pkg/integration/checker"
 	"github.com/docker/docker/pkg/integration/checker"
+	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/nat"
 	"github.com/docker/docker/pkg/nat"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/libnetwork/resolvconf"
 	"github.com/docker/libnetwork/resolvconf"
@@ -3812,3 +3813,114 @@ func (s *DockerSuite) TestRunWithOomScoreAdjInvalidRange(c *check.C) {
 		c.Fatalf("Expected output to contain %q, got %q instead", expected, out)
 		c.Fatalf("Expected output to contain %q, got %q instead", expected, out)
 	}
 	}
 }
 }
+
+func (s *DockerSuite) TestRunVolumesMountedAsShared(c *check.C) {
+	// Volume propagation is linux only. Also it creates directories for
+	// bind mounting, so needs to be same host.
+	testRequires(c, DaemonIsLinux, SameHostDaemon, NotUserNamespace)
+
+	// Prepare a source directory to bind mount
+	tmpDir, err := ioutil.TempDir("", "volume-source")
+	if err != nil {
+		c.Fatal(err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	if err := os.Mkdir(path.Join(tmpDir, "mnt1"), 0755); err != nil {
+		c.Fatal(err)
+	}
+
+	// Convert this directory into a shared mount point so that we do
+	// not rely on propagation properties of parent mount.
+	cmd := exec.Command("mount", "--bind", tmpDir, tmpDir)
+	if _, err = runCommand(cmd); err != nil {
+		c.Fatal(err)
+	}
+
+	cmd = exec.Command("mount", "--make-private", "--make-shared", tmpDir)
+	if _, err = runCommand(cmd); err != nil {
+		c.Fatal(err)
+	}
+
+	dockerCmd(c, "run", "--privileged", "-v", fmt.Sprintf("%s:/volume-dest:shared", tmpDir), "busybox", "mount", "--bind", "/volume-dest/mnt1", "/volume-dest/mnt1")
+
+	// Make sure a bind mount under a shared volume propagated to host.
+	if mounted, _ := mount.Mounted(path.Join(tmpDir, "mnt1")); !mounted {
+		c.Fatalf("Bind mount under shared volume did not propagate to host")
+	}
+
+	mount.Unmount(path.Join(tmpDir, "mnt1"))
+}
+
+func (s *DockerSuite) TestRunVolumesMountedAsSlave(c *check.C) {
+	// Volume propagation is linux only. Also it creates directories for
+	// bind mounting, so needs to be same host.
+	testRequires(c, DaemonIsLinux, SameHostDaemon, NotUserNamespace)
+
+	// Prepare a source directory to bind mount
+	tmpDir, err := ioutil.TempDir("", "volume-source")
+	if err != nil {
+		c.Fatal(err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	if err := os.Mkdir(path.Join(tmpDir, "mnt1"), 0755); err != nil {
+		c.Fatal(err)
+	}
+
+	// Prepare a source directory with file in it. We will bind mount this
+	// direcotry and see if file shows up.
+	tmpDir2, err := ioutil.TempDir("", "volume-source2")
+	if err != nil {
+		c.Fatal(err)
+	}
+	defer os.RemoveAll(tmpDir2)
+
+	if err := ioutil.WriteFile(path.Join(tmpDir2, "slave-testfile"), []byte("Test"), 0644); err != nil {
+		c.Fatal(err)
+	}
+
+	// Convert this directory into a shared mount point so that we do
+	// not rely on propagation properties of parent mount.
+	cmd := exec.Command("mount", "--bind", tmpDir, tmpDir)
+	if _, err = runCommand(cmd); err != nil {
+		c.Fatal(err)
+	}
+
+	cmd = exec.Command("mount", "--make-private", "--make-shared", tmpDir)
+	if _, err = runCommand(cmd); err != nil {
+		c.Fatal(err)
+	}
+
+	dockerCmd(c, "run", "-i", "-d", "--name", "parent", "-v", fmt.Sprintf("%s:/volume-dest:slave", tmpDir), "busybox", "top")
+
+	// Bind mount tmpDir2/ onto tmpDir/mnt1. If mount propagates inside
+	// container then contents of tmpDir2/slave-testfile should become
+	// visible at "/volume-dest/mnt1/slave-testfile"
+	cmd = exec.Command("mount", "--bind", tmpDir2, path.Join(tmpDir, "mnt1"))
+	if _, err = runCommand(cmd); err != nil {
+		c.Fatal(err)
+	}
+
+	out, _ := dockerCmd(c, "exec", "parent", "cat", "/volume-dest/mnt1/slave-testfile")
+
+	mount.Unmount(path.Join(tmpDir, "mnt1"))
+
+	if out != "Test" {
+		c.Fatalf("Bind mount under slave volume did not propagate to container")
+	}
+}
+
+func (s *DockerSuite) TestRunNamedVolumesMountedAsShared(c *check.C) {
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
+	out, exitcode, _ := dockerCmdWithError("run", "-v", "foo:/test:shared", "busybox", "touch", "/test/somefile")
+
+	if exitcode == 0 {
+		c.Fatalf("expected non-zero exit code; received %d", exitcode)
+	}
+
+	if expected := "Invalid volume specification"; !strings.Contains(out, expected) {
+		c.Fatalf(`Expected %q in output; got: %s`, expected, out)
+	}
+
+}

+ 73 - 3
man/docker-create.1.md

@@ -64,7 +64,7 @@ docker-create - Create a new container
 [**-u**|**--user**[=*USER*]]
 [**-u**|**--user**[=*USER*]]
 [**--ulimit**[=*[]*]]
 [**--ulimit**[=*[]*]]
 [**--uts**[=*[]*]]
 [**--uts**[=*[]*]]
-[**-v**|**--volume**[=*[]*]]
+[**-v**|**--volume**[=*[[HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]]
 [**--volume-driver**[=*DRIVER*]]
 [**--volume-driver**[=*DRIVER*]]
 [**--volumes-from**[=*[]*]]
 [**--volumes-from**[=*[]*]]
 [**-w**|**--workdir**[=*WORKDIR*]]
 [**-w**|**--workdir**[=*WORKDIR*]]
@@ -311,8 +311,78 @@ any options, the systems uses the following options:
      **host**: use the host's UTS namespace inside the container.
      **host**: use the host's UTS namespace inside the container.
      Note: the host mode gives the container access to changing the host's hostname and is therefore considered insecure.
      Note: the host mode gives the container access to changing the host's hostname and is therefore considered insecure.
 
 
-**-v**, **--volume**=[]
-   Bind mount a volume (e.g., from the host: -v /host:/container, from Docker: -v /container)
+**-v**|**--volume**[=*[[HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]
+   Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, Docker
+   bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the Docker
+   container. If 'HOST-DIR' is omitted,  Docker automatically creates the new
+   volume on the host.  The `OPTIONS` are a comma delimited list and can be:
+
+   * [rw|ro]
+   * [z|Z]
+   * [`[r]shared`|`[r]slave`|`[r]private`]
+
+The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `HOST-DIR`
+can be an absolute path or a `name` value. A `name` value must start with an
+alphanumeric character, followed by `a-z0-9`, `_` (underscore), `.` (period) or
+`-` (hyphen). An absolute path starts with a `/` (forward slash).
+
+If you supply a `HOST-DIR` that is an absolute path,  Docker bind-mounts to the
+path you specify. If you supply a `name`, Docker creates a named volume by that
+`name`. For example, you can specify either `/foo` or `foo` for a `HOST-DIR`
+value. If you supply the `/foo` value, Docker creates a bind-mount. If you
+supply the `foo` specification, Docker creates a named volume.
+
+You can specify multiple  **-v** options to mount one or more mounts to a
+container. To use these same mounts in other containers, specify the
+**--volumes-from** option also.
+
+You can add `:ro` or `:rw` suffix to a volume to mount it  read-only or
+read-write mode, respectively. By default, the volumes are mounted read-write.
+See examples.
+
+Labeling systems like SELinux require that proper labels are placed on volume
+content mounted into a container. Without a label, the security system might
+prevent the processes running inside the container from using the content. By
+default, Docker does not change the labels set by the OS.
+
+To change a label in the container context, you can add either of two suffixes
+`:z` or `:Z` to the volume mount. These suffixes tell Docker to relabel file
+objects on the shared volumes. The `z` option tells Docker that two containers
+share the volume content. As a result, Docker labels the content with a shared
+content label. Shared volume labels allow all containers to read/write content.
+The `Z` option tells Docker to label the content with a private unshared label.
+Only the current container can use a private volume.
+
+By default bind mounted volumes are `private`. That means any mounts done
+inside container will not be visible on host and vice-a-versa. One can change
+this behavior by specifying a volume mount propagation property. Making a
+volume `shared` mounts done under that volume inside container will be
+visible on host and vice-a-versa. Making a volume `slave` enables only one
+way mount propagation and that is mounts done on host under that volume
+will be visible inside container but not the other way around.
+
+To control mount propagation property of volume one can use `:[r]shared`,
+`:[r]slave` or `:[r]private` propagation flag. Propagation property can
+be specified only for bind mounted volumes and not for internal volumes or
+named volumes. For mount propagation to work source mount point (mount point
+where source dir is mounted on) has to have right propagation properties. For
+shared volumes, source mount point has to be shared. And for slave volumes,
+source mount has to be either shared or slave.
+
+Use `df <source-dir>` to figure out the source mount and then use
+`findmnt -o TARGET,PROPAGATION <source-mount-dir>` to figure out propagation
+properties of source mount. If `findmnt` utility is not available, then one
+can look at mount entry for source mount point in `/proc/self/mountinfo`. Look
+at `optional fields` and see if any propagaion properties are specified.
+`shared:X` means mount is `shared`, `master:X` means mount is `slave` and if
+nothing is there that means mount is `private`.
+
+To change propagation properties of a mount point use `mount` command. For
+example, if one wants to bind mount source directory `/foo` one can do
+`mount --bind /foo /foo` and `mount --make-private --make-shared /foo`. This
+will convert /foo into a `shared` mount point. Alternatively one can directly
+change propagation properties of source mount. Say `/` is source mount for
+`/foo`, then use `mount --make-shared /` to convert `/` into a `shared` mount.
 
 
 **--volume-driver**=""
 **--volume-driver**=""
    Container's volume driver. This driver creates volumes specified either from
    Container's volume driver. This driver creates volumes specified either from

+ 1 - 0
man/docker-inspect.1.md

@@ -104,6 +104,7 @@ To get information on a container use its ID or instance name:
         "Destination": "/data",
         "Destination": "/data",
         "Mode": "ro,Z",
         "Mode": "ro,Z",
         "RW": false
         "RW": false
+	"Propagation": ""
       }
       }
     ],
     ],
     "AppArmorProfile": "",
     "AppArmorProfile": "",

+ 55 - 27
man/docker-run.1.md

@@ -67,7 +67,7 @@ docker-run - Run a command in a new container
 [**-u**|**--user**[=*USER*]]
 [**-u**|**--user**[=*USER*]]
 [**--ulimit**[=*[]*]]
 [**--ulimit**[=*[]*]]
 [**--uts**[=*[]*]]
 [**--uts**[=*[]*]]
-[**-v**|**--volume**[=*[]*]]
+[**-v**|**--volume**[=*[[HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]]
 [**--volume-driver**[=*DRIVER*]]
 [**--volume-driver**[=*DRIVER*]]
 [**--volumes-from**[=*[]*]]
 [**--volumes-from**[=*[]*]]
 [**-w**|**--workdir**[=*WORKDIR*]]
 [**-w**|**--workdir**[=*WORKDIR*]]
@@ -476,24 +476,34 @@ any options, the systems uses the following options:
 **--ulimit**=[]
 **--ulimit**=[]
     Ulimit options
     Ulimit options
 
 
-**-v**, **--volume**=[] Create a bind mount
-   (format: `[host-dir:]container-dir[:<suffix options>]`, where suffix options
-are comma delimited and selected from [rw|ro] and [z|Z].)
+**-v**|**--volume**[=*[[HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]
+   Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, Docker
+   bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the Docker
+   container. If 'HOST-DIR' is omitted,  Docker automatically creates the new
+   volume on the host.  The `OPTIONS` are a comma delimited list and can be:
 
 
-   (e.g., using -v /host-dir:/container-dir, bind mounts /host-dir in the
-host to /container-dir in the Docker container)
+   * [rw|ro]
+   * [z|Z]
+   * [`[r]shared`|`[r]slave`|`[r]private`]
 
 
-   If 'host-dir' is missing, then docker automatically creates the new volume
-on the host. **This auto-creation of the host path has been deprecated in
-Release: v1.9.**
+The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `HOST-DIR`
+can be an absolute path or a `name` value. A `name` value must start with an
+alphanumeric character, followed by `a-z0-9`, `_` (underscore), `.` (period) or
+`-` (hyphen). An absolute path starts with a `/` (forward slash).
 
 
-   The **-v** option can be used one or
-more times to add one or more mounts to a container. These mounts can then be
-used in other containers using the **--volumes-from** option.
+If you supply a `HOST-DIR` that is an absolute path,  Docker bind-mounts to the
+path you specify. If you supply a `name`, Docker creates a named volume by that
+`name`. For example, you can specify either `/foo` or `foo` for a `HOST-DIR`
+value. If you supply the `/foo` value, Docker creates a bind-mount. If you
+supply the `foo` specification, Docker creates a named volume.
 
 
-   The volume may be optionally suffixed with :ro or :rw to mount the volumes in
-read-only or read-write mode, respectively. By default, the volumes are mounted
-read-write. See examples.
+You can specify multiple  **-v** options to mount one or more mounts to a
+container. To use these same mounts in other containers, specify the
+**--volumes-from** option also.
+
+You can add `:ro` or `:rw` suffix to a volume to mount it  read-only or
+read-write mode, respectively. By default, the volumes are mounted read-write.
+See examples.
 
 
 Labeling systems like SELinux require that proper labels are placed on volume
 Labeling systems like SELinux require that proper labels are placed on volume
 content mounted into a container. Without a label, the security system might
 content mounted into a container. Without a label, the security system might
@@ -508,18 +518,36 @@ content label. Shared volume labels allow all containers to read/write content.
 The `Z` option tells Docker to label the content with a private unshared label.
 The `Z` option tells Docker to label the content with a private unshared label.
 Only the current container can use a private volume.
 Only the current container can use a private volume.
 
 
-The `container-dir` must always be an absolute path such as `/src/docs`.
-The `host-dir` can either be an absolute path or a `name` value. If you
-supply an absolute path for the `host-dir`, Docker bind-mounts to the path
-you specify. If you supply a `name`, Docker creates a named volume by that `name`.
-
-A `name` value must start with start with an alphanumeric character,
-followed by `a-z0-9`, `_` (underscore), `.` (period) or `-` (hyphen).
-An absolute path starts with a `/` (forward slash).
-
-For example, you can specify either `/foo` or `foo` for a `host-dir` value.
-If you supply the `/foo` value, Docker creates a bind-mount. If you supply
-the `foo` specification, Docker creates a named volume.
+By default bind mounted volumes are `private`. That means any mounts done
+inside container will not be visible on host and vice-a-versa. One can change
+this behavior by specifying a volume mount propagation property. Making a
+volume `shared` mounts done under that volume inside container will be
+visible on host and vice-a-versa. Making a volume `slave` enables only one
+way mount propagation and that is mounts done on host under that volume
+will be visible inside container but not the other way around.
+
+To control mount propagation property of volume one can use `:[r]shared`,
+`:[r]slave` or `:[r]private` propagation flag. Propagation property can
+be specified only for bind mounted volumes and not for internal volumes or
+named volumes. For mount propagation to work source mount point (mount point
+where source dir is mounted on) has to have right propagation properties. For
+shared volumes, source mount point has to be shared. And for slave volumes,
+source mount has to be either shared or slave.
+
+Use `df <source-dir>` to figure out the source mount and then use
+`findmnt -o TARGET,PROPAGATION <source-mount-dir>` to figure out propagation
+properties of source mount. If `findmnt` utility is not available, then one
+can look at mount entry for source mount point in `/proc/self/mountinfo`. Look
+at `optional fields` and see if any propagaion properties are specified.
+`shared:X` means mount is `shared`, `master:X` means mount is `slave` and if
+nothing is there that means mount is `private`.
+
+To change propagation properties of a mount point use `mount` command. For
+example, if one wants to bind mount source directory `/foo` one can do
+`mount --bind /foo /foo` and `mount --make-private --make-shared /foo`. This
+will convert /foo into a `shared` mount point. Alternatively one can directly
+change propagation properties of source mount. Say `/` is source mount for
+`/foo`, then use `mount --make-shared /` to convert `/` into a `shared` mount.
 
 
 **--volume-driver**=""
 **--volume-driver**=""
    Container's volume driver. This driver creates volumes specified either from
    Container's volume driver. This driver creates volumes specified either from

+ 10 - 11
volume/volume.go

@@ -52,6 +52,9 @@ type MountPoint struct {
 
 
 	// Note Mode is not used on Windows
 	// Note Mode is not used on Windows
 	Mode string `json:"Relabel"` // Originally field was `Relabel`"
 	Mode string `json:"Relabel"` // Originally field was `Relabel`"
+
+	// Note Propagation is not used on Windows
+	Propagation string // Mount propagation string
 }
 }
 
 
 // Setup sets up a mount point by either mounting the volume if it is
 // Setup sets up a mount point by either mounting the volume if it is
@@ -85,17 +88,6 @@ func (m *MountPoint) Path() string {
 	return m.Source
 	return m.Source
 }
 }
 
 
-// ValidMountMode will make sure the mount mode is valid.
-// returns if it's a valid mount mode or not.
-func ValidMountMode(mode string) bool {
-	return roModes[strings.ToLower(mode)] || rwModes[strings.ToLower(mode)]
-}
-
-// ReadWrite tells you if a mode string is a valid read-write mode or not.
-func ReadWrite(mode string) bool {
-	return rwModes[strings.ToLower(mode)]
-}
-
 // ParseVolumesFrom ensure that the supplied volumes-from is valid.
 // ParseVolumesFrom ensure that the supplied volumes-from is valid.
 func ParseVolumesFrom(spec string) (string, string, error) {
 func ParseVolumesFrom(spec string) (string, string, error) {
 	if len(spec) == 0 {
 	if len(spec) == 0 {
@@ -111,6 +103,13 @@ func ParseVolumesFrom(spec string) (string, string, error) {
 		if !ValidMountMode(mode) {
 		if !ValidMountMode(mode) {
 			return "", "", derr.ErrorCodeVolumeInvalidMode.WithArgs(mode)
 			return "", "", derr.ErrorCodeVolumeInvalidMode.WithArgs(mode)
 		}
 		}
+		// For now don't allow propagation properties while importing
+		// volumes from data container. These volumes will inherit
+		// the same propagation property as of the original volume
+		// in data container. This probably can be relaxed in future.
+		if HasPropagation(mode) {
+			return "", "", derr.ErrorCodeVolumeInvalidMode.WithArgs(mode)
+		}
 	}
 	}
 	return id, mode, nil
 	return id, mode, nil
 }
 }

+ 44 - 0
volume/volume_propagation_linux.go

@@ -0,0 +1,44 @@
+// +build linux
+
+package volume
+
+import (
+	"strings"
+)
+
+// DefaultPropagationMode defines what propagation mode should be used by
+// default if user has not specified one explicitly.
+const DefaultPropagationMode string = "rprivate"
+
+// propagation modes
+var propagationModes = map[string]bool{
+	"private":  true,
+	"rprivate": true,
+	"slave":    true,
+	"rslave":   true,
+	"shared":   true,
+	"rshared":  true,
+}
+
+// GetPropagation extracts and returns the mount propagation mode. If there
+// are no specifications, then by default it is "private".
+func GetPropagation(mode string) string {
+	for _, o := range strings.Split(mode, ",") {
+		if propagationModes[o] {
+			return o
+		}
+	}
+	return DefaultPropagationMode
+}
+
+// HasPropagation checks if there is a valid propagation mode present in
+// passed string. Returns true if a valid propagatio mode specifier is
+// present, false otherwise.
+func HasPropagation(mode string) bool {
+	for _, o := range strings.Split(mode, ",") {
+		if propagationModes[o] {
+			return true
+		}
+	}
+	return false
+}

+ 65 - 0
volume/volume_propagation_linux_test.go

@@ -0,0 +1,65 @@
+// +build linux
+
+package volume
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestParseMountSpecPropagation(t *testing.T) {
+	var (
+		valid   []string
+		invalid map[string]string
+	)
+
+	valid = []string{
+		"/hostPath:/containerPath:shared",
+		"/hostPath:/containerPath:rshared",
+		"/hostPath:/containerPath:slave",
+		"/hostPath:/containerPath:rslave",
+		"/hostPath:/containerPath:private",
+		"/hostPath:/containerPath:rprivate",
+		"/hostPath:/containerPath:ro,shared",
+		"/hostPath:/containerPath:ro,slave",
+		"/hostPath:/containerPath:ro,private",
+		"/hostPath:/containerPath:ro,z,shared",
+		"/hostPath:/containerPath:ro,Z,slave",
+		"/hostPath:/containerPath:Z,ro,slave",
+		"/hostPath:/containerPath:slave,Z,ro",
+		"/hostPath:/containerPath:Z,slave,ro",
+		"/hostPath:/containerPath:slave,ro,Z",
+		"/hostPath:/containerPath:rslave,ro,Z",
+		"/hostPath:/containerPath:ro,rshared,Z",
+		"/hostPath:/containerPath:ro,Z,rprivate",
+	}
+	invalid = map[string]string{
+		"/path:/path:ro,rshared,rslave":   `invalid mode: "ro,rshared,rslave"`,
+		"/path:/path:ro,z,rshared,rslave": `invalid mode: "ro,z,rshared,rslave"`,
+		"/path:shared":                    "Invalid volume specification",
+		"/path:slave":                     "Invalid volume specification",
+		"/path:private":                   "Invalid volume specification",
+		"name:/absolute-path:shared":      "Invalid volume specification",
+		"name:/absolute-path:rshared":     "Invalid volume specification",
+		"name:/absolute-path:slave":       "Invalid volume specification",
+		"name:/absolute-path:rslave":      "Invalid volume specification",
+		"name:/absolute-path:private":     "Invalid volume specification",
+		"name:/absolute-path:rprivate":    "Invalid volume specification",
+	}
+
+	for _, path := range valid {
+		if _, err := ParseMountSpec(path, "local"); err != nil {
+			t.Fatalf("ParseMountSpec(`%q`) should succeed: error %q", path, err)
+		}
+	}
+
+	for path, expectedError := range invalid {
+		if _, err := ParseMountSpec(path, "local"); err == nil {
+			t.Fatalf("ParseMountSpec(`%q`) should have failed validation. Err %v", path, err)
+		} else {
+			if !strings.Contains(err.Error(), expectedError) {
+				t.Fatalf("ParseMountSpec(`%q`) error should contain %q, got %v", path, expectedError, err.Error())
+			}
+		}
+	}
+}

+ 22 - 0
volume/volume_propagation_unsupported.go

@@ -0,0 +1,22 @@
+// +build !linux
+
+package volume
+
+// DefaultPropagationMode is used only in linux. In other cases it returns
+// empty string.
+const DefaultPropagationMode string = ""
+
+// propagation modes not supported on this platform.
+var propagationModes = map[string]bool{}
+
+// GetPropagation is not supported. Return empty string.
+func GetPropagation(mode string) string {
+	return DefaultPropagationMode
+}
+
+// HasPropagation checks if there is a valid propagation mode present in
+// passed string. Returns true if a valid propagatio mode specifier is
+// present, false otherwise.
+func HasPropagation(mode string) bool {
+	return false
+}

+ 65 - 15
volume/volume_unix.go

@@ -12,22 +12,14 @@ import (
 
 
 // read-write modes
 // read-write modes
 var rwModes = map[string]bool{
 var rwModes = map[string]bool{
-	"rw":   true,
-	"rw,Z": true,
-	"rw,z": true,
-	"z,rw": true,
-	"Z,rw": true,
-	"Z":    true,
-	"z":    true,
+	"rw": true,
+	"ro": true,
 }
 }
 
 
-// read-only modes
-var roModes = map[string]bool{
-	"ro":   true,
-	"ro,Z": true,
-	"ro,z": true,
-	"z,ro": true,
-	"Z,ro": true,
+// label modes
+var labelModes = map[string]bool{
+	"Z": true,
+	"z": true,
 }
 }
 
 
 // BackwardsCompatible decides whether this mount point can be
 // BackwardsCompatible decides whether this mount point can be
@@ -51,7 +43,8 @@ func ParseMountSpec(spec, volumeDriver string) (*MountPoint, error) {
 	spec = filepath.ToSlash(spec)
 	spec = filepath.ToSlash(spec)
 
 
 	mp := &MountPoint{
 	mp := &MountPoint{
-		RW: true,
+		RW:          true,
+		Propagation: DefaultPropagationMode,
 	}
 	}
 	if strings.Count(spec, ":") > 2 {
 	if strings.Count(spec, ":") > 2 {
 		return nil, derr.ErrorCodeVolumeInvalid.WithArgs(spec)
 		return nil, derr.ErrorCodeVolumeInvalid.WithArgs(spec)
@@ -84,6 +77,7 @@ func ParseMountSpec(spec, volumeDriver string) (*MountPoint, error) {
 			return nil, derr.ErrorCodeVolumeInvalidMode.WithArgs(mp.Mode)
 			return nil, derr.ErrorCodeVolumeInvalidMode.WithArgs(mp.Mode)
 		}
 		}
 		mp.RW = ReadWrite(mp.Mode)
 		mp.RW = ReadWrite(mp.Mode)
+		mp.Propagation = GetPropagation(mp.Mode)
 	default:
 	default:
 		return nil, derr.ErrorCodeVolumeInvalid.WithArgs(spec)
 		return nil, derr.ErrorCodeVolumeInvalid.WithArgs(spec)
 	}
 	}
@@ -106,6 +100,17 @@ func ParseMountSpec(spec, volumeDriver string) (*MountPoint, error) {
 		if len(mp.Driver) == 0 {
 		if len(mp.Driver) == 0 {
 			mp.Driver = DefaultDriverName
 			mp.Driver = DefaultDriverName
 		}
 		}
+		// Named volumes can't have propagation properties specified.
+		// Their defaults will be decided by docker. This is just a
+		// safeguard. Don't want to get into situations where named
+		// volumes were mounted as '[r]shared' inside container and
+		// container does further mounts under that volume and these
+		// mounts become visible on  host and later original volume
+		// cleanup becomes an issue if container does not unmount
+		// submounts explicitly.
+		if HasPropagation(mp.Mode) {
+			return nil, derr.ErrorCodeVolumeInvalid.WithArgs(spec)
+		}
 	} else {
 	} else {
 		mp.Source = filepath.Clean(source)
 		mp.Source = filepath.Clean(source)
 	}
 	}
@@ -130,3 +135,48 @@ func ParseVolumeSource(spec string) (string, string) {
 func IsVolumeNameValid(name string) (bool, error) {
 func IsVolumeNameValid(name string) (bool, error) {
 	return true, nil
 	return true, nil
 }
 }
+
+// ValidMountMode will make sure the mount mode is valid.
+// returns if it's a valid mount mode or not.
+func ValidMountMode(mode string) bool {
+	rwModeCount := 0
+	labelModeCount := 0
+	propagationModeCount := 0
+
+	for _, o := range strings.Split(mode, ",") {
+		if rwModes[o] {
+			rwModeCount++
+			continue
+		} else if labelModes[o] {
+			labelModeCount++
+			continue
+		} else if propagationModes[o] {
+			propagationModeCount++
+			continue
+		}
+		return false
+	}
+
+	// Only one string for each mode is allowed.
+	if rwModeCount > 1 || labelModeCount > 1 || propagationModeCount > 1 {
+		return false
+	}
+	return true
+}
+
+// ReadWrite tells you if a mode string is a valid read-write mode or not.
+// If there are no specifications w.r.t read write mode, then by default
+// it returs true.
+func ReadWrite(mode string) bool {
+	if !ValidMountMode(mode) {
+		return false
+	}
+
+	for _, o := range strings.Split(mode, ",") {
+		if o == "ro" {
+			return false
+		}
+	}
+
+	return true
+}

+ 11 - 0
volume/volume_windows.go

@@ -186,3 +186,14 @@ func IsVolumeNameValid(name string) (bool, error) {
 	}
 	}
 	return true, nil
 	return true, nil
 }
 }
+
+// ValidMountMode will make sure the mount mode is valid.
+// returns if it's a valid mount mode or not.
+func ValidMountMode(mode string) bool {
+	return roModes[strings.ToLower(mode)] || rwModes[strings.ToLower(mode)]
+}
+
+// ReadWrite tells you if a mode string is a valid read-write mode or not.
+func ReadWrite(mode string) bool {
+	return rwModes[strings.ToLower(mode)]
+}