11 năm trước cách đây · 60a90970bc
--- a/daemon/execdriver/native/create.go
+++ b/daemon/execdriver/native/create.go
@@ -25,6 +25,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
 
				 	container.Cgroups.Name = c.ID
			
 
				 	// check to see if we are running in ramdisk to disable pivot root
			
 
				 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
			
 
				+	container.Context["restriction_path"] = d.restrictionPath
			
 
				 
			
 
				 	if err := d.createNetwork(container, c); err != nil {
			
 
				 		return nil, err
			
@@ -81,6 +82,8 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
 
				 		c.Enabled = true
			
 
				 	}
			
 
				 	container.Cgroups.DeviceAccess = true
			
 
				+	delete(container.Context, "restriction_path")
			
 
				+
			
 
				 	if apparmor.IsEnabled() {
			
 
				 		container.Context["apparmor_profile"] = "unconfined"
			
 
				 	}
			
--- a/daemon/execdriver/native/driver.go
+++ b/daemon/execdriver/native/driver.go
@@ -62,6 +62,7 @@ type driver struct {
 
				 	root             string
			
 
				 	initPath         string
			
 
				 	activeContainers map[string]*exec.Cmd
			
 
				+	restrictionPath  string
			
 
				 }
			
 
				 
			
 
				 func NewDriver(root, initPath string) (*driver, error) {
			
@@ -72,8 +73,14 @@ func NewDriver(root, initPath string) (*driver, error) {
 
				 	if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
			
 
				 		return nil, err
			
 
				 	}
			
 
				+	restrictionPath := filepath.Join(root, "empty")
			
 
				+	if err := os.MkdirAll(restrictionPath, 0700); err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+
			
 
				 	return &driver{
			
 
				 		root:             root,
			
 
				+		restrictionPath:  restrictionPath,
			
 
				 		initPath:         initPath,
			
 
				 		activeContainers: make(map[string]*exec.Cmd),
			
 
				 	}, nil
			
--- a/pkg/libcontainer/nsinit/init.go
+++ b/pkg/libcontainer/nsinit/init.go
@@ -61,7 +61,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol
 
				 
			
 
				 	label.Init()
			
 
				 	ns.logger.Println("setup mount namespace")
			
 
				-	if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil {
			
 
				+	if err := setupNewMountNamespace(rootfs, console, container); err != nil {
			
 
				 		return fmt.Errorf("setup mount namespace %s", err)
			
 
				 	}
			
 
				 	if err := system.Sethostname(container.Hostname); err != nil {
			
--- a/pkg/libcontainer/nsinit/mount.go
+++ b/pkg/libcontainer/nsinit/mount.go
@@ -6,6 +6,7 @@ import (
 
				 	"fmt"
			
 
				 	"github.com/dotcloud/docker/pkg/label"
			
 
				 	"github.com/dotcloud/docker/pkg/libcontainer"
			
 
				+	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
			
 
				 	"github.com/dotcloud/docker/pkg/system"
			
 
				 	"io/ioutil"
			
 
				 	"os"
			
@@ -21,9 +22,9 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD
 
				 //
			
 
				 // There is no need to unmount the new mounts because as soon as the mount namespace
			
 
				 // is no longer in use, the mounts will be removed automatically
			
 
				-func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool, mountLabel string) error {
			
 
				+func setupNewMountNamespace(rootfs, console string, container *libcontainer.Container) error {
			
 
				 	flag := syscall.MS_PRIVATE
			
 
				-	if noPivotRoot {
			
 
				+	if container.NoPivotRoot {
			
 
				 		flag = syscall.MS_SLAVE
			
 
				 	}
			
 
				 	if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
			
@@ -32,44 +33,28 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons
 
				 	if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
			
 
				 		return fmt.Errorf("mouting %s as bind %s", rootfs, err)
			
 
				 	}
			
 
				-	if err := mountSystem(rootfs, mountLabel); err != nil {
			
 
				+	if err := mountSystem(rootfs, container.Context["mount_label"]); err != nil {
			
 
				 		return fmt.Errorf("mount system %s", err)
			
 
				 	}
			
 
				-
			
 
				-	for _, m := range bindMounts {
			
 
				-		var (
			
 
				-			flags = syscall.MS_BIND | syscall.MS_REC
			
 
				-			dest  = filepath.Join(rootfs, m.Destination)
			
 
				-		)
			
 
				-		if !m.Writable {
			
 
				-			flags = flags | syscall.MS_RDONLY
			
 
				-		}
			
 
				-		if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
			
 
				-			return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
			
 
				-		}
			
 
				-		if !m.Writable {
			
 
				-			if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
			
 
				-				return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
			
 
				-			}
			
 
				-		}
			
 
				-		if m.Private {
			
 
				-			if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
			
 
				-				return fmt.Errorf("mounting %s private %s", dest, err)
			
 
				-			}
			
 
				+	if err := setupBindmounts(rootfs, container.Mounts); err != nil {
			
 
				+		return fmt.Errorf("bind mounts %s", err)
			
 
				+	}
			
 
				+	if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
			
 
				+		if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
			
 
				+			return fmt.Errorf("restrict %s", err)
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				 	if err := copyDevNodes(rootfs); err != nil {
			
 
				 		return fmt.Errorf("copy dev nodes %s", err)
			
 
				 	}
			
 
				-	if err := setupPtmx(rootfs, console, mountLabel); err != nil {
			
 
				+	if err := setupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
			
 
				 		return err
			
 
				 	}
			
 
				 	if err := system.Chdir(rootfs); err != nil {
			
 
				 		return fmt.Errorf("chdir into %s %s", rootfs, err)
			
 
				 	}
			
 
				 
			
 
				-	if noPivotRoot {
			
 
				+	if container.NoPivotRoot {
			
 
				 		if err := rootMsMove(rootfs); err != nil {
			
 
				 			return err
			
 
				 		}
			
@@ -79,7 +64,7 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if readonly {
			
 
				+	if container.ReadonlyFs {
			
 
				 		if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
			
 
				 			return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
			
 
				 		}
			
@@ -263,3 +248,29 @@ func remountSys() error {
 
				 	}
			
 
				 	return nil
			
 
				 }
			
 
				+
			
 
				+func setupBindmounts(rootfs string, bindMounts []libcontainer.Mount) error {
			
 
				+	for _, m := range bindMounts {
			
 
				+		var (
			
 
				+			flags = syscall.MS_BIND | syscall.MS_REC
			
 
				+			dest  = filepath.Join(rootfs, m.Destination)
			
 
				+		)
			
 
				+		if !m.Writable {
			
 
				+			flags = flags | syscall.MS_RDONLY
			
 
				+		}
			
 
				+		if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
			
 
				+			return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
			
 
				+		}
			
 
				+		if !m.Writable {
			
 
				+			if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
			
 
				+				return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
			
 
				+			}
			
 
				+		}
			
 
				+		if m.Private {
			
 
				+			if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
			
 
				+				return fmt.Errorf("mounting %s private %s", dest, err)
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
--- a/pkg/libcontainer/security/restrict/restrict.go
+++ b/pkg/libcontainer/security/restrict/restrict.go
@@ -0,0 +1,46 @@
 
				+package restrict
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"github.com/dotcloud/docker/pkg/system"
			
 
				+	"path/filepath"
			
 
				+	"syscall"
			
 
				+)
			
 
				+
			
 
				+const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
			
 
				+
			
 
				+var restrictions = map[string]string{
			
 
				+	// dirs
			
 
				+	"/proc/sys":  "",
			
 
				+	"/proc/irq":  "",
			
 
				+	"/proc/acpi": "",
			
 
				+
			
 
				+	// files
			
 
				+	"/proc/sysrq-trigger": "/dev/null",
			
 
				+	"/proc/kcore":         "/dev/null",
			
 
				+}
			
 
				+
			
 
				+// Restrict locks down access to many areas of proc
			
 
				+// by using the asumption that the user does not have mount caps to
			
 
				+// revert the changes made here
			
 
				+func Restrict(rootfs, empty string) error {
			
 
				+	for dest, source := range restrictions {
			
 
				+		dest = filepath.Join(rootfs, dest)
			
 
				+
			
 
				+		// we don't have a "/dev/null" for dirs so have the requester pass a dir
			
 
				+		// for us to bind mount
			
 
				+		switch source {
			
 
				+		case "":
			
 
				+			source = empty
			
 
				+		default:
			
 
				+			source = filepath.Join(rootfs, source)
			
 
				+		}
			
 
				+		if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
			
 
				+			return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
			
 
				+		}
			
 
				+		if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
			
 
				+			return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
			
 
				+		}
			
 
				+	}
			
 
				+	return nil
			
 
				+}