Ver código fonte

Update restrictions for better handling of mounts

This also cleans up some of the left over restriction paths code from
before.
Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
Michael Crosby 11 anos atrás
pai
commit
f5139233b9

+ 24 - 36
daemon/execdriver/lxc/driver.go

@@ -2,12 +2,6 @@ package lxc
 
 import (
 	"fmt"
-	"github.com/dotcloud/docker/daemon/execdriver"
-	"github.com/dotcloud/docker/pkg/cgroups"
-	"github.com/dotcloud/docker/pkg/label"
-	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
-	"github.com/dotcloud/docker/pkg/system"
-	"github.com/dotcloud/docker/utils"
 	"io/ioutil"
 	"log"
 	"os"
@@ -18,6 +12,13 @@ import (
 	"strings"
 	"syscall"
 	"time"
+
+	"github.com/dotcloud/docker/daemon/execdriver"
+	"github.com/dotcloud/docker/pkg/cgroups"
+	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
+	"github.com/dotcloud/docker/pkg/system"
+	"github.com/dotcloud/docker/utils"
 )
 
 const DriverName = "lxc"
@@ -27,31 +28,26 @@ func init() {
 		if err := setupEnv(args); err != nil {
 			return err
 		}
-
 		if err := setupHostname(args); err != nil {
 			return err
 		}
-
 		if err := setupNetworking(args); err != nil {
 			return err
 		}
-
-		if err := restrict.Restrict("/", "/empty"); err != nil {
-			return err
+		if !args.Privileged {
+			if err := restrict.Restrict(); err != nil {
+				return err
+			}
 		}
-
 		if err := setupCapabilities(args); err != nil {
 			return err
 		}
-
 		if err := setupWorkingDirectory(args); err != nil {
 			return err
 		}
-
 		if err := system.CloseFdsFrom(3); err != nil {
 			return err
 		}
-
 		if err := changeUser(args); err != nil {
 			return err
 		}
@@ -69,10 +65,9 @@ func init() {
 }
 
 type driver struct {
-	root            string // root path for the driver to use
-	apparmor        bool
-	sharedRoot      bool
-	restrictionPath string
+	root       string // root path for the driver to use
+	apparmor   bool
+	sharedRoot bool
 }
 
 func NewDriver(root string, apparmor bool) (*driver, error) {
@@ -80,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
 	if err := linkLxcStart(root); err != nil {
 		return nil, err
 	}
-	restrictionPath := filepath.Join(root, "empty")
-	if err := os.MkdirAll(restrictionPath, 0700); err != nil {
-		return nil, err
-	}
 	return &driver{
-		apparmor:        apparmor,
-		root:            root,
-		sharedRoot:      rootIsShared(),
-		restrictionPath: restrictionPath,
+		apparmor:   apparmor,
+		root:       root,
+		sharedRoot: rootIsShared(),
 	}, nil
 }
 
@@ -419,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
 
 	if err := LxcTemplateCompiled.Execute(fo, struct {
 		*execdriver.Command
-		AppArmor          bool
-		ProcessLabel      string
-		MountLabel        string
-		RestrictionSource string
+		AppArmor     bool
+		ProcessLabel string
+		MountLabel   string
 	}{
-		Command:           c,
-		AppArmor:          d.apparmor,
-		ProcessLabel:      process,
-		MountLabel:        mount,
-		RestrictionSource: d.restrictionPath,
+		Command:      c,
+		AppArmor:     d.apparmor,
+		ProcessLabel: process,
+		MountLabel:   mount,
 	}); err != nil {
 		return "", err
 	}

+ 3 - 9
daemon/execdriver/lxc/lxc_template.go

@@ -1,10 +1,11 @@
 package lxc
 
 import (
-	"github.com/dotcloud/docker/daemon/execdriver"
-	"github.com/dotcloud/docker/pkg/label"
 	"strings"
 	"text/template"
+
+	"github.com/dotcloud/docker/daemon/execdriver"
+	"github.com/dotcloud/docker/pkg/label"
 )
 
 const LxcTemplate = `
@@ -110,13 +111,6 @@ lxc.aa_profile = unconfined
 {{else}}
 # Let AppArmor normal confinement take place (i.e., not unconfined)
 {{end}}
-{{else}}
-# Restrict access to some stuff in /proc. Note that /proc is already mounted
-# read-only, so we don't need to bother about things that are just dangerous
-# to write to (like sysrq-trigger). Also, recent kernels won't let a container
-# peek into /proc/kcore, but let's cater for people who might run Docker on
-# older kernels. Just in case.
-lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
 {{end}}
 
 # limits

+ 2 - 2
daemon/execdriver/native/create.go

@@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
 	container.Cgroups.Name = c.ID
 	// check to see if we are running in ramdisk to disable pivot root
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
-	container.Context["restriction_path"] = d.restrictionPath
+	container.Context["restrictions"] = "true"
 
 	if err := d.createNetwork(container, c); err != nil {
 		return nil, err
@@ -84,7 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
 	}
 	container.Cgroups.DeviceAccess = true
 
-	delete(container.Context, "restriction_path")
+	delete(container.Context, "restrictions")
 
 	if apparmor.IsEnabled() {
 		container.Context["apparmor_profile"] = "unconfined"

+ 0 - 7
daemon/execdriver/native/driver.go

@@ -57,7 +57,6 @@ type driver struct {
 	root             string
 	initPath         string
 	activeContainers map[string]*exec.Cmd
-	restrictionPath  string
 }
 
 func NewDriver(root, initPath string) (*driver, error) {
@@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) {
 	if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
 		return nil, err
 	}
-	restrictionPath := filepath.Join(root, "empty")
-	if err := os.MkdirAll(restrictionPath, 0700); err != nil {
-		return nil, err
-	}
-
 	return &driver{
 		root:             root,
-		restrictionPath:  restrictionPath,
 		initPath:         initPath,
 		activeContainers: make(map[string]*exec.Cmd),
 	}, nil

+ 2 - 5
pkg/libcontainer/mount/init.go

@@ -123,15 +123,12 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo
 	systemMounts := []mount{
 		{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
 		{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
+		{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
+		{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
 	}
 
 	if len(mounts.OfType("devtmpfs")) == 1 {
 		systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
 	}
-	systemMounts = append(systemMounts,
-		mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
-		mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
-	)
-
 	return systemMounts
 }

+ 2 - 2
pkg/libcontainer/nsinit/init.go

@@ -72,8 +72,8 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
 
 	runtime.LockOSThread()
 
-	if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
-		if err := restrict.Restrict("/", restrictionPath); err != nil {
+	if container.Context["restrictions"] != "" {
+		if err := restrict.Restrict(); err != nil {
 			return err
 		}
 	}

+ 20 - 45
pkg/libcontainer/security/restrict/restrict.go

@@ -11,67 +11,42 @@ import (
 	"github.com/dotcloud/docker/pkg/system"
 )
 
-// "restrictions" are container paths (files, directories, whatever) that have to be masked.
-// maskPath is a "safe" path to be mounted over maskedPath. It can take two special values:
-// - if it is "", then nothing is mounted;
-// - if it is "EMPTY", then an empty directory is mounted instead.
-// If remountRO is true then the maskedPath is remounted read-only (regardless of whether a maskPath was used).
-type restriction struct {
-	maskedPath string
-	maskPath   string
-	remountRO  bool
-}
-
-var restrictions = []restriction{
-	{"/proc", "", true},
-	{"/sys", "", true},
-	{"/proc/kcore", "/dev/null", false},
-}
-
 // This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
 // However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
-// "empty" should be the path to an empty directory.
-func Restrict(rootfs, empty string) error {
-	for _, restriction := range restrictions {
-		dest := filepath.Join(rootfs, restriction.maskedPath)
-		if restriction.maskPath != "" {
-			var source string
-			if restriction.maskPath == "EMPTY" {
-				source = empty
-			} else {
-				source = filepath.Join(rootfs, restriction.maskPath)
-			}
-			if err := system.Mount(source, dest, "", syscall.MS_BIND, ""); err != nil {
-				return fmt.Errorf("unable to bind-mount %s over %s: %s", source, dest, err)
-			}
-		}
-		if restriction.remountRO {
-			if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
-				return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
-			}
+func Restrict() error {
+	// remount proc and sys as readonly
+	for _, dest := range []string{"proc", "sys"} {
+		if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
+			return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
 		}
 	}
 
+	if err := system.Mount("/proc/kcore", "/dev/null", "", syscall.MS_BIND, ""); err != nil {
+		return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore")
+	}
+
 	// This weird trick will allow us to mount /proc read-only, while being able to use AppArmor.
 	// This is because apparently, loading an AppArmor profile requires write access to /proc/1/attr.
 	// So we do another mount of procfs, ensure it's write-able, and bind-mount a subset of it.
-	tmpProcPath := filepath.Join(rootfs, ".proc")
-	if err := os.Mkdir(tmpProcPath, 0700); err != nil {
-		return fmt.Errorf("unable to create temporary proc mountpoint %s: %s", tmpProcPath, err)
+	var (
+		rwAttrPath = filepath.Join(".proc", "1", "attr")
+		roAttrPath = filepath.Join("proc", "1", "attr")
+	)
+
+	if err := os.Mkdir(".proc", 0700); err != nil {
+		return fmt.Errorf("unable to create temporary proc mountpoint .proc: %s", err)
 	}
-	if err := system.Mount("proc", tmpProcPath, "proc", 0, ""); err != nil {
+	if err := system.Mount("proc", ".proc", "proc", 0, ""); err != nil {
 		return fmt.Errorf("unable to mount proc on temporary proc mountpoint: %s", err)
 	}
-	if err := system.Mount("proc", tmpProcPath, "", syscall.MS_REMOUNT, ""); err != nil {
+	if err := system.Mount("proc", ".proc", "", syscall.MS_REMOUNT, ""); err != nil {
 		return fmt.Errorf("unable to remount proc read-write: %s", err)
 	}
-	rwAttrPath := filepath.Join(rootfs, ".proc", "1", "attr")
-	roAttrPath := filepath.Join(rootfs, "proc", "1", "attr")
 	if err := system.Mount(rwAttrPath, roAttrPath, "", syscall.MS_BIND, ""); err != nil {
 		return fmt.Errorf("unable to bind-mount %s on %s: %s", rwAttrPath, roAttrPath, err)
 	}
-	if err := system.Unmount(tmpProcPath, 0); err != nil {
+	if err := system.Unmount(".proc", 0); err != nil {
 		return fmt.Errorf("unable to unmount temporary proc filesystem: %s", err)
 	}
-	return nil
+	return os.RemoveAll(".proc")
 }

+ 1 - 1
pkg/libcontainer/security/restrict/unsupported.go

@@ -4,6 +4,6 @@ package restrict
 
 import "fmt"
 
-func Restrict(rootfs, empty string) error {
+func Restrict() error {
 	return fmt.Errorf("not supported")
 }