浏览代码

Add user namespace enable flag `--userns-remap` in experimental build

This adds the capability to turn on user namespace support when using an
experimental build Docker daemon binary using the `--userns-remap` flag.

Also documentation is added to the experimental docs.

Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)
Phil Estes 9 年之前
父节点
当前提交
44e1023a93

+ 119 - 0
daemon/config_experimental.go

@@ -0,0 +1,119 @@
+// +build experimental
+
+package daemon
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/docker/docker/pkg/idtools"
+	flag "github.com/docker/docker/pkg/mflag"
+	"github.com/opencontainers/runc/libcontainer/user"
+)
+
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
+	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
+}
+
+const (
+	defaultIDSpecifier string = "default"
+	defaultRemappedID  string = "dockremap"
+)
+
+// Parse the remapped root (user namespace) option, which can be one of:
+//   username            - valid username from /etc/passwd
+//   username:groupname  - valid username; valid groupname from /etc/group
+//   uid                 - 32-bit unsigned int valid Linux UID value
+//   uid:gid             - uid value; 32-bit unsigned int Linux GID value
+//
+//  If no groupname is specified, and a username is specified, an attempt
+//  will be made to lookup a gid for that username as a groupname
+//
+//  If names are used, they are verified to exist in passwd/group
+func parseRemappedRoot(usergrp string) (string, string, error) {
+
+	var (
+		userID, groupID     int
+		username, groupname string
+	)
+
+	idparts := strings.Split(usergrp, ":")
+	if len(idparts) > 2 {
+		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
+	}
+
+	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
+		// must be a uid; take it as valid
+		userID = int(uid)
+		luser, err := user.LookupUid(userID)
+		if err != nil {
+			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
+		}
+		username = luser.Name
+		if len(idparts) == 1 {
+			// if the uid was numeric and no gid was specified, take the uid as the gid
+			groupID = userID
+			lgrp, err := user.LookupGid(groupID)
+			if err != nil {
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
+			}
+			groupname = lgrp.Name
+		}
+	} else {
+		lookupName := idparts[0]
+		// special case: if the user specified "default", they want Docker to create or
+		// use (after creation) the "dockremap" user/group for root remapping
+		if lookupName == defaultIDSpecifier {
+			lookupName = defaultRemappedID
+		}
+		luser, err := user.LookupUser(lookupName)
+		if err != nil && idparts[0] != defaultIDSpecifier {
+			// error if the name requested isn't the special "dockremap" ID
+			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
+		} else if err != nil {
+			// special case-- if the username == "default", then we have been asked
+			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
+			// ranges will be used for the user and group mappings in user namespaced containers
+			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
+			if err == nil {
+				return defaultRemappedID, defaultRemappedID, nil
+			}
+			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
+		}
+		userID = luser.Uid
+		username = luser.Name
+		if len(idparts) == 1 {
+			// we only have a string username, and no group specified; look up gid from username as group
+			group, err := user.LookupGroup(lookupName)
+			if err != nil {
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
+			}
+			groupID = group.Gid
+			groupname = group.Name
+		}
+	}
+
+	if len(idparts) == 2 {
+		// groupname or gid is separately specified and must be resolved
+		// to a unsigned 32-bit gid
+		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
+			// must be a gid, take it as valid
+			groupID = int(gid)
+			lgrp, err := user.LookupGid(groupID)
+			if err != nil {
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
+			}
+			groupname = lgrp.Name
+		} else {
+			// not a number; attempt a lookup
+			group, err := user.LookupGroup(idparts[1])
+			if err != nil {
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
+			}
+			groupID = group.Gid
+			groupname = idparts[1]
+		}
+	}
+	return username, groupname, nil
+}

+ 8 - 0
daemon/config_stub.go

@@ -0,0 +1,8 @@
+// +build !experimental
+
+package daemon
+
+import flag "github.com/docker/docker/pkg/mflag"
+
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
+}

+ 110 - 0
daemon/daemon_experimental.go

@@ -0,0 +1,110 @@
+// +build experimental
+
+package daemon
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/pkg/directory"
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/runconfig"
+)
+
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
+	if config.ExecDriver != "native" && config.RemappedRoot != "" {
+		return nil, nil, fmt.Errorf("User namespace remapping is only supported with the native execdriver")
+	}
+	if runtime.GOOS == "windows" && config.RemappedRoot != "" {
+		return nil, nil, fmt.Errorf("User namespaces are not supported on Windows")
+	}
+
+	// if the daemon was started with remapped root option, parse
+	// the config option to the int uid,gid values
+	var (
+		uidMaps, gidMaps []idtools.IDMap
+	)
+	if config.RemappedRoot != "" {
+		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
+		if err != nil {
+			return nil, nil, err
+		}
+		if username == "root" {
+			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
+			// effectively
+			logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
+			return uidMaps, gidMaps, nil
+		}
+		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
+		// update remapped root setting now that we have resolved them to actual names
+		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
+
+		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
+		if err != nil {
+			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
+		}
+	}
+	return uidMaps, gidMaps, nil
+}
+
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
+	// the main docker root needs to be accessible by all users, as user namespace support
+	// will create subdirectories owned by either a) the real system root (when no remapping
+	// is setup) or b) the remapped root host ID (when --root=uid:gid is used)
+	// for "first time" users of user namespaces, we need to migrate the current directory
+	// contents to the "0.0" (root == root "namespace" daemon root)
+	nsRoot := "0.0"
+	if _, err := os.Stat(rootDir); err == nil {
+		// root current exists; we need to check for a prior migration
+		if _, err := os.Stat(filepath.Join(rootDir, nsRoot)); err != nil && os.IsNotExist(err) {
+			// need to migrate current root to "0.0" subroot
+			// 1. create non-usernamespaced root as "0.0"
+			if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
+				return fmt.Errorf("Cannot create daemon root %q: %v", filepath.Join(rootDir, nsRoot), err)
+			}
+			// 2. move current root content to "0.0" new subroot
+			if err := directory.MoveToSubdir(rootDir, nsRoot); err != nil {
+				return fmt.Errorf("Cannot migrate current daemon root %q for user namespaces: %v", rootDir, err)
+			}
+			// 3. chmod outer root to 755
+			if chmodErr := os.Chmod(rootDir, 0755); chmodErr != nil {
+				return chmodErr
+			}
+		}
+	} else if os.IsNotExist(err) {
+		// no root exists yet, create it 0755 with root:root ownership
+		if err := os.MkdirAll(rootDir, 0755); err != nil {
+			return err
+		}
+		// create the "0.0" subroot (so no future "migration" happens of the root)
+		if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
+			return err
+		}
+	}
+
+	// for user namespaces we will create a subtree underneath the specified root
+	// with any/all specified remapped root uid/gid options on the daemon creating
+	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
+	// `chdir()` to work for containers namespaced to that uid/gid)
+	if config.RemappedRoot != "" {
+		nsRoot = fmt.Sprintf("%d.%d", rootUID, rootGID)
+	}
+	config.Root = filepath.Join(rootDir, nsRoot)
+	logrus.Debugf("Creating actual daemon root: %s", config.Root)
+
+	// Create the root directory if it doesn't exists
+	if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
+		return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
+	}
+	return nil
+}
+
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
+	if hostConfig.Privileged && daemon.config().RemappedRoot != "" {
+		return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
+	}
+	return nil, nil
+}

+ 28 - 0
daemon/daemon_stub.go

@@ -0,0 +1,28 @@
+// +build !experimental
+
+package daemon
+
+import (
+	"os"
+
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/system"
+	"github.com/docker/docker/runconfig"
+)
+
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
+	return nil, nil, nil
+}
+
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
+	config.Root = rootDir
+	// Create the root directory if it doesn't exists
+	if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
+		return err
+	}
+	return nil
+}
+
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
+	return nil, nil
+}

+ 3 - 0
experimental/README.md

@@ -71,6 +71,9 @@ to build a Docker binary with the experimental features enabled:
 
 ## Current experimental features
 
+ * [External graphdriver plugins](plugins_graphdriver.md)
+ * [User namespaces](userns.md)
+
 ## How to comment on an experimental feature
 
 Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.  

+ 120 - 0
experimental/userns.md

@@ -0,0 +1,120 @@
+# Experimental: User namespace support
+
+Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
+a process--and therefore a container--to have a unique range of user and
+group IDs which are outside the traditional user and group range utilized by
+the host system. Potentially the most important security improvement is that,
+by default, container processes running as the `root` user will have expected
+administrative privilege (with some restrictions) inside the container but will
+effectively be mapped to an unprivileged `uid` on the host.
+
+In this experimental phase, the Docker daemon creates a single daemon-wide mapping
+for all containers running on the same engine instance. The mappings will
+utilize the existing subordinate user and group ID feature available on all modern
+Linux distributions.
+The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and 
+[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
+read for the user, and optional group, specified to the `--userns-remap` 
+parameter.  If you do not wish to specify your own user and/or group, you can 
+provide `default` as the value to this flag, and a user will be created on your behalf
+and provided subordinate uid and gid ranges. This default user will be named
+`dockremap`, and entries will be created for it in `/etc/passwd` and 
+`/etc/group` using your distro's standard user and group creation tools.
+
+> **Note**: The single mapping per-daemon restriction exists for this experimental
+> phase because Docker shares image layers from its local cache across all
+> containers running on the engine instance.  Since file ownership must be
+> the same for all containers sharing the same layer content, the decision
+> was made to map the file ownership on `docker pull` to the daemon's user and
+> group mappings so that there is no delay for running containers once the
+> content is downloaded--exactly the same performance characteristics as with
+> user namespaces disabled.
+
+## Starting the daemon with user namespaces enabled
+To enable this experimental user namespace support for a Docker daemon instance,
+start the daemon with the aforementioned `--userns-remap` flag, which accepts
+values in the following formats:
+
+ - uid
+ - uid:gid
+ - username
+ - username:groupname
+
+If numeric IDs are provided, translation back to valid user or group names
+will occur so that the subordinate uid and gid information can be read, given
+these resources are name-based, not id-based.  If the numeric ID information
+provided does not exist as entries in `/etc/passwd` or `/etc/group`, dameon
+startup will fail with an error message.
+
+*An example: starting with default Docker user management:*
+
+```
+     $ docker daemon --userns-remap=default
+```    
+In this case, Docker will create--or find the existing--user and group
+named `dockremap`. If the user is created, and the Linux distribution has
+appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
+with a contiguous 65536 length range of subordinate user and group IDs, starting
+at an offset based on prior entries in those files.  For example, Ubuntu will
+create the following range, based on an existing user already having the first
+65536 range:
+
+```
+     $ cat /etc/subuid
+     user1:100000:65536
+     dockremap:165536:65536
+```
+
+> **Note:** On a fresh Fedora install, we found that we had to `touch` the
+> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
+> were created.  Once these files existed, range assigment on user creation
+> worked properly.
+
+If you have a preferred/self-managed user with subordinate ID mappings already
+configured, you can provide that username or uid to the `--userns-remap` flag.
+If you have a group that doesn't match the username, you may provide the `gid`
+or group name as well; otherwise the username will be used as the group name
+when querying the system for the subordinate group ID range.
+
+## Detailed information on `subuid`/`subgid` ranges
+
+Given there may be advanced use of the subordinate ID ranges by power users, we will
+describe how the Docker daemon uses the range entries within these files under the
+current experimental user namespace support.
+
+The simplest case exists where only one contiguous range is defined for the
+provided user or group. In this case, Docker will use that entire contiguous
+range for the mapping of host uids and gids to the container process.  This 
+means that the first ID in the range will be the remapped root user, and the
+IDs above that initial ID will map host ID 1 through the end of the range.
+
+From the example `/etc/subid` content shown above, that means the remapped root
+user would be uid 165536.
+
+If the system administrator has set up multiple ranges for a single user or
+group, the Docker daemon will read all the available ranges and use the
+following algorithm to create the mapping ranges:
+
+1. The ranges will be sorted by *start ID* ascending
+2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on.  This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
+3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
+
+## User namespace known restrictions
+
+The following standard Docker features are currently incompatible when
+running a Docker daemon with experimental user namespaces enabled:
+
+ - sharing namespaces with the host (--pid=host, --net=host, etc.)
+ - sharing namespaces with other containers (--net=container:*other*)
+ - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
+ - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
+ - Using `--privileged` mode containers
+ - Using the lxc execdriver (only the `native` execdriver is enabled to use user namespaces)
+ - volume use without pre-arranging proper file ownership in mounted volumes
+
+Additionally, while the `root` user inside a user namespaced container
+process has many of the privileges of the administrative root user, the
+following operations will fail:
+
+ - Use of `mknod` - permission is denied for device creation by the container root
+ - others will be listed here when fully tested

+ 61 - 0
integration-cli/docker_cli_experimental_test.go

@@ -3,8 +3,15 @@
 package main
 
 import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
 	"strings"
 
+	"github.com/docker/docker/pkg/system"
 	"github.com/go-check/check"
 )
 
@@ -21,3 +28,57 @@ func (s *DockerSuite) TestExperimentalVersion(c *check.C) {
 		c.Fatalf("docker version did not contain experimental: %s", out)
 	}
 }
+
+// user namespaces test: run daemon with remapped root setting
+// 1. validate uid/gid maps are set properly
+// 2. verify that files created are owned by remapped root
+func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
+	testRequires(c, NativeExecDriver)
+	testRequires(c, SameHostDaemon)
+
+	c.Assert(s.d.StartWithBusybox("--userns-remap", "default"), check.IsNil)
+
+	tmpDir, err := ioutil.TempDir("", "userns")
+	if err != nil {
+		c.Fatal(err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	// we need to find the uid and gid of the remapped root from the daemon's root dir info
+	uidgid := strings.Split(filepath.Base(s.d.root), ".")
+	c.Assert(len(uidgid), check.Equals, 2, check.Commentf("Should have gotten uid/gid strings from root dirname: %s", filepath.Base(s.d.root)))
+	uid, err := strconv.Atoi(uidgid[0])
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse uid: %v", err))
+	gid, err := strconv.Atoi(uidgid[1])
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse gid: %v", err))
+
+	//writeable by the remapped root UID/GID pair
+	c.Assert(os.Chown(tmpDir, uid, gid), check.IsNil)
+
+	out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+
+	pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
+	if err != nil {
+		c.Fatalf("Could not inspect running container: out: %q; err: %v", pid, err)
+	}
+	// check the uid and gid maps for the PID to ensure root is remapped
+	// (cmd = cat /proc/<pid>/uid_map | grep -E '0\s+9999\s+1')
+	out, rc1, err := runCommandPipelineWithOutput(
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/uid_map"),
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", uid)))
+	c.Assert(rc1, check.Equals, 0, check.Commentf("Didn't match uid_map: output: %s", out))
+
+	out, rc2, err := runCommandPipelineWithOutput(
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/gid_map"),
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", gid)))
+	c.Assert(rc2, check.Equals, 0, check.Commentf("Didn't match gid_map: output: %s", out))
+
+	// check that the touched file is owned by remapped uid:gid
+	stat, err := system.Stat(filepath.Join(tmpDir, "testfile"))
+	if err != nil {
+		c.Fatal(err)
+	}
+	c.Assert(stat.UID(), check.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
+	c.Assert(stat.Gid(), check.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
+}