Parcourir la source

Merge pull request #12648 from estesp/userns-impl

Phase 1 implementation of user namespaces as a remapped container root
Arnaud Porterie il y a 9 ans
Parent
commit
ed9434c5bb
83 fichiers modifiés avec 1836 ajouts et 270 suppressions
  1. 1 0
      Makefile
  2. 9 1
      api/server/router/local/image.go
  3. 1 0
      daemon/config.go
  4. 119 0
      daemon/config_experimental.go
  5. 8 0
      daemon/config_stub.go
  6. 3 0
      daemon/config_unix.go
  7. 6 1
      daemon/container.go
  8. 21 2
      daemon/container_unix.go
  9. 47 15
      daemon/daemon.go
  10. 110 0
      daemon/daemon_experimental.go
  11. 28 0
      daemon/daemon_stub.go
  12. 1 1
      daemon/daemon_test.go
  13. 12 6
      daemon/daemon_unix.go
  14. 2 2
      daemon/daemon_windows.go
  15. 7 6
      daemon/daemonbuilder/builder.go
  16. 10 0
      daemon/execdriver/driver.go
  17. 40 0
      daemon/execdriver/native/create.go
  18. 66 8
      daemon/execdriver/native/driver.go
  19. 8 1
      daemon/execdriver/native/exec.go
  20. 1 0
      daemon/execdriver/native/template/default_template.go
  21. 5 1
      daemon/execdriver/termconsole.go
  22. 33 8
      daemon/graphdriver/aufs/aufs.go
  23. 1 1
      daemon/graphdriver/aufs/aufs_test.go
  24. 10 3
      daemon/graphdriver/aufs/migrate.go
  25. 19 6
      daemon/graphdriver/btrfs/btrfs.go
  26. 23 3
      daemon/graphdriver/devmapper/deviceset.go
  27. 1 1
      daemon/graphdriver/devmapper/devmapper_test.go
  28. 19 6
      daemon/graphdriver/devmapper/driver.go
  29. 12 11
      daemon/graphdriver/driver.go
  30. 12 4
      daemon/graphdriver/fsdiff.go
  31. 1 1
      daemon/graphdriver/graphtest/graphtest.go
  32. 36 13
      daemon/graphdriver/overlay/overlay.go
  33. 23 7
      daemon/graphdriver/vfs/driver.go
  34. 4 3
      daemon/graphdriver/windows/windows.go
  35. 13 4
      daemon/graphdriver/zfs/zfs.go
  36. 11 1
      daemon/volumes_unix.go
  37. 3 0
      experimental/README.md
  38. 120 0
      experimental/userns.md
  39. 17 4
      graph/graph.go
  40. 2 2
      graph/graph_test.go
  41. 2 2
      graph/tags_unit_test.go
  42. 2 1
      hack/make.sh
  43. 7 0
      hack/make/.integration-daemon-start
  44. 1 0
      integration-cli/docker_api_build_test.go
  45. 3 1
      integration-cli/docker_api_containers_test.go
  46. 22 5
      integration-cli/docker_cli_build_test.go
  47. 5 3
      integration-cli/docker_cli_cp_test.go
  48. 6 2
      integration-cli/docker_cli_cp_to_container_test.go
  49. 1 1
      integration-cli/docker_cli_create_test.go
  50. 4 4
      integration-cli/docker_cli_daemon_test.go
  51. 4 2
      integration-cli/docker_cli_exec_test.go
  52. 61 0
      integration-cli/docker_cli_experimental_test.go
  53. 35 25
      integration-cli/docker_cli_external_graphdriver_unix_test.go
  54. 4 3
      integration-cli/docker_cli_links_test.go
  55. 1 1
      integration-cli/docker_cli_links_unix_test.go
  56. 1 1
      integration-cli/docker_cli_nat_test.go
  57. 3 3
      integration-cli/docker_cli_netmode_test.go
  58. 2 2
      integration-cli/docker_cli_port_test.go
  59. 61 48
      integration-cli/docker_cli_run_test.go
  60. 3 1
      integration-cli/docker_cli_run_unix_test.go
  61. 1 1
      integration-cli/docker_cli_top_test.go
  62. 28 4
      integration-cli/docker_test_vars.go
  63. 56 5
      integration-cli/docker_utils.go
  64. 11 0
      integration-cli/requirements.go
  65. 99 5
      pkg/archive/archive.go
  66. 9 0
      pkg/archive/archive_unix.go
  67. 5 0
      pkg/archive/archive_windows.go
  68. 4 1
      pkg/archive/changes.go
  69. 2 2
      pkg/archive/changes_posix_test.go
  70. 1 1
      pkg/archive/changes_test.go
  71. 35 6
      pkg/archive/diff.go
  72. 3 3
      pkg/chrootarchive/diff.go
  73. 21 4
      pkg/chrootarchive/diff_unix.go
  74. 2 2
      pkg/chrootarchive/diff_windows.go
  75. 26 0
      pkg/directory/directory.go
  76. 45 0
      pkg/directory/directory_test.go
  77. 0 1
      pkg/directory/directory_windows.go
  78. 207 0
      pkg/idtools/idtools.go
  79. 155 0
      pkg/idtools/usergroupadd_linux.go
  80. 12 0
      pkg/idtools/usergroupadd_unsupported.go
  81. 10 2
      pkg/plugins/client.go
  82. 8 3
      volume/local/local.go
  83. 3 3
      volume/local/local_test.go

+ 1 - 0
Makefile

@@ -9,6 +9,7 @@ DOCKER_ENVS := \
 	-e DOCKER_DEBUG \
 	-e DOCKER_EXECDRIVER \
 	-e DOCKER_EXPERIMENTAL \
+	-e DOCKER_REMAP_ROOT \
 	-e DOCKER_GRAPHDRIVER \
 	-e DOCKER_STORAGE_OPTS \
 	-e DOCKER_USERLANDPROXY \

+ 9 - 1
api/server/router/local/image.go

@@ -18,6 +18,8 @@ import (
 	"github.com/docker/docker/daemon/daemonbuilder"
 	"github.com/docker/docker/graph"
 	"github.com/docker/docker/graph/tags"
+	"github.com/docker/docker/pkg/archive"
+	"github.com/docker/docker/pkg/chrootarchive"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/progressreader"
@@ -393,7 +395,13 @@ func (s *router) postBuild(ctx context.Context, w http.ResponseWriter, r *http.R
 		}
 	}()
 
-	docker := daemonbuilder.Docker{s.daemon, output, authConfigs}
+	uidMaps, gidMaps := s.daemon.GetUIDGIDMaps()
+	defaultArchiver := &archive.Archiver{
+		Untar:   chrootarchive.Untar,
+		UIDMaps: uidMaps,
+		GIDMaps: gidMaps,
+	}
+	docker := daemonbuilder.Docker{s.daemon, output, authConfigs, defaultArchiver}
 
 	b, err := dockerfile.NewBuilder(buildConfig, docker, builder.DockerIgnoreContext{context}, nil)
 	if err != nil {

+ 1 - 0
daemon/config.go

@@ -30,6 +30,7 @@ type CommonConfig struct {
 	LogConfig      runconfig.LogConfig
 	Mtu            int
 	Pidfile        string
+	RemappedRoot   string
 	Root           string
 	TrustKeyPath   string
 	DefaultNetwork string

+ 119 - 0
daemon/config_experimental.go

@@ -0,0 +1,119 @@
+// +build experimental
+
+package daemon
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/docker/docker/pkg/idtools"
+	flag "github.com/docker/docker/pkg/mflag"
+	"github.com/opencontainers/runc/libcontainer/user"
+)
+
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
+	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
+}
+
+const (
+	defaultIDSpecifier string = "default"
+	defaultRemappedID  string = "dockremap"
+)
+
+// Parse the remapped root (user namespace) option, which can be one of:
+//   username            - valid username from /etc/passwd
+//   username:groupname  - valid username; valid groupname from /etc/group
+//   uid                 - 32-bit unsigned int valid Linux UID value
+//   uid:gid             - uid value; 32-bit unsigned int Linux GID value
+//
+//  If no groupname is specified, and a username is specified, an attempt
+//  will be made to lookup a gid for that username as a groupname
+//
+//  If names are used, they are verified to exist in passwd/group
+func parseRemappedRoot(usergrp string) (string, string, error) {
+
+	var (
+		userID, groupID     int
+		username, groupname string
+	)
+
+	idparts := strings.Split(usergrp, ":")
+	if len(idparts) > 2 {
+		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
+	}
+
+	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
+		// must be a uid; take it as valid
+		userID = int(uid)
+		luser, err := user.LookupUid(userID)
+		if err != nil {
+			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
+		}
+		username = luser.Name
+		if len(idparts) == 1 {
+			// if the uid was numeric and no gid was specified, take the uid as the gid
+			groupID = userID
+			lgrp, err := user.LookupGid(groupID)
+			if err != nil {
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
+			}
+			groupname = lgrp.Name
+		}
+	} else {
+		lookupName := idparts[0]
+		// special case: if the user specified "default", they want Docker to create or
+		// use (after creation) the "dockremap" user/group for root remapping
+		if lookupName == defaultIDSpecifier {
+			lookupName = defaultRemappedID
+		}
+		luser, err := user.LookupUser(lookupName)
+		if err != nil && idparts[0] != defaultIDSpecifier {
+			// error if the name requested isn't the special "dockremap" ID
+			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
+		} else if err != nil {
+			// special case-- if the username == "default", then we have been asked
+			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
+			// ranges will be used for the user and group mappings in user namespaced containers
+			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
+			if err == nil {
+				return defaultRemappedID, defaultRemappedID, nil
+			}
+			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
+		}
+		userID = luser.Uid
+		username = luser.Name
+		if len(idparts) == 1 {
+			// we only have a string username, and no group specified; look up gid from username as group
+			group, err := user.LookupGroup(lookupName)
+			if err != nil {
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
+			}
+			groupID = group.Gid
+			groupname = group.Name
+		}
+	}
+
+	if len(idparts) == 2 {
+		// groupname or gid is separately specified and must be resolved
+		// to a unsigned 32-bit gid
+		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
+			// must be a gid, take it as valid
+			groupID = int(gid)
+			lgrp, err := user.LookupGid(groupID)
+			if err != nil {
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
+			}
+			groupname = lgrp.Name
+		} else {
+			// not a number; attempt a lookup
+			group, err := user.LookupGroup(idparts[1])
+			if err != nil {
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
+			}
+			groupID = group.Gid
+			groupname = idparts[1]
+		}
+	}
+	return username, groupname, nil
+}

+ 8 - 0
daemon/config_stub.go

@@ -0,0 +1,8 @@
+// +build !experimental
+
+package daemon
+
+import flag "github.com/docker/docker/pkg/mflag"
+
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
+}

+ 3 - 0
daemon/config_unix.go

@@ -27,6 +27,7 @@ type Config struct {
 	CorsHeaders          string
 	EnableCors           bool
 	EnableSelinuxSupport bool
+	RemappedRoot         string
 	SocketGroup          string
 	Ulimits              map[string]*ulimit.Ulimit
 }
@@ -77,4 +78,6 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
 	cmd.BoolVar(&config.Bridge.EnableUserlandProxy, []string{"-userland-proxy"}, true, usageFn("Use userland proxy for loopback traffic"))
 	cmd.BoolVar(&config.EnableCors, []string{"#api-enable-cors", "#-api-enable-cors"}, false, usageFn("Enable CORS headers in the remote API, this is deprecated by --api-cors-header"))
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
+
+	config.attachExperimentalFlags(cmd, usageFn)
 }

+ 6 - 1
daemon/container.go

@@ -553,7 +553,12 @@ func (container *Container) export() (archive.Archive, error) {
 		return nil, err
 	}
 
-	archive, err := archive.Tar(container.basefs, archive.Uncompressed)
+	uidMaps, gidMaps := container.daemon.GetUIDGIDMaps()
+	archive, err := archive.TarWithOptions(container.basefs, &archive.TarOptions{
+		Compression: archive.Uncompressed,
+		UIDMaps:     uidMaps,
+		GIDMaps:     gidMaps,
+	})
 	if err != nil {
 		container.Unmount()
 		return nil, err

+ 21 - 2
daemon/container_unix.go

@@ -20,6 +20,7 @@ import (
 	"github.com/docker/docker/daemon/network"
 	derr "github.com/docker/docker/errors"
 	"github.com/docker/docker/pkg/directory"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/nat"
 	"github.com/docker/docker/pkg/stringid"
 	"github.com/docker/docker/pkg/system"
@@ -302,6 +303,14 @@ func populateCommand(c *Container, env []string) error {
 	processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
 	processConfig.Env = env
 
+	remappedRoot := &execdriver.User{}
+	rootUID, rootGID := c.daemon.GetRemappedUIDGID()
+	if rootUID != 0 {
+		remappedRoot.UID = rootUID
+		remappedRoot.GID = rootGID
+	}
+	uidMap, gidMap := c.daemon.GetUIDGIDMaps()
+
 	c.command = &execdriver.Command{
 		ID:                 c.ID,
 		Rootfs:             c.rootfsPath(),
@@ -310,6 +319,9 @@ func populateCommand(c *Container, env []string) error {
 		WorkingDir:         c.Config.WorkingDir,
 		Network:            en,
 		Ipc:                ipc,
+		UIDMapping:         uidMap,
+		GIDMapping:         gidMap,
+		RemappedRoot:       remappedRoot,
 		Pid:                pid,
 		UTS:                uts,
 		Resources:          resources,
@@ -1343,19 +1355,23 @@ func (container *Container) hasMountFor(path string) bool {
 }
 
 func (container *Container) setupIpcDirs() error {
+	rootUID, rootGID := container.daemon.GetRemappedUIDGID()
 	if !container.hasMountFor("/dev/shm") {
 		shmPath, err := container.shmPath()
 		if err != nil {
 			return err
 		}
 
-		if err := os.MkdirAll(shmPath, 0700); err != nil {
+		if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
 			return err
 		}
 
 		if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel("mode=1777,size=65536k", container.getMountLabel())); err != nil {
 			return fmt.Errorf("mounting shm tmpfs: %s", err)
 		}
+		if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
+			return err
+		}
 	}
 
 	if !container.hasMountFor("/dev/mqueue") {
@@ -1364,13 +1380,16 @@ func (container *Container) setupIpcDirs() error {
 			return err
 		}
 
-		if err := os.MkdirAll(mqueuePath, 0700); err != nil {
+		if err := idtools.MkdirAllAs(mqueuePath, 0700, rootUID, rootGID); err != nil {
 			return err
 		}
 
 		if err := syscall.Mount("mqueue", mqueuePath, "mqueue", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), ""); err != nil {
 			return fmt.Errorf("mounting mqueue mqueue : %s", err)
 		}
+		if err := os.Chown(mqueuePath, rootUID, rootGID); err != nil {
+			return err
+		}
 	}
 
 	return nil

+ 47 - 15
daemon/daemon.go

@@ -37,6 +37,7 @@ import (
 	"github.com/docker/docker/pkg/discovery"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/graphdb"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/namesgenerator"
 	"github.com/docker/docker/pkg/nat"
@@ -121,6 +122,8 @@ type Daemon struct {
 	discoveryWatcher discovery.Watcher
 	root             string
 	shutdown         bool
+	uidMaps          []idtools.IDMap
+	gidMaps          []idtools.IDMap
 }
 
 // Get looks for a container using the provided information, which could be
@@ -632,6 +635,15 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	// on Windows to dump Go routine stacks
 	setupDumpStackTrap()
 
+	uidMaps, gidMaps, err := setupRemappedRoot(config)
+	if err != nil {
+		return nil, err
+	}
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	if err != nil {
+		return nil, err
+	}
+
 	// get the canonical path to the Docker root directory
 	var realRoot string
 	if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) {
@@ -642,14 +654,13 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 			return nil, fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err)
 		}
 	}
-	config.Root = realRoot
-	// Create the root directory if it doesn't exists
-	if err := system.MkdirAll(config.Root, 0700); err != nil {
+
+	if err = setupDaemonRoot(config, realRoot, rootUID, rootGID); err != nil {
 		return nil, err
 	}
 
 	// set up the tmpDir to use a canonical path
-	tmp, err := tempDir(config.Root)
+	tmp, err := tempDir(config.Root, rootUID, rootGID)
 	if err != nil {
 		return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err)
 	}
@@ -663,7 +674,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	graphdriver.DefaultDriver = config.GraphDriver
 
 	// Load storage driver
-	driver, err := graphdriver.New(config.Root, config.GraphOptions)
+	driver, err := graphdriver.New(config.Root, config.GraphOptions, uidMaps, gidMaps)
 	if err != nil {
 		return nil, fmt.Errorf("error initializing graphdriver: %v", err)
 	}
@@ -696,7 +707,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 
 	daemonRepo := filepath.Join(config.Root, "containers")
 
-	if err := system.MkdirAll(daemonRepo, 0700); err != nil {
+	if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 		return nil, err
 	}
 
@@ -706,13 +717,13 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	}
 
 	logrus.Debug("Creating images graph")
-	g, err := graph.NewGraph(filepath.Join(config.Root, "graph"), d.driver)
+	g, err := graph.NewGraph(filepath.Join(config.Root, "graph"), d.driver, uidMaps, gidMaps)
 	if err != nil {
 		return nil, err
 	}
 
 	// Configure the volumes driver
-	volStore, err := configureVolumes(config)
+	volStore, err := configureVolumes(config, rootUID, rootGID)
 	if err != nil {
 		return nil, err
 	}
@@ -777,7 +788,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 
 	var sysInitPath string
 	if config.ExecDriver == "lxc" {
-		initPath, err := configureSysInit(config)
+		initPath, err := configureSysInit(config, rootUID, rootGID)
 		if err != nil {
 			return nil, err
 		}
@@ -812,6 +823,8 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	d.EventsService = eventsService
 	d.volumes = volStore
 	d.root = config.Root
+	d.uidMaps = uidMaps
+	d.gidMaps = gidMaps
 
 	if err := d.cleanupMounts(); err != nil {
 		return nil, err
@@ -974,7 +987,11 @@ func (daemon *Daemon) diff(container *Container) (archive.Archive, error) {
 func (daemon *Daemon) createRootfs(container *Container) error {
 	// Step 1: create the container directory.
 	// This doubles as a barrier to avoid race conditions.
-	if err := os.Mkdir(container.root, 0700); err != nil {
+	rootUID, rootGID, err := idtools.GetRootUIDGID(daemon.uidMaps, daemon.gidMaps)
+	if err != nil {
+		return err
+	}
+	if err := idtools.MkdirAs(container.root, 0700, rootUID, rootGID); err != nil {
 		return err
 	}
 	initID := fmt.Sprintf("%s-init", container.ID)
@@ -986,7 +1003,7 @@ func (daemon *Daemon) createRootfs(container *Container) error {
 		return err
 	}
 
-	if err := setupInitLayer(initPath); err != nil {
+	if err := setupInitLayer(initPath, rootUID, rootGID); err != nil {
 		daemon.driver.Put(initID)
 		return err
 	}
@@ -1105,6 +1122,21 @@ func (daemon *Daemon) containerGraph() *graphdb.Database {
 	return daemon.containerGraphDB
 }
 
+// GetUIDGIDMaps returns the current daemon's user namespace settings
+// for the full uid and gid maps which will be applied to containers
+// started in this instance.
+func (daemon *Daemon) GetUIDGIDMaps() ([]idtools.IDMap, []idtools.IDMap) {
+	return daemon.uidMaps, daemon.gidMaps
+}
+
+// GetRemappedUIDGID returns the current daemon's uid and gid values
+// if user namespaces are in use for this daemon instance.  If not
+// this function will return "real" root values of 0, 0.
+func (daemon *Daemon) GetRemappedUIDGID() (int, int) {
+	uid, gid, _ := idtools.GetRootUIDGID(daemon.uidMaps, daemon.gidMaps)
+	return uid, gid
+}
+
 // ImageGetCached returns the earliest created image that is a child
 // of the image with imgID, that had the same config when it was
 // created. nil is returned if a child cannot be found. An error is
@@ -1139,12 +1171,12 @@ func (daemon *Daemon) ImageGetCached(imgID string, config *runconfig.Config) (*i
 }
 
 // tempDir returns the default directory to use for temporary files.
-func tempDir(rootDir string) (string, error) {
+func tempDir(rootDir string, rootUID, rootGID int) (string, error) {
 	var tmpDir string
 	if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" {
 		tmpDir = filepath.Join(rootDir, "tmp")
 	}
-	return tmpDir, system.MkdirAll(tmpDir, 0700)
+	return tmpDir, idtools.MkdirAllAs(tmpDir, 0700, rootUID, rootGID)
 }
 
 func (daemon *Daemon) setHostConfig(container *Container, hostConfig *runconfig.HostConfig) error {
@@ -1228,8 +1260,8 @@ func (daemon *Daemon) verifyContainerSettings(hostConfig *runconfig.HostConfig,
 	return verifyPlatformContainerSettings(daemon, hostConfig, config)
 }
 
-func configureVolumes(config *Config) (*store.VolumeStore, error) {
-	volumesDriver, err := local.New(config.Root)
+func configureVolumes(config *Config, rootUID, rootGID int) (*store.VolumeStore, error) {
+	volumesDriver, err := local.New(config.Root, rootUID, rootGID)
 	if err != nil {
 		return nil, err
 	}

+ 110 - 0
daemon/daemon_experimental.go

@@ -0,0 +1,110 @@
+// +build experimental
+
+package daemon
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/pkg/directory"
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/runconfig"
+)
+
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
+	if config.ExecDriver != "native" && config.RemappedRoot != "" {
+		return nil, nil, fmt.Errorf("User namespace remapping is only supported with the native execdriver")
+	}
+	if runtime.GOOS == "windows" && config.RemappedRoot != "" {
+		return nil, nil, fmt.Errorf("User namespaces are not supported on Windows")
+	}
+
+	// if the daemon was started with remapped root option, parse
+	// the config option to the int uid,gid values
+	var (
+		uidMaps, gidMaps []idtools.IDMap
+	)
+	if config.RemappedRoot != "" {
+		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
+		if err != nil {
+			return nil, nil, err
+		}
+		if username == "root" {
+			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
+			// effectively
+			logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
+			return uidMaps, gidMaps, nil
+		}
+		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
+		// update remapped root setting now that we have resolved them to actual names
+		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
+
+		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
+		if err != nil {
+			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
+		}
+	}
+	return uidMaps, gidMaps, nil
+}
+
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
+	// the main docker root needs to be accessible by all users, as user namespace support
+	// will create subdirectories owned by either a) the real system root (when no remapping
+	// is setup) or b) the remapped root host ID (when --root=uid:gid is used)
+	// for "first time" users of user namespaces, we need to migrate the current directory
+	// contents to the "0.0" (root == root "namespace" daemon root)
+	nsRoot := "0.0"
+	if _, err := os.Stat(rootDir); err == nil {
+		// root current exists; we need to check for a prior migration
+		if _, err := os.Stat(filepath.Join(rootDir, nsRoot)); err != nil && os.IsNotExist(err) {
+			// need to migrate current root to "0.0" subroot
+			// 1. create non-usernamespaced root as "0.0"
+			if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
+				return fmt.Errorf("Cannot create daemon root %q: %v", filepath.Join(rootDir, nsRoot), err)
+			}
+			// 2. move current root content to "0.0" new subroot
+			if err := directory.MoveToSubdir(rootDir, nsRoot); err != nil {
+				return fmt.Errorf("Cannot migrate current daemon root %q for user namespaces: %v", rootDir, err)
+			}
+			// 3. chmod outer root to 755
+			if chmodErr := os.Chmod(rootDir, 0755); chmodErr != nil {
+				return chmodErr
+			}
+		}
+	} else if os.IsNotExist(err) {
+		// no root exists yet, create it 0755 with root:root ownership
+		if err := os.MkdirAll(rootDir, 0755); err != nil {
+			return err
+		}
+		// create the "0.0" subroot (so no future "migration" happens of the root)
+		if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
+			return err
+		}
+	}
+
+	// for user namespaces we will create a subtree underneath the specified root
+	// with any/all specified remapped root uid/gid options on the daemon creating
+	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
+	// `chdir()` to work for containers namespaced to that uid/gid)
+	if config.RemappedRoot != "" {
+		nsRoot = fmt.Sprintf("%d.%d", rootUID, rootGID)
+	}
+	config.Root = filepath.Join(rootDir, nsRoot)
+	logrus.Debugf("Creating actual daemon root: %s", config.Root)
+
+	// Create the root directory if it doesn't exists
+	if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
+		return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
+	}
+	return nil
+}
+
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
+	if hostConfig.Privileged && daemon.config().RemappedRoot != "" {
+		return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
+	}
+	return nil, nil
+}

+ 28 - 0
daemon/daemon_stub.go

@@ -0,0 +1,28 @@
+// +build !experimental
+
+package daemon
+
+import (
+	"os"
+
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/system"
+	"github.com/docker/docker/runconfig"
+)
+
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
+	return nil, nil, nil
+}
+
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
+	config.Root = rootDir
+	// Create the root directory if it doesn't exists
+	if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
+		return err
+	}
+	return nil
+}
+
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
+	return nil, nil
+}

+ 1 - 1
daemon/daemon_test.go

@@ -509,7 +509,7 @@ func initDaemonForVolumesTest(tmp string) (*Daemon, error) {
 		volumes:    store.New(),
 	}
 
-	volumesDriver, err := local.New(tmp)
+	volumesDriver, err := local.New(tmp, 0, 0)
 	if err != nil {
 		return nil, err
 	}

+ 12 - 6
daemon/daemon_unix.go

@@ -15,10 +15,10 @@ import (
 	"github.com/docker/docker/daemon/graphdriver"
 	derr "github.com/docker/docker/errors"
 	"github.com/docker/docker/pkg/fileutils"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/parsers/kernel"
 	"github.com/docker/docker/pkg/sysinfo"
-	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/utils"
 	"github.com/docker/libnetwork"
@@ -121,6 +121,11 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *runconfig.HostC
 	warnings := []string{}
 	sysInfo := sysinfo.New(true)
 
+	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
+	if err != nil {
+		return warnings, err
+	}
+
 	if hostConfig.LxcConf.Len() > 0 && !strings.Contains(daemon.ExecutionDriver().Name(), "lxc") {
 		return warnings, fmt.Errorf("Cannot use --lxc-conf with execdriver: %s", daemon.ExecutionDriver().Name())
 	}
@@ -275,7 +280,7 @@ func migrateIfDownlevel(driver graphdriver.Driver, root string) error {
 	return migrateIfAufs(driver, root)
 }
 
-func configureSysInit(config *Config) (string, error) {
+func configureSysInit(config *Config, rootUID, rootGID int) (string, error) {
 	localCopy := filepath.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION))
 	sysInitPath := utils.DockerInitPath(localCopy)
 	if sysInitPath == "" {
@@ -284,7 +289,7 @@ func configureSysInit(config *Config) (string, error) {
 
 	if sysInitPath != localCopy {
 		// When we find a suitable dockerinit binary (even if it's our local binary), we copy it into config.Root at localCopy for future use (so that the original can go away without that being a problem, for example during a package upgrade).
-		if err := os.Mkdir(filepath.Dir(localCopy), 0700); err != nil && !os.IsExist(err) {
+		if err := idtools.MkdirAs(filepath.Dir(localCopy), 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 			return "", err
 		}
 		if _, err := fileutils.CopyFile(sysInitPath, localCopy); err != nil {
@@ -455,7 +460,7 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e
 //
 // This extra layer is used by all containers as the top-most ro layer. It protects
 // the container from unwanted side-effects on the rw layer.
-func setupInitLayer(initLayer string) error {
+func setupInitLayer(initLayer string, rootUID, rootGID int) error {
 	for pth, typ := range map[string]string{
 		"/dev/pts":         "dir",
 		"/dev/shm":         "dir",
@@ -478,12 +483,12 @@ func setupInitLayer(initLayer string) error {
 
 		if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil {
 			if os.IsNotExist(err) {
-				if err := system.MkdirAll(filepath.Join(initLayer, filepath.Dir(pth)), 0755); err != nil {
+				if err := idtools.MkdirAllAs(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootUID, rootGID); err != nil {
 					return err
 				}
 				switch typ {
 				case "dir":
-					if err := system.MkdirAll(filepath.Join(initLayer, pth), 0755); err != nil {
+					if err := idtools.MkdirAllAs(filepath.Join(initLayer, pth), 0755, rootUID, rootGID); err != nil {
 						return err
 					}
 				case "file":
@@ -492,6 +497,7 @@ func setupInitLayer(initLayer string) error {
 						return err
 					}
 					f.Close()
+					f.Chown(rootUID, rootGID)
 				default:
 					if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil {
 						return err

+ 2 - 2
daemon/daemon_windows.go

@@ -25,7 +25,7 @@ func parseSecurityOpt(container *Container, config *runconfig.HostConfig) error
 	return nil
 }
 
-func setupInitLayer(initLayer string) error {
+func setupInitLayer(initLayer string, rootUID, rootGID int) error {
 	return nil
 }
 
@@ -89,7 +89,7 @@ func migrateIfDownlevel(driver graphdriver.Driver, root string) error {
 	return nil
 }
 
-func configureSysInit(config *Config) (string, error) {
+func configureSysInit(config *Config, rootUID, rootGID int) (string, error) {
 	// TODO Windows.
 	return os.Getenv("TEMP"), nil
 }

+ 7 - 6
daemon/daemonbuilder/builder.go

@@ -16,7 +16,6 @@ import (
 	"github.com/docker/docker/graph"
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/pkg/archive"
-	"github.com/docker/docker/pkg/chrootarchive"
 	"github.com/docker/docker/pkg/httputils"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/parsers"
@@ -32,6 +31,7 @@ type Docker struct {
 	Daemon      *daemon.Daemon
 	OutOld      io.Writer
 	AuthConfigs map[string]cliconfig.AuthConfig
+	Archiver    *archive.Archiver
 }
 
 // ensure Docker implements builder.Docker
@@ -121,6 +121,7 @@ func (d Docker) Release(sessionID string, activeImages []string) {
 func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo, decompress bool) error {
 	srcPath := src.Path()
 	destExists := true
+	rootUID, rootGID := d.Daemon.GetRemappedUIDGID()
 
 	// Work in daemon-local OS specific file paths
 	destPath = filepath.FromSlash(destPath)
@@ -149,10 +150,10 @@ func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo,
 
 	if src.IsDir() {
 		// copy as directory
-		if err := chrootarchive.CopyWithTar(srcPath, destPath); err != nil {
+		if err := d.Archiver.CopyWithTar(srcPath, destPath); err != nil {
 			return err
 		}
-		return fixPermissions(srcPath, destPath, 0, 0, destExists)
+		return fixPermissions(srcPath, destPath, rootUID, rootGID, destExists)
 	}
 	if decompress {
 		// Only try to untar if it is a file and that we've been told to decompress (when ADD-ing a remote file)
@@ -167,7 +168,7 @@ func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo,
 		}
 
 		// try to successfully untar the orig
-		if err := chrootarchive.UntarPath(srcPath, tarDest); err == nil {
+		if err := d.Archiver.UntarPath(srcPath, tarDest); err == nil {
 			return nil
 		} else if err != io.EOF {
 			logrus.Debugf("Couldn't untar to %s: %v", tarDest, err)
@@ -182,11 +183,11 @@ func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo,
 	if err := system.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
 		return err
 	}
-	if err := chrootarchive.CopyFileWithTar(srcPath, destPath); err != nil {
+	if err := d.Archiver.CopyFileWithTar(srcPath, destPath); err != nil {
 		return err
 	}
 
-	return fixPermissions(srcPath, destPath, 0, 0, destExists)
+	return fixPermissions(srcPath, destPath, rootUID, rootGID, destExists)
 }
 
 // GetCachedImage returns a reference to a cached image whose parent equals `parent`

+ 10 - 0
daemon/execdriver/driver.go

@@ -6,6 +6,7 @@ import (
 	"os/exec"
 	"time"
 
+	"github.com/docker/docker/pkg/idtools"
 	// TODO Windows: Factor out ulimit
 	"github.com/docker/docker/pkg/ulimit"
 	"github.com/opencontainers/runc/libcontainer"
@@ -173,6 +174,12 @@ type Mount struct {
 	Slave       bool   `json:"slave"`
 }
 
+// User contains the uid and gid representing a Unix user
+type User struct {
+	UID int `json:"root_uid"`
+	GID int `json:"root_gid"`
+}
+
 // ProcessConfig describes a process that will be run inside a container.
 type ProcessConfig struct {
 	exec.Cmd `json:"-"`
@@ -202,6 +209,9 @@ type Command struct {
 	Ipc                *Ipc              `json:"ipc"`
 	Pid                *Pid              `json:"pid"`
 	UTS                *UTS              `json:"uts"`
+	RemappedRoot       *User             `json:"remap_root"`
+	UIDMapping         []idtools.IDMap   `json:"uidmapping"`
+	GIDMapping         []idtools.IDMap   `json:"gidmapping"`
 	Resources          *Resources        `json:"resources"`
 	Mounts             []Mount           `json:"mounts"`
 	AllowedDevices     []*configs.Device `json:"allowed_devices"`

+ 40 - 0
daemon/execdriver/native/create.go

@@ -8,6 +8,7 @@ import (
 	"syscall"
 
 	"github.com/docker/docker/daemon/execdriver"
+
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/devices"
@@ -30,6 +31,10 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
 		return nil, err
 	}
 
+	if err := d.setupRemappedRoot(container, c); err != nil {
+		return nil, err
+	}
+
 	if err := d.createNetwork(container, c, hooks); err != nil {
 		return nil, err
 	}
@@ -193,6 +198,40 @@ func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) err
 	return nil
 }
 
+func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
+	if c.RemappedRoot.UID == 0 {
+		container.Namespaces.Remove(configs.NEWUSER)
+		return nil
+	}
+
+	// convert the Docker daemon id map to the libcontainer variant of the same struct
+	// this keeps us from having to import libcontainer code across Docker client + daemon packages
+	cuidMaps := []configs.IDMap{}
+	cgidMaps := []configs.IDMap{}
+	for _, idMap := range c.UIDMapping {
+		cuidMaps = append(cuidMaps, configs.IDMap(idMap))
+	}
+	for _, idMap := range c.GIDMapping {
+		cgidMaps = append(cgidMaps, configs.IDMap(idMap))
+	}
+	container.UidMappings = cuidMaps
+	container.GidMappings = cgidMaps
+
+	for _, node := range container.Devices {
+		node.Uid = uint32(c.RemappedRoot.UID)
+		node.Gid = uint32(c.RemappedRoot.GID)
+	}
+	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
+	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
+	for i := range container.Mounts {
+		if container.Mounts[i].Device == "cgroup" {
+			container.Mounts[i].Flags &= ^syscall.MS_RDONLY
+		}
+	}
+
+	return nil
+}
+
 func (d *Driver) setPrivileged(container *configs.Config) (err error) {
 	container.Capabilities = execdriver.GetAllCapabilities()
 	container.Cgroups.AllowAllDevices = true
@@ -255,6 +294,7 @@ func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) e
 		if m.Slave {
 			flags |= syscall.MS_SLAVE
 		}
+
 		container.Mounts = append(container.Mounts, &configs.Mount{
 			Source:      m.Source,
 			Destination: m.Destination,

+ 66 - 8
daemon/execdriver/native/driver.go

@@ -443,22 +443,35 @@ func (t *TtyConsole) Close() error {
 }
 
 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
-	var term execdriver.Terminal
-	var err error
+
+	rootuid, err := container.HostUID()
+	if err != nil {
+		return err
+	}
 
 	if processConfig.Tty {
-		rootuid, err := container.HostUID()
+		cons, err := p.NewConsole(rootuid)
 		if err != nil {
 			return err
 		}
-		cons, err := p.NewConsole(rootuid)
+		term, err := NewTtyConsole(cons, pipes)
 		if err != nil {
 			return err
 		}
-		term, err = NewTtyConsole(cons, pipes)
-	} else {
+		processConfig.Terminal = term
+		return nil
+	}
+	// not a tty--set up stdio pipes
+	term := &execdriver.StdConsole{}
+	processConfig.Terminal = term
+
+	// if we are not in a user namespace, there is no reason to go through
+	// the hassle of setting up os-level pipes with proper (remapped) ownership
+	// so we will do the prior shortcut for non-userns containers
+	if rootuid == 0 {
 		p.Stdout = pipes.Stdout
 		p.Stderr = pipes.Stderr
+
 		r, w, err := os.Pipe()
 		if err != nil {
 			return err
@@ -470,12 +483,57 @@ func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConf
 			}()
 			p.Stdin = r
 		}
-		term = &execdriver.StdConsole{}
+		return nil
 	}
+
+	// if we have user namespaces enabled (rootuid != 0), we will set
+	// up os pipes for stderr, stdout, stdin so we can chown them to
+	// the proper ownership to allow for proper access to the underlying
+	// fds
+	var fds []int
+
+	//setup stdout
+	r, w, err := os.Pipe()
 	if err != nil {
 		return err
 	}
-	processConfig.Terminal = term
+	fds = append(fds, int(r.Fd()), int(w.Fd()))
+	if pipes.Stdout != nil {
+		go io.Copy(pipes.Stdout, r)
+	}
+	term.Closers = append(term.Closers, r)
+	p.Stdout = w
+
+	//setup stderr
+	r, w, err = os.Pipe()
+	if err != nil {
+		return err
+	}
+	fds = append(fds, int(r.Fd()), int(w.Fd()))
+	if pipes.Stderr != nil {
+		go io.Copy(pipes.Stderr, r)
+	}
+	term.Closers = append(term.Closers, r)
+	p.Stderr = w
+
+	//setup stdin
+	r, w, err = os.Pipe()
+	if err != nil {
+		return err
+	}
+	fds = append(fds, int(r.Fd()), int(w.Fd()))
+	if pipes.Stdin != nil {
+		go func() {
+			io.Copy(w, pipes.Stdin)
+			w.Close()
+		}()
+		p.Stdin = r
+	}
+	for _, fd := range fds {
+		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
+			return fmt.Errorf("Failed to chown pipes fd: %v", err)
+		}
+	}
 	return nil
 }
 

+ 8 - 1
daemon/execdriver/native/exec.go

@@ -26,11 +26,18 @@ func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo
 		return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
 	}
 
+	user := processConfig.User
+	if c.RemappedRoot.UID != 0 && user == "" {
+		//if user namespaces are enabled, set user explicitly so uid/gid is set to 0
+		//otherwise we end up with the overflow id and no permissions (65534)
+		user = "0"
+	}
+
 	p := &libcontainer.Process{
 		Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
 		Env:  c.ProcessConfig.Env,
 		Cwd:  c.WorkingDir,
-		User: processConfig.User,
+		User: user,
 	}
 
 	if processConfig.Privileged {

+ 1 - 0
daemon/execdriver/native/template/default_template.go

@@ -34,6 +34,7 @@ func New() *configs.Config {
 			{Type: "NEWIPC"},
 			{Type: "NEWPID"},
 			{Type: "NEWNET"},
+			{Type: "NEWUSER"},
 		}),
 		Cgroups: &configs.Cgroup{
 			Parent:           "docker",

+ 5 - 1
daemon/execdriver/termconsole.go

@@ -7,6 +7,8 @@ import (
 
 // StdConsole defines standard console operations for execdriver
 type StdConsole struct {
+	// Closers holds io.Closer references for closing at terminal close time
+	Closers []io.Closer
 }
 
 // NewStdConsole returns a new StdConsole struct
@@ -46,6 +48,8 @@ func (s *StdConsole) Resize(h, w int) error {
 
 // Close implements Close method of Terminal interface
 func (s *StdConsole) Close() error {
-	// nothing to close here
+	for _, c := range s.Closers {
+		c.Close()
+	}
 	return nil
 }

+ 33 - 8
daemon/graphdriver/aufs/aufs.go

@@ -34,12 +34,15 @@ import (
 	"syscall"
 
 	"github.com/Sirupsen/logrus"
+
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/chrootarchive"
 	"github.com/docker/docker/pkg/directory"
+	"github.com/docker/docker/pkg/idtools"
 	mountpk "github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/stringid"
+
 	"github.com/opencontainers/runc/libcontainer/label"
 )
 
@@ -71,13 +74,15 @@ type data struct {
 // active maps mount id to the count
 type Driver struct {
 	root       string
+	uidMaps    []idtools.IDMap
+	gidMaps    []idtools.IDMap
 	sync.Mutex // Protects concurrent modification to active
 	active     map[string]*data
 }
 
 // Init returns a new AUFS driver.
 // An error is returned if AUFS is not supported.
-func Init(root string, options []string) (graphdriver.Driver, error) {
+func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 
 	// Try to load the aufs kernel module
 	if err := supportsAufs(); err != nil {
@@ -105,12 +110,23 @@ func Init(root string, options []string) (graphdriver.Driver, error) {
 	}
 
 	a := &Driver{
-		root:   root,
-		active: make(map[string]*data),
+		root:    root,
+		active:  make(map[string]*data),
+		uidMaps: uidMaps,
+		gidMaps: gidMaps,
 	}
 
-	// Create the root aufs driver dir
-	if err := os.MkdirAll(root, 0755); err != nil {
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	if err != nil {
+		return nil, err
+	}
+	// Create the root aufs driver dir and return
+	// if it already exists
+	// If not populate the dir structure
+	if err := idtools.MkdirAllAs(root, 0755, rootUID, rootGID); err != nil {
+		if os.IsExist(err) {
+			return a, nil
+		}
 		return nil, err
 	}
 
@@ -120,7 +136,7 @@ func Init(root string, options []string) (graphdriver.Driver, error) {
 
 	// Populate the dir structure
 	for _, p := range paths {
-		if err := os.MkdirAll(path.Join(root, p), 0755); err != nil {
+		if err := idtools.MkdirAllAs(path.Join(root, p), 0755, rootUID, rootGID); err != nil {
 			return nil, err
 		}
 	}
@@ -221,8 +237,12 @@ func (a *Driver) createDirsFor(id string) error {
 		"diff",
 	}
 
+	rootUID, rootGID, err := idtools.GetRootUIDGID(a.uidMaps, a.gidMaps)
+	if err != nil {
+		return err
+	}
 	for _, p := range paths {
-		if err := os.MkdirAll(path.Join(a.rootPath(), p, id), 0755); err != nil {
+		if err := idtools.MkdirAllAs(path.Join(a.rootPath(), p, id), 0755, rootUID, rootGID); err != nil {
 			return err
 		}
 	}
@@ -334,11 +354,16 @@ func (a *Driver) Diff(id, parent string) (archive.Archive, error) {
 	return archive.TarWithOptions(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
 		Compression:     archive.Uncompressed,
 		ExcludePatterns: []string{archive.WhiteoutMetaPrefix + "*", "!" + archive.WhiteoutOpaqueDir},
+		UIDMaps:         a.uidMaps,
+		GIDMaps:         a.gidMaps,
 	})
 }
 
 func (a *Driver) applyDiff(id string, diff archive.Reader) error {
-	return chrootarchive.UntarUncompressed(diff, path.Join(a.rootPath(), "diff", id), nil)
+	return chrootarchive.UntarUncompressed(diff, path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
+		UIDMaps: a.uidMaps,
+		GIDMaps: a.gidMaps,
+	})
 }
 
 // DiffSize calculates the changes between the specified id

+ 1 - 1
daemon/graphdriver/aufs/aufs_test.go

@@ -26,7 +26,7 @@ func init() {
 }
 
 func testInit(dir string, t *testing.T) graphdriver.Driver {
-	d, err := Init(dir, nil)
+	d, err := Init(dir, nil, nil, nil)
 	if err != nil {
 		if err == graphdriver.ErrNotSupported {
 			t.Skip(err)

+ 10 - 3
daemon/graphdriver/aufs/migrate.go

@@ -8,6 +8,8 @@ import (
 	"io/ioutil"
 	"os"
 	"path"
+
+	"github.com/docker/docker/pkg/idtools"
 )
 
 type metadata struct {
@@ -38,7 +40,7 @@ func pathExists(pth string) bool {
 // For the migration we try to move the folder containing the layer files, if that
 // fails because the data is currently mounted we will fallback to creating a
 // symlink.
-func (a *Driver) Migrate(pth string, setupInit func(p string) error) error {
+func (a *Driver) Migrate(pth string, setupInit func(p string, rootUID, rootGID int) error) error {
 	if pathExists(path.Join(pth, "graph")) {
 		if err := a.migrateRepositories(pth); err != nil {
 			return err
@@ -59,12 +61,17 @@ func (a *Driver) migrateRepositories(pth string) error {
 	return nil
 }
 
-func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) error {
+func (a *Driver) migrateContainers(pth string, setupInit func(p string, rootUID, rootGID int) error) error {
 	fis, err := ioutil.ReadDir(pth)
 	if err != nil {
 		return err
 	}
 
+	rootUID, rootGID, err := idtools.GetRootUIDGID(a.uidMaps, a.gidMaps)
+	if err != nil {
+		return err
+	}
+
 	for _, fi := range fis {
 		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "rw")) {
 			if err := tryRelocate(path.Join(pth, id, "rw"), path.Join(a.rootPath(), "diff", id)); err != nil {
@@ -88,7 +95,7 @@ func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) e
 					return err
 				}
 				// setup init layer
-				if err := setupInit(initPath); err != nil {
+				if err := setupInit(initPath, rootUID, rootGID); err != nil {
 					return err
 				}
 

+ 19 - 6
daemon/graphdriver/btrfs/btrfs.go

@@ -19,6 +19,7 @@ import (
 	"unsafe"
 
 	"github.com/docker/docker/daemon/graphdriver"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/mount"
 )
 
@@ -28,7 +29,7 @@ func init() {
 
 // Init returns a new BTRFS driver.
 // An error is returned if BTRFS is not supported.
-func Init(home string, options []string) (graphdriver.Driver, error) {
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 	rootdir := path.Dir(home)
 
 	var buf syscall.Statfs_t
@@ -40,7 +41,11 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
 		return nil, graphdriver.ErrPrerequisites
 	}
 
-	if err := os.MkdirAll(home, 0700); err != nil {
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	if err != nil {
+		return nil, err
+	}
+	if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
 		return nil, err
 	}
 
@@ -49,16 +54,20 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
 	}
 
 	driver := &Driver{
-		home: home,
+		home:    home,
+		uidMaps: uidMaps,
+		gidMaps: gidMaps,
 	}
 
-	return graphdriver.NewNaiveDiffDriver(driver), nil
+	return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil
 }
 
 // Driver contains information about the filesystem mounted.
 type Driver struct {
 	//root of the file system
-	home string
+	home    string
+	uidMaps []idtools.IDMap
+	gidMaps []idtools.IDMap
 }
 
 // String prints the name of the driver (btrfs).
@@ -226,7 +235,11 @@ func (d *Driver) subvolumesDirID(id string) string {
 // Create the filesystem with given id.
 func (d *Driver) Create(id string, parent string) error {
 	subvolumes := path.Join(d.home, "subvolumes")
-	if err := os.MkdirAll(subvolumes, 0700); err != nil {
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err != nil {
+		return err
+	}
+	if err := idtools.MkdirAllAs(subvolumes, 0700, rootUID, rootGID); err != nil {
 		return err
 	}
 	if parent == "" {

+ 23 - 3
daemon/graphdriver/devmapper/deviceset.go

@@ -19,11 +19,14 @@ import (
 	"time"
 
 	"github.com/Sirupsen/logrus"
+
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/pkg/devicemapper"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/units"
+
 	"github.com/opencontainers/runc/libcontainer/label"
 )
 
@@ -113,6 +116,8 @@ type DeviceSet struct {
 	BaseDeviceUUID        string //save UUID of base device
 	nrDeletedDevices      uint   //number of deleted devices
 	deletionWorkerTicker  *time.Ticker
+	uidMaps               []idtools.IDMap
+	gidMaps               []idtools.IDMap
 }
 
 // DiskUsage contains information about disk usage and is used when reporting Status of a device.
@@ -250,7 +255,11 @@ func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
 	dirname := devices.loopbackDir()
 	filename := path.Join(dirname, name)
 
-	if err := os.MkdirAll(dirname, 0700); err != nil {
+	uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps)
+	if err != nil {
+		return "", err
+	}
+	if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) {
 		return "", err
 	}
 
@@ -1448,7 +1457,16 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
 		logrus.Warn("Udev sync is not supported. This will lead to unexpected behavior, data loss and errors. For more information, see https://docs.docker.com/reference/commandline/daemon/#daemon-storage-driver-option")
 	}
 
-	if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil {
+	//create the root dir of the devmapper driver ownership to match this
+	//daemon's remapped root uid/gid so containers can start properly
+	uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps)
+	if err != nil {
+		return err
+	}
+	if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) {
+		return err
+	}
+	if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) {
 		return err
 	}
 
@@ -2230,7 +2248,7 @@ func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, er
 }
 
 // NewDeviceSet creates the device set based on the options provided.
-func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error) {
+func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) {
 	devicemapper.SetDevDir("/dev")
 
 	devices := &DeviceSet{
@@ -2245,6 +2263,8 @@ func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error
 		thinpBlockSize:        defaultThinpBlockSize,
 		deviceIDMap:           make([]byte, deviceIDMapSz),
 		deletionWorkerTicker:  time.NewTicker(time.Second * 30),
+		uidMaps:               uidMaps,
+		gidMaps:               gidMaps,
 	}
 
 	foundBlkDiscard := false

+ 1 - 1
daemon/graphdriver/devmapper/devmapper_test.go

@@ -67,7 +67,7 @@ func testChangeLoopBackSize(t *testing.T, delta, expectDataSize, expectMetaDataS
 	d, err := Init(driver.home, []string{
 		fmt.Sprintf("dm.loopdatasize=%d", defaultDataLoopbackSize+delta),
 		fmt.Sprintf("dm.loopmetadatasize=%d", defaultMetaDataLoopbackSize+delta),
-	})
+	}, nil, nil)
 	if err != nil {
 		t.Fatalf("error creating devicemapper driver: %v", err)
 	}

+ 19 - 6
daemon/graphdriver/devmapper/driver.go

@@ -10,8 +10,10 @@ import (
 	"strconv"
 
 	"github.com/Sirupsen/logrus"
+
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/pkg/devicemapper"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/units"
 )
@@ -28,13 +30,15 @@ func init() {
 // Driver contains the device set mounted and the home directory
 type Driver struct {
 	*DeviceSet
-	home string
+	home    string
+	uidMaps []idtools.IDMap
+	gidMaps []idtools.IDMap
 }
 
 var backingFs = "<unknown>"
 
 // Init creates a driver with the given home and the set of options.
-func Init(home string, options []string) (graphdriver.Driver, error) {
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 	fsMagic, err := graphdriver.GetFSMagic(home)
 	if err != nil {
 		return nil, err
@@ -43,7 +47,7 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
 		backingFs = fsName
 	}
 
-	deviceSet, err := NewDeviceSet(home, true, options)
+	deviceSet, err := NewDeviceSet(home, true, options, uidMaps, gidMaps)
 	if err != nil {
 		return nil, err
 	}
@@ -55,9 +59,11 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
 	d := &Driver{
 		DeviceSet: deviceSet,
 		home:      home,
+		uidMaps:   uidMaps,
+		gidMaps:   gidMaps,
 	}
 
-	return graphdriver.NewNaiveDiffDriver(d), nil
+	return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
 }
 
 func (d *Driver) String() string {
@@ -160,8 +166,15 @@ func (d *Driver) Remove(id string) error {
 func (d *Driver) Get(id, mountLabel string) (string, error) {
 	mp := path.Join(d.home, "mnt", id)
 
+	uid, gid, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err != nil {
+		return "", err
+	}
 	// Create the target directories if they don't exist
-	if err := os.MkdirAll(mp, 0755); err != nil {
+	if err := idtools.MkdirAllAs(path.Join(d.home, "mnt"), 0755, uid, gid); err != nil && !os.IsExist(err) {
+		return "", err
+	}
+	if err := idtools.MkdirAs(mp, 0755, uid, gid); err != nil && !os.IsExist(err) {
 		return "", err
 	}
 
@@ -171,7 +184,7 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
 	}
 
 	rootFs := path.Join(mp, "rootfs")
-	if err := os.MkdirAll(rootFs, 0755); err != nil {
+	if err := idtools.MkdirAllAs(rootFs, 0755, uid, gid); err != nil && !os.IsExist(err) {
 		d.DeviceSet.UnmountDevice(id)
 		return "", err
 	}

+ 12 - 11
daemon/graphdriver/driver.go

@@ -8,7 +8,9 @@ import (
 	"strings"
 
 	"github.com/Sirupsen/logrus"
+
 	"github.com/docker/docker/pkg/archive"
+	"github.com/docker/docker/pkg/idtools"
 )
 
 // FsMagic unsigned id of the filesystem in use.
@@ -34,7 +36,7 @@ var (
 )
 
 // InitFunc initializes the storage driver.
-type InitFunc func(root string, options []string) (Driver, error)
+type InitFunc func(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error)
 
 // ProtoDriver defines the basic capabilities of a driver.
 // This interface exists solely to be a minimum set of methods
@@ -46,7 +48,6 @@ type ProtoDriver interface {
 	// String returns a string representation of this driver.
 	String() string
 	// Create creates a new, empty, filesystem layer with the
-	// specified id and parent. Parent may be "".
 	Create(id, parent string) error
 	// Remove attempts to remove the filesystem layer with this id.
 	Remove(id string) error
@@ -107,9 +108,9 @@ func Register(name string, initFunc InitFunc) error {
 }
 
 // GetDriver initializes and returns the registered driver
-func GetDriver(name, home string, options []string) (Driver, error) {
+func GetDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
 	if initFunc, exists := drivers[name]; exists {
-		return initFunc(filepath.Join(home, name), options)
+		return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
 	}
 	if pluginDriver, err := lookupPlugin(name, home, options); err == nil {
 		return pluginDriver, nil
@@ -119,20 +120,20 @@ func GetDriver(name, home string, options []string) (Driver, error) {
 }
 
 // getBuiltinDriver initalizes and returns the registered driver, but does not try to load from plugins
-func getBuiltinDriver(name, home string, options []string) (Driver, error) {
+func getBuiltinDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
 	if initFunc, exists := drivers[name]; exists {
-		return initFunc(filepath.Join(home, name), options)
+		return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
 	}
 	logrus.Errorf("Failed to built-in GetDriver graph %s %s", name, home)
 	return nil, ErrNotSupported
 }
 
 // New creates the driver and initializes it at the specified root.
-func New(root string, options []string) (driver Driver, err error) {
+func New(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (driver Driver, err error) {
 	for _, name := range []string{os.Getenv("DOCKER_DRIVER"), DefaultDriver} {
 		if name != "" {
 			logrus.Debugf("[graphdriver] trying provided driver %q", name) // so the logs show specified driver
-			return GetDriver(name, root, options)
+			return GetDriver(name, root, options, uidMaps, gidMaps)
 		}
 	}
 
@@ -147,7 +148,7 @@ func New(root string, options []string) (driver Driver, err error) {
 			// of the state found from prior drivers, check in order of our priority
 			// which we would prefer
 			if prior == name {
-				driver, err = getBuiltinDriver(name, root, options)
+				driver, err = getBuiltinDriver(name, root, options, uidMaps, gidMaps)
 				if err != nil {
 					// unlike below, we will return error here, because there is prior
 					// state, and now it is no longer supported/prereq/compatible, so
@@ -167,7 +168,7 @@ func New(root string, options []string) (driver Driver, err error) {
 
 	// Check for priority drivers first
 	for _, name := range priority {
-		driver, err = getBuiltinDriver(name, root, options)
+		driver, err = getBuiltinDriver(name, root, options, uidMaps, gidMaps)
 		if err != nil {
 			if err == ErrNotSupported || err == ErrPrerequisites || err == ErrIncompatibleFS {
 				continue
@@ -179,7 +180,7 @@ func New(root string, options []string) (driver Driver, err error) {
 
 	// Check all registered drivers if no priority driver is found
 	for _, initFunc := range drivers {
-		if driver, err = initFunc(root, options); err != nil {
+		if driver, err = initFunc(root, options, uidMaps, gidMaps); err != nil {
 			if err == ErrNotSupported || err == ErrPrerequisites || err == ErrIncompatibleFS {
 				continue
 			}

+ 12 - 4
daemon/graphdriver/fsdiff.go

@@ -6,8 +6,10 @@ import (
 	"time"
 
 	"github.com/Sirupsen/logrus"
+
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/chrootarchive"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/ioutils"
 )
 
@@ -18,6 +20,8 @@ import (
 // Notably, the AUFS driver doesn't need to be wrapped like this.
 type NaiveDiffDriver struct {
 	ProtoDriver
+	uidMaps []idtools.IDMap
+	gidMaps []idtools.IDMap
 }
 
 // NewNaiveDiffDriver returns a fully functional driver that wraps the
@@ -27,8 +31,10 @@ type NaiveDiffDriver struct {
 //     Changes(id, parent string) ([]archive.Change, error)
 //     ApplyDiff(id, parent string, diff archive.Reader) (size int64, err error)
 //     DiffSize(id, parent string) (size int64, err error)
-func NewNaiveDiffDriver(driver ProtoDriver) Driver {
-	return &NaiveDiffDriver{ProtoDriver: driver}
+func NewNaiveDiffDriver(driver ProtoDriver, uidMaps, gidMaps []idtools.IDMap) Driver {
+	return &NaiveDiffDriver{ProtoDriver: driver,
+		uidMaps: uidMaps,
+		gidMaps: gidMaps}
 }
 
 // Diff produces an archive of the changes between the specified
@@ -70,7 +76,7 @@ func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch archive.Archive, err e
 		return nil, err
 	}
 
-	archive, err := archive.ExportChanges(layerFs, changes)
+	archive, err := archive.ExportChanges(layerFs, changes, gdw.uidMaps, gdw.gidMaps)
 	if err != nil {
 		return nil, err
 	}
@@ -119,9 +125,11 @@ func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff archive.Reader) (s
 	}
 	defer driver.Put(id)
 
+	options := &archive.TarOptions{UIDMaps: gdw.uidMaps,
+		GIDMaps: gdw.gidMaps}
 	start := time.Now().UTC()
 	logrus.Debugf("Start untar layer")
-	if size, err = chrootarchive.ApplyUncompressedLayer(layerFs, diff); err != nil {
+	if size, err = chrootarchive.ApplyUncompressedLayer(layerFs, diff, options); err != nil {
 		return
 	}
 	logrus.Debugf("Untar time: %vs", time.Now().UTC().Sub(start).Seconds())

+ 1 - 1
daemon/graphdriver/graphtest/graphtest.go

@@ -74,7 +74,7 @@ func newDriver(t *testing.T, name string) *Driver {
 		t.Fatal(err)
 	}
 
-	d, err := graphdriver.GetDriver(name, root, nil)
+	d, err := graphdriver.GetDriver(name, root, nil, nil, nil)
 	if err != nil {
 		t.Logf("graphdriver: %v\n", err)
 		if err == graphdriver.ErrNotSupported || err == graphdriver.ErrPrerequisites || err == graphdriver.ErrIncompatibleFS {

+ 36 - 13
daemon/graphdriver/overlay/overlay.go

@@ -13,9 +13,12 @@ import (
 	"syscall"
 
 	"github.com/Sirupsen/logrus"
+
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/chrootarchive"
+	"github.com/docker/docker/pkg/idtools"
+
 	"github.com/opencontainers/runc/libcontainer/label"
 )
 
@@ -41,9 +44,9 @@ type naiveDiffDriverWithApply struct {
 }
 
 // NaiveDiffDriverWithApply returns a NaiveDiff driver with custom ApplyDiff.
-func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver) graphdriver.Driver {
+func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver, uidMaps, gidMaps []idtools.IDMap) graphdriver.Driver {
 	return &naiveDiffDriverWithApply{
-		Driver:    graphdriver.NewNaiveDiffDriver(driver),
+		Driver:    graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps),
 		applyDiff: driver,
 	}
 }
@@ -98,6 +101,8 @@ type Driver struct {
 	home       string
 	sync.Mutex // Protects concurrent modification to active
 	active     map[string]*ActiveMount
+	uidMaps    []idtools.IDMap
+	gidMaps    []idtools.IDMap
 }
 
 var backingFs = "<unknown>"
@@ -109,7 +114,7 @@ func init() {
 // Init returns the NaiveDiffDriver, a native diff driver for overlay filesystem.
 // If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
 // If a overlay filesystem is not supported over a existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
-func Init(home string, options []string) (graphdriver.Driver, error) {
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 
 	if err := supportsOverlay(); err != nil {
 		return nil, graphdriver.ErrNotSupported
@@ -136,17 +141,23 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
 		return nil, graphdriver.ErrIncompatibleFS
 	}
 
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	if err != nil {
+		return nil, err
+	}
 	// Create the driver home dir
-	if err := os.MkdirAll(home, 0755); err != nil {
+	if err := idtools.MkdirAllAs(home, 0755, rootUID, rootGID); err != nil && !os.IsExist(err) {
 		return nil, err
 	}
 
 	d := &Driver{
-		home:   home,
-		active: make(map[string]*ActiveMount),
+		home:    home,
+		active:  make(map[string]*ActiveMount),
+		uidMaps: uidMaps,
+		gidMaps: gidMaps,
 	}
 
-	return NaiveDiffDriverWithApply(d), nil
+	return NaiveDiffDriverWithApply(d, uidMaps, gidMaps), nil
 }
 
 func supportsOverlay() error {
@@ -221,10 +232,15 @@ func (d *Driver) Cleanup() error {
 // The parent filesystem is used to configure these directories for the overlay.
 func (d *Driver) Create(id string, parent string) (retErr error) {
 	dir := d.dir(id)
-	if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
+
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err != nil {
+		return err
+	}
+	if err := idtools.MkdirAllAs(path.Dir(dir), 0700, rootUID, rootGID); err != nil {
 		return err
 	}
-	if err := os.Mkdir(dir, 0700); err != nil {
+	if err := idtools.MkdirAs(dir, 0700, rootUID, rootGID); err != nil {
 		return err
 	}
 
@@ -237,7 +253,7 @@ func (d *Driver) Create(id string, parent string) (retErr error) {
 
 	// Toplevel images are just a "root" dir
 	if parent == "" {
-		if err := os.Mkdir(path.Join(dir, "root"), 0755); err != nil {
+		if err := idtools.MkdirAs(path.Join(dir, "root"), 0755, rootUID, rootGID); err != nil {
 			return err
 		}
 		return nil
@@ -260,7 +276,7 @@ func (d *Driver) Create(id string, parent string) (retErr error) {
 		if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
 			return err
 		}
-		if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
+		if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil {
 			return err
 		}
 		if err := ioutil.WriteFile(path.Join(dir, "lower-id"), []byte(parent), 0666); err != nil {
@@ -293,7 +309,7 @@ func (d *Driver) Create(id string, parent string) (retErr error) {
 	if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
 		return err
 	}
-	if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
+	if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil {
 		return err
 	}
 
@@ -349,6 +365,12 @@ func (d *Driver) Get(id string, mountLabel string) (string, error) {
 	if err := syscall.Mount("overlay", mergedDir, "overlay", 0, label.FormatMountLabel(opts, mountLabel)); err != nil {
 		return "", fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err)
 	}
+	// chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
+	// user namespace requires this to move a directory from lower to upper.
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil {
+		return "", err
+	}
 	mount.path = mergedDir
 	mount.mounted = true
 	d.active[id] = mount
@@ -431,7 +453,8 @@ func (d *Driver) ApplyDiff(id string, parent string, diff archive.Reader) (size
 		return 0, err
 	}
 
-	if size, err = chrootarchive.ApplyUncompressedLayer(tmpRootDir, diff); err != nil {
+	options := &archive.TarOptions{UIDMaps: d.uidMaps, GIDMaps: d.gidMaps}
+	if size, err = chrootarchive.ApplyUncompressedLayer(tmpRootDir, diff, options); err != nil {
 		return 0, err
 	}
 

+ 23 - 7
daemon/graphdriver/vfs/driver.go

@@ -9,7 +9,8 @@ import (
 
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/pkg/chrootarchive"
-	"github.com/docker/docker/pkg/system"
+	"github.com/docker/docker/pkg/idtools"
+
 	"github.com/opencontainers/runc/libcontainer/label"
 )
 
@@ -19,11 +20,20 @@ func init() {
 
 // Init returns a new VFS driver.
 // This sets the home directory for the driver and returns NaiveDiffDriver.
-func Init(home string, options []string) (graphdriver.Driver, error) {
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 	d := &Driver{
-		home: home,
+		home:    home,
+		uidMaps: uidMaps,
+		gidMaps: gidMaps,
+	}
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	if err != nil {
+		return nil, err
+	}
+	if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
+		return nil, err
 	}
-	return graphdriver.NewNaiveDiffDriver(d), nil
+	return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
 }
 
 // Driver holds information about the driver, home directory of the driver.
@@ -31,7 +41,9 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
 // In order to support layering, files are copied from the parent layer into the new layer. There is no copy-on-write support.
 // Driver must be wrapped in NaiveDiffDriver to be used as a graphdriver.Driver
 type Driver struct {
-	home string
+	home    string
+	uidMaps []idtools.IDMap
+	gidMaps []idtools.IDMap
 }
 
 func (d *Driver) String() string {
@@ -56,10 +68,14 @@ func (d *Driver) Cleanup() error {
 // Create prepares the filesystem for the VFS driver and copies the directory for the given id under the parent.
 func (d *Driver) Create(id, parent string) error {
 	dir := d.dir(id)
-	if err := system.MkdirAll(filepath.Dir(dir), 0700); err != nil {
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err != nil {
+		return err
+	}
+	if err := idtools.MkdirAllAs(filepath.Dir(dir), 0700, rootUID, rootGID); err != nil {
 		return err
 	}
-	if err := os.Mkdir(dir, 0755); err != nil {
+	if err := idtools.MkdirAs(dir, 0755, rootUID, rootGID); err != nil {
 		return err
 	}
 	opts := []string{"level:s0"}

+ 4 - 3
daemon/graphdriver/windows/windows.go

@@ -21,6 +21,7 @@ import (
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/chrootarchive"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/random"
 	"github.com/microsoft/hcsshim"
@@ -50,7 +51,7 @@ type Driver struct {
 }
 
 // InitFilter returns a new Windows storage filter driver.
-func InitFilter(home string, options []string) (graphdriver.Driver, error) {
+func InitFilter(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 	logrus.Debugf("WindowsGraphDriver InitFilter at %s", home)
 	d := &Driver{
 		info: hcsshim.DriverInfo{
@@ -63,7 +64,7 @@ func InitFilter(home string, options []string) (graphdriver.Driver, error) {
 }
 
 // InitDiff returns a new Windows differencing disk driver.
-func InitDiff(home string, options []string) (graphdriver.Driver, error) {
+func InitDiff(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 	logrus.Debugf("WindowsGraphDriver InitDiff at %s", home)
 	d := &Driver{
 		info: hcsshim.DriverInfo{
@@ -328,7 +329,7 @@ func (d *Driver) ApplyDiff(id, parent string, diff archive.Reader) (size int64,
 		logrus.Debugf("WindowsGraphDriver ApplyDiff: Start untar layer")
 		destination := d.dir(id)
 		destination = filepath.Dir(destination)
-		if size, err = chrootarchive.ApplyUncompressedLayer(destination, diff); err != nil {
+		if size, err = chrootarchive.ApplyUncompressedLayer(destination, diff, nil); err != nil {
 			return
 		}
 		logrus.Debugf("WindowsGraphDriver ApplyDiff: Untar time: %vs", time.Now().UTC().Sub(start).Seconds())

+ 13 - 4
daemon/graphdriver/zfs/zfs.go

@@ -15,6 +15,7 @@ import (
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon/graphdriver"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/parsers"
 	zfs "github.com/mistifyio/go-zfs"
@@ -41,7 +42,7 @@ func (*Logger) Log(cmd []string) {
 // Init returns a new ZFS driver.
 // It takes base mount path and a array of options which are represented as key value pairs.
 // Each option is in the for key=value. 'zfs.fsname' is expected to be a valid key in the options.
-func Init(base string, opt []string) (graphdriver.Driver, error) {
+func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
 	var err error
 
 	if _, err := exec.LookPath("zfs"); err != nil {
@@ -102,8 +103,10 @@ func Init(base string, opt []string) (graphdriver.Driver, error) {
 		dataset:          rootDataset,
 		options:          options,
 		filesystemsCache: filesystemsCache,
+		uidMaps:          uidMaps,
+		gidMaps:          gidMaps,
 	}
-	return graphdriver.NewNaiveDiffDriver(d), nil
+	return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
 }
 
 func parseOptions(opt []string) (zfsOptions, error) {
@@ -156,6 +159,8 @@ type Driver struct {
 	options          zfsOptions
 	sync.Mutex       // protects filesystem cache against concurrent access
 	filesystemsCache map[string]bool
+	uidMaps          []idtools.IDMap
+	gidMaps          []idtools.IDMap
 }
 
 func (d *Driver) String() string {
@@ -294,12 +299,16 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
 	options := label.FormatMountLabel("", mountLabel)
 	logrus.Debugf(`[zfs] mount("%s", "%s", "%s")`, filesystem, mountpoint, options)
 
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err != nil {
+		return "", err
+	}
 	// Create the target directories if they don't exist
-	if err := os.MkdirAll(mountpoint, 0755); err != nil {
+	if err := idtools.MkdirAllAs(mountpoint, 0755, rootUID, rootGID); err != nil {
 		return "", err
 	}
 
-	err := mount.Mount(filesystem, mountpoint, "zfs", options)
+	err = mount.Mount(filesystem, mountpoint, "zfs", options)
 	if err != nil {
 		return "", fmt.Errorf("error creating zfs mount of %s to %s: %v", filesystem, mountpoint, err)
 	}

+ 11 - 1
daemon/volumes_unix.go

@@ -55,7 +55,17 @@ func (container *Container) setupMounts() ([]execdriver.Mount, error) {
 	}
 
 	mounts = sortMounts(mounts)
-	return append(mounts, container.networkMounts()...), nil
+	netMounts := container.networkMounts()
+	// if we are going to mount any of the network files from container
+	// metadata, the ownership must be set properly for potential container
+	// remapped root (user namespaces)
+	rootUID, rootGID := container.daemon.GetRemappedUIDGID()
+	for _, mount := range netMounts {
+		if err := os.Chown(mount.Source, rootUID, rootGID); err != nil {
+			return nil, err
+		}
+	}
+	return append(mounts, netMounts...), nil
 }
 
 // parseBindMount validates the configuration of mount information in runconfig is valid.

+ 3 - 0
experimental/README.md

@@ -71,6 +71,9 @@ to build a Docker binary with the experimental features enabled:
 
 ## Current experimental features
 
+ * [External graphdriver plugins](plugins_graphdriver.md)
+ * [User namespaces](userns.md)
+
 ## How to comment on an experimental feature
 
 Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.  

+ 120 - 0
experimental/userns.md

@@ -0,0 +1,120 @@
+# Experimental: User namespace support
+
+Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
+a process--and therefore a container--to have a unique range of user and
+group IDs which are outside the traditional user and group range utilized by
+the host system. Potentially the most important security improvement is that,
+by default, container processes running as the `root` user will have expected
+administrative privilege (with some restrictions) inside the container but will
+effectively be mapped to an unprivileged `uid` on the host.
+
+In this experimental phase, the Docker daemon creates a single daemon-wide mapping
+for all containers running on the same engine instance. The mappings will
+utilize the existing subordinate user and group ID feature available on all modern
+Linux distributions.
+The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and 
+[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
+read for the user, and optional group, specified to the `--userns-remap` 
+parameter.  If you do not wish to specify your own user and/or group, you can 
+provide `default` as the value to this flag, and a user will be created on your behalf
+and provided subordinate uid and gid ranges. This default user will be named
+`dockremap`, and entries will be created for it in `/etc/passwd` and 
+`/etc/group` using your distro's standard user and group creation tools.
+
+> **Note**: The single mapping per-daemon restriction exists for this experimental
+> phase because Docker shares image layers from its local cache across all
+> containers running on the engine instance.  Since file ownership must be
+> the same for all containers sharing the same layer content, the decision
+> was made to map the file ownership on `docker pull` to the daemon's user and
+> group mappings so that there is no delay for running containers once the
+> content is downloaded--exactly the same performance characteristics as with
+> user namespaces disabled.
+
+## Starting the daemon with user namespaces enabled
+To enable this experimental user namespace support for a Docker daemon instance,
+start the daemon with the aforementioned `--userns-remap` flag, which accepts
+values in the following formats:
+
+ - uid
+ - uid:gid
+ - username
+ - username:groupname
+
+If numeric IDs are provided, translation back to valid user or group names
+will occur so that the subordinate uid and gid information can be read, given
+these resources are name-based, not id-based.  If the numeric ID information
+provided does not exist as entries in `/etc/passwd` or `/etc/group`, dameon
+startup will fail with an error message.
+
+*An example: starting with default Docker user management:*
+
+```
+     $ docker daemon --userns-remap=default
+```    
+In this case, Docker will create--or find the existing--user and group
+named `dockremap`. If the user is created, and the Linux distribution has
+appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
+with a contiguous 65536 length range of subordinate user and group IDs, starting
+at an offset based on prior entries in those files.  For example, Ubuntu will
+create the following range, based on an existing user already having the first
+65536 range:
+
+```
+     $ cat /etc/subuid
+     user1:100000:65536
+     dockremap:165536:65536
+```
+
+> **Note:** On a fresh Fedora install, we found that we had to `touch` the
+> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
+> were created.  Once these files existed, range assigment on user creation
+> worked properly.
+
+If you have a preferred/self-managed user with subordinate ID mappings already
+configured, you can provide that username or uid to the `--userns-remap` flag.
+If you have a group that doesn't match the username, you may provide the `gid`
+or group name as well; otherwise the username will be used as the group name
+when querying the system for the subordinate group ID range.
+
+## Detailed information on `subuid`/`subgid` ranges
+
+Given there may be advanced use of the subordinate ID ranges by power users, we will
+describe how the Docker daemon uses the range entries within these files under the
+current experimental user namespace support.
+
+The simplest case exists where only one contiguous range is defined for the
+provided user or group. In this case, Docker will use that entire contiguous
+range for the mapping of host uids and gids to the container process.  This 
+means that the first ID in the range will be the remapped root user, and the
+IDs above that initial ID will map host ID 1 through the end of the range.
+
+From the example `/etc/subid` content shown above, that means the remapped root
+user would be uid 165536.
+
+If the system administrator has set up multiple ranges for a single user or
+group, the Docker daemon will read all the available ranges and use the
+following algorithm to create the mapping ranges:
+
+1. The ranges will be sorted by *start ID* ascending
+2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on.  This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
+3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
+
+## User namespace known restrictions
+
+The following standard Docker features are currently incompatible when
+running a Docker daemon with experimental user namespaces enabled:
+
+ - sharing namespaces with the host (--pid=host, --net=host, etc.)
+ - sharing namespaces with other containers (--net=container:*other*)
+ - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
+ - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
+ - Using `--privileged` mode containers
+ - Using the lxc execdriver (only the `native` execdriver is enabled to use user namespaces)
+ - volume use without pre-arranging proper file ownership in mounted volumes
+
+Additionally, while the `root` user inside a user namespaced container
+process has many of the privileges of the administrative root user, the
+following operations will fail:
+
+ - Use of `mknod` - permission is denied for device creation by the container root
+ - others will be listed here when fully tested

+ 17 - 4
graph/graph.go

@@ -21,10 +21,10 @@ import (
 	"github.com/docker/docker/daemon/graphdriver"
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/pkg/archive"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/progressreader"
 	"github.com/docker/docker/pkg/streamformatter"
 	"github.com/docker/docker/pkg/stringid"
-	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/truncindex"
 	"github.com/docker/docker/runconfig"
 	"github.com/vbatts/tar-split/tar/asm"
@@ -82,6 +82,8 @@ type Graph struct {
 	imageMutex       imageMutex // protect images in driver.
 	retained         *retainedLayers
 	tarSplitDisabled bool
+	uidMaps          []idtools.IDMap
+	gidMaps          []idtools.IDMap
 }
 
 // file names for ./graph/<ID>/
@@ -101,13 +103,18 @@ var (
 
 // NewGraph instantiates a new graph at the given root path in the filesystem.
 // `root` will be created if it doesn't exist.
-func NewGraph(root string, driver graphdriver.Driver) (*Graph, error) {
+func NewGraph(root string, driver graphdriver.Driver, uidMaps, gidMaps []idtools.IDMap) (*Graph, error) {
 	abspath, err := filepath.Abs(root)
 	if err != nil {
 		return nil, err
 	}
+
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	if err != nil {
+		return nil, err
+	}
 	// Create the root directory if it doesn't exists
-	if err := system.MkdirAll(root, 0700); err != nil {
+	if err := idtools.MkdirAllAs(root, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 		return nil, err
 	}
 
@@ -116,6 +123,8 @@ func NewGraph(root string, driver graphdriver.Driver) (*Graph, error) {
 		idIndex:  truncindex.NewTruncIndex([]string{}),
 		driver:   driver,
 		retained: &retainedLayers{layerHolders: make(map[string]map[string]struct{})},
+		uidMaps:  uidMaps,
+		gidMaps:  gidMaps,
 	}
 
 	// Windows does not currently support tarsplit functionality.
@@ -325,7 +334,11 @@ func (graph *Graph) TempLayerArchive(id string, sf *streamformatter.StreamFormat
 // mktemp creates a temporary sub-directory inside the graph's filesystem.
 func (graph *Graph) mktemp() (string, error) {
 	dir := filepath.Join(graph.root, "_tmp", stringid.GenerateNonCryptoID())
-	if err := system.MkdirAll(dir, 0700); err != nil {
+	rootUID, rootGID, err := idtools.GetRootUIDGID(graph.uidMaps, graph.gidMaps)
+	if err != nil {
+		return "", err
+	}
+	if err := idtools.MkdirAllAs(dir, 0700, rootUID, rootGID); err != nil {
 		return "", err
 	}
 	return dir, nil

+ 2 - 2
graph/graph_test.go

@@ -281,11 +281,11 @@ func tempGraph(t *testing.T) (*Graph, graphdriver.Driver) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	driver, err := graphdriver.New(tmp, nil)
+	driver, err := graphdriver.New(tmp, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
-	graph, err := NewGraph(tmp, driver)
+	graph, err := NewGraph(tmp, driver, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 2 - 2
graph/tags_unit_test.go

@@ -54,11 +54,11 @@ func fakeTar() (io.Reader, error) {
 }
 
 func mkTestTagStore(root string, t *testing.T) *TagStore {
-	driver, err := graphdriver.New(root, nil)
+	driver, err := graphdriver.New(root, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
-	graph, err := NewGraph(root, driver)
+	graph, err := NewGraph(root, driver, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 2 - 1
hack/make.sh

@@ -96,7 +96,7 @@ if [ ! "$GOPATH" ]; then
 	exit 1
 fi
 
-if [ "$DOCKER_EXPERIMENTAL" ]; then
+if [ "$DOCKER_EXPERIMENTAL" ] || [ "$DOCKER_REMAP_ROOT" ]; then
 	echo >&2 '# WARNING! DOCKER_EXPERIMENTAL is set: building experimental features'
 	echo >&2
 	DOCKER_BUILDTAGS+=" experimental"
@@ -220,6 +220,7 @@ test_env() {
 		DOCKER_GRAPHDRIVER="$DOCKER_GRAPHDRIVER" \
 		DOCKER_USERLANDPROXY="$DOCKER_USERLANDPROXY" \
 		DOCKER_HOST="$DOCKER_HOST" \
+		DOCKER_REMAP_ROOT="$DOCKER_REMAP_ROOT" \
 		DOCKER_REMOTE_DAEMON="$DOCKER_REMOTE_DAEMON" \
 		GOPATH="$GOPATH" \
 		HOME="$ABS_DEST/fake-HOME" \

+ 7 - 0
hack/make/.integration-daemon-start

@@ -26,6 +26,12 @@ if [ -n "$DOCKER_STORAGE_OPTS" ]; then
 	unset IFS
 fi
 
+# example usage: DOCKER_STORAGE_OPTS="dm.basesize=20G,dm.loopdatasize=200G"
+extra_params=""
+if [ "$DOCKER_REMAP_ROOT" ]; then
+	extra_params="--userns-remap $DOCKER_REMAP_ROOT"
+fi
+
 if [ -z "$DOCKER_TEST_HOST" ]; then
 	# Start apparmor if it is enabled
 	if [ -e "/sys/module/apparmor/parameters/enabled" ] && [ "$(cat /sys/module/apparmor/parameters/enabled)" == "Y" ]; then
@@ -47,6 +53,7 @@ if [ -z "$DOCKER_TEST_HOST" ]; then
 		--pidfile "$DEST/docker.pid" \
 		--userland-proxy="$DOCKER_USERLANDPROXY" \
 		$storage_params \
+		$extra_params \
 			&> "$DEST/docker.log"
 	) &
 	# make sure that if the script exits unexpectedly, we stop this daemon we just started

+ 1 - 0
integration-cli/docker_api_build_test.go

@@ -45,6 +45,7 @@ func (s *DockerSuite) TestBuildApiDockerfilePath(c *check.C) {
 }
 
 func (s *DockerSuite) TestBuildApiDockerFileRemote(c *check.C) {
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	server, err := fakeStorage(map[string]string{
 		"testD": `FROM busybox

+ 3 - 1
integration-cli/docker_api_containers_test.go

@@ -1487,7 +1487,9 @@ func (s *DockerSuite) TestContainersApiCreateNoHostConfig118(c *check.C) {
 // extract an archive to a symlink in a writable volume which points to a
 // directory outside of the volume.
 func (s *DockerSuite) TestPutContainerArchiveErrSymlinkInVolumeToReadOnlyRootfs(c *check.C) {
-	testRequires(c, SameHostDaemon) // Requires local volume mount bind.
+	// Requires local volume mount bind.
+	// --read-only + userns has remount issues
+	testRequires(c, SameHostDaemon, NotUserNamespace)
 
 	testVol := getTestDir(c, "test-put-container-archive-err-symlink-in-volume-to-read-only-rootfs-")
 	defer os.RemoveAll(testVol)

+ 22 - 5
integration-cli/docker_cli_build_test.go

@@ -2183,6 +2183,8 @@ func (s *DockerSuite) TestBuildWorkdirWithEnvVariables(c *check.C) {
 }
 
 func (s *DockerSuite) TestBuildRelativeCopy(c *check.C) {
+	// cat /test1/test2/foo gets permission denied for the user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildrelativecopy"
 	dockerfile := `
@@ -2248,7 +2250,7 @@ func (s *DockerSuite) TestBuildContextCleanup(c *check.C) {
 	testRequires(c, SameHostDaemon)
 
 	name := "testbuildcontextcleanup"
-	entries, err := ioutil.ReadDir("/var/lib/docker/tmp")
+	entries, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
 	if err != nil {
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
 	}
@@ -2259,7 +2261,7 @@ func (s *DockerSuite) TestBuildContextCleanup(c *check.C) {
 	if err != nil {
 		c.Fatal(err)
 	}
-	entriesFinal, err := ioutil.ReadDir("/var/lib/docker/tmp")
+	entriesFinal, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
 	if err != nil {
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
 	}
@@ -2274,7 +2276,7 @@ func (s *DockerSuite) TestBuildContextCleanupFailedBuild(c *check.C) {
 	testRequires(c, SameHostDaemon)
 
 	name := "testbuildcontextcleanup"
-	entries, err := ioutil.ReadDir("/var/lib/docker/tmp")
+	entries, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
 	if err != nil {
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
 	}
@@ -2285,7 +2287,7 @@ func (s *DockerSuite) TestBuildContextCleanupFailedBuild(c *check.C) {
 	if err == nil {
 		c.Fatalf("expected build to fail, but it didn't")
 	}
-	entriesFinal, err := ioutil.ReadDir("/var/lib/docker/tmp")
+	entriesFinal, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
 	if err != nil {
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
 	}
@@ -2683,6 +2685,8 @@ func (s *DockerSuite) TestBuildConditionalCache(c *check.C) {
 }
 
 func (s *DockerSuite) TestBuildAddLocalFileWithCache(c *check.C) {
+	// local files are not owned by the correct user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildaddlocalfilewithcache"
 	name2 := "testbuildaddlocalfilewithcache2"
@@ -2741,6 +2745,8 @@ func (s *DockerSuite) TestBuildAddMultipleLocalFileWithCache(c *check.C) {
 }
 
 func (s *DockerSuite) TestBuildAddLocalFileWithoutCache(c *check.C) {
+	// local files are not owned by the correct user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildaddlocalfilewithoutcache"
 	name2 := "testbuildaddlocalfilewithoutcache2"
@@ -3862,6 +3868,8 @@ RUN [ "$(id -u):$(id -g)/$(id -un):$(id -gn)/$(id -G):$(id -Gn)" = '1042:1043/10
 }
 
 func (s *DockerSuite) TestBuildEnvUsage(c *check.C) {
+	// /docker/world/hello is not owned by the correct user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildenvusage"
 	dockerfile := `FROM busybox
@@ -3898,6 +3906,8 @@ RUN    [ "$ghi" = "def" ]
 }
 
 func (s *DockerSuite) TestBuildEnvUsage2(c *check.C) {
+	// /docker/world/hello is not owned by the correct user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildenvusage2"
 	dockerfile := `FROM busybox
@@ -4024,6 +4034,8 @@ RUN [ "$(cat /testfile)" = 'test!' ]`
 }
 
 func (s *DockerSuite) TestBuildAddTar(c *check.C) {
+	// /test/foo is not owned by the correct user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildaddtar"
 
@@ -4080,7 +4092,8 @@ RUN cat /existing-directory-trailing-slash/test/foo | grep Hi`
 }
 
 func (s *DockerSuite) TestBuildAddTarXz(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	// /test/foo is not owned by the correct user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildaddtarxz"
 
@@ -4839,6 +4852,8 @@ func (s *DockerSuite) TestBuildSymlinkBreakout(c *check.C) {
 }
 
 func (s *DockerSuite) TestBuildXZHost(c *check.C) {
+	// /usr/local/sbin/xz gets permission denied for the user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	name := "testbuildxzhost"
 
@@ -4867,6 +4882,8 @@ RUN [ ! -e /injected ]`,
 }
 
 func (s *DockerSuite) TestBuildVolumesRetainContents(c *check.C) {
+	// /foo/file gets permission denied for the user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	var (
 		name     = "testbuildvolumescontent"

+ 5 - 3
integration-cli/docker_cli_cp_test.go

@@ -559,7 +559,7 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
 	// Copy actual /etc/resolv.conf
 	dockerCmd(c, "cp", cleanedContainerID+":/etc/resolv.conf", outDir)
 
-	expected, err := ioutil.ReadFile("/var/lib/docker/containers/" + cleanedContainerID + "/resolv.conf")
+	expected, err := readContainerFile(cleanedContainerID, "resolv.conf")
 	actual, err := ioutil.ReadFile(outDir + "/resolv.conf")
 
 	if !bytes.Equal(actual, expected) {
@@ -569,7 +569,7 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
 	// Copy actual /etc/hosts
 	dockerCmd(c, "cp", cleanedContainerID+":/etc/hosts", outDir)
 
-	expected, err = ioutil.ReadFile("/var/lib/docker/containers/" + cleanedContainerID + "/hosts")
+	expected, err = readContainerFile(cleanedContainerID, "hosts")
 	actual, err = ioutil.ReadFile(outDir + "/hosts")
 
 	if !bytes.Equal(actual, expected) {
@@ -579,7 +579,7 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
 	// Copy actual /etc/resolv.conf
 	dockerCmd(c, "cp", cleanedContainerID+":/etc/hostname", outDir)
 
-	expected, err = ioutil.ReadFile("/var/lib/docker/containers/" + cleanedContainerID + "/hostname")
+	expected, err = readContainerFile(cleanedContainerID, "hostname")
 	actual, err = ioutil.ReadFile(outDir + "/hostname")
 
 	if !bytes.Equal(actual, expected) {
@@ -589,6 +589,8 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
 }
 
 func (s *DockerSuite) TestCpVolumePath(c *check.C) {
+	//  stat /tmp/cp-test-volumepath851508420/test gets permission denied for the user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	testRequires(c, SameHostDaemon)
 

+ 6 - 2
integration-cli/docker_cli_cp_to_container_test.go

@@ -153,6 +153,8 @@ func (s *DockerSuite) TestCpToErrDstNotDir(c *check.C) {
 // Check that copying from a local path to a symlink in a container copies to
 // the symlink target and does not overwrite the container symlink itself.
 func (s *DockerSuite) TestCpToSymlinkDestination(c *check.C) {
+	//  stat /tmp/test-cp-to-symlink-destination-262430901/vol3 gets permission denied for the user
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	testRequires(c, SameHostDaemon) // Requires local volume mount bind.
 
@@ -699,7 +701,8 @@ func (s *DockerSuite) TestCpToCaseJ(c *check.C) {
 // The `docker cp` command should also ensure that you cannot
 // write to a container rootfs that is marked as read-only.
 func (s *DockerSuite) TestCpToErrReadOnlyRootfs(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	// --read-only + userns has remount issues
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	tmpDir := getTestDir(c, "test-cp-to-err-read-only-rootfs")
 	defer os.RemoveAll(tmpDir)
 
@@ -732,7 +735,8 @@ func (s *DockerSuite) TestCpToErrReadOnlyRootfs(c *check.C) {
 // The `docker cp` command should also ensure that you
 // cannot write to a volume that is mounted as read-only.
 func (s *DockerSuite) TestCpToErrReadOnlyVolume(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	// --read-only + userns has remount issues
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	tmpDir := getTestDir(c, "test-cp-to-err-read-only-volume")
 	defer os.RemoveAll(tmpDir)
 

+ 1 - 1
integration-cli/docker_cli_create_test.go

@@ -276,7 +276,7 @@ func (s *DockerSuite) TestCreateRM(c *check.C) {
 
 func (s *DockerSuite) TestCreateModeIpcContainer(c *check.C) {
 	testRequires(c, DaemonIsLinux)
-	testRequires(c, SameHostDaemon)
+	testRequires(c, SameHostDaemon, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "create", "busybox")
 	id := strings.TrimSpace(out)

+ 4 - 4
integration-cli/docker_cli_daemon_test.go

@@ -1075,7 +1075,7 @@ func (s *DockerDaemonSuite) TestDaemonLoggingDriverDefault(c *check.C) {
 	if out, err := s.d.Cmd("wait", id); err != nil {
 		c.Fatal(out, err)
 	}
-	logPath := filepath.Join(s.d.folder, "graph", "containers", id, id+"-json.log")
+	logPath := filepath.Join(s.d.root, "containers", id, id+"-json.log")
 
 	if _, err := os.Stat(logPath); err != nil {
 		c.Fatal(err)
@@ -1117,7 +1117,7 @@ func (s *DockerDaemonSuite) TestDaemonLoggingDriverDefaultOverride(c *check.C) {
 	if out, err := s.d.Cmd("wait", id); err != nil {
 		c.Fatal(out, err)
 	}
-	logPath := filepath.Join(s.d.folder, "graph", "containers", id, id+"-json.log")
+	logPath := filepath.Join(s.d.root, "containers", id, id+"-json.log")
 
 	if _, err := os.Stat(logPath); err == nil || !os.IsNotExist(err) {
 		c.Fatalf("%s shouldn't exits, error on Stat: %s", logPath, err)
@@ -1159,7 +1159,7 @@ func (s *DockerDaemonSuite) TestDaemonLoggingDriverNoneOverride(c *check.C) {
 	if out, err := s.d.Cmd("wait", id); err != nil {
 		c.Fatal(out, err)
 	}
-	logPath := filepath.Join(s.d.folder, "graph", "containers", id, id+"-json.log")
+	logPath := filepath.Join(s.d.root, "containers", id, id+"-json.log")
 
 	if _, err := os.Stat(logPath); err != nil {
 		c.Fatal(err)
@@ -1483,7 +1483,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) {
 }
 
 func (s *DockerDaemonSuite) TestRunContainerWithBridgeNone(c *check.C) {
-	testRequires(c, NativeExecDriver)
+	testRequires(c, NativeExecDriver, NotUserNamespace)
 	c.Assert(s.d.StartWithBusybox("-b", "none"), check.IsNil)
 
 	out, err := s.d.Cmd("run", "--rm", "busybox", "ip", "l")

+ 4 - 2
integration-cli/docker_cli_exec_test.go

@@ -265,6 +265,7 @@ func (s *DockerSuite) TestExecStopNotHanging(c *check.C) {
 }
 
 func (s *DockerSuite) TestExecCgroup(c *check.C) {
+	testRequires(c, NotUserNamespace)
 	testRequires(c, DaemonIsLinux)
 	dockerCmd(c, "run", "-d", "--name", "testing", "busybox", "top")
 
@@ -547,7 +548,7 @@ func (s *DockerSuite) TestExecWithUser(c *check.C) {
 }
 
 func (s *DockerSuite) TestExecWithPrivileged(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	// Start main loop which attempts mknod repeatedly
 	dockerCmd(c, "run", "-d", "--name", "parent", "--cap-drop=ALL", "busybox", "sh", "-c", `while (true); do if [ -e /exec_priv ]; then cat /exec_priv && mknod /tmp/sda b 8 0 && echo "Success"; else echo "Privileged exec has not run yet"; fi; usleep 10000; done`)
 
@@ -605,7 +606,8 @@ func (s *DockerSuite) TestExecWithImageUser(c *check.C) {
 }
 
 func (s *DockerSuite) TestExecOnReadonlyContainer(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	// --read-only + userns has remount issues
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "-d", "--read-only", "--name", "parent", "busybox", "top")
 	if _, status := dockerCmd(c, "exec", "parent", "true"); status != 0 {
 		c.Fatalf("exec into a read-only container failed with exit status %d", status)

+ 61 - 0
integration-cli/docker_cli_experimental_test.go

@@ -3,8 +3,15 @@
 package main
 
 import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
 	"strings"
 
+	"github.com/docker/docker/pkg/system"
 	"github.com/go-check/check"
 )
 
@@ -21,3 +28,57 @@ func (s *DockerSuite) TestExperimentalVersion(c *check.C) {
 		c.Fatalf("docker version did not contain experimental: %s", out)
 	}
 }
+
+// user namespaces test: run daemon with remapped root setting
+// 1. validate uid/gid maps are set properly
+// 2. verify that files created are owned by remapped root
+func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
+	testRequires(c, NativeExecDriver)
+	testRequires(c, SameHostDaemon)
+
+	c.Assert(s.d.StartWithBusybox("--userns-remap", "default"), check.IsNil)
+
+	tmpDir, err := ioutil.TempDir("", "userns")
+	if err != nil {
+		c.Fatal(err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	// we need to find the uid and gid of the remapped root from the daemon's root dir info
+	uidgid := strings.Split(filepath.Base(s.d.root), ".")
+	c.Assert(len(uidgid), check.Equals, 2, check.Commentf("Should have gotten uid/gid strings from root dirname: %s", filepath.Base(s.d.root)))
+	uid, err := strconv.Atoi(uidgid[0])
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse uid: %v", err))
+	gid, err := strconv.Atoi(uidgid[1])
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse gid: %v", err))
+
+	//writeable by the remapped root UID/GID pair
+	c.Assert(os.Chown(tmpDir, uid, gid), check.IsNil)
+
+	out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+
+	pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
+	if err != nil {
+		c.Fatalf("Could not inspect running container: out: %q; err: %v", pid, err)
+	}
+	// check the uid and gid maps for the PID to ensure root is remapped
+	// (cmd = cat /proc/<pid>/uid_map | grep -E '0\s+9999\s+1')
+	out, rc1, err := runCommandPipelineWithOutput(
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/uid_map"),
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", uid)))
+	c.Assert(rc1, check.Equals, 0, check.Commentf("Didn't match uid_map: output: %s", out))
+
+	out, rc2, err := runCommandPipelineWithOutput(
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/gid_map"),
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", gid)))
+	c.Assert(rc2, check.Equals, 0, check.Commentf("Didn't match gid_map: output: %s", out))
+
+	// check that the touched file is owned by remapped uid:gid
+	stat, err := system.Stat(filepath.Join(tmpDir, "testfile"))
+	if err != nil {
+		c.Fatal(err)
+	}
+	c.Assert(stat.UID(), check.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
+	c.Assert(stat.Gid(), check.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
+}

+ 35 - 25
integration-cli/docker_cli_external_graphdriver_unix_test.go

@@ -83,7 +83,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		w.Header().Set("Content-Type", "appplication/vnd.docker.plugins.v1+json")
 		switch t := data.(type) {
 		case error:
-			fmt.Fprintln(w, fmt.Sprintf(`{"Err": %s}`, t.Error()))
+			fmt.Fprintln(w, fmt.Sprintf(`{"Err": %q}`, t.Error()))
 		case string:
 			fmt.Fprintln(w, t)
 		default:
@@ -91,13 +91,21 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		}
 	}
 
+	decReq := func(b io.ReadCloser, out interface{}, w http.ResponseWriter) error {
+		defer b.Close()
+		if err := json.NewDecoder(b).Decode(&out); err != nil {
+			http.Error(w, fmt.Sprintf("error decoding json: %s", err.Error()), 500)
+		}
+		return nil
+	}
+
 	base, err := ioutil.TempDir("", "external-graph-test")
 	c.Assert(err, check.IsNil)
-	vfsProto, err := vfs.Init(base, []string{})
+	vfsProto, err := vfs.Init(base, []string{}, nil, nil)
 	if err != nil {
 		c.Fatalf("error initializing graph driver: %v", err)
 	}
-	driver := graphdriver.NewNaiveDiffDriver(vfsProto)
+	driver := graphdriver.NewNaiveDiffDriver(vfsProto, nil, nil)
 
 	mux.HandleFunc("/Plugin.Activate", func(w http.ResponseWriter, r *http.Request) {
 		s.ec.activations++
@@ -113,8 +121,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.creations++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 		if err := driver.Create(req.ID, req.Parent); err != nil {
@@ -128,8 +135,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.removals++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 
@@ -144,8 +150,8 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.gets++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
+			return
 		}
 
 		dir, err := driver.Get(req.ID, req.MountLabel)
@@ -160,8 +166,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.puts++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 
@@ -176,8 +181,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.exists++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 		respond(w, &graphDriverResponse{Exists: driver.Exists(req.ID)})
@@ -185,7 +189,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 
 	mux.HandleFunc("/GraphDriver.Status", func(w http.ResponseWriter, r *http.Request) {
 		s.ec.stats++
-		respond(w, `{"Status":{}}`)
+		respond(w, &graphDriverResponse{Status: driver.Status()})
 	})
 
 	mux.HandleFunc("/GraphDriver.Cleanup", func(w http.ResponseWriter, r *http.Request) {
@@ -202,8 +206,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.metadata++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 
@@ -219,8 +222,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.diff++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 
@@ -235,8 +237,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 	mux.HandleFunc("/GraphDriver.Changes", func(w http.ResponseWriter, r *http.Request) {
 		s.ec.changes++
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 
@@ -250,10 +251,17 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 
 	mux.HandleFunc("/GraphDriver.ApplyDiff", func(w http.ResponseWriter, r *http.Request) {
 		s.ec.applydiff++
+		var diff archive.Reader = r.Body
+		defer r.Body.Close()
+
 		id := r.URL.Query().Get("id")
 		parent := r.URL.Query().Get("parent")
 
-		size, err := driver.ApplyDiff(id, parent, r.Body)
+		if id == "" {
+			http.Error(w, fmt.Sprintf("missing id"), 409)
+		}
+
+		size, err := driver.ApplyDiff(id, parent, diff)
 		if err != nil {
 			respond(w, err)
 			return
@@ -265,8 +273,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
 		s.ec.diffsize++
 
 		var req graphDriverRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			http.Error(w, err.Error(), 500)
+		if err := decReq(r.Body, &req, w); err != nil {
 			return
 		}
 
@@ -296,7 +303,10 @@ func (s *DockerExternalGraphdriverSuite) TearDownSuite(c *check.C) {
 }
 
 func (s *DockerExternalGraphdriverSuite) TestExternalGraphDriver(c *check.C) {
-	c.Assert(s.d.StartWithBusybox("-s", "test-external-graph-driver"), check.IsNil)
+	if err := s.d.StartWithBusybox("-s", "test-external-graph-driver"); err != nil {
+		b, _ := ioutil.ReadFile(s.d.LogfileName())
+		c.Assert(err, check.IsNil, check.Commentf("\n%s", string(b)))
+	}
 
 	out, err := s.d.Cmd("run", "-d", "--name=graphtest", "busybox", "sh", "-c", "echo hello > /hello")
 	c.Assert(err, check.IsNil, check.Commentf(out))
@@ -326,7 +336,7 @@ func (s *DockerExternalGraphdriverSuite) TestExternalGraphDriver(c *check.C) {
 	c.Assert(s.ec.removals >= 1, check.Equals, true)
 	c.Assert(s.ec.gets >= 1, check.Equals, true)
 	c.Assert(s.ec.puts >= 1, check.Equals, true)
-	c.Assert(s.ec.stats, check.Equals, 1)
+	c.Assert(s.ec.stats, check.Equals, 3)
 	c.Assert(s.ec.cleanups, check.Equals, 2)
 	c.Assert(s.ec.exists >= 1, check.Equals, true)
 	c.Assert(s.ec.applydiff >= 1, check.Equals, true)

+ 4 - 3
integration-cli/docker_cli_links_test.go

@@ -2,10 +2,11 @@ package main
 
 import (
 	"fmt"
-	"github.com/go-check/check"
 	"reflect"
 	"regexp"
 	"strings"
+
+	"github.com/go-check/check"
 )
 
 func (s *DockerSuite) TestLinksPingUnlinkedContainers(c *check.C) {
@@ -233,7 +234,7 @@ func (s *DockerSuite) TestLinkShortDefinition(c *check.C) {
 }
 
 func (s *DockerSuite) TestLinksNetworkHostContainer(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "-d", "--net", "host", "--name", "host_container", "busybox", "top")
 	out, _, err := dockerCmdWithError("run", "--name", "should_fail", "--link", "host_container:tester", "busybox", "true")
 	if err == nil || !strings.Contains(out, "--net=host can't be used with links. This would result in undefined behavior") {
@@ -242,7 +243,7 @@ func (s *DockerSuite) TestLinksNetworkHostContainer(c *check.C) {
 }
 
 func (s *DockerSuite) TestLinksEtcHostsRegularFile(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--net=host", "busybox", "ls", "-la", "/etc/hosts")
 	if !strings.HasPrefix(out, "-") {
 		c.Errorf("/etc/hosts should be a regular file")

+ 1 - 1
integration-cli/docker_cli_links_unix_test.go

@@ -12,7 +12,7 @@ import (
 func (s *DockerSuite) TestLinksEtcHostsContentMatch(c *check.C) {
 	// In a _unix file as using Unix specific files, and must be on the
 	// same host as the daemon.
-	testRequires(c, SameHostDaemon)
+	testRequires(c, SameHostDaemon, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "run", "--net=host", "busybox", "cat", "/etc/hosts")
 	hosts, err := ioutil.ReadFile("/etc/hosts")

+ 1 - 1
integration-cli/docker_cli_nat_test.go

@@ -98,7 +98,7 @@ func (s *DockerSuite) TestNetworkLocalhostTCPNat(c *check.C) {
 
 func (s *DockerSuite) TestNetworkLoopbackNat(c *check.C) {
 	testRequires(c, DaemonIsLinux)
-	testRequires(c, SameHostDaemon, NativeExecDriver)
+	testRequires(c, SameHostDaemon, NativeExecDriver, NotUserNamespace)
 	msg := "it works"
 	startServerContainer(c, msg, 8080)
 	endpoint := getExternalAddress(c)

+ 3 - 3
integration-cli/docker_cli_netmode_test.go

@@ -23,7 +23,7 @@ func checkContains(expected string, out string, c *check.C) {
 }
 
 func (s *DockerSuite) TestNetHostname(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 
 	var (
 		out    string
@@ -81,7 +81,7 @@ func (s *DockerSuite) TestNetHostname(c *check.C) {
 }
 
 func (s *DockerSuite) TestConflictContainerNetworkAndLinks(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	var (
 		out    string
 		err    error
@@ -102,7 +102,7 @@ func (s *DockerSuite) TestConflictContainerNetworkAndLinks(c *check.C) {
 }
 
 func (s *DockerSuite) TestConflictNetworkModeAndOptions(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	var (
 		out    string
 		err    error

+ 2 - 2
integration-cli/docker_cli_port_test.go

@@ -249,7 +249,7 @@ func (s *DockerSuite) TestUnpublishedPortsInPsOutput(c *check.C) {
 }
 
 func (s *DockerSuite) TestPortHostBinding(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "-d", "-p", "9876:80", "busybox",
 		"nc", "-l", "-p", "80")
 	firstID := strings.TrimSpace(out)
@@ -272,7 +272,7 @@ func (s *DockerSuite) TestPortHostBinding(c *check.C) {
 }
 
 func (s *DockerSuite) TestPortExposeHostBinding(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "-d", "-P", "--expose", "80", "busybox",
 		"nc", "-l", "-p", "80")
 	firstID := strings.TrimSpace(out)

+ 61 - 48
integration-cli/docker_cli_run_test.go

@@ -707,7 +707,7 @@ func (s *DockerSuite) TestRunContainerNetwork(c *check.C) {
 func (s *DockerSuite) TestRunNetHostNotAllowedWithLinks(c *check.C) {
 	// TODO Windows: This is Linux specific as --link is not supported and
 	// this will be deprecated in favour of container networking model.
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "--name", "linked", "busybox", "true")
 
 	_, _, err := dockerCmdWithError("run", "--net=host", "--link", "linked:linked", "busybox", "true")
@@ -733,7 +733,7 @@ func (s *DockerSuite) TestRunFullHostnameSet(c *check.C) {
 func (s *DockerSuite) TestRunPrivilegedCanMknod(c *check.C) {
 	// Not applicable for Windows as Windows daemon does not support
 	// the concept of --privileged, and mknod is a Unix concept.
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok")
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
 		c.Fatalf("expected output ok received %s", actual)
@@ -743,7 +743,7 @@ func (s *DockerSuite) TestRunPrivilegedCanMknod(c *check.C) {
 func (s *DockerSuite) TestRunUnprivilegedCanMknod(c *check.C) {
 	// Not applicable for Windows as Windows daemon does not support
 	// the concept of --privileged, and mknod is a Unix concept.
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok")
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
 		c.Fatalf("expected output ok received %s", actual)
@@ -799,7 +799,7 @@ func (s *DockerSuite) TestRunCapDropALLCannotMknod(c *check.C) {
 
 func (s *DockerSuite) TestRunCapDropALLAddMknodCanMknod(c *check.C) {
 	// Not applicable for Windows as there is no concept of --cap-drop or mknod
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--cap-drop=ALL", "--cap-add=MKNOD", "--cap-add=SETGID", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok")
 
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
@@ -861,7 +861,7 @@ func (s *DockerSuite) TestRunGroupAdd(c *check.C) {
 
 func (s *DockerSuite) TestRunPrivilegedCanMount(c *check.C) {
 	// Not applicable for Windows as there is no concept of --privileged
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "mount -t tmpfs none /tmp && echo ok")
 
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
@@ -892,7 +892,7 @@ func (s *DockerSuite) TestRunSysNotWritableInNonPrivilegedContainers(c *check.C)
 
 func (s *DockerSuite) TestRunSysWritableInPrivilegedContainers(c *check.C) {
 	// Not applicable for Windows as there is no concept of unprivileged
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	if _, code, err := dockerCmdWithError("run", "--privileged", "busybox", "touch", "/sys/kernel/profiling"); err != nil || code != 0 {
 		c.Fatalf("sys should be writable in privileged container")
 	}
@@ -908,7 +908,7 @@ func (s *DockerSuite) TestRunProcNotWritableInNonPrivilegedContainers(c *check.C
 
 func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
 	// Not applicable for Windows as there is no concept of --privileged
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	if _, code := dockerCmd(c, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger"); code != 0 {
 		c.Fatalf("proc should be writable in privileged container")
 	}
@@ -916,7 +916,8 @@ func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
 
 func (s *DockerSuite) TestRunDeviceNumbers(c *check.C) {
 	// Not applicable on Windows as /dev/ is a Unix specific concept
-	testRequires(c, DaemonIsLinux)
+	// TODO: NotUserNamespace could be removed here if "root" "root" is replaced w user
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "busybox", "sh", "-c", "ls -l /dev/null")
 	deviceLineFields := strings.Fields(out)
 	deviceLineFields[6] = ""
@@ -946,7 +947,7 @@ func (s *DockerSuite) TestRunUnprivilegedWithChroot(c *check.C) {
 
 func (s *DockerSuite) TestRunAddingOptionalDevices(c *check.C) {
 	// Not applicable on Windows as Windows does not support --device
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--device", "/dev/zero:/dev/nulo", "busybox", "sh", "-c", "ls /dev/nulo")
 	if actual := strings.Trim(out, "\r\n"); actual != "/dev/nulo" {
 		c.Fatalf("expected output /dev/nulo, received %s", actual)
@@ -955,7 +956,7 @@ func (s *DockerSuite) TestRunAddingOptionalDevices(c *check.C) {
 
 func (s *DockerSuite) TestRunAddingOptionalDevicesNoSrc(c *check.C) {
 	// Not applicable on Windows as Windows does not support --device
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--device", "/dev/zero:rw", "busybox", "sh", "-c", "ls /dev/zero")
 	if actual := strings.Trim(out, "\r\n"); actual != "/dev/zero" {
 		c.Fatalf("expected output /dev/zero, received %s", actual)
@@ -964,7 +965,7 @@ func (s *DockerSuite) TestRunAddingOptionalDevicesNoSrc(c *check.C) {
 
 func (s *DockerSuite) TestRunAddingOptionalDevicesInvalidMode(c *check.C) {
 	// Not applicable on Windows as Windows does not support --device
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	_, _, err := dockerCmdWithError("run", "--device", "/dev/zero:ro", "busybox", "sh", "-c", "ls /dev/zero")
 	if err == nil {
 		c.Fatalf("run container with device mode ro should fail")
@@ -973,7 +974,7 @@ func (s *DockerSuite) TestRunAddingOptionalDevicesInvalidMode(c *check.C) {
 
 func (s *DockerSuite) TestRunModeHostname(c *check.C) {
 	// Not applicable on Windows as Windows does not support -h
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "run", "-h=testhostname", "busybox", "cat", "/etc/hostname")
 
@@ -1711,6 +1712,8 @@ func (s *DockerSuite) TestRunEntrypoint(c *check.C) {
 }
 
 func (s *DockerSuite) TestRunBindMounts(c *check.C) {
+	// /tmp gets permission denied
+	testRequires(c, NotUserNamespace)
 	// Cannot run on Windows as Windows does not support volumes
 	testRequires(c, DaemonIsLinux, SameHostDaemon)
 
@@ -1909,6 +1912,8 @@ func (s *DockerSuite) TestRunAllocatePortInReservedRange(c *check.C) {
 
 // Regression test for #7792
 func (s *DockerSuite) TestRunMountOrdering(c *check.C) {
+	// tmp gets permission denied
+	testRequires(c, NotUserNamespace)
 	// Not applicable on Windows as Windows does not support volumes
 	testRequires(c, SameHostDaemon, DaemonIsLinux)
 
@@ -1953,6 +1958,8 @@ func (s *DockerSuite) TestRunMountOrdering(c *check.C) {
 
 // Regression test for https://github.com/docker/docker/issues/8259
 func (s *DockerSuite) TestRunReuseBindVolumeThatIsSymlink(c *check.C) {
+	// /tmp gets permission denied
+	testRequires(c, NotUserNamespace)
 	// Not applicable on Windows as Windows does not support volumes
 	testRequires(c, SameHostDaemon, DaemonIsLinux)
 
@@ -2157,7 +2164,7 @@ func (s *DockerSuite) TestRunUnknownCommand(c *check.C) {
 
 func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	hostIpc, err := os.Readlink("/proc/1/ns/ipc")
 	if err != nil {
@@ -2179,7 +2186,7 @@ func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
 
 func (s *DockerSuite) TestRunModeIpcContainer(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "run", "-d", "busybox", "sh", "-c", "echo -n test > /dev/shm/test && top")
 
@@ -2211,7 +2218,7 @@ func (s *DockerSuite) TestRunModeIpcContainer(c *check.C) {
 
 func (s *DockerSuite) TestRunModeIpcContainerNotExists(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _, err := dockerCmdWithError("run", "-d", "--ipc", "container:abcd1234", "busybox", "top")
 	if !strings.Contains(out, "abcd1234") || err == nil {
 		c.Fatalf("run IPC from a non exists container should with correct error out")
@@ -2220,7 +2227,7 @@ func (s *DockerSuite) TestRunModeIpcContainerNotExists(c *check.C) {
 
 func (s *DockerSuite) TestRunModeIpcContainerNotRunning(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "create", "busybox")
 
@@ -2250,7 +2257,7 @@ func (s *DockerSuite) TestRunMountShmMqueueFromHost(c *check.C) {
 
 func (s *DockerSuite) TestContainerNetworkMode(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "run", "-d", "busybox", "top")
 	id := strings.TrimSpace(out)
@@ -2272,7 +2279,7 @@ func (s *DockerSuite) TestContainerNetworkMode(c *check.C) {
 
 func (s *DockerSuite) TestRunModePidHost(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, NativeExecDriver, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, NativeExecDriver, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	hostPid, err := os.Readlink("/proc/1/ns/pid")
 	if err != nil {
@@ -2413,7 +2420,7 @@ func (s *DockerSuite) TestRunNonLocalMacAddress(c *check.C) {
 
 func (s *DockerSuite) TestRunNetHost(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	hostNet, err := os.Readlink("/proc/1/ns/net")
 	if err != nil {
@@ -2436,7 +2443,7 @@ func (s *DockerSuite) TestRunNetHost(c *check.C) {
 func (s *DockerSuite) TestRunNetHostTwiceSameName(c *check.C) {
 	// TODO Windows. As Windows networking evolves and converges towards
 	// CNM, this test may be possible to enable on Windows.
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	dockerCmd(c, "run", "--rm", "--name=thost", "--net=host", "busybox", "true")
 	dockerCmd(c, "run", "--rm", "--name=thost", "--net=host", "busybox", "true")
@@ -2444,7 +2451,7 @@ func (s *DockerSuite) TestRunNetHostTwiceSameName(c *check.C) {
 
 func (s *DockerSuite) TestRunNetContainerWhichHost(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	hostNet, err := os.Readlink("/proc/1/ns/net")
 	if err != nil {
@@ -2534,7 +2541,8 @@ func (s *DockerSuite) TestRunContainerWithReadonlyRootfs(c *check.C) {
 func (s *DockerSuite) TestPermissionsPtsReadonlyRootfs(c *check.C) {
 	// Not applicable on Windows due to use of Unix specific functionality, plus
 	// the use of --read-only which is not supported.
-	testRequires(c, DaemonIsLinux, NativeExecDriver)
+	// --read-only + userns has remount issues
+	testRequires(c, DaemonIsLinux, NativeExecDriver, NotUserNamespace)
 
 	// Ensure we have not broken writing /dev/pts
 	out, status := dockerCmd(c, "run", "--read-only", "--rm", "busybox", "mount")
@@ -2549,7 +2557,7 @@ func (s *DockerSuite) TestPermissionsPtsReadonlyRootfs(c *check.C) {
 
 func testReadOnlyFile(filename string, c *check.C) {
 	// Not applicable on Windows which does not support --read-only
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
 
 	out, _, err := dockerCmdWithError("run", "--read-only", "--rm", "busybox", "touch", filename)
 	if err == nil {
@@ -2572,7 +2580,8 @@ func testReadOnlyFile(filename string, c *check.C) {
 
 func (s *DockerSuite) TestRunContainerWithReadonlyEtcHostsAndLinkedContainer(c *check.C) {
 	// Not applicable on Windows which does not support --link
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
+	// --read-only + userns has remount issues
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
 
 	dockerCmd(c, "run", "-d", "--name", "test-etc-hosts-ro-linked", "busybox", "top")
 
@@ -2583,9 +2592,9 @@ func (s *DockerSuite) TestRunContainerWithReadonlyEtcHostsAndLinkedContainer(c *
 }
 
 func (s *DockerSuite) TestRunContainerWithReadonlyRootfsWithDnsFlag(c *check.C) {
-	// Not applicable on Windows which does not support either --read-only or
-	// --dns.
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
+	// Not applicable on Windows which does not support either --read-only or --dns.
+	// --read-only + userns has remount issues
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "run", "--read-only", "--dns", "1.1.1.1", "busybox", "/bin/cat", "/etc/resolv.conf")
 	if !strings.Contains(string(out), "1.1.1.1") {
@@ -2595,7 +2604,8 @@ func (s *DockerSuite) TestRunContainerWithReadonlyRootfsWithDnsFlag(c *check.C)
 
 func (s *DockerSuite) TestRunContainerWithReadonlyRootfsWithAddHostFlag(c *check.C) {
 	// Not applicable on Windows which does not support --read-only
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
+	// --read-only + userns has remount issues
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
 
 	out, _ := dockerCmd(c, "run", "--read-only", "--add-host", "testreadonly:127.0.0.1", "busybox", "/bin/cat", "/etc/hosts")
 	if !strings.Contains(string(out), "testreadonly") {
@@ -2654,7 +2664,7 @@ func (s *DockerSuite) TestRunContainerWithRmFlagCannotStartContainer(c *check.C)
 
 func (s *DockerSuite) TestRunPidHostWithChildIsKillable(c *check.C) {
 	// Not applicable on Windows as uses Unix specific functionality
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	name := "ibuildthecloud"
 	dockerCmd(c, "run", "-d", "--pid=host", "--name", name, "busybox", "sh", "-c", "sleep 30; echo hi")
 
@@ -2734,7 +2744,7 @@ func (s *DockerSuite) TestRunReadProcLatency(c *check.C) {
 
 func (s *DockerSuite) TestRunReadFilteredProc(c *check.C) {
 	// Not applicable on Windows as uses Unix specific functionality
-	testRequires(c, Apparmor, DaemonIsLinux)
+	testRequires(c, Apparmor, DaemonIsLinux, NotUserNamespace)
 
 	testReadPaths := []string{
 		"/proc/latency_stats",
@@ -2767,7 +2777,8 @@ func (s *DockerSuite) TestMountIntoProc(c *check.C) {
 
 func (s *DockerSuite) TestMountIntoSys(c *check.C) {
 	// Not applicable on Windows as uses Unix specific functionality
-	testRequires(c, DaemonIsLinux, NativeExecDriver)
+	testRequires(c, DaemonIsLinux)
+	testRequires(c, NativeExecDriver, NotUserNamespace)
 	dockerCmd(c, "run", "-v", "/sys/fs/cgroup", "busybox", "true")
 }
 
@@ -2854,7 +2865,7 @@ func (s *DockerSuite) TestVolumeFromMixedRWOptions(c *check.C) {
 
 func (s *DockerSuite) TestRunWriteFilteredProc(c *check.C) {
 	// Not applicable on Windows as uses Unix specific functionality
-	testRequires(c, Apparmor, NativeExecDriver, DaemonIsLinux)
+	testRequires(c, Apparmor, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
 
 	testWritePaths := []string{
 		/* modprobe and core_pattern should both be denied by generic
@@ -2917,7 +2928,8 @@ func (s *DockerSuite) TestRunNetworkFilesBindMountRO(c *check.C) {
 
 func (s *DockerSuite) TestRunNetworkFilesBindMountROFilesystem(c *check.C) {
 	// Not applicable on Windows as uses Unix specific functionality
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
+	// --read-only + userns has remount issues
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
 
 	filename := createTmpFile(c, "test123")
 	defer os.Remove(filename)
@@ -3260,7 +3272,8 @@ func (s *DockerSuite) TestRunContainerWithCgroupParentAbsPath(c *check.C) {
 
 func (s *DockerSuite) TestRunContainerWithCgroupMountRO(c *check.C) {
 	// Not applicable on Windows as uses Unix specific functionality
-	testRequires(c, DaemonIsLinux, NativeExecDriver)
+	// --read-only + userns has remount issues
+	testRequires(c, DaemonIsLinux, NativeExecDriver, NotUserNamespace)
 
 	filename := "/sys/fs/cgroup/devices/test123"
 	out, _, err := dockerCmdWithError("run", "busybox", "touch", filename)
@@ -3275,7 +3288,7 @@ func (s *DockerSuite) TestRunContainerWithCgroupMountRO(c *check.C) {
 
 func (s *DockerSuite) TestRunContainerNetworkModeToSelf(c *check.C) {
 	// Not applicable on Windows which does not support --net=container
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _, err := dockerCmdWithError("run", "--name=me", "--net=container:me", "busybox", "true")
 	if err == nil || !strings.Contains(out, "cannot join own network") {
 		c.Fatalf("using container net mode to self should result in an error\nerr: %q\nout: %s", err, out)
@@ -3284,7 +3297,7 @@ func (s *DockerSuite) TestRunContainerNetworkModeToSelf(c *check.C) {
 
 func (s *DockerSuite) TestRunContainerNetModeWithDnsMacHosts(c *check.C) {
 	// Not applicable on Windows which does not support --net=container
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _, err := dockerCmdWithError("run", "-d", "--name", "parent", "busybox", "top")
 	if err != nil {
 		c.Fatalf("failed to run container: %v, output: %q", err, out)
@@ -3308,7 +3321,7 @@ func (s *DockerSuite) TestRunContainerNetModeWithDnsMacHosts(c *check.C) {
 
 func (s *DockerSuite) TestRunContainerNetModeWithExposePort(c *check.C) {
 	// Not applicable on Windows which does not support --net=container
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "-d", "--name", "parent", "busybox", "top")
 
 	out, _, err := dockerCmdWithError("run", "-p", "5000:5000", "--net=container:parent", "busybox")
@@ -3329,7 +3342,7 @@ func (s *DockerSuite) TestRunContainerNetModeWithExposePort(c *check.C) {
 
 func (s *DockerSuite) TestRunLinkToContainerNetMode(c *check.C) {
 	// Not applicable on Windows which does not support --net=container or --link
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "--name", "test", "-d", "busybox", "top")
 	dockerCmd(c, "run", "--name", "parent", "-d", "--net=container:test", "busybox", "top")
 	dockerCmd(c, "run", "-d", "--link=parent:parent", "busybox", "top")
@@ -3373,7 +3386,7 @@ func (s *DockerSuite) TestRunLoopbackWhenNetworkDisabled(c *check.C) {
 
 func (s *DockerSuite) TestRunModeNetContainerHostname(c *check.C) {
 	// Windows does not support --net=container
-	testRequires(c, DaemonIsLinux, ExecSupport)
+	testRequires(c, DaemonIsLinux, ExecSupport, NotUserNamespace)
 
 	dockerCmd(c, "run", "-i", "-d", "--name", "parent", "busybox", "top")
 	out, _ := dockerCmd(c, "exec", "parent", "cat", "/etc/hostname")
@@ -3399,7 +3412,7 @@ func (s *DockerSuite) TestRunNetworkNotInitializedNoneMode(c *check.C) {
 
 func (s *DockerSuite) TestTwoContainersInNetHost(c *check.C) {
 	// Not applicable as Windows does not support --net=host
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace, NotUserNamespace)
 	dockerCmd(c, "run", "-d", "--net=host", "--name=first", "busybox", "top")
 	dockerCmd(c, "run", "-d", "--net=host", "--name=second", "busybox", "top")
 	dockerCmd(c, "stop", "first")
@@ -3407,7 +3420,7 @@ func (s *DockerSuite) TestTwoContainersInNetHost(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainersInUserDefinedNetwork(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork")
 	dockerCmd(c, "run", "-d", "--net=testnetwork", "--name=first", "busybox", "top")
 	c.Assert(waitRun("first"), check.IsNil)
@@ -3418,7 +3431,7 @@ func (s *DockerSuite) TestContainersInUserDefinedNetwork(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainersInMultipleNetworks(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	// Create 2 networks using bridge driver
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork2")
@@ -3441,7 +3454,7 @@ func (s *DockerSuite) TestContainersInMultipleNetworks(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainersNetworkIsolation(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	// Create 2 networks using bridge driver
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork2")
@@ -3473,7 +3486,7 @@ func (s *DockerSuite) TestContainersNetworkIsolation(c *check.C) {
 }
 
 func (s *DockerSuite) TestNetworkRmWithActiveContainers(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	// Create 2 networks using bridge driver
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
 	// Run and connect containers to testnetwork1
@@ -3495,7 +3508,7 @@ func (s *DockerSuite) TestNetworkRmWithActiveContainers(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainerRestartInMultipleNetworks(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	// Create 2 networks using bridge driver
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork2")
@@ -3531,7 +3544,7 @@ func (s *DockerSuite) TestContainerRestartInMultipleNetworks(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainerWithConflictingHostNetworks(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	// Run a container with --net=host
 	dockerCmd(c, "run", "-d", "--net=host", "--name=first", "busybox", "top")
 	c.Assert(waitRun("first"), check.IsNil)
@@ -3547,7 +3560,7 @@ func (s *DockerSuite) TestContainerWithConflictingHostNetworks(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainerWithConflictingSharedNetwork(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "-d", "--name=first", "busybox", "top")
 	c.Assert(waitRun("first"), check.IsNil)
 	// Run second container in first container's network namespace
@@ -3568,7 +3581,7 @@ func (s *DockerSuite) TestContainerWithConflictingSharedNetwork(c *check.C) {
 }
 
 func (s *DockerSuite) TestContainerWithConflictingNoneNetwork(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	dockerCmd(c, "run", "-d", "--net=none", "--name=first", "busybox", "top")
 	c.Assert(waitRun("first"), check.IsNil)
 

+ 3 - 1
integration-cli/docker_cli_run_unix_test.go

@@ -57,6 +57,8 @@ func (s *DockerSuite) TestRunRedirectStdout(c *check.C) {
 
 // Test recursive bind mount works by default
 func (s *DockerSuite) TestRunWithVolumesIsRecursive(c *check.C) {
+	// /tmp gets permission denied
+	testRequires(c, NotUserNamespace)
 	tmpDir, err := ioutil.TempDir("", "docker_recursive_mount_test")
 	if err != nil {
 		c.Fatal(err)
@@ -90,7 +92,7 @@ func (s *DockerSuite) TestRunWithVolumesIsRecursive(c *check.C) {
 }
 
 func (s *DockerSuite) TestRunDeviceDirectory(c *check.C) {
-	testRequires(c, NativeExecDriver)
+	testRequires(c, NativeExecDriver, NotUserNamespace)
 	if _, err := os.Stat("/dev/snd"); err != nil {
 		c.Skip("Host does not have /dev/snd")
 	}

+ 1 - 1
integration-cli/docker_cli_top_test.go

@@ -30,7 +30,7 @@ func (s *DockerSuite) TestTopNonPrivileged(c *check.C) {
 }
 
 func (s *DockerSuite) TestTopPrivileged(c *check.C) {
-	testRequires(c, DaemonIsLinux)
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	out, _ := dockerCmd(c, "run", "--privileged", "-i", "-d", "busybox", "top")
 	cleanedContainerID := strings.TrimSpace(out)
 

+ 28 - 4
integration-cli/docker_test_vars.go

@@ -1,9 +1,12 @@
 package main
 
 import (
+	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
+
+	"github.com/docker/docker/pkg/reexec"
 )
 
 var (
@@ -16,10 +19,6 @@ var (
 	// the private registry to use for tests
 	privateRegistryURL = "127.0.0.1:5000"
 
-	dockerBasePath       = "/var/lib/docker"
-	volumesConfigPath    = dockerBasePath + "/volumes"
-	containerStoragePath = dockerBasePath + "/containers"
-
 	runtimePath    = "/var/run/docker"
 	execDriverPath = runtimePath + "/execdriver/native"
 
@@ -38,6 +37,13 @@ var (
 	// daemonDefaultImage is the name of the default image to use when running
 	// tests. This is platform dependent.
 	daemonDefaultImage string
+
+	// For a local daemon on Linux, these values will be used for testing
+	// user namespace support as the standard graph path(s) will be
+	// appended with the root remapped uid.gid prefix
+	dockerBasePath       string
+	volumesConfigPath    string
+	containerStoragePath string
 )
 
 const (
@@ -50,6 +56,7 @@ const (
 )
 
 func init() {
+	reexec.Init()
 	if dockerBin := os.Getenv("DOCKER_BINARY"); dockerBin != "" {
 		dockerBinary = dockerBin
 	}
@@ -85,4 +92,21 @@ func init() {
 	} else {
 		isLocalDaemon = true
 	}
+
+	// This is only used for a tests with local daemon true (Linux-only today)
+	// default is "/var/lib/docker", but we'll try and ask the
+	// /info endpoint for the specific root dir
+	dockerBasePath = "/var/lib/docker"
+	type Info struct {
+		DockerRootDir string
+	}
+	var i Info
+	status, b, err := sockRequest("GET", "/info", nil)
+	if err == nil && status == 200 {
+		if err = json.Unmarshal(b, &i); err == nil {
+			dockerBasePath = i.DockerRootDir
+		}
+	}
+	volumesConfigPath = dockerBasePath + "/volumes"
+	containerStoragePath = dockerBasePath + "/containers"
 }

+ 56 - 5
integration-cli/docker_utils.go

@@ -41,6 +41,7 @@ type Daemon struct {
 	c              *check.C
 	logFile        *os.File
 	folder         string
+	root           string
 	stdin          io.WriteCloser
 	stdout, stderr io.ReadCloser
 	cmd            *exec.Cmd
@@ -65,9 +66,10 @@ func NewDaemon(c *check.C) *Daemon {
 	if err != nil {
 		c.Fatalf("Could not make %q an absolute path: %v", dir, err)
 	}
+	daemonRoot := filepath.Join(daemonFolder, "root")
 
-	if err := os.MkdirAll(filepath.Join(daemonFolder, "graph"), 0600); err != nil {
-		c.Fatalf("Could not create %s/graph directory", daemonFolder)
+	if err := os.MkdirAll(daemonRoot, 0755); err != nil {
+		c.Fatalf("Could not create daemon root %q: %v", dir, err)
 	}
 
 	userlandProxy := true
@@ -82,6 +84,7 @@ func NewDaemon(c *check.C) *Daemon {
 		id:            id,
 		c:             c,
 		folder:        daemonFolder,
+		root:          daemonRoot,
 		storageDriver: os.Getenv("DOCKER_GRAPHDRIVER"),
 		execDriver:    os.Getenv("DOCKER_EXECDRIVER"),
 		userlandProxy: userlandProxy,
@@ -99,11 +102,15 @@ func (d *Daemon) Start(arg ...string) error {
 	args := append(d.GlobalFlags,
 		d.Command,
 		"--host", d.sock(),
-		"--graph", fmt.Sprintf("%s/graph", d.folder),
+		"--graph", d.root,
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.folder),
 		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
 	)
 
+	if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
+		args = append(args, []string{"--userns-remap", root}...)
+	}
+
 	// If we don't explicitly set the log-level or debug flag(-D) then
 	// turn on debug mode
 	foundIt := false
@@ -181,8 +188,11 @@ func (d *Daemon) Start(arg ...string) error {
 			if resp.StatusCode != http.StatusOK {
 				d.c.Logf("[%s] received status != 200 OK: %s", d.id, resp.Status)
 			}
-
 			d.c.Logf("[%s] daemon started", d.id)
+			d.root, err = d.queryRootDir()
+			if err != nil {
+				return fmt.Errorf("[%s] error querying daemon for root directory: %v", d.id, err)
+			}
 			return nil
 		}
 	}
@@ -278,6 +288,47 @@ func (d *Daemon) Restart(arg ...string) error {
 	return d.Start(arg...)
 }
 
+func (d *Daemon) queryRootDir() (string, error) {
+	// update daemon root by asking /info endpoint (to support user
+	// namespaced daemon with root remapped uid.gid directory)
+	conn, err := net.Dial("unix", filepath.Join(d.folder, "docker.sock"))
+	if err != nil {
+		return "", err
+	}
+	client := httputil.NewClientConn(conn, nil)
+
+	req, err := http.NewRequest("GET", "/info", nil)
+	if err != nil {
+		client.Close()
+		return "", err
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		client.Close()
+		return "", err
+	}
+	body := ioutils.NewReadCloserWrapper(resp.Body, func() error {
+		defer client.Close()
+		return resp.Body.Close()
+	})
+
+	type Info struct {
+		DockerRootDir string
+	}
+	var b []byte
+	var i Info
+	b, err = readBody(body)
+	if err == nil && resp.StatusCode == 200 {
+		// read the docker root dir
+		if err = json.Unmarshal(b, &i); err == nil {
+			return i.DockerRootDir, nil
+		}
+	}
+	return "", err
+}
+
 func (d *Daemon) sock() string {
 	return fmt.Sprintf("unix://%s/docker.sock", d.folder)
 }
@@ -1236,7 +1287,7 @@ func readFile(src string, c *check.C) (content string) {
 }
 
 func containerStorageFile(containerID, basename string) string {
-	return filepath.Join("/var/lib/docker/containers", containerID, basename)
+	return filepath.Join(containerStoragePath, containerID, basename)
 }
 
 // docker commands that use this function must be run with the '-d' switch.

+ 11 - 0
integration-cli/requirements.go

@@ -6,6 +6,7 @@ import (
 	"io/ioutil"
 	"log"
 	"net/http"
+	"os"
 	"os/exec"
 	"strings"
 	"time"
@@ -147,6 +148,16 @@ var (
 		},
 		"Test requires native Golang compiler instead of GCCGO",
 	}
+	NotUserNamespace = testRequirement{
+		func() bool {
+			root := os.Getenv("DOCKER_REMAP_ROOT")
+			if root != "" {
+				return false
+			}
+			return true
+		},
+		"Test cannot be run when remapping root",
+	}
 )
 
 // testRequires checks if the environment satisfies the requirements

+ 99 - 5
pkg/archive/archive.go

@@ -19,6 +19,7 @@ import (
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/pkg/fileutils"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/pools"
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/system"
@@ -41,6 +42,8 @@ type (
 		ExcludePatterns  []string
 		Compression      Compression
 		NoLchown         bool
+		UIDMaps          []idtools.IDMap
+		GIDMaps          []idtools.IDMap
 		ChownOpts        *TarChownOptions
 		IncludeSourceDir bool
 		// When unpacking, specifies whether overwriting a directory with a
@@ -52,9 +55,13 @@ type (
 	}
 
 	// Archiver allows the reuse of most utility functions of this package
-	// with a pluggable Untar function.
+	// with a pluggable Untar function. Also, to facilitate the passing of
+	// specific id mappings for untar, an archiver can be created with maps
+	// which will then be passed to Untar operations
 	Archiver struct {
-		Untar func(io.Reader, string, *TarOptions) error
+		Untar   func(io.Reader, string, *TarOptions) error
+		UIDMaps []idtools.IDMap
+		GIDMaps []idtools.IDMap
 	}
 
 	// breakoutError is used to differentiate errors related to breaking out
@@ -66,7 +73,7 @@ type (
 var (
 	// ErrNotImplemented is the error message of function not implemented.
 	ErrNotImplemented = errors.New("Function not implemented")
-	defaultArchiver   = &Archiver{Untar}
+	defaultArchiver   = &Archiver{Untar: Untar, UIDMaps: nil, GIDMaps: nil}
 )
 
 const (
@@ -194,6 +201,8 @@ type tarAppender struct {
 
 	// for hardlink mapping
 	SeenFiles map[uint64]string
+	UIDMaps   []idtools.IDMap
+	GIDMaps   []idtools.IDMap
 }
 
 // canonicalTarName provides a platform-independent and consistent posix-style
@@ -261,6 +270,25 @@ func (ta *tarAppender) addTarFile(path, name string) error {
 		hdr.Xattrs["security.capability"] = string(capability)
 	}
 
+	//handle re-mapping container ID mappings back to host ID mappings before
+	//writing tar headers/files
+	if ta.UIDMaps != nil || ta.GIDMaps != nil {
+		uid, gid, err := getFileUIDGID(fi.Sys())
+		if err != nil {
+			return err
+		}
+		xUID, err := idtools.ToContainer(uid, ta.UIDMaps)
+		if err != nil {
+			return err
+		}
+		xGID, err := idtools.ToContainer(gid, ta.GIDMaps)
+		if err != nil {
+			return err
+		}
+		hdr.Uid = xUID
+		hdr.Gid = xGID
+	}
+
 	if err := ta.TarWriter.WriteHeader(hdr); err != nil {
 		return err
 	}
@@ -427,6 +455,8 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
 			TarWriter: tar.NewWriter(compressWriter),
 			Buffer:    pools.BufioWriter32KPool.Get(nil),
 			SeenFiles: make(map[uint64]string),
+			UIDMaps:   options.UIDMaps,
+			GIDMaps:   options.GIDMaps,
 		}
 
 		defer func() {
@@ -554,6 +584,10 @@ func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) err
 	defer pools.BufioReader32KPool.Put(trBuf)
 
 	var dirs []*tar.Header
+	remappedRootUID, remappedRootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
+	if err != nil {
+		return err
+	}
 
 	// Iterate through the files in the archive.
 loop:
@@ -631,6 +665,28 @@ loop:
 		}
 		trBuf.Reset(tr)
 
+		// if the options contain a uid & gid maps, convert header uid/gid
+		// entries using the maps such that lchown sets the proper mapped
+		// uid/gid after writing the file. We only perform this mapping if
+		// the file isn't already owned by the remapped root UID or GID, as
+		// that specific uid/gid has no mapping from container -> host, and
+		// those files already have the proper ownership for inside the
+		// container.
+		if hdr.Uid != remappedRootUID {
+			xUID, err := idtools.ToHost(hdr.Uid, options.UIDMaps)
+			if err != nil {
+				return err
+			}
+			hdr.Uid = xUID
+		}
+		if hdr.Gid != remappedRootGID {
+			xGID, err := idtools.ToHost(hdr.Gid, options.GIDMaps)
+			if err != nil {
+				return err
+			}
+			hdr.Gid = xGID
+		}
+
 		if err := createTarFile(path, dest, hdr, trBuf, !options.NoLchown, options.ChownOpts); err != nil {
 			return err
 		}
@@ -703,7 +759,15 @@ func (archiver *Archiver) TarUntar(src, dst string) error {
 		return err
 	}
 	defer archive.Close()
-	return archiver.Untar(archive, dst, nil)
+
+	var options *TarOptions
+	if archiver.UIDMaps != nil || archiver.GIDMaps != nil {
+		options = &TarOptions{
+			UIDMaps: archiver.UIDMaps,
+			GIDMaps: archiver.GIDMaps,
+		}
+	}
+	return archiver.Untar(archive, dst, options)
 }
 
 // TarUntar is a convenience function which calls Tar and Untar, with the output of one piped into the other.
@@ -719,7 +783,14 @@ func (archiver *Archiver) UntarPath(src, dst string) error {
 		return err
 	}
 	defer archive.Close()
-	if err := archiver.Untar(archive, dst, nil); err != nil {
+	var options *TarOptions
+	if archiver.UIDMaps != nil || archiver.GIDMaps != nil {
+		options = &TarOptions{
+			UIDMaps: archiver.UIDMaps,
+			GIDMaps: archiver.GIDMaps,
+		}
+	}
+	if err := archiver.Untar(archive, dst, options); err != nil {
 		return err
 	}
 	return nil
@@ -801,6 +872,28 @@ func (archiver *Archiver) CopyFileWithTar(src, dst string) (err error) {
 		hdr.Name = filepath.Base(dst)
 		hdr.Mode = int64(chmodTarEntry(os.FileMode(hdr.Mode)))
 
+		remappedRootUID, remappedRootGID, err := idtools.GetRootUIDGID(archiver.UIDMaps, archiver.GIDMaps)
+		if err != nil {
+			return err
+		}
+
+		// only perform mapping if the file being copied isn't already owned by the
+		// uid or gid of the remapped root in the container
+		if remappedRootUID != hdr.Uid {
+			xUID, err := idtools.ToHost(hdr.Uid, archiver.UIDMaps)
+			if err != nil {
+				return err
+			}
+			hdr.Uid = xUID
+		}
+		if remappedRootGID != hdr.Gid {
+			xGID, err := idtools.ToHost(hdr.Gid, archiver.GIDMaps)
+			if err != nil {
+				return err
+			}
+			hdr.Gid = xGID
+		}
+
 		tw := tar.NewWriter(w)
 		defer tw.Close()
 		if err := tw.WriteHeader(hdr); err != nil {
@@ -816,6 +909,7 @@ func (archiver *Archiver) CopyFileWithTar(src, dst string) (err error) {
 			err = er
 		}
 	}()
+
 	return archiver.Untar(r, filepath.Dir(dst), nil)
 }
 

+ 9 - 0
pkg/archive/archive_unix.go

@@ -61,6 +61,15 @@ func setHeaderForSpecialDevice(hdr *tar.Header, ta *tarAppender, name string, st
 	return
 }
 
+func getFileUIDGID(stat interface{}) (int, int, error) {
+	s, ok := stat.(*syscall.Stat_t)
+
+	if !ok {
+		return -1, -1, errors.New("cannot convert stat value to syscall.Stat_t")
+	}
+	return int(s.Uid), int(s.Gid), nil
+}
+
 func major(device uint64) uint64 {
 	return (device >> 8) & 0xfff
 }

+ 5 - 0
pkg/archive/archive_windows.go

@@ -63,3 +63,8 @@ func handleTarTypeBlockCharFifo(hdr *tar.Header, path string) error {
 func handleLChmod(hdr *tar.Header, path string, hdrInfo os.FileInfo) error {
 	return nil
 }
+
+func getFileUIDGID(stat interface{}) (int, int, error) {
+	// no notion of file ownership mapping yet on Windows
+	return 0, 0, nil
+}

+ 4 - 1
pkg/archive/changes.go

@@ -14,6 +14,7 @@ import (
 	"time"
 
 	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/pools"
 	"github.com/docker/docker/pkg/system"
 )
@@ -341,13 +342,15 @@ func ChangesSize(newDir string, changes []Change) int64 {
 }
 
 // ExportChanges produces an Archive from the provided changes, relative to dir.
-func ExportChanges(dir string, changes []Change) (Archive, error) {
+func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (Archive, error) {
 	reader, writer := io.Pipe()
 	go func() {
 		ta := &tarAppender{
 			TarWriter: tar.NewWriter(writer),
 			Buffer:    pools.BufioWriter32KPool.Get(nil),
 			SeenFiles: make(map[uint64]string),
+			UIDMaps:   uidMaps,
+			GIDMaps:   gidMaps,
 		}
 		// this buffer is needed for the duration of this piped stream
 		defer pools.BufioWriter32KPool.Put(ta.Buffer)

+ 2 - 2
pkg/archive/changes_posix_test.go

@@ -61,7 +61,7 @@ func TestHardLinkOrder(t *testing.T) {
 	sort.Sort(changesByPath(changes))
 
 	// ExportChanges
-	ar, err := ExportChanges(dest, changes)
+	ar, err := ExportChanges(dest, changes, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -73,7 +73,7 @@ func TestHardLinkOrder(t *testing.T) {
 	// reverse sort
 	sort.Sort(sort.Reverse(changesByPath(changes)))
 	// ExportChanges
-	arRev, err := ExportChanges(dest, changes)
+	arRev, err := ExportChanges(dest, changes, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 1 - 1
pkg/archive/changes_test.go

@@ -410,7 +410,7 @@ func TestApplyLayer(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	layer, err := ExportChanges(dst, changes)
+	layer, err := ExportChanges(dst, changes, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}

+ 35 - 6
pkg/archive/diff.go

@@ -11,6 +11,7 @@ import (
 	"strings"
 
 	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/pools"
 	"github.com/docker/docker/pkg/system"
 )
@@ -18,16 +19,23 @@ import (
 // UnpackLayer unpack `layer` to a `dest`. The stream `layer` can be
 // compressed or uncompressed.
 // Returns the size in bytes of the contents of the layer.
-func UnpackLayer(dest string, layer Reader) (size int64, err error) {
+func UnpackLayer(dest string, layer Reader, options *TarOptions) (size int64, err error) {
 	tr := tar.NewReader(layer)
 	trBuf := pools.BufioReader32KPool.Get(tr)
 	defer pools.BufioReader32KPool.Put(trBuf)
 
 	var dirs []*tar.Header
+	remappedRootUID, remappedRootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
+	if err != nil {
+		return 0, err
+	}
 
 	aufsTempdir := ""
 	aufsHardlinks := make(map[string]*tar.Header)
 
+	if options == nil {
+		options = &TarOptions{}
+	}
 	// Iterate through the files in the archive.
 	for {
 		hdr, err := tr.Next()
@@ -169,6 +177,27 @@ func UnpackLayer(dest string, layer Reader) (size int64, err error) {
 				srcData = tmpFile
 			}
 
+			// if the options contain a uid & gid maps, convert header uid/gid
+			// entries using the maps such that lchown sets the proper mapped
+			// uid/gid after writing the file. We only perform this mapping if
+			// the file isn't already owned by the remapped root UID or GID, as
+			// that specific uid/gid has no mapping from container -> host, and
+			// those files already have the proper ownership for inside the
+			// container.
+			if srcHdr.Uid != remappedRootUID {
+				xUID, err := idtools.ToHost(srcHdr.Uid, options.UIDMaps)
+				if err != nil {
+					return 0, err
+				}
+				srcHdr.Uid = xUID
+			}
+			if srcHdr.Gid != remappedRootGID {
+				xGID, err := idtools.ToHost(srcHdr.Gid, options.GIDMaps)
+				if err != nil {
+					return 0, err
+				}
+				srcHdr.Gid = xGID
+			}
 			if err := createTarFile(path, dest, srcHdr, srcData, true, nil); err != nil {
 				return 0, err
 			}
@@ -196,19 +225,19 @@ func UnpackLayer(dest string, layer Reader) (size int64, err error) {
 // compressed or uncompressed.
 // Returns the size in bytes of the contents of the layer.
 func ApplyLayer(dest string, layer Reader) (int64, error) {
-	return applyLayerHandler(dest, layer, true)
+	return applyLayerHandler(dest, layer, &TarOptions{}, true)
 }
 
 // ApplyUncompressedLayer parses a diff in the standard layer format from
 // `layer`, and applies it to the directory `dest`. The stream `layer`
 // can only be uncompressed.
 // Returns the size in bytes of the contents of the layer.
-func ApplyUncompressedLayer(dest string, layer Reader) (int64, error) {
-	return applyLayerHandler(dest, layer, false)
+func ApplyUncompressedLayer(dest string, layer Reader, options *TarOptions) (int64, error) {
+	return applyLayerHandler(dest, layer, options, false)
 }
 
 // do the bulk load of ApplyLayer, but allow for not calling DecompressStream
-func applyLayerHandler(dest string, layer Reader, decompress bool) (int64, error) {
+func applyLayerHandler(dest string, layer Reader, options *TarOptions, decompress bool) (int64, error) {
 	dest = filepath.Clean(dest)
 
 	// We need to be able to set any perms
@@ -224,5 +253,5 @@ func applyLayerHandler(dest string, layer Reader, decompress bool) (int64, error
 			return 0, err
 		}
 	}
-	return UnpackLayer(dest, layer)
+	return UnpackLayer(dest, layer, options)
 }

+ 3 - 3
pkg/chrootarchive/diff.go

@@ -7,13 +7,13 @@ import "github.com/docker/docker/pkg/archive"
 // uncompressed.
 // Returns the size in bytes of the contents of the layer.
 func ApplyLayer(dest string, layer archive.Reader) (size int64, err error) {
-	return applyLayerHandler(dest, layer, true)
+	return applyLayerHandler(dest, layer, &archive.TarOptions{}, true)
 }
 
 // ApplyUncompressedLayer parses a diff in the standard layer format from
 // `layer`, and applies it to the directory `dest`. The stream `layer`
 // can only be uncompressed.
 // Returns the size in bytes of the contents of the layer.
-func ApplyUncompressedLayer(dest string, layer archive.Reader) (int64, error) {
-	return applyLayerHandler(dest, layer, false)
+func ApplyUncompressedLayer(dest string, layer archive.Reader, options *archive.TarOptions) (int64, error) {
+	return applyLayerHandler(dest, layer, options, false)
 }

+ 21 - 4
pkg/chrootarchive/diff_unix.go

@@ -27,8 +27,9 @@ type applyLayerResponse struct {
 func applyLayer() {
 
 	var (
-		tmpDir = ""
-		err    error
+		tmpDir  = ""
+		err     error
+		options *archive.TarOptions
 	)
 	runtime.LockOSThread()
 	flag.Parse()
@@ -44,12 +45,16 @@ func applyLayer() {
 		fatal(err)
 	}
 
+	if err := json.Unmarshal([]byte(os.Getenv("OPT")), &options); err != nil {
+		fatal(err)
+	}
+
 	if tmpDir, err = ioutil.TempDir("/", "temp-docker-extract"); err != nil {
 		fatal(err)
 	}
 
 	os.Setenv("TMPDIR", tmpDir)
-	size, err := archive.UnpackLayer("/", os.Stdin)
+	size, err := archive.UnpackLayer("/", os.Stdin, options)
 	os.RemoveAll(tmpDir)
 	if err != nil {
 		fatal(err)
@@ -68,7 +73,7 @@ func applyLayer() {
 // applyLayerHandler parses a diff in the standard layer format from `layer`, and
 // applies it to the directory `dest`. Returns the size in bytes of the
 // contents of the layer.
-func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size int64, err error) {
+func applyLayerHandler(dest string, layer archive.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
 	dest = filepath.Clean(dest)
 	if decompress {
 		decompressed, err := archive.DecompressStream(layer)
@@ -79,9 +84,21 @@ func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size
 
 		layer = decompressed
 	}
+	if options == nil {
+		options = &archive.TarOptions{}
+	}
+	if options.ExcludePatterns == nil {
+		options.ExcludePatterns = []string{}
+	}
+
+	data, err := json.Marshal(options)
+	if err != nil {
+		return 0, fmt.Errorf("ApplyLayer json encode: %v", err)
+	}
 
 	cmd := reexec.Command("docker-applyLayer", dest)
 	cmd.Stdin = layer
+	cmd.Env = append(cmd.Env, fmt.Sprintf("OPT=%s", data))
 
 	outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer)
 	cmd.Stdout, cmd.Stderr = outBuf, errBuf

+ 2 - 2
pkg/chrootarchive/diff_windows.go

@@ -13,7 +13,7 @@ import (
 // applyLayerHandler parses a diff in the standard layer format from `layer`, and
 // applies it to the directory `dest`. Returns the size in bytes of the
 // contents of the layer.
-func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size int64, err error) {
+func applyLayerHandler(dest string, layer archive.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
 	dest = filepath.Clean(dest)
 
 	// Ensure it is a Windows-style volume path
@@ -34,7 +34,7 @@ func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size
 		return 0, fmt.Errorf("ApplyLayer failed to create temp-docker-extract under %s. %s", dest, err)
 	}
 
-	s, err := archive.UnpackLayer(dest, layer)
+	s, err := archive.UnpackLayer(dest, layer, nil)
 	os.RemoveAll(tmpDir)
 	if err != nil {
 		return 0, fmt.Errorf("ApplyLayer %s failed UnpackLayer to %s", err, dest)

+ 26 - 0
pkg/directory/directory.go

@@ -0,0 +1,26 @@
+package directory
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+)
+
+// MoveToSubdir moves all contents of a directory to a subdirectory underneath the original path
+func MoveToSubdir(oldpath, subdir string) error {
+
+	infos, err := ioutil.ReadDir(oldpath)
+	if err != nil {
+		return err
+	}
+	for _, info := range infos {
+		if info.Name() != subdir {
+			oldName := filepath.Join(oldpath, info.Name())
+			newName := filepath.Join(oldpath, subdir, info.Name())
+			if err := os.Rename(oldName, newName); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}

+ 45 - 0
pkg/directory/directory_test.go

@@ -3,6 +3,9 @@ package directory
 import (
 	"io/ioutil"
 	"os"
+	"path/filepath"
+	"reflect"
+	"sort"
 	"testing"
 )
 
@@ -135,3 +138,45 @@ func TestSizeFileAndNestedDirectoryNonempty(t *testing.T) {
 		t.Fatalf("directory with 6-byte file and nested directory with 6-byte file has size: %d", size)
 	}
 }
+
+// Test migration of directory to a subdir underneath itself
+func TestMoveToSubdir(t *testing.T) {
+	var outerDir, subDir string
+	var err error
+
+	if outerDir, err = ioutil.TempDir(os.TempDir(), "TestMoveToSubdir"); err != nil {
+		t.Fatalf("failed to create directory: %v", err)
+	}
+
+	if subDir, err = ioutil.TempDir(outerDir, "testSub"); err != nil {
+		t.Fatalf("failed to create subdirectory: %v", err)
+	}
+
+	// write 4 temp files in the outer dir to get moved
+	filesList := []string{"a", "b", "c", "d"}
+	for _, fName := range filesList {
+		if file, err := os.Create(filepath.Join(outerDir, fName)); err != nil {
+			t.Fatalf("couldn't create temp file %q: %v", fName, err)
+		} else {
+			file.WriteString(fName)
+			file.Close()
+		}
+	}
+
+	if err = MoveToSubdir(outerDir, filepath.Base(subDir)); err != nil {
+		t.Fatalf("Error during migration of content to subdirectory: %v", err)
+	}
+	// validate that the files were moved to the subdirectory
+	infos, err := ioutil.ReadDir(subDir)
+	if len(infos) != 4 {
+		t.Fatalf("Should be four files in the subdir after the migration: actual length: %d", len(infos))
+	}
+	var results []string
+	for _, info := range infos {
+		results = append(results, info.Name())
+	}
+	sort.Sort(sort.StringSlice(results))
+	if !reflect.DeepEqual(filesList, results) {
+		t.Fatalf("Results after migration do not equal list of files: expected: %v, got: %v", filesList, results)
+	}
+}

+ 0 - 1
pkg/directory/directory_windows.go

@@ -5,7 +5,6 @@ package directory
 import (
 	"os"
 	"path/filepath"
-	"strings"
 
 	"github.com/docker/docker/pkg/longpath"
 )

+ 207 - 0
pkg/idtools/idtools.go

@@ -0,0 +1,207 @@
+package idtools
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/docker/docker/pkg/system"
+)
+
+// IDMap contains a single entry for user namespace range remapping. An array
+// of IDMap entries represents the structure that will be provided to the Linux
+// kernel for creating a user namespace.
+type IDMap struct {
+	ContainerID int `json:"container_id"`
+	HostID      int `json:"host_id"`
+	Size        int `json:"size"`
+}
+
+type subIDRange struct {
+	Start  int
+	Length int
+}
+
+type ranges []subIDRange
+
+func (e ranges) Len() int           { return len(e) }
+func (e ranges) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
+func (e ranges) Less(i, j int) bool { return e[i].Start < e[j].Start }
+
+const (
+	subuidFileName string = "/etc/subuid"
+	subgidFileName string = "/etc/subgid"
+)
+
+// MkdirAllAs creates a directory (include any along the path) and then modifies
+// ownership to the requested uid/gid.  If the directory already exists, this
+// function will still change ownership to the requested uid/gid pair.
+func MkdirAllAs(path string, mode os.FileMode, ownerUID, ownerGID int) error {
+	return mkdirAs(path, mode, ownerUID, ownerGID, true)
+}
+
+// MkdirAs creates a directory and then modifies ownership to the requested uid/gid.
+// If the directory already exists, this function still changes ownership
+func MkdirAs(path string, mode os.FileMode, ownerUID, ownerGID int) error {
+	return mkdirAs(path, mode, ownerUID, ownerGID, false)
+}
+
+func mkdirAs(path string, mode os.FileMode, ownerUID, ownerGID int, mkAll bool) error {
+	if mkAll {
+		if err := system.MkdirAll(path, mode); err != nil && !os.IsExist(err) {
+			return err
+		}
+	} else {
+		if err := os.Mkdir(path, mode); err != nil && !os.IsExist(err) {
+			return err
+		}
+	}
+	// even if it existed, we will chown to change ownership as requested
+	if err := os.Chown(path, ownerUID, ownerGID); err != nil {
+		return err
+	}
+	return nil
+}
+
+// GetRootUIDGID retrieves the remapped root uid/gid pair from the set of maps.
+// If the maps are empty, then the root uid/gid will default to "real" 0/0
+func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
+	var uid, gid int
+
+	if uidMap != nil {
+		xUID, err := ToHost(0, uidMap)
+		if err != nil {
+			return -1, -1, err
+		}
+		uid = xUID
+	}
+	if gidMap != nil {
+		xGID, err := ToHost(0, gidMap)
+		if err != nil {
+			return -1, -1, err
+		}
+		gid = xGID
+	}
+	return uid, gid, nil
+}
+
+// ToContainer takes an id mapping, and uses it to translate a
+// host ID to the remapped ID. If no map is provided, then the translation
+// assumes a 1-to-1 mapping and returns the passed in id
+func ToContainer(hostID int, idMap []IDMap) (int, error) {
+	if idMap == nil {
+		return hostID, nil
+	}
+	for _, m := range idMap {
+		if (hostID >= m.HostID) && (hostID <= (m.HostID + m.Size - 1)) {
+			contID := m.ContainerID + (hostID - m.HostID)
+			return contID, nil
+		}
+	}
+	return -1, fmt.Errorf("Host ID %d cannot be mapped to a container ID", hostID)
+}
+
+// ToHost takes an id mapping and a remapped ID, and translates the
+// ID to the mapped host ID. If no map is provided, then the translation
+// assumes a 1-to-1 mapping and returns the passed in id #
+func ToHost(contID int, idMap []IDMap) (int, error) {
+	if idMap == nil {
+		return contID, nil
+	}
+	for _, m := range idMap {
+		if (contID >= m.ContainerID) && (contID <= (m.ContainerID + m.Size - 1)) {
+			hostID := m.HostID + (contID - m.ContainerID)
+			return hostID, nil
+		}
+	}
+	return -1, fmt.Errorf("Container ID %d cannot be mapped to a host ID", contID)
+}
+
+// CreateIDMappings takes a requested user and group name and
+// using the data from /etc/sub{uid,gid} ranges, creates the
+// proper uid and gid remapping ranges for that user/group pair
+func CreateIDMappings(username, groupname string) ([]IDMap, []IDMap, error) {
+	subuidRanges, err := parseSubuid(username)
+	if err != nil {
+		return nil, nil, err
+	}
+	subgidRanges, err := parseSubgid(groupname)
+	if err != nil {
+		return nil, nil, err
+	}
+	if len(subuidRanges) == 0 {
+		return nil, nil, fmt.Errorf("No subuid ranges found for user %q", username)
+	}
+	if len(subgidRanges) == 0 {
+		return nil, nil, fmt.Errorf("No subgid ranges found for group %q", groupname)
+	}
+
+	return createIDMap(subuidRanges), createIDMap(subgidRanges), nil
+}
+
+func createIDMap(subidRanges ranges) []IDMap {
+	idMap := []IDMap{}
+
+	// sort the ranges by lowest ID first
+	sort.Sort(subidRanges)
+	containerID := 0
+	for _, idrange := range subidRanges {
+		idMap = append(idMap, IDMap{
+			ContainerID: containerID,
+			HostID:      idrange.Start,
+			Size:        idrange.Length,
+		})
+		containerID = containerID + idrange.Length
+	}
+	return idMap
+}
+
+func parseSubuid(username string) (ranges, error) {
+	return parseSubidFile(subuidFileName, username)
+}
+
+func parseSubgid(username string) (ranges, error) {
+	return parseSubidFile(subgidFileName, username)
+}
+
+func parseSubidFile(path, username string) (ranges, error) {
+	var rangeList ranges
+
+	subidFile, err := os.Open(path)
+	if err != nil {
+		return rangeList, err
+	}
+	defer subidFile.Close()
+
+	s := bufio.NewScanner(subidFile)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return rangeList, err
+		}
+
+		text := strings.TrimSpace(s.Text())
+		if text == "" {
+			continue
+		}
+		parts := strings.Split(text, ":")
+		if len(parts) != 3 {
+			return rangeList, fmt.Errorf("Cannot parse subuid/gid information: Format not correct for %s file", path)
+		}
+		if parts[0] == username {
+			// return the first entry for a user; ignores potential for multiple ranges per user
+			startid, err := strconv.Atoi(parts[1])
+			if err != nil {
+				return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
+			}
+			length, err := strconv.Atoi(parts[2])
+			if err != nil {
+				return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
+			}
+			rangeList = append(rangeList, subIDRange{startid, length})
+		}
+	}
+	return rangeList, nil
+}

+ 155 - 0
pkg/idtools/usergroupadd_linux.go

@@ -0,0 +1,155 @@
+package idtools
+
+import (
+	"fmt"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"syscall"
+)
+
+// add a user and/or group to Linux /etc/passwd, /etc/group using standard
+// Linux distribution commands:
+// adduser --uid <id> --shell /bin/login --no-create-home --disabled-login --ingroup <groupname> <username>
+// useradd -M -u <id> -s /bin/nologin -N -g <groupname> <username>
+// addgroup --gid <id> <groupname>
+// groupadd -g <id> <groupname>
+
+const baseUID int = 10000
+const baseGID int = 10000
+const idMAX int = 65534
+
+var (
+	userCommand  string
+	groupCommand string
+
+	cmdTemplates = map[string]string{
+		"adduser":  "--uid %d --shell /bin/false --no-create-home --disabled-login --ingroup %s %s",
+		"useradd":  "-M -u %d -s /bin/false -N -g %s %s",
+		"addgroup": "--gid %d %s",
+		"groupadd": "-g %d %s",
+	}
+)
+
+func init() {
+	// set up which commands are used for adding users/groups dependent on distro
+	if _, err := resolveBinary("adduser"); err == nil {
+		userCommand = "adduser"
+	} else if _, err := resolveBinary("useradd"); err == nil {
+		userCommand = "useradd"
+	}
+	if _, err := resolveBinary("addgroup"); err == nil {
+		groupCommand = "addgroup"
+	} else if _, err := resolveBinary("groupadd"); err == nil {
+		groupCommand = "groupadd"
+	}
+}
+
+func resolveBinary(binname string) (string, error) {
+	binaryPath, err := exec.LookPath(binname)
+	if err != nil {
+		return "", err
+	}
+	resolvedPath, err := filepath.EvalSymlinks(binaryPath)
+	if err != nil {
+		return "", err
+	}
+	//only return no error if the final resolved binary basename
+	//matches what was searched for
+	if filepath.Base(resolvedPath) == binname {
+		return resolvedPath, nil
+	}
+	return "", fmt.Errorf("Binary %q does not resolve to a binary of that name in $PATH (%q)", binname, resolvedPath)
+}
+
+// AddNamespaceRangesUser takes a name and finds an unused uid, gid pair
+// and calls the appropriate helper function to add the group and then
+// the user to the group in /etc/group and /etc/passwd respectively.
+// This new user's /etc/sub{uid,gid} ranges will be used for user namespace
+// mapping ranges in containers.
+func AddNamespaceRangesUser(name string) (int, int, error) {
+	// Find unused uid, gid pair
+	uid, err := findUnusedUID(baseUID)
+	if err != nil {
+		return -1, -1, fmt.Errorf("Unable to find unused UID: %v", err)
+	}
+	gid, err := findUnusedGID(baseGID)
+	if err != nil {
+		return -1, -1, fmt.Errorf("Unable to find unused GID: %v", err)
+	}
+
+	// First add the group that we will use
+	if err := addGroup(name, gid); err != nil {
+		return -1, -1, fmt.Errorf("Error adding group %q: %v", name, err)
+	}
+	// Add the user as a member of the group
+	if err := addUser(name, uid, name); err != nil {
+		return -1, -1, fmt.Errorf("Error adding user %q: %v", name, err)
+	}
+	return uid, gid, nil
+}
+
+func addUser(userName string, uid int, groupName string) error {
+
+	if userCommand == "" {
+		return fmt.Errorf("Cannot add user; no useradd/adduser binary found")
+	}
+	args := fmt.Sprintf(cmdTemplates[userCommand], uid, groupName, userName)
+	return execAddCmd(userCommand, args)
+}
+
+func addGroup(groupName string, gid int) error {
+
+	if groupCommand == "" {
+		return fmt.Errorf("Cannot add group; no groupadd/addgroup binary found")
+	}
+	args := fmt.Sprintf(cmdTemplates[groupCommand], gid, groupName)
+	// only error out if the error isn't that the group already exists
+	// if the group exists then our needs are already met
+	if err := execAddCmd(groupCommand, args); err != nil && !strings.Contains(err.Error(), "already exists") {
+		return err
+	}
+	return nil
+}
+
+func execAddCmd(cmd, args string) error {
+	execCmd := exec.Command(cmd, strings.Split(args, " ")...)
+	out, err := execCmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("Failed to add user/group with error: %v; output: %q", err, string(out))
+	}
+	return nil
+}
+
+func findUnusedUID(startUID int) (int, error) {
+	return findUnused("passwd", startUID)
+}
+
+func findUnusedGID(startGID int) (int, error) {
+	return findUnused("group", startGID)
+}
+
+func findUnused(file string, id int) (int, error) {
+	for {
+		cmdStr := fmt.Sprintf("cat /etc/%s | cut -d: -f3 | grep '^%d$'", file, id)
+		cmd := exec.Command("sh", "-c", cmdStr)
+		if err := cmd.Run(); err != nil {
+			// if a non-zero return code occurs, then we know the ID was not found
+			// and is usable
+			if exiterr, ok := err.(*exec.ExitError); ok {
+				// The program has exited with an exit code != 0
+				if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+					if status.ExitStatus() == 1 {
+						//no match, we can use this ID
+						return id, nil
+					}
+				}
+			}
+			return -1, fmt.Errorf("Error looking in /etc/%s for unused ID: %v", file, err)
+		}
+		id++
+		if id > idMAX {
+			return -1, fmt.Errorf("Maximum id in %q reached with finding unused numeric ID", file)
+		}
+	}
+}

+ 12 - 0
pkg/idtools/usergroupadd_unsupported.go

@@ -0,0 +1,12 @@
+// +build !linux
+
+package idtools
+
+import "fmt"
+
+// AddNamespaceRangesUser takes a name and finds an unused uid, gid pair
+// and calls the appropriate helper function to add the group and then
+// the user to the group in /etc/group and /etc/passwd respectively.
+func AddNamespaceRangesUser(name string) (int, int, error) {
+	return -1, -1, fmt.Errorf("No support for adding users or groups on this OS")
+}

+ 10 - 2
pkg/plugins/client.go

@@ -68,7 +68,11 @@ func (c *Client) Call(serviceMethod string, args interface{}, ret interface{}) e
 		return err
 	}
 	defer body.Close()
-	return json.NewDecoder(body).Decode(&ret)
+	if err := json.NewDecoder(body).Decode(&ret); err != nil {
+		logrus.Errorf("%s: error reading plugin resp: %v", serviceMethod, err)
+		return err
+	}
+	return nil
 }
 
 // Stream calls the specified method with the specified arguments for the plugin and returns the response body
@@ -86,7 +90,11 @@ func (c *Client) SendFile(serviceMethod string, data io.Reader, ret interface{})
 	if err != nil {
 		return err
 	}
-	return json.NewDecoder(body).Decode(&ret)
+	if err := json.NewDecoder(body).Decode(&ret); err != nil {
+		logrus.Errorf("%s: error reading plugin resp: %v", serviceMethod, err)
+		return err
+	}
+	return nil
 }
 
 func (c *Client) callWithRetry(serviceMethod string, data io.Reader, retry bool) (io.ReadCloser, error) {

+ 8 - 3
volume/local/local.go

@@ -11,6 +11,7 @@ import (
 	"path/filepath"
 	"sync"
 
+	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/volume"
 )
 
@@ -28,10 +29,10 @@ var ErrNotFound = errors.New("volume not found")
 // New instantiates a new Root instance with the provided scope. Scope
 // is the base path that the Root instance uses to store its
 // volumes. The base path is created here if it does not exist.
-func New(scope string) (*Root, error) {
+func New(scope string, rootUID, rootGID int) (*Root, error) {
 	rootDirectory := filepath.Join(scope, volumesPathName)
 
-	if err := os.MkdirAll(rootDirectory, 0700); err != nil {
+	if err := idtools.MkdirAllAs(rootDirectory, 0700, rootUID, rootGID); err != nil {
 		return nil, err
 	}
 
@@ -39,6 +40,8 @@ func New(scope string) (*Root, error) {
 		scope:   scope,
 		path:    rootDirectory,
 		volumes: make(map[string]*localVolume),
+		rootUID: rootUID,
+		rootGID: rootGID,
 	}
 
 	dirs, err := ioutil.ReadDir(rootDirectory)
@@ -66,6 +69,8 @@ type Root struct {
 	scope   string
 	path    string
 	volumes map[string]*localVolume
+	rootUID int
+	rootGID int
 }
 
 // List lists all the volumes
@@ -100,7 +105,7 @@ func (r *Root) Create(name string, _ map[string]string) (volume.Volume, error) {
 	}
 
 	path := r.DataPath(name)
-	if err := os.MkdirAll(path, 0755); err != nil {
+	if err := idtools.MkdirAllAs(path, 0755, r.rootUID, r.rootGID); err != nil {
 		if os.IsExist(err) {
 			return nil, fmt.Errorf("volume already exists under %s", filepath.Dir(path))
 		}

+ 3 - 3
volume/local/local_test.go

@@ -13,7 +13,7 @@ func TestRemove(t *testing.T) {
 	}
 	defer os.RemoveAll(rootDir)
 
-	r, err := New(rootDir)
+	r, err := New(rootDir, 0, 0)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -55,7 +55,7 @@ func TestInitializeWithVolumes(t *testing.T) {
 	}
 	defer os.RemoveAll(rootDir)
 
-	r, err := New(rootDir)
+	r, err := New(rootDir, 0, 0)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -65,7 +65,7 @@ func TestInitializeWithVolumes(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	r, err = New(rootDir)
+	r, err = New(rootDir, 0, 0)
 	if err != nil {
 		t.Fatal(err)
 	}