Run privileged containers when userns are specified

Following #19995 and #17409 this PR enables skipping userns re-mapping
when creating a container (or when executing a command). Thus, enabling
privileged containers running side by side with userns remapped
containers.

The feature is enabled by specifying ```--userns:host```, which will not
remapped the user if userns are applied. If this flag is not specified,
the existing behavior (which blocks specific privileged operation)
remains.

Signed-off-by: Liron Levin <liron@twistlock.com>
This commit is contained in:
Liron Levin 2016-02-08 16:23:24 +02:00
parent b9361f02da
commit 6993e891d1
12 changed files with 87 additions and 6 deletions

View file

@ -218,11 +218,14 @@ func (daemon *Daemon) populateCommand(c *container.Container, env []string) erro
processConfig.Env = env
remappedRoot := &execdriver.User{}
rootUID, rootGID := daemon.GetRemappedUIDGID()
if rootUID != 0 {
remappedRoot.UID = rootUID
remappedRoot.GID = rootGID
if c.HostConfig.UsernsMode.IsPrivate() {
rootUID, rootGID := daemon.GetRemappedUIDGID()
if rootUID != 0 {
remappedRoot.UID = rootUID
remappedRoot.GID = rootGID
}
}
uidMap, gidMap := daemon.GetUIDGIDMaps()
if !daemon.seccompEnabled {

View file

@ -429,7 +429,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
}
// check for various conflicting options with user namespaces
if daemon.configStore.RemappedRoot != "" {
if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
if hostConfig.Privileged {
return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
}

View file

@ -125,6 +125,7 @@ This section lists each version from latest to oldest. Each listing includes a
* `GET /info` now returns `KernelMemory` field, showing if "kernel memory limit" is supported.
* `POST /containers/create` now takes `PidsLimit` field, if the kernel is >= 4.3 and the pids cgroup is supported.
* `GET /containers/(id or name)/stats` now returns `pids_stats`, if the kernel is >= 4.3 and the pids cgroup is supported.
* `POST /containers/create` now allows you to override usernamespaces remapping and use privileged options for the container.
* `POST /auth` now returns an `IdentityToken` when supported by a registry.
### v1.22 API changes

View file

@ -431,6 +431,8 @@ Json Parameters:
The default is not to restart. (optional)
An ever increasing delay (double the previous delay, starting at 100mS)
is added before each restart to prevent flooding the server.
- **UsernsMode** - Sets the usernamespace mode for the container when usernamespace remapping option is enabled.
supported values are: `host`.
- **NetworkMode** - Sets the networking mode for the container. Supported
standard values are: `bridge`, `host`, `none`, and `container:<name|id>`. Any other value is taken
as a custom network's name to which this container should connect to.

View file

@ -83,6 +83,9 @@ Creates a new container.
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
-t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID
--userns="" Container user namespace
'host': Use the Docker host user namespace
'': Use the Docker daemon user namespace specified by `--userns-remap` option.
--ulimit=[] Ulimit options
--uts="" UTS namespace to use
-v, --volume=[host-src:]container-dest[:<options>]

View file

@ -750,6 +750,16 @@ following algorithm to create the mapping ranges:
2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
### Disable user namespace for a container
If you enable user namespaces on the daemon, all containers are started
with user namespaces enabled. In some situations you might want to disable
this feature for a container, for example, to start a privileged container (see
[user namespace known restrictions](#user-namespace-known-restrictions)).
To enable those advanced features for a specific container use `--userns=host`
in the `run/exec/create` command.
This option will completely disable user namespace mapping for the container's user.
### User namespace known restrictions
The following standard Docker features are currently incompatible when

View file

@ -85,6 +85,9 @@ parent = "smn_cli"
--stop-signal="SIGTERM" Signal to stop a container
-t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
--userns="" Container user namespace
'host': Use the Docker host user namespace
'': Use the Docker daemon user namespace specified by `--userns-remap` option.
--ulimit=[] Ulimit options
--uts="" UTS namespace to use
-v, --volume=[host-src:]container-dest[:<options>]

View file

@ -37,11 +37,13 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
gid, err := strconv.Atoi(uidgid[1])
c.Assert(err, checker.IsNil, check.Commentf("Can't parse gid"))
//writeable by the remapped root UID/GID pair
// writable by the remapped root UID/GID pair
c.Assert(os.Chown(tmpDir, uid, gid), checker.IsNil)
out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
user := s.findUser(c, "userns")
c.Assert(uidgid[0], checker.Equals, user)
pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
c.Assert(err, checker.IsNil, check.Commentf("Could not inspect running container: out: %q", pid))
@ -62,4 +64,23 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
c.Assert(err, checker.IsNil)
c.Assert(stat.UID(), checker.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
c.Assert(stat.GID(), checker.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
// use host usernamespace
out, err = s.d.Cmd("run", "-d", "--name", "userns_skip", "--userns", "host", "busybox", "sh", "-c", "touch /goofy/testfile; top")
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
user = s.findUser(c, "userns_skip")
// userns are skipped, user is root
c.Assert(user, checker.Equals, "root")
}
// findUser finds the uid or name of the user of the first process that runs in a container
func (s *DockerDaemonSuite) findUser(c *check.C, container string) string {
out, err := s.d.Cmd("top", container)
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
rows := strings.Split(out, "\n")
if len(rows) < 2 {
// No process rows founds
c.FailNow()
}
return strings.Fields(rows[1])[0]
}

View file

@ -58,6 +58,7 @@ docker-create - Create a new container
[**-P**|**--publish-all**]
[**-p**|**--publish**[=*[]*]]
[**--pid**[=*[]*]]
[**--userns**[=*[]*]]
[**--pids-limit**[=*PIDS_LIMIT*]]
[**--privileged**]
[**--read-only**]
@ -291,6 +292,10 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
**host**: use the host's PID namespace inside the container.
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
**--userns**=""
Set the usernamespace mode for the container when `userns-remap` option is enabled.
**host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`).
**--pids-limit**=""
Tune the container's pids limit. Set `-1` to have unlimited pids for the container.

View file

@ -60,6 +60,7 @@ docker-run - Run a command in a new container
[**-P**|**--publish-all**]
[**-p**|**--publish**[=*[]*]]
[**--pid**[=*[]*]]
[**--userns**[=*[]*]]
[**--pids-limit**[=*PIDS_LIMIT*]]
[**--privileged**]
[**--read-only**]
@ -421,6 +422,10 @@ Use `docker port` to see the actual mapping: `docker port CONTAINER $CONTAINERPO
**host**: use the host's PID namespace inside the container.
Note: the host mode gives the container full access to local PID and is therefore considered insecure.
**--userns**=""
Set the usernamespace mode for the container when `userns-remap` option is enabled.
**host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`).
**--pids-limit**=""
Tune the container's pids limit. Set `-1` to have unlimited pids for the container.

View file

@ -121,6 +121,27 @@ func TestUTSModeTest(t *testing.T) {
}
}
func TestUsernsModeTest(t *testing.T) {
usrensMode := map[container.UsernsMode][]bool{
// private, host, valid
"": {true, false, true},
"something:weird": {true, false, false},
"host": {false, true, true},
"host:name": {true, false, true},
}
for usernsMode, state := range usrensMode {
if usernsMode.IsPrivate() != state[0] {
t.Fatalf("UsernsMode.IsPrivate for %v should have been %v but was %v", usernsMode, state[0], usernsMode.IsPrivate())
}
if usernsMode.IsHost() != state[1] {
t.Fatalf("UsernsMode.IsHost for %v should have been %v but was %v", usernsMode, state[1], usernsMode.IsHost())
}
if usernsMode.Valid() != state[2] {
t.Fatalf("UsernsMode.Valid for %v should have been %v but was %v", usernsMode, state[2], usernsMode.Valid())
}
}
}
func TestPidModeTest(t *testing.T) {
pidModes := map[container.PidMode][]bool{
// private, host, valid

View file

@ -59,6 +59,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
flPrivileged = cmd.Bool([]string{"-privileged"}, false, "Give extended privileges to this container")
flPidMode = cmd.String([]string{"-pid"}, "", "PID namespace to use")
flUTSMode = cmd.String([]string{"-uts"}, "", "UTS namespace to use")
flUsernsMode = cmd.String([]string{"-userns"}, "", "User namespace to use")
flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports")
flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
@ -316,6 +317,11 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
return nil, nil, nil, cmd, fmt.Errorf("--uts: invalid UTS mode")
}
usernsMode := container.UsernsMode(*flUsernsMode)
if !usernsMode.Valid() {
return nil, nil, nil, cmd, fmt.Errorf("--userns: invalid USER mode")
}
restartPolicy, err := ParseRestartPolicy(*flRestartPolicy)
if err != nil {
return nil, nil, nil, cmd, err
@ -404,6 +410,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
IpcMode: ipcMode,
PidMode: pidMode,
UTSMode: utsMode,
UsernsMode: usernsMode,
CapAdd: strslice.StrSlice(flCapAdd.GetAll()),
CapDrop: strslice.StrSlice(flCapDrop.GetAll()),
GroupAdd: flGroupAdd.GetAll(),