浏览代码

Merge pull request #208 from thaJeztah/19.03_backport_rootless_fixes

[19.03 backport] backport rootless fixes
Tibor Vass 6 年之前
父节点
当前提交
03ce4080a4

+ 3 - 4
cmd/dockerd/config_common_unix.go

@@ -9,12 +9,11 @@ import (
 	"github.com/docker/docker/daemon/config"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/homedir"
-	"github.com/docker/docker/rootless"
 	"github.com/spf13/pflag"
 )
 
 func getDefaultPidFile() (string, error) {
-	if !rootless.RunningWithNonRootUsername() {
+	if !honorXDG {
 		return "/var/run/docker.pid", nil
 	}
 	runtimeDir, err := homedir.GetRuntimeDir()
@@ -25,7 +24,7 @@ func getDefaultPidFile() (string, error) {
 }
 
 func getDefaultDataRoot() (string, error) {
-	if !rootless.RunningWithNonRootUsername() {
+	if !honorXDG {
 		return "/var/lib/docker", nil
 	}
 	dataHome, err := homedir.GetDataHome()
@@ -36,7 +35,7 @@ func getDefaultDataRoot() (string, error) {
 }
 
 func getDefaultExecRoot() (string, error) {
-	if !rootless.RunningWithNonRootUsername() {
+	if !honorXDG {
 		return "/var/run/docker", nil
 	}
 	runtimeDir, err := homedir.GetRuntimeDir()

+ 16 - 3
cmd/dockerd/config_unix.go

@@ -3,10 +3,13 @@
 package main
 
 import (
+	"os/exec"
+
 	"github.com/docker/docker/daemon/config"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/rootless"
 	"github.com/docker/go-units"
+	"github.com/pkg/errors"
 	"github.com/spf13/pflag"
 )
 
@@ -35,7 +38,16 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
 	flags.BoolVar(&conf.BridgeConfig.EnableIPv6, "ipv6", false, "Enable IPv6 networking")
 	flags.StringVar(&conf.BridgeConfig.FixedCIDRv6, "fixed-cidr-v6", "", "IPv6 subnet for fixed IPs")
 	flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic")
-	flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", "", "Path to the userland proxy binary")
+	defaultUserlandProxyPath := ""
+	if rootless.RunningWithRootlessKit() {
+		var err error
+		// use rootlesskit-docker-proxy for exposing the ports in RootlessKit netns to the initial namespace.
+		defaultUserlandProxyPath, err = exec.LookPath(rootless.RootlessKitDockerProxyBinary)
+		if err != nil {
+			return errors.Wrapf(err, "running with RootlessKit, but %s not installed", rootless.RootlessKitDockerProxyBinary)
+		}
+	}
+	flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", defaultUserlandProxyPath, "Path to the userland proxy binary")
 	flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers")
 	flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces")
 	flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running")
@@ -49,7 +61,8 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
 	flags.BoolVar(&conf.NoNewPrivileges, "no-new-privileges", false, "Set no-new-privileges by default for new containers")
 	flags.StringVar(&conf.IpcMode, "default-ipc-mode", config.DefaultIpcMode, `Default mode for containers ipc ("shareable" | "private")`)
 	flags.Var(&conf.NetworkConfig.DefaultAddressPools, "default-address-pool", "Default address pools for node specific local networks")
-	// Mostly users don't need to set this flag explicitly.
-	flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithNonRootUsername(), "Enable rootless mode (experimental)")
+	// rootless needs to be explicitly specified for running "rootful" dockerd in rootless dockerd (#38702)
+	// Note that defaultUserlandProxyPath and honorXDG are configured according to the value of rootless.RunningWithRootlessKit, not the value of --rootless.
+	flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithRootlessKit(), "Enable rootless mode; typically used with RootlessKit (experimental)")
 	return nil
 }

+ 9 - 3
cmd/dockerd/daemon.go

@@ -103,6 +103,12 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
 		if cli.Config.IsRootless() {
 			logrus.Warn("Running in rootless mode. Cgroups, AppArmor, and CRIU are disabled.")
 		}
+		if rootless.RunningWithRootlessKit() {
+			logrus.Info("Running with RootlessKit integration")
+			if !cli.Config.IsRootless() {
+				return fmt.Errorf("rootless mode needs to be enabled for running with RootlessKit")
+			}
+		}
 	} else {
 		if cli.Config.IsRootless() {
 			return fmt.Errorf("rootless mode is supported only when running in experimental mode")
@@ -591,7 +597,7 @@ func loadListeners(cli *DaemonCli, serverConfig *apiserver.Config) ([]string, er
 	var hosts []string
 	for i := 0; i < len(cli.Config.Hosts); i++ {
 		var err error
-		if cli.Config.Hosts[i], err = dopts.ParseHost(cli.Config.TLS, rootless.RunningWithNonRootUsername(), cli.Config.Hosts[i]); err != nil {
+		if cli.Config.Hosts[i], err = dopts.ParseHost(cli.Config.TLS, honorXDG, cli.Config.Hosts[i]); err != nil {
 			return nil, errors.Wrapf(err, "error parsing -H %s", cli.Config.Hosts[i])
 		}
 
@@ -668,9 +674,9 @@ func validateAuthzPlugins(requestedPlugins []string, pg plugingetter.PluginGette
 	return nil
 }
 
-func systemContainerdRunning(isRootless bool) (string, bool, error) {
+func systemContainerdRunning(honorXDG bool) (string, bool, error) {
 	addr := containerddefaults.DefaultAddress
-	if isRootless {
+	if honorXDG {
 		runtimeDir, err := homedir.GetRuntimeDir()
 		if err != nil {
 			return "", false, err

+ 2 - 3
cmd/dockerd/daemon_unix.go

@@ -18,14 +18,13 @@ import (
 	"github.com/docker/docker/daemon/config"
 	"github.com/docker/docker/libcontainerd/supervisor"
 	"github.com/docker/docker/pkg/homedir"
-	"github.com/docker/docker/rootless"
 	"github.com/docker/libnetwork/portallocator"
 	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 )
 
 func getDefaultDaemonConfigDir() (string, error) {
-	if !rootless.RunningWithNonRootUsername() {
+	if !honorXDG {
 		return "/etc/docker", nil
 	}
 	// NOTE: CLI uses ~/.docker while the daemon uses ~/.config/docker, because
@@ -148,7 +147,7 @@ func newCgroupParent(config *config.Config) string {
 func (cli *DaemonCli) initContainerD(ctx context.Context) (func(time.Duration) error, error) {
 	var waitForShutdown func(time.Duration) error
 	if cli.Config.ContainerdAddr == "" {
-		systemContainerdAddr, ok, err := systemContainerdRunning(cli.Config.IsRootless())
+		systemContainerdAddr, ok, err := systemContainerdRunning(honorXDG)
 		if err != nil {
 			return nil, errors.Wrap(err, "could not determine whether the system containerd is running")
 		}

+ 13 - 0
cmd/dockerd/docker.go

@@ -10,11 +10,16 @@ import (
 	"github.com/docker/docker/pkg/jsonmessage"
 	"github.com/docker/docker/pkg/reexec"
 	"github.com/docker/docker/pkg/term"
+	"github.com/docker/docker/rootless"
 	"github.com/moby/buildkit/util/apicaps"
 	"github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"
 )
 
+var (
+	honorXDG bool
+)
+
 func newDaemonCommand() (*cobra.Command, error) {
 	opts := newDaemonOptions(config.New())
 
@@ -53,6 +58,14 @@ func init() {
 	if dockerversion.ProductName != "" {
 		apicaps.ExportedProduct = dockerversion.ProductName
 	}
+	// When running with RootlessKit, $XDG_RUNTIME_DIR, $XDG_DATA_HOME, and $XDG_CONFIG_HOME needs to be
+	// honored as the default dirs, because we are unlikely to have permissions to access the system-wide
+	// directories.
+	//
+	// Note that even running with --rootless, when not running with RootlessKit, honorXDG needs to be kept false,
+	// because the system-wide directories in the current mount namespace are expected to be accessible.
+	// ("rootful" dockerd in rootless dockerd, #38702)
+	honorXDG = rootless.RunningWithRootlessKit()
 }
 
 func main() {

+ 29 - 18
contrib/dockerd-rootless.sh

@@ -9,7 +9,9 @@
 # External dependencies:
 # * newuidmap and newgidmap needs to be installed.
 # * /etc/subuid and /etc/subgid needs to be configured for the current user.
-# * Either slirp4netns (v0.3+) or VPNKit needs to be installed.
+# * Either one of slirp4netns (v0.3+), VPNKit, lxc-user-nic needs to be installed.
+#   slirp4netns is used by default if installed. Otherwise fallsback to VPNKit.
+#   The default value can be overridden with $DOCKERD_ROOTLESS_ROOTLESSKIT_NET=(slirp4netns|vpnkit|lxc-user-nic)
 #
 # See the documentation for the further information.
 
@@ -35,24 +37,32 @@ if [ -z $rootlesskit ]; then
 	exit 1
 fi
 
-net=""
-mtu=""
-if which slirp4netns >/dev/null 2>&1; then
-	if slirp4netns --help | grep -- --disable-host-loopback; then
-		net=slirp4netns
-		mtu=65520
-	else
-		echo "slirp4netns does not support --disable-host-loopback. Falling back to VPNKit."
-	fi
-fi
+: "${DOCKERD_ROOTLESS_ROOTLESSKIT_NET:=}"
+: "${DOCKERD_ROOTLESS_ROOTLESSKIT_MTU:=}"
+net=$DOCKERD_ROOTLESS_ROOTLESSKIT_NET
+mtu=$DOCKERD_ROOTLESS_ROOTLESSKIT_MTU
 if [ -z $net ]; then
-	if which vpnkit >/dev/null 2>&1; then
-		net=vpnkit
-		mtu=1500
-	else
-		echo "Either slirp4netns (v0.3+) or vpnkit needs to be installed"
-		exit 1
+	if which slirp4netns >/dev/null 2>&1; then
+		if slirp4netns --help | grep -- --disable-host-loopback; then
+			net=slirp4netns
+			if [ -z $mtu ]; then
+				mtu=65520
+			fi
+		else
+			echo "slirp4netns does not support --disable-host-loopback. Falling back to VPNKit."
+		fi
 	fi
+	if [ -z $net ]; then
+		if which vpnkit >/dev/null 2>&1; then
+			net=vpnkit
+		else
+			echo "Either slirp4netns (v0.3+) or vpnkit needs to be installed"
+			exit 1
+		fi
+	fi
+fi
+if [ -z $mtu ]; then
+	mtu=1500
 fi
 
 if [ -z $_DOCKERD_ROOTLESS_CHILD ]; then
@@ -66,7 +76,8 @@ if [ -z $_DOCKERD_ROOTLESS_CHILD ]; then
 	#         (by either systemd-networkd or NetworkManager)
 	# * /run: copy-up is required so that we can create /run/docker (hardcoded for plugins) in our namespace
 	$rootlesskit \
-		--net=$net --mtu=$mtu --disable-host-loopback --port-driver=builtin \
+		--net=$net --mtu=$mtu \
+		--disable-host-loopback --port-driver=builtin \
 		--copy-up=/etc --copy-up=/run \
 		$DOCKERD_ROOTLESS_ROOTLESSKIT_FLAGS \
 		$0 $@

+ 10 - 3
docs/rootless.md

@@ -20,7 +20,6 @@ $ grep ^$(whoami): /etc/subgid
 penguin:231072:65536
 ```
 
-* Either [slirp4netns](https://github.com/rootless-containers/slirp4netns) (v0.3+) or [VPNKit](https://github.com/moby/vpnkit) needs to be installed. slirp4netns is preferred for the best performance.
 
 ### Distribution-specific hint
 
@@ -55,10 +54,9 @@ penguin:231072:65536
 You need to run `dockerd-rootless.sh` instead of `dockerd`.
 
 ```console
-$ dockerd-rootless.sh --experimental --userland-proxy --userland-proxy-path=$(which rootlesskit-docker-proxy)"
+$ dockerd-rootless.sh --experimental
 ```
 As Rootless mode is experimental per se, currently you always need to run `dockerd-rootless.sh` with `--experimental`.
-Also, to expose ports, you need to set `--userland-proxy-path` to the path of `rootlesskit-docker-proxy` binary.
 
 Remarks:
 * The socket path is set to `$XDG_RUNTIME_DIR/docker.sock` by default. `$XDG_RUNTIME_DIR` is typically set to `/run/user/$UID`.
@@ -82,3 +80,12 @@ To route ping packets, you need to set up `net.ipv4.ping_group_range` properly a
 ```console
 $ sudo sh -c "echo 0   2147483647  > /proc/sys/net/ipv4/ping_group_range"
 ```
+
+### Changing network stack
+
+`dockerd-rootless.sh` uses [slirp4netns](https://github.com/rootless-containers/slirp4netns) (if installed) or [VPNKit](https://github.com/moby/vpnkit) as the network stack by default.
+These network stacks run in userspace and might have performance overhead. See [RootlessKit documentation](https://github.com/rootless-containers/rootlesskit/tree/v0.4.0#network-drivers) for further information.
+
+Optionally, you can use `lxc-user-nic` instead for the best performance.
+To use `lxc-user-nic`, you need to edit [`/etc/lxc/lxc-usernet`](https://github.com/rootless-containers/rootlesskit/tree/v0.4.0#--netlxc-user-nic-experimental) and set `$DOCKERD_ROOTLESS_ROOTLESSKIT_NET=lxc-user-nic`.
+

+ 2 - 2
hack/dockerfile/install/rootlesskit.installer

@@ -1,7 +1,7 @@
 #!/bin/sh
 
-# v0.3.0
-ROOTLESSKIT_COMMIT=70e0502f328bc5ffb14692a7ea41abb77196043b
+# v0.4.0
+ROOTLESSKIT_COMMIT=e92d5e772ee7e103aecf380c5874a40c52876ff0
 
 install_rootlesskit() {
 	case "$1" in

+ 3 - 3
opts/hosts.go

@@ -45,13 +45,13 @@ func ValidateHost(val string) (string, error) {
 }
 
 // ParseHost and set defaults for a Daemon host string.
-// defaultToTLS is preferred over defaultToUnixRootless.
-func ParseHost(defaultToTLS, defaultToUnixRootless bool, val string) (string, error) {
+// defaultToTLS is preferred over defaultToUnixXDG.
+func ParseHost(defaultToTLS, defaultToUnixXDG bool, val string) (string, error) {
 	host := strings.TrimSpace(val)
 	if host == "" {
 		if defaultToTLS {
 			host = DefaultTLSHost
-		} else if defaultToUnixRootless {
+		} else if defaultToUnixXDG {
 			runtimeDir, err := homedir.GetRuntimeDir()
 			if err != nil {
 				return "", err

+ 13 - 14
rootless/rootless.go

@@ -5,22 +5,21 @@ import (
 	"sync"
 )
 
+const (
+	// RootlessKitDockerProxyBinary is the binary name of rootlesskit-docker-proxy
+	RootlessKitDockerProxyBinary = "rootlesskit-docker-proxy"
+)
+
 var (
-	runningWithNonRootUsername     bool
-	runningWithNonRootUsernameOnce sync.Once
+	runningWithRootlessKit     bool
+	runningWithRootlessKitOnce sync.Once
 )
 
-// RunningWithNonRootUsername returns true if we $USER is set to a non-root value,
-// regardless to the UID/EUID value.
-//
-// The value of this variable is mostly used for configuring default paths.
-// If the value is true, $HOME and $XDG_RUNTIME_DIR should be honored for setting up the default paths.
-// If false (not only EUID==0 but also $USER==root), $HOME and $XDG_RUNTIME_DIR should be ignored
-// even if we are in a user namespace.
-func RunningWithNonRootUsername() bool {
-	runningWithNonRootUsernameOnce.Do(func() {
-		u := os.Getenv("USER")
-		runningWithNonRootUsername = u != "" && u != "root"
+// RunningWithRootlessKit returns true if running under RootlessKit namespaces.
+func RunningWithRootlessKit() bool {
+	runningWithRootlessKitOnce.Do(func() {
+		u := os.Getenv("ROOTLESSKIT_STATE_DIR")
+		runningWithRootlessKit = u != ""
 	})
-	return runningWithNonRootUsername
+	return runningWithRootlessKit
 }