Parcourir la source

Merge pull request #19265 from rhatdan/netsysctl

Add support for setting sysctls
Vincent Demeester il y a 9 ans
Parent
commit
988508a2b5

+ 1 - 0
contrib/completion/bash/docker

@@ -1671,6 +1671,7 @@ _docker_run() {
 		--shm-size
 		--shm-size
 		--stop-signal
 		--stop-signal
 		--tmpfs
 		--tmpfs
+		--sysctl
 		--ulimit
 		--ulimit
 		--user -u
 		--user -u
 		--userns
 		--userns

+ 1 - 0
contrib/completion/zsh/_docker

@@ -644,6 +644,7 @@ __docker_subcommand() {
         "($help)--privileged[Give extended privileges to this container]"
         "($help)--privileged[Give extended privileges to this container]"
         "($help)--read-only[Mount the container's root filesystem as read only]"
         "($help)--read-only[Mount the container's root filesystem as read only]"
         "($help)*--security-opt=[Security options]:security option: "
         "($help)*--security-opt=[Security options]:security option: "
+        "($help)*--sysctl=-[sysctl options]:sysctl: "
         "($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]"
         "($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]"
         "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
         "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
         "($help)--tmpfs[mount tmpfs]"
         "($help)--tmpfs[mount tmpfs]"

+ 1 - 0
daemon/oci_linux.go

@@ -611,6 +611,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, e
 		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
 		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
 	}
 	}
 	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
 	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
+	s.Linux.Sysctl = c.HostConfig.Sysctls
 	if err := setDevices(&s, c); err != nil {
 	if err := setDevices(&s, c); err != nil {
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
 	}
 	}

+ 1 - 0
docs/reference/api/docker_remote_api.md

@@ -176,6 +176,7 @@ This section lists each version from latest to oldest.  Each listing includes a
 
 
 [Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation
 [Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation
 
 
+* `POST /containers/create` and `POST /containers/(id)/start` allow you to configure kernel parameters (sysctls) for use in the container.
 * `GET /volumes` lists volumes from all volume drivers.
 * `GET /volumes` lists volumes from all volume drivers.
 * `POST /volumes/create` to create a volume.
 * `POST /volumes/create` to create a volume.
 * `GET /volumes/(name)` get low-level information about a volume.
 * `GET /volumes/(name)` get low-level information about a volume.

+ 8 - 0
docs/reference/api/docker_remote_api_v1.21.md

@@ -199,6 +199,7 @@ Create a container
              "RestartPolicy": { "Name": "", "MaximumRetryCount": 0 },
              "RestartPolicy": { "Name": "", "MaximumRetryCount": 0 },
              "NetworkMode": "bridge",
              "NetworkMode": "bridge",
              "Devices": [],
              "Devices": [],
+             "Sysctls": { "net.ipv4.ip_forward": "1" },
              "Ulimits": [{}],
              "Ulimits": [{}],
              "LogConfig": { "Type": "json-file", "Config": {} },
              "LogConfig": { "Type": "json-file", "Config": {} },
              "SecurityOpt": [],
              "SecurityOpt": [],
@@ -306,6 +307,10 @@ Json Parameters:
     -   **Devices** - A list of devices to add to the container specified as a JSON object in the
     -   **Devices** - A list of devices to add to the container specified as a JSON object in the
       form
       form
           `{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}`
           `{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}`
+    -   **Sysctls** - A list of kernel parameters (sysctls) to set in the container, specified as
+          `{ <name>: <Value> }`, for example:
+	  `{ "net.ipv4.ip_forward": "1" }`
+
     -   **Ulimits** - A list of ulimits to set in the container, specified as
     -   **Ulimits** - A list of ulimits to set in the container, specified as
           `{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example:
           `{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example:
           `Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }`
           `Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }`
@@ -426,6 +431,9 @@ Return low-level information on the container `id`
 				"Type": "json-file"
 				"Type": "json-file"
 			},
 			},
 			"SecurityOpt": null,
 			"SecurityOpt": null,
+			"Sysctls": {
+			        "net.ipv4.ip_forward": "1"
+			},
 			"VolumesFrom": null,
 			"VolumesFrom": null,
 			"Ulimits": [{}],
 			"Ulimits": [{}],
 			"VolumeDriver": ""
 			"VolumeDriver": ""

+ 1 - 0
docs/reference/commandline/create.md

@@ -82,6 +82,7 @@ Creates a new container.
       --stop-signal="SIGTERM"       Signal to stop a container
       --stop-signal="SIGTERM"       Signal to stop a container
       --shm-size=[]                 Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`.  Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
       --shm-size=[]                 Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`.  Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
       --storage-opt=[]              Set storage driver options per container
       --storage-opt=[]              Set storage driver options per container
+      --sysctl[=*[]*]]              Configure namespaced kernel parameters at runtime
       -t, --tty                     Allocate a pseudo-TTY
       -t, --tty                     Allocate a pseudo-TTY
       -u, --user=""                 Username or UID
       -u, --user=""                 Username or UID
       --userns=""                   Container user namespace
       --userns=""                   Container user namespace

+ 28 - 0
docs/reference/commandline/run.md

@@ -84,6 +84,7 @@ parent = "smn_cli"
       --sig-proxy=true              Proxy received signals to the process
       --sig-proxy=true              Proxy received signals to the process
       --stop-signal="SIGTERM"       Signal to stop a container
       --stop-signal="SIGTERM"       Signal to stop a container
       --storage-opt=[]              Set storage driver options per container
       --storage-opt=[]              Set storage driver options per container
+      --sysctl[=*[]*]]              Configure namespaced kernel parameters at runtime
       -t, --tty                     Allocate a pseudo-TTY
       -t, --tty                     Allocate a pseudo-TTY
       -u, --user=""                 Username or UID (format: <name|uid>[:<group|gid>])
       -u, --user=""                 Username or UID (format: <name|uid>[:<group|gid>])
       --userns=""                   Container user namespace
       --userns=""                   Container user namespace
@@ -620,3 +621,30 @@ If you have set the `--exec-opt isolation=hyperv` option on the Docker `daemon`,
 $ docker run -d --isolation default busybox top
 $ docker run -d --isolation default busybox top
 $ docker run -d --isolation hyperv busybox top
 $ docker run -d --isolation hyperv busybox top
 ```
 ```
+
+### Configure namespaced kernel parameters (sysctls) at runtime
+
+The `--sysctl` sets namespaced kernel parameters (sysctls) in the
+container. For example, to turn on IP forwarding in the containers
+network namespace, run this command:
+
+    $ docker run --sysctl net.ipv4.ip_forward=1 someimage
+
+
+> **Note**: Not all sysctls are namespaced. docker does not support changing sysctls
+> inside of a container that also modify the host system. As the kernel 
+> evolves we expect to see more sysctls become namespaced.
+
+#### Currently supported sysctls
+
+  `IPC Namespace`:
+
+  kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
+  Sysctls beginning with fs.mqueue.*
+
+  If you use the `--ipc=host` option these sysctls will not be allowed.
+
+  `Network Namespace`:
+      Sysctls beginning with net.*
+
+  If you use the `--net=host` option using these sysctls will not be allowed.

+ 32 - 0
integration-cli/docker_cli_run_unix_test.go

@@ -4,6 +4,7 @@ package main
 
 
 import (
 import (
 	"bufio"
 	"bufio"
+	"encoding/json"
 	"fmt"
 	"fmt"
 	"io/ioutil"
 	"io/ioutil"
 	"os"
 	"os"
@@ -747,6 +748,37 @@ func (s *DockerSuite) TestRunTmpfsMounts(c *check.C) {
 	}
 	}
 }
 }
 
 
+func (s *DockerSuite) TestRunSysctls(c *check.C) {
+
+	testRequires(c, DaemonIsLinux)
+	var err error
+
+	out, _ := dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=1", "--name", "test", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
+	c.Assert(strings.TrimSpace(out), check.Equals, "1")
+
+	out = inspectFieldJSON(c, "test", "HostConfig.Sysctls")
+
+	sysctls := make(map[string]string)
+	err = json.Unmarshal([]byte(out), &sysctls)
+	c.Assert(err, check.IsNil)
+	c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "1")
+
+	out, _ = dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=0", "--name", "test1", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
+	c.Assert(strings.TrimSpace(out), check.Equals, "0")
+
+	out = inspectFieldJSON(c, "test1", "HostConfig.Sysctls")
+
+	err = json.Unmarshal([]byte(out), &sysctls)
+	c.Assert(err, check.IsNil)
+	c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "0")
+
+	runCmd := exec.Command(dockerBinary, "run", "--sysctl", "kernel.foobar=1", "--name", "test2", "busybox", "cat", "/proc/sys/kernel/foobar")
+	out, _, _ = runCommandWithOutput(runCmd)
+	if !strings.Contains(out, "invalid value") {
+		c.Fatalf("expected --sysctl to fail, got %s", out)
+	}
+}
+
 // TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted.
 // TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted.
 func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) {
 func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) {
 	testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor)
 	testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor)

+ 16 - 0
man/docker-create.1.md

@@ -67,6 +67,7 @@ docker-create - Create a new container
 [**--storage-opt**[=*[]*]]
 [**--storage-opt**[=*[]*]]
 [**--stop-signal**[=*SIGNAL*]]
 [**--stop-signal**[=*SIGNAL*]]
 [**--shm-size**[=*[]*]]
 [**--shm-size**[=*[]*]]
+[**--sysctl**[=*[]*]]
 [**-t**|**--tty**]
 [**-t**|**--tty**]
 [**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
 [**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
 [**-u**|**--user**[=*USER*]]
 [**-u**|**--user**[=*USER*]]
@@ -336,6 +337,21 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
 **--stop-signal**=*SIGTERM*
 **--stop-signal**=*SIGTERM*
   Signal to stop a container. Default is SIGTERM.
   Signal to stop a container. Default is SIGTERM.
 
 
+**--sysctl**=SYSCTL
+  Configure namespaced kernel parameters at runtime
+
+  IPC Namespace - current sysctls allowed:
+
+  kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
+  Sysctls beginning with fs.mqueue.*
+
+  Note: if you use --ipc=host using these sysctls will not be allowed.
+
+  Network Namespace - current sysctls allowed:
+      Sysctls beginning with net.*
+
+  Note: if you use --net=host using these sysctls will not be allowed.
+
 **-t**, **--tty**=*true*|*false*
 **-t**, **--tty**=*true*|*false*
    Allocate a pseudo-TTY. The default is *false*.
    Allocate a pseudo-TTY. The default is *false*.
 
 

+ 33 - 0
man/docker-run.1.md

@@ -71,6 +71,7 @@ docker-run - Run a command in a new container
 [**--stop-signal**[=*SIGNAL*]]
 [**--stop-signal**[=*SIGNAL*]]
 [**--shm-size**[=*[]*]]
 [**--shm-size**[=*[]*]]
 [**--sig-proxy**[=*true*]]
 [**--sig-proxy**[=*true*]]
+[**--sysctl**[=*[]*]]
 [**-t**|**--tty**]
 [**-t**|**--tty**]
 [**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
 [**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
 [**-u**|**--user**[=*USER*]]
 [**-u**|**--user**[=*USER*]]
@@ -492,6 +493,21 @@ its root filesystem mounted as read only prohibiting any writes.
    `number` must be greater than `0`.  Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).
    `number` must be greater than `0`.  Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).
    If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
    If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
 
 
+**--sysctl**=SYSCTL
+  Configure namespaced kernel parameters at runtime
+
+  IPC Namespace - current sysctls allowed:
+
+  kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
+  Sysctls beginning with fs.mqueue.*
+
+  If you use the `--ipc=host` option these sysctls will not be allowed.
+
+  Network Namespace - current sysctls allowed:
+      Sysctls beginning with net.*
+
+  If you use the `--net=host` option these sysctls will not be allowed.
+
 **--sig-proxy**=*true*|*false*
 **--sig-proxy**=*true*|*false*
    Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*.
    Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*.
 
 
@@ -955,6 +971,23 @@ $ docker run -d --isolation default busybox top
 $ docker run -d --isolation hyperv busybox top
 $ docker run -d --isolation hyperv busybox top
 ```
 ```
 
 
+## Setting Namespaced Kernel Parameters (Sysctls)
+
+The `--sysctl` sets namespaced kernel parameters (sysctls) in the
+container. For example, to turn on IP forwarding in the containers
+network namespace, run this command:
+
+    $ docker run --sysctl net.ipv4.ip_forward=1 someimage
+
+Note:
+
+Not all sysctls are namespaced. docker does not support changing sysctls
+inside of a container that also modify the host system. As the kernel 
+evolves we expect to see more sysctls become namespaced.
+
+See the definition of the `--sysctl` option above for the current list of 
+supported sysctls.
+
 # HISTORY
 # HISTORY
 April 2014, Originally compiled by William Henry (whenry at redhat dot com)
 April 2014, Originally compiled by William Henry (whenry at redhat dot com)
 based on docker.com source material and internal work.
 based on docker.com source material and internal work.

+ 32 - 0
opts/opts.go

@@ -240,3 +240,35 @@ func ValidateLabel(val string) (string, error) {
 	}
 	}
 	return val, nil
 	return val, nil
 }
 }
+
+// ValidateSysctl validates an sysctl and returns it.
+func ValidateSysctl(val string) (string, error) {
+	validSysctlMap := map[string]bool{
+		"kernel.msgmax":          true,
+		"kernel.msgmnb":          true,
+		"kernel.msgmni":          true,
+		"kernel.sem":             true,
+		"kernel.shmall":          true,
+		"kernel.shmmax":          true,
+		"kernel.shmmni":          true,
+		"kernel.shm_rmid_forced": true,
+	}
+	validSysctlPrefixes := []string{
+		"net.",
+		"fs.mqueue.",
+	}
+	arr := strings.Split(val, "=")
+	if len(arr) < 2 {
+		return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
+	}
+	if validSysctlMap[arr[0]] {
+		return val, nil
+	}
+
+	for _, vp := range validSysctlPrefixes {
+		if strings.HasPrefix(arr[0], vp) {
+			return val, nil
+		}
+	}
+	return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
+}

+ 3 - 0
runconfig/opts/parse.go

@@ -42,6 +42,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
 		flDevices           = opts.NewListOpts(ValidateDevice)
 		flDevices           = opts.NewListOpts(ValidateDevice)
 
 
 		flUlimits = NewUlimitOpt(nil)
 		flUlimits = NewUlimitOpt(nil)
+		flSysctls = opts.NewMapOpts(nil, opts.ValidateSysctl)
 
 
 		flPublish           = opts.NewListOpts(nil)
 		flPublish           = opts.NewListOpts(nil)
 		flExpose            = opts.NewListOpts(nil)
 		flExpose            = opts.NewListOpts(nil)
@@ -127,6 +128,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
 	cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options")
 	cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options")
 	cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container")
 	cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container")
 	cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options")
 	cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options")
+	cmd.Var(flSysctls, []string{"-sysctl"}, "Sysctl options")
 	cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options")
 	cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options")
 
 
 	cmd.Require(flag.Min, 1)
 	cmd.Require(flag.Min, 1)
@@ -430,6 +432,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
 		ShmSize:        shmSize,
 		ShmSize:        shmSize,
 		Resources:      resources,
 		Resources:      resources,
 		Tmpfs:          tmpfs,
 		Tmpfs:          tmpfs,
+		Sysctls:        flSysctls.GetAll(),
 	}
 	}
 
 
 	// When allocating stdin in attached mode, close stdin at client disconnect
 	// When allocating stdin in attached mode, close stdin at client disconnect