Kaynağa Gözat

Add support for kernel memory limit

Signed-off-by: Qiang Huang <h.huangqiang@huawei.com>
Qiang Huang 10 yıl önce
ebeveyn
işleme
b6f1b4ad35

+ 1 - 0
contrib/completion/bash/docker

@@ -1075,6 +1075,7 @@ _docker_run() {
 		--group-add
 		--hostname -h
 		--ipc
+		--kernel-memory
 		--label-file
 		--label -l
 		--link

+ 1 - 0
daemon/container_unix.go

@@ -248,6 +248,7 @@ func populateCommand(c *Container, env []string) error {
 	resources := &execdriver.Resources{
 		Memory:           c.hostConfig.Memory,
 		MemorySwap:       c.hostConfig.MemorySwap,
+		KernelMemory:     c.hostConfig.KernelMemory,
 		CPUShares:        c.hostConfig.CPUShares,
 		CpusetCpus:       c.hostConfig.CpusetCpus,
 		CpusetMems:       c.hostConfig.CpusetMems,

+ 9 - 0
daemon/daemon_unix.go

@@ -156,6 +156,15 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *runconfig.HostC
 			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100.", swappiness)
 		}
 	}
+	if hostConfig.KernelMemory > 0 && !sysInfo.KernelMemory {
+		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities. Limitation discarded.")
+		logrus.Warnf("Your kernel does not support kernel memory limit capabilities. Limitation discarded.")
+		hostConfig.KernelMemory = 0
+	}
+	if hostConfig.KernelMemory > 0 && !CheckKernelVersion(4, 0, 0) {
+		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
+		logrus.Warnf("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
+	}
 	if hostConfig.CPUShares > 0 && !sysInfo.CPUShares {
 		warnings = append(warnings, "Your kernel does not support CPU shares. Shares discarded.")
 		logrus.Warnf("Your kernel does not support CPU shares. Shares discarded.")

+ 1 - 0
daemon/execdriver/driver.go

@@ -127,6 +127,7 @@ type UTS struct {
 type Resources struct {
 	Memory           int64            `json:"memory"`
 	MemorySwap       int64            `json:"memory_swap"`
+	KernelMemory     int64            `json:"kernel_memory"`
 	CPUShares        int64            `json:"cpu_shares"`
 	CpusetCpus       string           `json:"cpuset_cpus"`
 	CpusetMems       string           `json:"cpuset_mems"`

+ 3 - 0
daemon/execdriver/lxc/lxc_template.go

@@ -96,6 +96,9 @@ lxc.cgroup.memory.soft_limit_in_bytes = {{.Resources.Memory}}
 lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
 {{end}}
 {{end}}
+{{if .Resources.KernelMemory}}
+lxc.cgroup.memory.kmem.limit_in_bytes = {{.Resources.Memory}}
+{{end}}
 {{if .Resources.CPUShares}}
 lxc.cgroup.cpu.shares = {{.Resources.CPUShares}}
 {{end}}

+ 4 - 1
docs/reference/api/docker_remote_api_v1.21.md

@@ -172,6 +172,7 @@ Create a container
              "LxcConf": {"lxc.utsname":"docker"},
              "Memory": 0,
              "MemorySwap": 0,
+             "KernelMemory": 0,
              "CpuShares": 512,
              "CpuPeriod": 100000,
              "CpusetCpus": "0,1",
@@ -217,8 +218,9 @@ Json Parameters:
       for the container.
 -   **User** - A string value specifying the user inside the container.
 -   **Memory** - Memory limit in bytes.
--   **MemorySwap**- Total memory limit (memory + swap); set `-1` to disable swap
+-   **MemorySwap** - Total memory limit (memory + swap); set `-1` to disable swap
       You must use this with `memory` and make the swap value larger than `memory`.
+-   **KernelMemory** - Kernel memory limit in bytes.
 -   **CpuShares** - An integer value containing the container's CPU Shares
       (ie. the relative weight vs other containers).
 -   **CpuPeriod** - The length of a CPU period in microseconds.
@@ -387,6 +389,7 @@ Return low-level information on the container `id`
 			"LxcConf": [],
 			"Memory": 0,
 			"MemorySwap": 0,
+			"KernelMemory": 0,
 			"OomKillDisable": false,
 			"NetworkMode": "bridge",
 			"PortBindings": {},

+ 1 - 0
docs/reference/commandline/create.md

@@ -40,6 +40,7 @@ Creates a new container.
       --help=false                  Print usage
       -i, --interactive=false       Keep STDIN open even if not attached
       --ipc=""                      IPC namespace to use
+      --kernel-memory=""            Kernel memory limit
       -l, --label=[]                Set metadata on the container (e.g., --label=com.example.key=value)
       --label-file=[]               Read in a line delimited file of labels
       --link=[]                     Add link to another container

+ 1 - 0
docs/reference/commandline/run.md

@@ -40,6 +40,7 @@ weight=1
       --help=false                  Print usage
       -i, --interactive=false       Keep STDIN open even if not attached
       --ipc=""                      IPC namespace to use
+      --kernel-memory=""            Kernel memory limit
       -l, --label=[]                Set metadata on the container (e.g., --label=com.example.key=value)
       --label-file=[]               Read in a file of labels (EOL delimited)
       --link=[]                     Add link to another container

+ 74 - 3
docs/reference/run.md

@@ -509,6 +509,7 @@ container:
 |----------------------------|---------------------------------------------------------------------------------------------|
 | `-m`, `--memory="" `       | Memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)                        |
 | `--memory-swap=""`         | Total memory limit (memory + swap, format: `<number>[<unit>]`, where unit = b, k, m or g)   |
+| `--kernel-memory=""`       | Kernel memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)                 |
 | `-c`, `--cpu-shares=0`     | CPU shares (relative weight)                                                                |
 | `--cpu-period=0`           | Limit the CPU CFS (Completely Fair Scheduler) period                                        |
 | `--cpuset-cpus="" `        | CPUs in which to allow execution (0-3, 0,1)                                                 |
@@ -518,9 +519,9 @@ container:
 | `--oom-kill-disable=false` | Whether to disable OOM Killer for the container or not.                                     |
 | `--memory-swappiness=""  ` | Tune a container's memory swappiness behavior. Accepts an integer between 0 and 100.        |
 
-### Memory constraints
+### User memory constraints
 
-We have four ways to set memory usage:
+We have four ways to set user memory usage:
 
 <table>
   <thead>
@@ -568,7 +569,7 @@ We have four ways to set memory usage:
   </tbody>
 </table>
 
-### Examples
+Examples:
 
     $ docker run -ti ubuntu:14.04 /bin/bash
 
@@ -612,6 +613,76 @@ The following example, illustrates a dangerous way to use the flag:
 The container has unlimited memory which can cause the host to run out memory
 and require killing system processes to free memory.
 
+### Kernel memory constraints
+
+Kernel memory is fundamentally different than user memory as kernel memory can't
+be swapped out. The inability to swap makes it possible for the container to
+block system services by consuming too much kernel memory. Kernel memory includes:
+
+ - stack pages
+ - slab pages
+ - sockets memory pressure
+ - tcp memory pressure
+
+You can setup kernel memory limit to constrain these kinds of memory. For example,
+every process consumes some stack pages. By limiting kernel memory, you can
+prevent new processes from being created when the kernel memory usage is too high.
+
+Kernel memory is never completely independent of user memory. Instead, you limit
+kernel memory in the context of the user memory limit. Assume "U" is the user memory
+limit and "K" the kernel limit. There are three possible ways to set limits:
+
+<table>
+  <thead>
+    <tr>
+      <th>Option</th>
+      <th>Result</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td class="no-wrap"><strong>U != 0, K = inf</strong> (default)</td>
+      <td>
+        This is the standard memory limitation mechanism already present before using
+        kernel memory. Kernel memory is completely ignored.
+      </td>
+    </tr>
+    <tr>
+      <td class="no-wrap"><strong>U != 0, K &lt; U</strong></td>
+      <td>
+        Kernel memory is a subset of the user memory. This setup is useful in
+        deployments where the total amount of memory per-cgroup is overcommited.
+        Overcommiting kernel memory limits is definitely not recommended, since the
+        box can still run out of non-reclaimable memory.
+        In this case, the you can configure K so that the sum of all groups is
+        never greater than the total memory. Then, freely set U at the expense of
+        the system's service quality.
+      </td>
+    </tr>
+    <tr>
+      <td class="no-wrap"><strong>U != 0, K &gt; U</strong></td>
+      <td>
+        Since kernel memory charges are also fed to the user counter and reclaimation
+        is triggered for the container for both kinds of memory. This configuration
+        gives the admin a unified view of memory. It is also useful for people
+        who just want to track kernel memory usage.
+      </td>
+    </tr>
+  </tbody>
+</table>
+
+Examples:
+
+    $ docker run -ti -m 500M --kernel-memory 50M ubuntu:14.04 /bin/bash
+
+We set memory and kernel memory, so the processes in the container can use
+500M memory in total, in this 500M memory, it can be 50M kernel memory tops.
+
+    $ docker run -ti --kernel-memory 50M ubuntu:14.04 /bin/bash
+
+We set kernel memory without **-m**, so the processes in the container can
+use as much memory as they want, but they can only use 50M kernel memory.
+
 ### Swappiness constraint
 
 By default, a container's kernel can swap out a percentage of anonymous pages.

+ 12 - 0
integration-cli/docker_cli_run_unix_test.go

@@ -283,6 +283,18 @@ func (s *DockerSuite) TestRunWithCpuPeriod(c *check.C) {
 	}
 }
 
+func (s *DockerSuite) TestRunWithKernelMemory(c *check.C) {
+	testRequires(c, kernelMemorySupport)
+
+	dockerCmd(c, "run", "--kernel-memory", "50M", "--name", "test", "busybox", "true")
+
+	out, err := inspectField("test", "HostConfig.KernelMemory")
+	c.Assert(err, check.IsNil)
+	if out != "52428800" {
+		c.Fatalf("setting the kernel memory limit failed")
+	}
+}
+
 func (s *DockerSuite) TestRunOOMExitCode(c *check.C) {
 	testRequires(c, oomControl)
 	errChan := make(chan error)

+ 13 - 0
integration-cli/requirements_unix.go

@@ -54,4 +54,17 @@ var (
 		},
 		"Test requires Oom control enabled.",
 	}
+	kernelMemorySupport = testRequirement{
+		func() bool {
+			cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory")
+			if err != nil {
+				return false
+			}
+			if _, err := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.kmem.limit_in_bytes")); err != nil {
+				return false
+			}
+			return true
+		},
+		"Test requires an environment that supports cgroup kernel memory.",
+	}
 )

+ 10 - 0
man/docker-create.1.md

@@ -30,6 +30,7 @@ docker-create - Create a new container
 [**--help**]
 [**-i**|**--interactive**[=*false*]]
 [**--ipc**[=*IPC*]]
+[**--kernel-memory**[=*KERNEL-MEMORY*]]
 [**-l**|**--label**[=*[]*]]
 [**--label-file**[=*[]*]]
 [**--link**[=*[]*]]
@@ -148,6 +149,15 @@ two memory nodes.
                                'container:<name|id>': reuses another container shared memory, semaphores and message queues
                                'host': use the host shared memory,semaphores and message queues inside the container.  Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.
 
+**--kernel-memory**=""
+   Kernel memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)
+
+   Constrains the kernel memory available to a container. If a limit of 0
+is specified (not using `--kernel-memory`), the container's kernel memory
+is not limited. If you specify a limit, it may be rounded up to a multiple
+of the operating system's page size and the value can be very large,
+millions of trillions.
+
 **-l**, **--label**=[]
    Adds metadata to a container (e.g., --label=com.example.key=value)
 

+ 10 - 0
man/docker-run.1.md

@@ -31,6 +31,7 @@ docker-run - Run a command in a new container
 [**--help**]
 [**-i**|**--interactive**[=*false*]]
 [**--ipc**[=*IPC*]]
+[**--kernel-memory**[=*KERNEL-MEMORY*]]
 [**-l**|**--label**[=*[]*]]
 [**--label-file**[=*[]*]]
 [**--link**[=*[]*]]
@@ -242,6 +243,15 @@ ENTRYPOINT.
 **-l**, **--label**=[]
    Set metadata on the container (e.g., --label com.example.key=value)
 
+**--kernel-memory**=""
+   Kernel memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)
+
+   Constrains the kernel memory available to a container. If a limit of 0
+is specified (not using `--kernel-memory`), the container's kernel memory
+is not limited. If you specify a limit, it may be rounded up to a multiple
+of the operating system's page size and the value can be very large,
+millions of trillions.
+
 **--label-file**=[]
    Read in a line delimited file of labels
 

+ 3 - 0
pkg/sysinfo/sysinfo.go

@@ -36,6 +36,9 @@ type cgroupMemInfo struct {
 
 	// Whether memory swappiness is supported or not
 	MemorySwappiness bool
+
+	// Whether kernel memory limit is supported or not
+	KernelMemory bool
 }
 
 type cgroupCPUInfo struct {

+ 5 - 0
pkg/sysinfo/sysinfo_linux.go

@@ -57,12 +57,17 @@ func checkCgroupMem(quiet bool) cgroupMemInfo {
 	if !quiet && !memorySwappiness {
 		logrus.Warnf("Your kernel does not support memory swappiness.")
 	}
+	kernelMemory := cgroupEnabled(mountPoint, "memory.kmem.limit_in_bytes")
+	if !quiet && !kernelMemory {
+		logrus.Warnf("Your kernel does not support kernel memory limit.")
+	}
 
 	return cgroupMemInfo{
 		MemoryLimit:      true,
 		SwapLimit:        swapLimit,
 		OomKillDisable:   oomKillDisable,
 		MemorySwappiness: memorySwappiness,
+		KernelMemory:     kernelMemory,
 	}
 }
 

+ 1 - 0
runconfig/hostconfig.go

@@ -265,6 +265,7 @@ type HostConfig struct {
 	LxcConf          *LxcConfig       // Additional lxc configuration
 	Memory           int64            // Memory limit (in bytes)
 	MemorySwap       int64            // Total memory usage (memory + swap); set `-1` to disable swap
+	KernelMemory     int64            // Kernel memory limit (in bytes)
 	CPUShares        int64            `json:"CpuShares"` // CPU shares (relative weight vs. other containers)
 	CPUPeriod        int64            `json:"CpuPeriod"` // CPU CFS (Completely Fair Scheduler) period
 	CpusetCpus       string           // CpusetCpus 0-2, 0,1

+ 11 - 0
runconfig/parse.go

@@ -74,6 +74,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
 		flHostname        = cmd.String([]string{"h", "-hostname"}, "", "Container host name")
 		flMemoryString    = cmd.String([]string{"m", "-memory"}, "", "Memory limit")
 		flMemorySwap      = cmd.String([]string{"-memory-swap"}, "", "Total memory (memory + swap), '-1' to disable swap")
+		flKernelMemory    = cmd.String([]string{"-kernel-memory"}, "", "Kernel memory limit")
 		flUser            = cmd.String([]string{"u", "-user"}, "", "Username or UID (format: <name|uid>[:<group|gid>])")
 		flWorkingDir      = cmd.String([]string{"w", "-workdir"}, "", "Working directory inside the container")
 		flCPUShares       = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
@@ -166,6 +167,15 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
 		}
 	}
 
+	var KernelMemory int64
+	if *flKernelMemory != "" {
+		parsedKernelMemory, err := units.RAMInBytes(*flKernelMemory)
+		if err != nil {
+			return nil, nil, cmd, err
+		}
+		KernelMemory = parsedKernelMemory
+	}
+
 	swappiness := *flSwappiness
 	if swappiness != -1 && (swappiness < 0 || swappiness > 100) {
 		return nil, nil, cmd, fmt.Errorf("Invalid value: %d. Valid memory swappiness range is 0-100", swappiness)
@@ -320,6 +330,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
 		LxcConf:          lxcConf,
 		Memory:           flMemory,
 		MemorySwap:       memorySwap,
+		KernelMemory:     KernelMemory,
 		CPUShares:        *flCPUShares,
 		CPUPeriod:        *flCPUPeriod,
 		CpusetCpus:       *flCpusetCpus,