Quellcode durchsuchen

Merge pull request #40486 from AkihiroSuda/rootless-cgroup2-systemd

rootless: support `--exec-opt native.cgroupdriver=systemd`
Brian Goff vor 5 Jahren
Ursprung
Commit
76e3a49933

+ 4 - 6
daemon/daemon_unix.go

@@ -599,15 +599,13 @@ func verifyPlatformContainerResources(resources *containertypes.Resources, sysIn
 }
 
 func (daemon *Daemon) getCgroupDriver() string {
+	if UsingSystemd(daemon.configStore) {
+		return cgroupSystemdDriver
+	}
 	if daemon.Rootless() {
 		return cgroupNoneDriver
 	}
-	cgroupDriver := cgroupFsDriver
-
-	if UsingSystemd(daemon.configStore) {
-		cgroupDriver = cgroupSystemdDriver
-	}
-	return cgroupDriver
+	return cgroupFsDriver
 }
 
 // getCD gets the raw value of the native.cgroupdriver option, if set.

+ 25 - 3
daemon/oci_linux.go

@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -84,8 +85,26 @@ func WithLibnetwork(daemon *Daemon, c *container.Container) coci.SpecOpts {
 }
 
 // WithRootless sets the spec to the rootless configuration
-func WithRootless(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
-	return specconv.ToRootless(s)
+func WithRootless(daemon *Daemon) coci.SpecOpts {
+	return func(_ context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
+		var v2Controllers []string
+		if daemon.getCgroupDriver() == cgroupSystemdDriver {
+			if !cgroups.IsCgroup2UnifiedMode() {
+				return errors.New("rootless systemd driver doesn't support cgroup v1")
+			}
+			rootlesskitParentEUID := os.Getenv("ROOTLESSKIT_PARENT_EUID")
+			if rootlesskitParentEUID == "" {
+				return errors.New("$ROOTLESSKIT_PARENT_EUID is not set (requires RootlessKit v0.8.0)")
+			}
+			controllersPath := fmt.Sprintf("/sys/fs/cgroup/user.slice/user-%s.slice/cgroup.controllers", rootlesskitParentEUID)
+			controllersFile, err := ioutil.ReadFile(controllersPath)
+			if err != nil {
+				return err
+			}
+			v2Controllers = strings.Fields(string(controllersFile))
+		}
+		return specconv.ToRootless(s, v2Controllers)
+	}
 }
 
 // WithOOMScore sets the oom score
@@ -760,6 +779,9 @@ func WithCgroups(daemon *Daemon, c *container.Container) coci.SpecOpts {
 		useSystemd := UsingSystemd(daemon.configStore)
 		if useSystemd {
 			parent = "system.slice"
+			if daemon.configStore.Rootless {
+				parent = "user.slice"
+			}
 		}
 
 		if c.HostConfig.CgroupParent != "" {
@@ -985,7 +1007,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e
 		opts = append(opts, coci.WithReadonlyPaths(c.HostConfig.ReadonlyPaths))
 	}
 	if daemon.configStore.Rootless {
-		opts = append(opts, WithRootless)
+		opts = append(opts, WithRootless(daemon))
 	}
 	return &s, coci.ApplyOpts(context.Background(), nil, &containers.Container{
 		ID: c.ID,

+ 2 - 2
hack/dockerfile/install/rootlesskit.installer

@@ -1,7 +1,7 @@
 #!/bin/sh
 
-# v0.7.1
-: ${ROOTLESSKIT_COMMIT:=76c4e26750da3986fa0e741464fbf0fcd55bea71}
+# v0.8.0
+: ${ROOTLESSKIT_COMMIT:=ce88a431e6a7cf891ebb68b10bfc6a5724b9ae72}
 
 install_rootlesskit() {
 	case "$1" in

+ 43 - 7
rootless/specconv/specconv_linux.go

@@ -8,10 +8,12 @@ import (
 )
 
 // ToRootless converts spec to be compatible with "rootless" runc.
-// * Remove cgroups (will be supported in separate PR when delegation permission is configured)
+// * Remove non-supported cgroups
 // * Fix up OOMScoreAdj
-func ToRootless(spec *specs.Spec) error {
-	return toRootless(spec, getCurrentOOMScoreAdj())
+//
+// v2Controllers should be non-nil only if running with v2 and systemd.
+func ToRootless(spec *specs.Spec, v2Controllers []string) error {
+	return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj())
 }
 
 func getCurrentOOMScoreAdj() int {
@@ -26,10 +28,44 @@ func getCurrentOOMScoreAdj() int {
 	return i
 }
 
-func toRootless(spec *specs.Spec, currentOOMScoreAdj int) error {
-	// Remove cgroup settings.
-	spec.Linux.Resources = nil
-	spec.Linux.CgroupsPath = ""
+func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error {
+	if len(v2Controllers) == 0 {
+		// Remove cgroup settings.
+		spec.Linux.Resources = nil
+		spec.Linux.CgroupsPath = ""
+	} else {
+		if spec.Linux.Resources != nil {
+			m := make(map[string]struct{})
+			for _, s := range v2Controllers {
+				m[s] = struct{}{}
+			}
+			// Remove devices: https://github.com/containers/crun/issues/255
+			spec.Linux.Resources.Devices = nil
+			if _, ok := m["memory"]; !ok {
+				spec.Linux.Resources.Memory = nil
+			}
+			if _, ok := m["cpu"]; !ok {
+				spec.Linux.Resources.CPU = nil
+			}
+			if _, ok := m["cpuset"]; !ok {
+				if spec.Linux.Resources.CPU != nil {
+					spec.Linux.Resources.CPU.Cpus = ""
+					spec.Linux.Resources.CPU.Mems = ""
+				}
+			}
+			if _, ok := m["pids"]; !ok {
+				spec.Linux.Resources.Pids = nil
+			}
+			if _, ok := m["io"]; !ok {
+				spec.Linux.Resources.BlockIO = nil
+			}
+			if _, ok := m["rdma"]; !ok {
+				spec.Linux.Resources.Rdma = nil
+			}
+			spec.Linux.Resources.HugepageLimits = nil
+			spec.Linux.Resources.Network = nil
+		}
+	}
 
 	if spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
 		*spec.Process.OOMScoreAdj = currentOOMScoreAdj