123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199 |
- package specconv // import "github.com/docker/docker/pkg/rootless/specconv"
- import (
- "context"
- "fmt"
- "os"
- "path"
- "path/filepath"
- "strconv"
- "strings"
- "github.com/containerd/log"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- )
- // ToRootfulInRootless is used for "rootful-in-rootless" dind;
- // the daemon is running in UserNS but has no access to RootlessKit API socket, host filesystem, etc.
- //
- // This fuction does:
- // * Fix up OOMScoreAdj (needed since systemd v250: https://github.com/moby/moby/issues/46563)
- func ToRootfulInRootless(spec *specs.Spec) {
- if spec.Process == nil || spec.Process.OOMScoreAdj == nil {
- return
- }
- if currentOOMScoreAdj := getCurrentOOMScoreAdj(); *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
- *spec.Process.OOMScoreAdj = currentOOMScoreAdj
- }
- }
- // ToRootless converts spec to be compatible with "rootless" runc.
- // * Remove non-supported cgroups
- // * Fix up OOMScoreAdj
- // * Fix up /proc if --pid=host
- // * Fix up /dev/shm and /dev/mqueue if --ipc=host
- //
- // v2Controllers should be non-nil only if running with v2 and systemd.
- func ToRootless(spec *specs.Spec, v2Controllers []string) error {
- return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj())
- }
- func getCurrentOOMScoreAdj() int {
- b, err := os.ReadFile("/proc/self/oom_score_adj")
- if err != nil {
- log.G(context.TODO()).WithError(err).Warn("failed to read /proc/self/oom_score_adj")
- return 0
- }
- s := string(b)
- i, err := strconv.Atoi(strings.TrimSpace(s))
- if err != nil {
- log.G(context.TODO()).WithError(err).Warnf("failed to parse /proc/self/oom_score_adj (%q)", s)
- return 0
- }
- return i
- }
- func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error {
- if len(v2Controllers) == 0 {
- if spec.Linux != nil {
- // Remove cgroup settings.
- spec.Linux.Resources = nil
- spec.Linux.CgroupsPath = ""
- }
- } else {
- if spec.Linux != nil && spec.Linux.Resources != nil {
- m := make(map[string]struct{})
- for _, s := range v2Controllers {
- m[s] = struct{}{}
- }
- // Remove devices: https://github.com/containers/crun/issues/255
- spec.Linux.Resources.Devices = nil
- if _, ok := m["memory"]; !ok {
- spec.Linux.Resources.Memory = nil
- }
- if _, ok := m["cpu"]; !ok {
- spec.Linux.Resources.CPU = nil
- }
- if _, ok := m["cpuset"]; !ok {
- if spec.Linux.Resources.CPU != nil {
- spec.Linux.Resources.CPU.Cpus = ""
- spec.Linux.Resources.CPU.Mems = ""
- }
- }
- if _, ok := m["pids"]; !ok {
- spec.Linux.Resources.Pids = nil
- }
- if _, ok := m["io"]; !ok {
- spec.Linux.Resources.BlockIO = nil
- }
- if _, ok := m["rdma"]; !ok {
- spec.Linux.Resources.Rdma = nil
- }
- spec.Linux.Resources.HugepageLimits = nil
- spec.Linux.Resources.Network = nil
- }
- }
- if spec.Process != nil && spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
- *spec.Process.OOMScoreAdj = currentOOMScoreAdj
- }
- // Fix up /proc if --pid=host
- pidHost, err := isHostNS(spec, specs.PIDNamespace)
- if err != nil {
- return err
- }
- if pidHost {
- if err := bindMountHostProcfs(spec); err != nil {
- return err
- }
- }
- // Fix up /dev/shm and /dev/mqueue if --ipc=host
- ipcHost, err := isHostNS(spec, specs.IPCNamespace)
- if err != nil {
- return err
- }
- if ipcHost {
- if err := bindMountHostIPC(spec); err != nil {
- return err
- }
- }
- return nil
- }
- func isHostNS(spec *specs.Spec, nsType specs.LinuxNamespaceType) (bool, error) {
- if strings.Contains(string(nsType), string(os.PathSeparator)) {
- return false, fmt.Errorf("unexpected namespace type %q", nsType)
- }
- if spec.Linux == nil {
- return false, nil
- }
- for _, ns := range spec.Linux.Namespaces {
- if ns.Type == nsType {
- if ns.Path == "" {
- return false, nil
- }
- ns, err := os.Readlink(ns.Path)
- if err != nil {
- return false, err
- }
- selfNS, err := os.Readlink(filepath.Join("/proc/self/ns", string(nsType)))
- if err != nil {
- return false, err
- }
- return ns == selfNS, nil
- }
- }
- return true, nil
- }
- func bindMountHostProcfs(spec *specs.Spec) error {
- // Replace procfs mount with rbind
- // https://github.com/containers/podman/blob/v3.0.0-rc1/pkg/specgen/generate/oci.go#L248-L257
- for i, m := range spec.Mounts {
- if path.Clean(m.Destination) == "/proc" {
- newM := specs.Mount{
- Destination: "/proc",
- Type: "bind",
- Source: "/proc",
- Options: []string{"rbind", "nosuid", "noexec", "nodev"},
- }
- spec.Mounts[i] = newM
- }
- }
- if spec.Linux != nil {
- // Remove ReadonlyPaths for /proc/*
- newROP := spec.Linux.ReadonlyPaths[:0]
- for _, s := range spec.Linux.ReadonlyPaths {
- s = path.Clean(s)
- if !strings.HasPrefix(s, "/proc/") {
- newROP = append(newROP, s)
- }
- }
- spec.Linux.ReadonlyPaths = newROP
- }
- return nil
- }
- // withBindMountHostIPC replaces /dev/shm and /dev/mqueue mount with rbind.
- // Required for --ipc=host on rootless.
- //
- // Based on https://github.com/containerd/nerdctl/blob/v1.1.0/cmd/nerdctl/run.go#L836-L860
- func bindMountHostIPC(s *specs.Spec) error {
- for i, m := range s.Mounts {
- switch p := path.Clean(m.Destination); p {
- case "/dev/shm", "/dev/mqueue":
- s.Mounts[i] = specs.Mount{
- Destination: p,
- Type: "bind",
- Source: p,
- Options: []string{"rbind", "nosuid", "noexec", "nodev"},
- }
- }
- }
- return nil
- }
|