specconv_linux.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. package specconv // import "github.com/docker/docker/pkg/rootless/specconv"
  2. import (
  3. "context"
  4. "fmt"
  5. "os"
  6. "path"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "github.com/containerd/log"
  11. specs "github.com/opencontainers/runtime-spec/specs-go"
  12. )
  13. // ToRootfulInRootless is used for "rootful-in-rootless" dind;
  14. // the daemon is running in UserNS but has no access to RootlessKit API socket, host filesystem, etc.
  15. //
  16. // This fuction does:
  17. // * Fix up OOMScoreAdj (needed since systemd v250: https://github.com/moby/moby/issues/46563)
  18. func ToRootfulInRootless(spec *specs.Spec) {
  19. if spec.Process == nil || spec.Process.OOMScoreAdj == nil {
  20. return
  21. }
  22. if currentOOMScoreAdj := getCurrentOOMScoreAdj(); *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
  23. *spec.Process.OOMScoreAdj = currentOOMScoreAdj
  24. }
  25. }
  26. // ToRootless converts spec to be compatible with "rootless" runc.
  27. // * Remove non-supported cgroups
  28. // * Fix up OOMScoreAdj
  29. // * Fix up /proc if --pid=host
  30. // * Fix up /dev/shm and /dev/mqueue if --ipc=host
  31. //
  32. // v2Controllers should be non-nil only if running with v2 and systemd.
  33. func ToRootless(spec *specs.Spec, v2Controllers []string) error {
  34. return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj())
  35. }
  36. func getCurrentOOMScoreAdj() int {
  37. b, err := os.ReadFile("/proc/self/oom_score_adj")
  38. if err != nil {
  39. log.G(context.TODO()).WithError(err).Warn("failed to read /proc/self/oom_score_adj")
  40. return 0
  41. }
  42. s := string(b)
  43. i, err := strconv.Atoi(strings.TrimSpace(s))
  44. if err != nil {
  45. log.G(context.TODO()).WithError(err).Warnf("failed to parse /proc/self/oom_score_adj (%q)", s)
  46. return 0
  47. }
  48. return i
  49. }
  50. func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error {
  51. if len(v2Controllers) == 0 {
  52. if spec.Linux != nil {
  53. // Remove cgroup settings.
  54. spec.Linux.Resources = nil
  55. spec.Linux.CgroupsPath = ""
  56. }
  57. } else {
  58. if spec.Linux != nil && spec.Linux.Resources != nil {
  59. m := make(map[string]struct{})
  60. for _, s := range v2Controllers {
  61. m[s] = struct{}{}
  62. }
  63. // Remove devices: https://github.com/containers/crun/issues/255
  64. spec.Linux.Resources.Devices = nil
  65. if _, ok := m["memory"]; !ok {
  66. spec.Linux.Resources.Memory = nil
  67. }
  68. if _, ok := m["cpu"]; !ok {
  69. spec.Linux.Resources.CPU = nil
  70. }
  71. if _, ok := m["cpuset"]; !ok {
  72. if spec.Linux.Resources.CPU != nil {
  73. spec.Linux.Resources.CPU.Cpus = ""
  74. spec.Linux.Resources.CPU.Mems = ""
  75. }
  76. }
  77. if _, ok := m["pids"]; !ok {
  78. spec.Linux.Resources.Pids = nil
  79. }
  80. if _, ok := m["io"]; !ok {
  81. spec.Linux.Resources.BlockIO = nil
  82. }
  83. if _, ok := m["rdma"]; !ok {
  84. spec.Linux.Resources.Rdma = nil
  85. }
  86. spec.Linux.Resources.HugepageLimits = nil
  87. spec.Linux.Resources.Network = nil
  88. }
  89. }
  90. if spec.Process != nil && spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
  91. *spec.Process.OOMScoreAdj = currentOOMScoreAdj
  92. }
  93. // Fix up /proc if --pid=host
  94. pidHost, err := isHostNS(spec, specs.PIDNamespace)
  95. if err != nil {
  96. return err
  97. }
  98. if pidHost {
  99. if err := bindMountHostProcfs(spec); err != nil {
  100. return err
  101. }
  102. }
  103. // Fix up /dev/shm and /dev/mqueue if --ipc=host
  104. ipcHost, err := isHostNS(spec, specs.IPCNamespace)
  105. if err != nil {
  106. return err
  107. }
  108. if ipcHost {
  109. if err := bindMountHostIPC(spec); err != nil {
  110. return err
  111. }
  112. }
  113. return nil
  114. }
  115. func isHostNS(spec *specs.Spec, nsType specs.LinuxNamespaceType) (bool, error) {
  116. if strings.Contains(string(nsType), string(os.PathSeparator)) {
  117. return false, fmt.Errorf("unexpected namespace type %q", nsType)
  118. }
  119. if spec.Linux == nil {
  120. return false, nil
  121. }
  122. for _, ns := range spec.Linux.Namespaces {
  123. if ns.Type == nsType {
  124. if ns.Path == "" {
  125. return false, nil
  126. }
  127. ns, err := os.Readlink(ns.Path)
  128. if err != nil {
  129. return false, err
  130. }
  131. selfNS, err := os.Readlink(filepath.Join("/proc/self/ns", string(nsType)))
  132. if err != nil {
  133. return false, err
  134. }
  135. return ns == selfNS, nil
  136. }
  137. }
  138. return true, nil
  139. }
  140. func bindMountHostProcfs(spec *specs.Spec) error {
  141. // Replace procfs mount with rbind
  142. // https://github.com/containers/podman/blob/v3.0.0-rc1/pkg/specgen/generate/oci.go#L248-L257
  143. for i, m := range spec.Mounts {
  144. if path.Clean(m.Destination) == "/proc" {
  145. newM := specs.Mount{
  146. Destination: "/proc",
  147. Type: "bind",
  148. Source: "/proc",
  149. Options: []string{"rbind", "nosuid", "noexec", "nodev"},
  150. }
  151. spec.Mounts[i] = newM
  152. }
  153. }
  154. if spec.Linux != nil {
  155. // Remove ReadonlyPaths for /proc/*
  156. newROP := spec.Linux.ReadonlyPaths[:0]
  157. for _, s := range spec.Linux.ReadonlyPaths {
  158. s = path.Clean(s)
  159. if !strings.HasPrefix(s, "/proc/") {
  160. newROP = append(newROP, s)
  161. }
  162. }
  163. spec.Linux.ReadonlyPaths = newROP
  164. }
  165. return nil
  166. }
  167. // withBindMountHostIPC replaces /dev/shm and /dev/mqueue mount with rbind.
  168. // Required for --ipc=host on rootless.
  169. //
  170. // Based on https://github.com/containerd/nerdctl/blob/v1.1.0/cmd/nerdctl/run.go#L836-L860
  171. func bindMountHostIPC(s *specs.Spec) error {
  172. for i, m := range s.Mounts {
  173. switch p := path.Clean(m.Destination); p {
  174. case "/dev/shm", "/dev/mqueue":
  175. s.Mounts[i] = specs.Mount{
  176. Destination: p,
  177. Type: "bind",
  178. Source: p,
  179. Options: []string{"rbind", "nosuid", "noexec", "nodev"},
  180. }
  181. }
  182. }
  183. return nil
  184. }