utils.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /*
  2. Copyright The containerd Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cgroups
  14. import (
  15. "bufio"
  16. "errors"
  17. "fmt"
  18. "io"
  19. "io/ioutil"
  20. "os"
  21. "path/filepath"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "syscall"
  26. "time"
  27. units "github.com/docker/go-units"
  28. specs "github.com/opencontainers/runtime-spec/specs-go"
  29. "golang.org/x/sys/unix"
  30. )
  31. var (
  32. nsOnce sync.Once
  33. inUserNS bool
  34. checkMode sync.Once
  35. cgMode CGMode
  36. )
  37. const unifiedMountpoint = "/sys/fs/cgroup"
  38. // CGMode is the cgroups mode of the host system
  39. type CGMode int
  40. const (
  41. // Unavailable cgroup mountpoint
  42. Unavailable CGMode = iota
  43. // Legacy cgroups v1
  44. Legacy
  45. // Hybrid with cgroups v1 and v2 controllers mounted
  46. Hybrid
  47. // Unified with only cgroups v2 mounted
  48. Unified
  49. )
  50. // Mode returns the cgroups mode running on the host
  51. func Mode() CGMode {
  52. checkMode.Do(func() {
  53. var st unix.Statfs_t
  54. if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
  55. cgMode = Unavailable
  56. return
  57. }
  58. switch st.Type {
  59. case unix.CGROUP2_SUPER_MAGIC:
  60. cgMode = Unified
  61. default:
  62. cgMode = Legacy
  63. if err := unix.Statfs(filepath.Join(unifiedMountpoint, "unified"), &st); err != nil {
  64. return
  65. }
  66. if st.Type == unix.CGROUP2_SUPER_MAGIC {
  67. cgMode = Hybrid
  68. }
  69. }
  70. })
  71. return cgMode
  72. }
  73. // RunningInUserNS detects whether we are currently running in a user namespace.
  74. // Copied from github.com/lxc/lxd/shared/util.go
  75. func RunningInUserNS() bool {
  76. nsOnce.Do(func() {
  77. file, err := os.Open("/proc/self/uid_map")
  78. if err != nil {
  79. // This kernel-provided file only exists if user namespaces are supported
  80. return
  81. }
  82. defer file.Close()
  83. buf := bufio.NewReader(file)
  84. l, _, err := buf.ReadLine()
  85. if err != nil {
  86. return
  87. }
  88. line := string(l)
  89. var a, b, c int64
  90. fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
  91. /*
  92. * We assume we are in the initial user namespace if we have a full
  93. * range - 4294967295 uids starting at uid 0.
  94. */
  95. if a == 0 && b == 0 && c == 4294967295 {
  96. return
  97. }
  98. inUserNS = true
  99. })
  100. return inUserNS
  101. }
  102. // defaults returns all known groups
  103. func defaults(root string) ([]Subsystem, error) {
  104. h, err := NewHugetlb(root)
  105. if err != nil && !os.IsNotExist(err) {
  106. return nil, err
  107. }
  108. s := []Subsystem{
  109. NewNamed(root, "systemd"),
  110. NewFreezer(root),
  111. NewPids(root),
  112. NewNetCls(root),
  113. NewNetPrio(root),
  114. NewPerfEvent(root),
  115. NewCpuset(root),
  116. NewCpu(root),
  117. NewCpuacct(root),
  118. NewMemory(root),
  119. NewBlkio(root),
  120. NewRdma(root),
  121. }
  122. // only add the devices cgroup if we are not in a user namespace
  123. // because modifications are not allowed
  124. if !RunningInUserNS() {
  125. s = append(s, NewDevices(root))
  126. }
  127. // add the hugetlb cgroup if error wasn't due to missing hugetlb
  128. // cgroup support on the host
  129. if err == nil {
  130. s = append(s, h)
  131. }
  132. return s, nil
  133. }
  134. // remove will remove a cgroup path handling EAGAIN and EBUSY errors and
  135. // retrying the remove after a exp timeout
  136. func remove(path string) error {
  137. delay := 10 * time.Millisecond
  138. for i := 0; i < 5; i++ {
  139. if i != 0 {
  140. time.Sleep(delay)
  141. delay *= 2
  142. }
  143. if err := os.RemoveAll(path); err == nil {
  144. return nil
  145. }
  146. }
  147. return fmt.Errorf("cgroups: unable to remove path %q", path)
  148. }
  149. // readPids will read all the pids of processes or tasks in a cgroup by the provided path
  150. func readPids(path string, subsystem Name, pType procType) ([]Process, error) {
  151. f, err := os.Open(filepath.Join(path, pType))
  152. if err != nil {
  153. return nil, err
  154. }
  155. defer f.Close()
  156. var (
  157. out []Process
  158. s = bufio.NewScanner(f)
  159. )
  160. for s.Scan() {
  161. if t := s.Text(); t != "" {
  162. pid, err := strconv.Atoi(t)
  163. if err != nil {
  164. return nil, err
  165. }
  166. out = append(out, Process{
  167. Pid: pid,
  168. Subsystem: subsystem,
  169. Path: path,
  170. })
  171. }
  172. }
  173. if err := s.Err(); err != nil {
  174. // failed to read all pids?
  175. return nil, err
  176. }
  177. return out, nil
  178. }
  179. func hugePageSizes() ([]string, error) {
  180. var (
  181. pageSizes []string
  182. sizeList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
  183. )
  184. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  185. if err != nil {
  186. return nil, err
  187. }
  188. for _, st := range files {
  189. nameArray := strings.Split(st.Name(), "-")
  190. pageSize, err := units.RAMInBytes(nameArray[1])
  191. if err != nil {
  192. return nil, err
  193. }
  194. pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
  195. }
  196. return pageSizes, nil
  197. }
  198. func readUint(path string) (uint64, error) {
  199. v, err := ioutil.ReadFile(path)
  200. if err != nil {
  201. return 0, err
  202. }
  203. return parseUint(strings.TrimSpace(string(v)), 10, 64)
  204. }
  205. func parseUint(s string, base, bitSize int) (uint64, error) {
  206. v, err := strconv.ParseUint(s, base, bitSize)
  207. if err != nil {
  208. intValue, intErr := strconv.ParseInt(s, base, bitSize)
  209. // 1. Handle negative values greater than MinInt64 (and)
  210. // 2. Handle negative values lesser than MinInt64
  211. if intErr == nil && intValue < 0 {
  212. return 0, nil
  213. } else if intErr != nil &&
  214. intErr.(*strconv.NumError).Err == strconv.ErrRange &&
  215. intValue < 0 {
  216. return 0, nil
  217. }
  218. return 0, err
  219. }
  220. return v, nil
  221. }
  222. func parseKV(raw string) (string, uint64, error) {
  223. parts := strings.Fields(raw)
  224. switch len(parts) {
  225. case 2:
  226. v, err := parseUint(parts[1], 10, 64)
  227. if err != nil {
  228. return "", 0, err
  229. }
  230. return parts[0], v, nil
  231. default:
  232. return "", 0, ErrInvalidFormat
  233. }
  234. }
  235. // ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
  236. // or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
  237. // "cpu": "/user.slice/user-1000.slice"
  238. // "pids": "/user.slice/user-1000.slice"
  239. // etc.
  240. //
  241. // Note that for cgroup v2 unified hierarchy, there are no per-controller
  242. // cgroup paths, so the resulting map will have a single element where the key
  243. // is empty string ("") and the value is the cgroup path the <pid> is in.
  244. func ParseCgroupFile(path string) (map[string]string, error) {
  245. f, err := os.Open(path)
  246. if err != nil {
  247. return nil, err
  248. }
  249. defer f.Close()
  250. return parseCgroupFromReader(f)
  251. }
  252. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  253. var (
  254. cgroups = make(map[string]string)
  255. s = bufio.NewScanner(r)
  256. )
  257. for s.Scan() {
  258. var (
  259. text = s.Text()
  260. parts = strings.SplitN(text, ":", 3)
  261. )
  262. if len(parts) < 3 {
  263. return nil, fmt.Errorf("invalid cgroup entry: %q", text)
  264. }
  265. for _, subs := range strings.Split(parts[1], ",") {
  266. if subs != "" {
  267. cgroups[subs] = parts[2]
  268. }
  269. }
  270. }
  271. if err := s.Err(); err != nil {
  272. return nil, err
  273. }
  274. return cgroups, nil
  275. }
  276. func getCgroupDestination(subsystem string) (string, error) {
  277. f, err := os.Open("/proc/self/mountinfo")
  278. if err != nil {
  279. return "", err
  280. }
  281. defer f.Close()
  282. s := bufio.NewScanner(f)
  283. for s.Scan() {
  284. fields := strings.Split(s.Text(), " ")
  285. if len(fields) < 10 {
  286. // broken mountinfo?
  287. continue
  288. }
  289. if fields[len(fields)-3] != "cgroup" {
  290. continue
  291. }
  292. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  293. if opt == subsystem {
  294. return fields[3], nil
  295. }
  296. }
  297. }
  298. if err := s.Err(); err != nil {
  299. return "", err
  300. }
  301. return "", ErrNoCgroupMountDestination
  302. }
  303. func pathers(subystems []Subsystem) []pather {
  304. var out []pather
  305. for _, s := range subystems {
  306. if p, ok := s.(pather); ok {
  307. out = append(out, p)
  308. }
  309. }
  310. return out
  311. }
  312. func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
  313. if c, ok := s.(creator); ok {
  314. p, err := path(s.Name())
  315. if err != nil {
  316. return err
  317. }
  318. if err := c.Create(p, resources); err != nil {
  319. return err
  320. }
  321. } else if c, ok := s.(pather); ok {
  322. p, err := path(s.Name())
  323. if err != nil {
  324. return err
  325. }
  326. // do the default create if the group does not have a custom one
  327. if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
  328. return err
  329. }
  330. }
  331. return nil
  332. }
  333. func cleanPath(path string) string {
  334. if path == "" {
  335. return ""
  336. }
  337. path = filepath.Clean(path)
  338. if !filepath.IsAbs(path) {
  339. path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
  340. }
  341. return path
  342. }
  343. func retryingWriteFile(path string, data []byte, mode os.FileMode) error {
  344. // Retry writes on EINTR; see:
  345. // https://github.com/golang/go/issues/38033
  346. for {
  347. err := ioutil.WriteFile(path, data, mode)
  348. if err == nil {
  349. return nil
  350. } else if !errors.Is(err, syscall.EINTR) {
  351. return err
  352. }
  353. }
  354. }