utils.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. Copyright The containerd Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cgroups
  14. import (
  15. "bufio"
  16. "errors"
  17. "fmt"
  18. "io"
  19. "io/ioutil"
  20. "os"
  21. "path/filepath"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "syscall"
  26. "time"
  27. units "github.com/docker/go-units"
  28. specs "github.com/opencontainers/runtime-spec/specs-go"
  29. "golang.org/x/sys/unix"
  30. )
  31. var (
  32. nsOnce sync.Once
  33. inUserNS bool
  34. checkMode sync.Once
  35. cgMode CGMode
  36. )
  37. const unifiedMountpoint = "/sys/fs/cgroup"
  38. // CGMode is the cgroups mode of the host system
  39. type CGMode int
  40. const (
  41. // Unavailable cgroup mountpoint
  42. Unavailable CGMode = iota
  43. // Legacy cgroups v1
  44. Legacy
  45. // Hybrid with cgroups v1 and v2 controllers mounted
  46. Hybrid
  47. // Unified with only cgroups v2 mounted
  48. Unified
  49. )
  50. // Mode returns the cgroups mode running on the host
  51. func Mode() CGMode {
  52. checkMode.Do(func() {
  53. var st unix.Statfs_t
  54. if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
  55. cgMode = Unavailable
  56. return
  57. }
  58. switch st.Type {
  59. case unix.CGROUP2_SUPER_MAGIC:
  60. cgMode = Unified
  61. default:
  62. cgMode = Legacy
  63. if err := unix.Statfs(filepath.Join(unifiedMountpoint, "unified"), &st); err != nil {
  64. return
  65. }
  66. if st.Type == unix.CGROUP2_SUPER_MAGIC {
  67. cgMode = Hybrid
  68. }
  69. }
  70. })
  71. return cgMode
  72. }
  73. // RunningInUserNS detects whether we are currently running in a user namespace.
  74. // Copied from github.com/lxc/lxd/shared/util.go
  75. func RunningInUserNS() bool {
  76. nsOnce.Do(func() {
  77. file, err := os.Open("/proc/self/uid_map")
  78. if err != nil {
  79. // This kernel-provided file only exists if user namespaces are supported
  80. return
  81. }
  82. defer file.Close()
  83. buf := bufio.NewReader(file)
  84. l, _, err := buf.ReadLine()
  85. if err != nil {
  86. return
  87. }
  88. line := string(l)
  89. var a, b, c int64
  90. fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
  91. /*
  92. * We assume we are in the initial user namespace if we have a full
  93. * range - 4294967295 uids starting at uid 0.
  94. */
  95. if a == 0 && b == 0 && c == 4294967295 {
  96. return
  97. }
  98. inUserNS = true
  99. })
  100. return inUserNS
  101. }
  102. // defaults returns all known groups
  103. func defaults(root string) ([]Subsystem, error) {
  104. h, err := NewHugetlb(root)
  105. if err != nil && !os.IsNotExist(err) {
  106. return nil, err
  107. }
  108. s := []Subsystem{
  109. NewNamed(root, "systemd"),
  110. NewFreezer(root),
  111. NewPids(root),
  112. NewNetCls(root),
  113. NewNetPrio(root),
  114. NewPerfEvent(root),
  115. NewCpuset(root),
  116. NewCpu(root),
  117. NewCpuacct(root),
  118. NewMemory(root),
  119. NewBlkio(root),
  120. NewRdma(root),
  121. }
  122. // only add the devices cgroup if we are not in a user namespace
  123. // because modifications are not allowed
  124. if !RunningInUserNS() {
  125. s = append(s, NewDevices(root))
  126. }
  127. // add the hugetlb cgroup if error wasn't due to missing hugetlb
  128. // cgroup support on the host
  129. if err == nil {
  130. s = append(s, h)
  131. }
  132. return s, nil
  133. }
  134. // remove will remove a cgroup path handling EAGAIN and EBUSY errors and
  135. // retrying the remove after a exp timeout
  136. func remove(path string) error {
  137. delay := 10 * time.Millisecond
  138. for i := 0; i < 5; i++ {
  139. if i != 0 {
  140. time.Sleep(delay)
  141. delay *= 2
  142. }
  143. if err := os.RemoveAll(path); err == nil {
  144. return nil
  145. }
  146. }
  147. return fmt.Errorf("cgroups: unable to remove path %q", path)
  148. }
  149. // readPids will read all the pids of processes in a cgroup by the provided path
  150. func readPids(path string, subsystem Name) ([]Process, error) {
  151. f, err := os.Open(filepath.Join(path, cgroupProcs))
  152. if err != nil {
  153. return nil, err
  154. }
  155. defer f.Close()
  156. var (
  157. out []Process
  158. s = bufio.NewScanner(f)
  159. )
  160. for s.Scan() {
  161. if t := s.Text(); t != "" {
  162. pid, err := strconv.Atoi(t)
  163. if err != nil {
  164. return nil, err
  165. }
  166. out = append(out, Process{
  167. Pid: pid,
  168. Subsystem: subsystem,
  169. Path: path,
  170. })
  171. }
  172. }
  173. if err := s.Err(); err != nil {
  174. // failed to read all pids?
  175. return nil, err
  176. }
  177. return out, nil
  178. }
  179. // readTasksPids will read all the pids of tasks in a cgroup by the provided path
  180. func readTasksPids(path string, subsystem Name) ([]Task, error) {
  181. f, err := os.Open(filepath.Join(path, cgroupTasks))
  182. if err != nil {
  183. return nil, err
  184. }
  185. defer f.Close()
  186. var (
  187. out []Task
  188. s = bufio.NewScanner(f)
  189. )
  190. for s.Scan() {
  191. if t := s.Text(); t != "" {
  192. pid, err := strconv.Atoi(t)
  193. if err != nil {
  194. return nil, err
  195. }
  196. out = append(out, Task{
  197. Pid: pid,
  198. Subsystem: subsystem,
  199. Path: path,
  200. })
  201. }
  202. }
  203. if err := s.Err(); err != nil {
  204. return nil, err
  205. }
  206. return out, nil
  207. }
  208. func hugePageSizes() ([]string, error) {
  209. var (
  210. pageSizes []string
  211. sizeList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
  212. )
  213. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  214. if err != nil {
  215. return nil, err
  216. }
  217. for _, st := range files {
  218. nameArray := strings.Split(st.Name(), "-")
  219. pageSize, err := units.RAMInBytes(nameArray[1])
  220. if err != nil {
  221. return nil, err
  222. }
  223. pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
  224. }
  225. return pageSizes, nil
  226. }
  227. func readUint(path string) (uint64, error) {
  228. v, err := ioutil.ReadFile(path)
  229. if err != nil {
  230. return 0, err
  231. }
  232. return parseUint(strings.TrimSpace(string(v)), 10, 64)
  233. }
  234. func parseUint(s string, base, bitSize int) (uint64, error) {
  235. v, err := strconv.ParseUint(s, base, bitSize)
  236. if err != nil {
  237. intValue, intErr := strconv.ParseInt(s, base, bitSize)
  238. // 1. Handle negative values greater than MinInt64 (and)
  239. // 2. Handle negative values lesser than MinInt64
  240. if intErr == nil && intValue < 0 {
  241. return 0, nil
  242. } else if intErr != nil &&
  243. intErr.(*strconv.NumError).Err == strconv.ErrRange &&
  244. intValue < 0 {
  245. return 0, nil
  246. }
  247. return 0, err
  248. }
  249. return v, nil
  250. }
  251. func parseKV(raw string) (string, uint64, error) {
  252. parts := strings.Fields(raw)
  253. switch len(parts) {
  254. case 2:
  255. v, err := parseUint(parts[1], 10, 64)
  256. if err != nil {
  257. return "", 0, err
  258. }
  259. return parts[0], v, nil
  260. default:
  261. return "", 0, ErrInvalidFormat
  262. }
  263. }
  264. func parseCgroupFile(path string) (map[string]string, error) {
  265. f, err := os.Open(path)
  266. if err != nil {
  267. return nil, err
  268. }
  269. defer f.Close()
  270. return parseCgroupFromReader(f)
  271. }
  272. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  273. var (
  274. cgroups = make(map[string]string)
  275. s = bufio.NewScanner(r)
  276. )
  277. for s.Scan() {
  278. var (
  279. text = s.Text()
  280. parts = strings.SplitN(text, ":", 3)
  281. )
  282. if len(parts) < 3 {
  283. return nil, fmt.Errorf("invalid cgroup entry: %q", text)
  284. }
  285. for _, subs := range strings.Split(parts[1], ",") {
  286. if subs != "" {
  287. cgroups[subs] = parts[2]
  288. }
  289. }
  290. }
  291. if err := s.Err(); err != nil {
  292. return nil, err
  293. }
  294. return cgroups, nil
  295. }
  296. func getCgroupDestination(subsystem string) (string, error) {
  297. f, err := os.Open("/proc/self/mountinfo")
  298. if err != nil {
  299. return "", err
  300. }
  301. defer f.Close()
  302. s := bufio.NewScanner(f)
  303. for s.Scan() {
  304. fields := strings.Split(s.Text(), " ")
  305. if len(fields) < 10 {
  306. // broken mountinfo?
  307. continue
  308. }
  309. if fields[len(fields)-3] != "cgroup" {
  310. continue
  311. }
  312. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  313. if opt == subsystem {
  314. return fields[3], nil
  315. }
  316. }
  317. }
  318. if err := s.Err(); err != nil {
  319. return "", err
  320. }
  321. return "", ErrNoCgroupMountDestination
  322. }
  323. func pathers(subystems []Subsystem) []pather {
  324. var out []pather
  325. for _, s := range subystems {
  326. if p, ok := s.(pather); ok {
  327. out = append(out, p)
  328. }
  329. }
  330. return out
  331. }
  332. func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
  333. if c, ok := s.(creator); ok {
  334. p, err := path(s.Name())
  335. if err != nil {
  336. return err
  337. }
  338. if err := c.Create(p, resources); err != nil {
  339. return err
  340. }
  341. } else if c, ok := s.(pather); ok {
  342. p, err := path(s.Name())
  343. if err != nil {
  344. return err
  345. }
  346. // do the default create if the group does not have a custom one
  347. if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
  348. return err
  349. }
  350. }
  351. return nil
  352. }
  353. func cleanPath(path string) string {
  354. if path == "" {
  355. return ""
  356. }
  357. path = filepath.Clean(path)
  358. if !filepath.IsAbs(path) {
  359. path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
  360. }
  361. return path
  362. }
  363. func retryingWriteFile(path string, data []byte, mode os.FileMode) error {
  364. // Retry writes on EINTR; see:
  365. // https://github.com/golang/go/issues/38033
  366. for {
  367. err := ioutil.WriteFile(path, data, mode)
  368. if err == nil {
  369. return nil
  370. } else if !errors.Is(err, syscall.EINTR) {
  371. return err
  372. }
  373. }
  374. }