utils.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /*
  2. Copyright The containerd Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cgroups
  14. import (
  15. "bufio"
  16. "errors"
  17. "fmt"
  18. "io"
  19. "io/ioutil"
  20. "os"
  21. "path/filepath"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "syscall"
  26. "time"
  27. units "github.com/docker/go-units"
  28. specs "github.com/opencontainers/runtime-spec/specs-go"
  29. "golang.org/x/sys/unix"
  30. )
  31. var (
  32. nsOnce sync.Once
  33. inUserNS bool
  34. checkMode sync.Once
  35. cgMode CGMode
  36. )
  37. const unifiedMountpoint = "/sys/fs/cgroup"
  38. // CGMode is the cgroups mode of the host system
  39. type CGMode int
  40. const (
  41. // Unavailable cgroup mountpoint
  42. Unavailable CGMode = iota
  43. // Legacy cgroups v1
  44. Legacy
  45. // Hybrid with cgroups v1 and v2 controllers mounted
  46. Hybrid
  47. // Unified with only cgroups v2 mounted
  48. Unified
  49. )
  50. // Mode returns the cgroups mode running on the host
  51. func Mode() CGMode {
  52. checkMode.Do(func() {
  53. var st unix.Statfs_t
  54. if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
  55. cgMode = Unavailable
  56. return
  57. }
  58. switch st.Type {
  59. case unix.CGROUP2_SUPER_MAGIC:
  60. cgMode = Unified
  61. default:
  62. cgMode = Legacy
  63. if err := unix.Statfs(filepath.Join(unifiedMountpoint, "unified"), &st); err != nil {
  64. return
  65. }
  66. if st.Type == unix.CGROUP2_SUPER_MAGIC {
  67. cgMode = Hybrid
  68. }
  69. }
  70. })
  71. return cgMode
  72. }
  73. // RunningInUserNS detects whether we are currently running in a user namespace.
  74. // Copied from github.com/lxc/lxd/shared/util.go
  75. func RunningInUserNS() bool {
  76. nsOnce.Do(func() {
  77. file, err := os.Open("/proc/self/uid_map")
  78. if err != nil {
  79. // This kernel-provided file only exists if user namespaces are supported
  80. return
  81. }
  82. defer file.Close()
  83. buf := bufio.NewReader(file)
  84. l, _, err := buf.ReadLine()
  85. if err != nil {
  86. return
  87. }
  88. line := string(l)
  89. var a, b, c int64
  90. fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
  91. /*
  92. * We assume we are in the initial user namespace if we have a full
  93. * range - 4294967295 uids starting at uid 0.
  94. */
  95. if a == 0 && b == 0 && c == 4294967295 {
  96. return
  97. }
  98. inUserNS = true
  99. })
  100. return inUserNS
  101. }
  102. // defaults returns all known groups
  103. func defaults(root string) ([]Subsystem, error) {
  104. h, err := NewHugetlb(root)
  105. if err != nil && !os.IsNotExist(err) {
  106. return nil, err
  107. }
  108. s := []Subsystem{
  109. NewNamed(root, "systemd"),
  110. NewFreezer(root),
  111. NewPids(root),
  112. NewNetCls(root),
  113. NewNetPrio(root),
  114. NewPerfEvent(root),
  115. NewCpuset(root),
  116. NewCpu(root),
  117. NewCpuacct(root),
  118. NewMemory(root),
  119. NewBlkio(root),
  120. NewRdma(root),
  121. }
  122. // only add the devices cgroup if we are not in a user namespace
  123. // because modifications are not allowed
  124. if !RunningInUserNS() {
  125. s = append(s, NewDevices(root))
  126. }
  127. // add the hugetlb cgroup if error wasn't due to missing hugetlb
  128. // cgroup support on the host
  129. if err == nil {
  130. s = append(s, h)
  131. }
  132. return s, nil
  133. }
  134. // remove will remove a cgroup path handling EAGAIN and EBUSY errors and
  135. // retrying the remove after a exp timeout
  136. func remove(path string) error {
  137. delay := 10 * time.Millisecond
  138. for i := 0; i < 5; i++ {
  139. if i != 0 {
  140. time.Sleep(delay)
  141. delay *= 2
  142. }
  143. if err := os.RemoveAll(path); err == nil {
  144. return nil
  145. }
  146. }
  147. return fmt.Errorf("cgroups: unable to remove path %q", path)
  148. }
  149. // readPids will read all the pids of processes or tasks in a cgroup by the provided path
  150. func readPids(path string, subsystem Name, pType procType) ([]Process, error) {
  151. f, err := os.Open(filepath.Join(path, pType))
  152. if err != nil {
  153. return nil, err
  154. }
  155. defer f.Close()
  156. var (
  157. out []Process
  158. s = bufio.NewScanner(f)
  159. )
  160. for s.Scan() {
  161. if t := s.Text(); t != "" {
  162. pid, err := strconv.Atoi(t)
  163. if err != nil {
  164. return nil, err
  165. }
  166. out = append(out, Process{
  167. Pid: pid,
  168. Subsystem: subsystem,
  169. Path: path,
  170. })
  171. }
  172. }
  173. if err := s.Err(); err != nil {
  174. // failed to read all pids?
  175. return nil, err
  176. }
  177. return out, nil
  178. }
  179. func hugePageSizes() ([]string, error) {
  180. var (
  181. pageSizes []string
  182. sizeList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
  183. )
  184. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  185. if err != nil {
  186. return nil, err
  187. }
  188. for _, st := range files {
  189. nameArray := strings.Split(st.Name(), "-")
  190. pageSize, err := units.RAMInBytes(nameArray[1])
  191. if err != nil {
  192. return nil, err
  193. }
  194. pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
  195. }
  196. return pageSizes, nil
  197. }
  198. func readUint(path string) (uint64, error) {
  199. v, err := ioutil.ReadFile(path)
  200. if err != nil {
  201. return 0, err
  202. }
  203. return parseUint(strings.TrimSpace(string(v)), 10, 64)
  204. }
  205. func parseUint(s string, base, bitSize int) (uint64, error) {
  206. v, err := strconv.ParseUint(s, base, bitSize)
  207. if err != nil {
  208. intValue, intErr := strconv.ParseInt(s, base, bitSize)
  209. // 1. Handle negative values greater than MinInt64 (and)
  210. // 2. Handle negative values lesser than MinInt64
  211. if intErr == nil && intValue < 0 {
  212. return 0, nil
  213. } else if intErr != nil &&
  214. intErr.(*strconv.NumError).Err == strconv.ErrRange &&
  215. intValue < 0 {
  216. return 0, nil
  217. }
  218. return 0, err
  219. }
  220. return v, nil
  221. }
  222. func parseKV(raw string) (string, uint64, error) {
  223. parts := strings.Fields(raw)
  224. switch len(parts) {
  225. case 2:
  226. v, err := parseUint(parts[1], 10, 64)
  227. if err != nil {
  228. return "", 0, err
  229. }
  230. return parts[0], v, nil
  231. default:
  232. return "", 0, ErrInvalidFormat
  233. }
  234. }
  235. // ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
  236. // or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
  237. // "cpu": "/user.slice/user-1000.slice"
  238. // "pids": "/user.slice/user-1000.slice"
  239. // etc.
  240. //
  241. // The resulting map does not have an element for cgroup v2 unified hierarchy.
  242. // Use ParseCgroupFileUnified to get the unified path.
  243. func ParseCgroupFile(path string) (map[string]string, error) {
  244. x, _, err := ParseCgroupFileUnified(path)
  245. return x, err
  246. }
  247. // ParseCgroupFileUnified returns legacy subsystem paths as the first value,
  248. // and returns the unified path as the second value.
  249. func ParseCgroupFileUnified(path string) (map[string]string, string, error) {
  250. f, err := os.Open(path)
  251. if err != nil {
  252. return nil, "", err
  253. }
  254. defer f.Close()
  255. return parseCgroupFromReaderUnified(f)
  256. }
  257. func parseCgroupFromReaderUnified(r io.Reader) (map[string]string, string, error) {
  258. var (
  259. cgroups = make(map[string]string)
  260. unified = ""
  261. s = bufio.NewScanner(r)
  262. )
  263. for s.Scan() {
  264. var (
  265. text = s.Text()
  266. parts = strings.SplitN(text, ":", 3)
  267. )
  268. if len(parts) < 3 {
  269. return nil, unified, fmt.Errorf("invalid cgroup entry: %q", text)
  270. }
  271. for _, subs := range strings.Split(parts[1], ",") {
  272. if subs == "" {
  273. unified = parts[2]
  274. } else {
  275. cgroups[subs] = parts[2]
  276. }
  277. }
  278. }
  279. if err := s.Err(); err != nil {
  280. return nil, unified, err
  281. }
  282. return cgroups, unified, nil
  283. }
  284. func getCgroupDestination(subsystem string) (string, error) {
  285. f, err := os.Open("/proc/self/mountinfo")
  286. if err != nil {
  287. return "", err
  288. }
  289. defer f.Close()
  290. s := bufio.NewScanner(f)
  291. for s.Scan() {
  292. fields := strings.Split(s.Text(), " ")
  293. if len(fields) < 10 {
  294. // broken mountinfo?
  295. continue
  296. }
  297. if fields[len(fields)-3] != "cgroup" {
  298. continue
  299. }
  300. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  301. if opt == subsystem {
  302. return fields[3], nil
  303. }
  304. }
  305. }
  306. if err := s.Err(); err != nil {
  307. return "", err
  308. }
  309. return "", ErrNoCgroupMountDestination
  310. }
  311. func pathers(subystems []Subsystem) []pather {
  312. var out []pather
  313. for _, s := range subystems {
  314. if p, ok := s.(pather); ok {
  315. out = append(out, p)
  316. }
  317. }
  318. return out
  319. }
  320. func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
  321. if c, ok := s.(creator); ok {
  322. p, err := path(s.Name())
  323. if err != nil {
  324. return err
  325. }
  326. if err := c.Create(p, resources); err != nil {
  327. return err
  328. }
  329. } else if c, ok := s.(pather); ok {
  330. p, err := path(s.Name())
  331. if err != nil {
  332. return err
  333. }
  334. // do the default create if the group does not have a custom one
  335. if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
  336. return err
  337. }
  338. }
  339. return nil
  340. }
  341. func cleanPath(path string) string {
  342. if path == "" {
  343. return ""
  344. }
  345. path = filepath.Clean(path)
  346. if !filepath.IsAbs(path) {
  347. path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
  348. }
  349. return path
  350. }
  351. func retryingWriteFile(path string, data []byte, mode os.FileMode) error {
  352. // Retry writes on EINTR; see:
  353. // https://github.com/golang/go/issues/38033
  354. for {
  355. err := ioutil.WriteFile(path, data, mode)
  356. if err == nil {
  357. return nil
  358. } else if !errors.Is(err, syscall.EINTR) {
  359. return err
  360. }
  361. }
  362. }