utils.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. // +build linux
  2. package cgroups
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "time"
  13. units "github.com/docker/go-units"
  14. "golang.org/x/sys/unix"
  15. )
  16. const (
  17. CgroupNamePrefix = "name="
  18. CgroupProcesses = "cgroup.procs"
  19. )
  20. // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
  21. func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
  22. mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
  23. return mnt, err
  24. }
  25. func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
  26. // We are not using mount.GetMounts() because it's super-inefficient,
  27. // parsing it directly sped up x10 times because of not using Sscanf.
  28. // It was one of two major performance drawbacks in container start.
  29. if !isSubsystemAvailable(subsystem) {
  30. return "", "", NewNotFoundError(subsystem)
  31. }
  32. f, err := os.Open("/proc/self/mountinfo")
  33. if err != nil {
  34. return "", "", err
  35. }
  36. defer f.Close()
  37. return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
  38. }
  39. func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
  40. scanner := bufio.NewScanner(reader)
  41. for scanner.Scan() {
  42. txt := scanner.Text()
  43. fields := strings.Fields(txt)
  44. if len(fields) < 5 {
  45. continue
  46. }
  47. if strings.HasPrefix(fields[4], cgroupPath) {
  48. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  49. if opt == subsystem {
  50. return fields[4], fields[3], nil
  51. }
  52. }
  53. }
  54. }
  55. if err := scanner.Err(); err != nil {
  56. return "", "", err
  57. }
  58. return "", "", NewNotFoundError(subsystem)
  59. }
  60. func isSubsystemAvailable(subsystem string) bool {
  61. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  62. if err != nil {
  63. return false
  64. }
  65. _, avail := cgroups[subsystem]
  66. return avail
  67. }
  68. func GetClosestMountpointAncestor(dir, mountinfo string) string {
  69. deepestMountPoint := ""
  70. for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
  71. mountInfoParts := strings.Fields(mountInfoEntry)
  72. if len(mountInfoParts) < 5 {
  73. continue
  74. }
  75. mountPoint := mountInfoParts[4]
  76. if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
  77. deepestMountPoint = mountPoint
  78. }
  79. }
  80. return deepestMountPoint
  81. }
  82. func FindCgroupMountpointDir() (string, error) {
  83. f, err := os.Open("/proc/self/mountinfo")
  84. if err != nil {
  85. return "", err
  86. }
  87. defer f.Close()
  88. scanner := bufio.NewScanner(f)
  89. for scanner.Scan() {
  90. text := scanner.Text()
  91. fields := strings.Split(text, " ")
  92. // Safe as mountinfo encodes mountpoints with spaces as \040.
  93. index := strings.Index(text, " - ")
  94. postSeparatorFields := strings.Fields(text[index+3:])
  95. numPostFields := len(postSeparatorFields)
  96. // This is an error as we can't detect if the mount is for "cgroup"
  97. if numPostFields == 0 {
  98. return "", fmt.Errorf("Found no fields post '-' in %q", text)
  99. }
  100. if postSeparatorFields[0] == "cgroup" {
  101. // Check that the mount is properly formatted.
  102. if numPostFields < 3 {
  103. return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
  104. }
  105. return filepath.Dir(fields[4]), nil
  106. }
  107. }
  108. if err := scanner.Err(); err != nil {
  109. return "", err
  110. }
  111. return "", NewNotFoundError("cgroup")
  112. }
  113. type Mount struct {
  114. Mountpoint string
  115. Root string
  116. Subsystems []string
  117. }
  118. func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
  119. if len(m.Subsystems) == 0 {
  120. return "", fmt.Errorf("no subsystem for mount")
  121. }
  122. return getControllerPath(m.Subsystems[0], cgroups)
  123. }
  124. func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
  125. res := make([]Mount, 0, len(ss))
  126. scanner := bufio.NewScanner(mi)
  127. numFound := 0
  128. for scanner.Scan() && numFound < len(ss) {
  129. txt := scanner.Text()
  130. sepIdx := strings.Index(txt, " - ")
  131. if sepIdx == -1 {
  132. return nil, fmt.Errorf("invalid mountinfo format")
  133. }
  134. if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
  135. continue
  136. }
  137. fields := strings.Split(txt, " ")
  138. m := Mount{
  139. Mountpoint: fields[4],
  140. Root: fields[3],
  141. }
  142. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  143. seen, known := ss[opt]
  144. if !known || (!all && seen) {
  145. continue
  146. }
  147. ss[opt] = true
  148. if strings.HasPrefix(opt, CgroupNamePrefix) {
  149. opt = opt[len(CgroupNamePrefix):]
  150. }
  151. m.Subsystems = append(m.Subsystems, opt)
  152. numFound++
  153. }
  154. if len(m.Subsystems) > 0 || all {
  155. res = append(res, m)
  156. }
  157. }
  158. if err := scanner.Err(); err != nil {
  159. return nil, err
  160. }
  161. return res, nil
  162. }
  163. // GetCgroupMounts returns the mounts for the cgroup subsystems.
  164. // all indicates whether to return just the first instance or all the mounts.
  165. func GetCgroupMounts(all bool) ([]Mount, error) {
  166. f, err := os.Open("/proc/self/mountinfo")
  167. if err != nil {
  168. return nil, err
  169. }
  170. defer f.Close()
  171. allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
  172. if err != nil {
  173. return nil, err
  174. }
  175. allMap := make(map[string]bool)
  176. for s := range allSubsystems {
  177. allMap[s] = false
  178. }
  179. return getCgroupMountsHelper(allMap, f, all)
  180. }
  181. // GetAllSubsystems returns all the cgroup subsystems supported by the kernel
  182. func GetAllSubsystems() ([]string, error) {
  183. f, err := os.Open("/proc/cgroups")
  184. if err != nil {
  185. return nil, err
  186. }
  187. defer f.Close()
  188. subsystems := []string{}
  189. s := bufio.NewScanner(f)
  190. for s.Scan() {
  191. text := s.Text()
  192. if text[0] != '#' {
  193. parts := strings.Fields(text)
  194. if len(parts) >= 4 && parts[3] != "0" {
  195. subsystems = append(subsystems, parts[0])
  196. }
  197. }
  198. }
  199. if err := s.Err(); err != nil {
  200. return nil, err
  201. }
  202. return subsystems, nil
  203. }
  204. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
  205. func GetOwnCgroup(subsystem string) (string, error) {
  206. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  207. if err != nil {
  208. return "", err
  209. }
  210. return getControllerPath(subsystem, cgroups)
  211. }
  212. func GetOwnCgroupPath(subsystem string) (string, error) {
  213. cgroup, err := GetOwnCgroup(subsystem)
  214. if err != nil {
  215. return "", err
  216. }
  217. return getCgroupPathHelper(subsystem, cgroup)
  218. }
  219. func GetInitCgroup(subsystem string) (string, error) {
  220. cgroups, err := ParseCgroupFile("/proc/1/cgroup")
  221. if err != nil {
  222. return "", err
  223. }
  224. return getControllerPath(subsystem, cgroups)
  225. }
  226. func GetInitCgroupPath(subsystem string) (string, error) {
  227. cgroup, err := GetInitCgroup(subsystem)
  228. if err != nil {
  229. return "", err
  230. }
  231. return getCgroupPathHelper(subsystem, cgroup)
  232. }
  233. func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
  234. mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
  235. if err != nil {
  236. return "", err
  237. }
  238. // This is needed for nested containers, because in /proc/self/cgroup we
  239. // see paths from host, which don't exist in container.
  240. relCgroup, err := filepath.Rel(root, cgroup)
  241. if err != nil {
  242. return "", err
  243. }
  244. return filepath.Join(mnt, relCgroup), nil
  245. }
  246. func readProcsFile(dir string) ([]int, error) {
  247. f, err := os.Open(filepath.Join(dir, CgroupProcesses))
  248. if err != nil {
  249. return nil, err
  250. }
  251. defer f.Close()
  252. var (
  253. s = bufio.NewScanner(f)
  254. out = []int{}
  255. )
  256. for s.Scan() {
  257. if t := s.Text(); t != "" {
  258. pid, err := strconv.Atoi(t)
  259. if err != nil {
  260. return nil, err
  261. }
  262. out = append(out, pid)
  263. }
  264. }
  265. return out, nil
  266. }
  267. // ParseCgroupFile parses the given cgroup file, typically from
  268. // /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
  269. func ParseCgroupFile(path string) (map[string]string, error) {
  270. f, err := os.Open(path)
  271. if err != nil {
  272. return nil, err
  273. }
  274. defer f.Close()
  275. return parseCgroupFromReader(f)
  276. }
  277. // helper function for ParseCgroupFile to make testing easier
  278. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  279. s := bufio.NewScanner(r)
  280. cgroups := make(map[string]string)
  281. for s.Scan() {
  282. text := s.Text()
  283. // from cgroups(7):
  284. // /proc/[pid]/cgroup
  285. // ...
  286. // For each cgroup hierarchy ... there is one entry
  287. // containing three colon-separated fields of the form:
  288. // hierarchy-ID:subsystem-list:cgroup-path
  289. parts := strings.SplitN(text, ":", 3)
  290. if len(parts) < 3 {
  291. return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
  292. }
  293. for _, subs := range strings.Split(parts[1], ",") {
  294. cgroups[subs] = parts[2]
  295. }
  296. }
  297. if err := s.Err(); err != nil {
  298. return nil, err
  299. }
  300. return cgroups, nil
  301. }
  302. func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
  303. if p, ok := cgroups[subsystem]; ok {
  304. return p, nil
  305. }
  306. if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
  307. return p, nil
  308. }
  309. return "", NewNotFoundError(subsystem)
  310. }
  311. func PathExists(path string) bool {
  312. if _, err := os.Stat(path); err != nil {
  313. return false
  314. }
  315. return true
  316. }
  317. func EnterPid(cgroupPaths map[string]string, pid int) error {
  318. for _, path := range cgroupPaths {
  319. if PathExists(path) {
  320. if err := WriteCgroupProc(path, pid); err != nil {
  321. return err
  322. }
  323. }
  324. }
  325. return nil
  326. }
  327. // RemovePaths iterates over the provided paths removing them.
  328. // We trying to remove all paths five times with increasing delay between tries.
  329. // If after all there are not removed cgroups - appropriate error will be
  330. // returned.
  331. func RemovePaths(paths map[string]string) (err error) {
  332. delay := 10 * time.Millisecond
  333. for i := 0; i < 5; i++ {
  334. if i != 0 {
  335. time.Sleep(delay)
  336. delay *= 2
  337. }
  338. for s, p := range paths {
  339. os.RemoveAll(p)
  340. // TODO: here probably should be logging
  341. _, err := os.Stat(p)
  342. // We need this strange way of checking cgroups existence because
  343. // RemoveAll almost always returns error, even on already removed
  344. // cgroups
  345. if os.IsNotExist(err) {
  346. delete(paths, s)
  347. }
  348. }
  349. if len(paths) == 0 {
  350. return nil
  351. }
  352. }
  353. return fmt.Errorf("Failed to remove paths: %v", paths)
  354. }
  355. func GetHugePageSize() ([]string, error) {
  356. var pageSizes []string
  357. sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
  358. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  359. if err != nil {
  360. return pageSizes, err
  361. }
  362. for _, st := range files {
  363. nameArray := strings.Split(st.Name(), "-")
  364. pageSize, err := units.RAMInBytes(nameArray[1])
  365. if err != nil {
  366. return []string{}, err
  367. }
  368. sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
  369. pageSizes = append(pageSizes, sizeString)
  370. }
  371. return pageSizes, nil
  372. }
  373. // GetPids returns all pids, that were added to cgroup at path.
  374. func GetPids(path string) ([]int, error) {
  375. return readProcsFile(path)
  376. }
  377. // GetAllPids returns all pids, that were added to cgroup at path and to all its
  378. // subcgroups.
  379. func GetAllPids(path string) ([]int, error) {
  380. var pids []int
  381. // collect pids from all sub-cgroups
  382. err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
  383. dir, file := filepath.Split(p)
  384. if file != CgroupProcesses {
  385. return nil
  386. }
  387. if iErr != nil {
  388. return iErr
  389. }
  390. cPids, err := readProcsFile(dir)
  391. if err != nil {
  392. return err
  393. }
  394. pids = append(pids, cPids...)
  395. return nil
  396. })
  397. return pids, err
  398. }
  399. // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
  400. func WriteCgroupProc(dir string, pid int) error {
  401. // Normally dir should not be empty, one case is that cgroup subsystem
  402. // is not mounted, we will get empty dir, and we want it fail here.
  403. if dir == "" {
  404. return fmt.Errorf("no such directory for %s", CgroupProcesses)
  405. }
  406. // Dont attach any pid to the cgroup if -1 is specified as a pid
  407. if pid == -1 {
  408. return nil
  409. }
  410. cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700)
  411. if err != nil {
  412. return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
  413. }
  414. defer cgroupProcessesFile.Close()
  415. for i := 0; i < 5; i++ {
  416. _, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid))
  417. if err == nil {
  418. return nil
  419. }
  420. // EINVAL might mean that the task being added to cgroup.procs is in state
  421. // TASK_NEW. We should attempt to do so again.
  422. if isEINVAL(err) {
  423. time.Sleep(30 * time.Millisecond)
  424. continue
  425. }
  426. return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
  427. }
  428. return err
  429. }
  430. func isEINVAL(err error) bool {
  431. switch err := err.(type) {
  432. case *os.PathError:
  433. return err.Err == unix.EINVAL
  434. default:
  435. return false
  436. }
  437. }