utils.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. // +build linux
  2. package cgroups
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "time"
  13. units "github.com/docker/go-units"
  14. )
  15. const (
  16. CgroupNamePrefix = "name="
  17. CgroupProcesses = "cgroup.procs"
  18. )
  19. // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
  20. func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
  21. mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
  22. return mnt, err
  23. }
  24. func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
  25. // We are not using mount.GetMounts() because it's super-inefficient,
  26. // parsing it directly sped up x10 times because of not using Sscanf.
  27. // It was one of two major performance drawbacks in container start.
  28. if !isSubsystemAvailable(subsystem) {
  29. return "", "", NewNotFoundError(subsystem)
  30. }
  31. f, err := os.Open("/proc/self/mountinfo")
  32. if err != nil {
  33. return "", "", err
  34. }
  35. defer f.Close()
  36. return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
  37. }
  38. func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
  39. scanner := bufio.NewScanner(reader)
  40. for scanner.Scan() {
  41. txt := scanner.Text()
  42. fields := strings.Fields(txt)
  43. if len(fields) < 5 {
  44. continue
  45. }
  46. if strings.HasPrefix(fields[4], cgroupPath) {
  47. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  48. if opt == subsystem {
  49. return fields[4], fields[3], nil
  50. }
  51. }
  52. }
  53. }
  54. if err := scanner.Err(); err != nil {
  55. return "", "", err
  56. }
  57. return "", "", NewNotFoundError(subsystem)
  58. }
  59. func isSubsystemAvailable(subsystem string) bool {
  60. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  61. if err != nil {
  62. return false
  63. }
  64. _, avail := cgroups[subsystem]
  65. return avail
  66. }
  67. func GetClosestMountpointAncestor(dir, mountinfo string) string {
  68. deepestMountPoint := ""
  69. for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
  70. mountInfoParts := strings.Fields(mountInfoEntry)
  71. if len(mountInfoParts) < 5 {
  72. continue
  73. }
  74. mountPoint := mountInfoParts[4]
  75. if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
  76. deepestMountPoint = mountPoint
  77. }
  78. }
  79. return deepestMountPoint
  80. }
  81. func FindCgroupMountpointDir() (string, error) {
  82. f, err := os.Open("/proc/self/mountinfo")
  83. if err != nil {
  84. return "", err
  85. }
  86. defer f.Close()
  87. scanner := bufio.NewScanner(f)
  88. for scanner.Scan() {
  89. text := scanner.Text()
  90. fields := strings.Split(text, " ")
  91. // Safe as mountinfo encodes mountpoints with spaces as \040.
  92. index := strings.Index(text, " - ")
  93. postSeparatorFields := strings.Fields(text[index+3:])
  94. numPostFields := len(postSeparatorFields)
  95. // This is an error as we can't detect if the mount is for "cgroup"
  96. if numPostFields == 0 {
  97. return "", fmt.Errorf("Found no fields post '-' in %q", text)
  98. }
  99. if postSeparatorFields[0] == "cgroup" {
  100. // Check that the mount is properly formatted.
  101. if numPostFields < 3 {
  102. return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
  103. }
  104. return filepath.Dir(fields[4]), nil
  105. }
  106. }
  107. if err := scanner.Err(); err != nil {
  108. return "", err
  109. }
  110. return "", NewNotFoundError("cgroup")
  111. }
  112. type Mount struct {
  113. Mountpoint string
  114. Root string
  115. Subsystems []string
  116. }
  117. func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
  118. if len(m.Subsystems) == 0 {
  119. return "", fmt.Errorf("no subsystem for mount")
  120. }
  121. return getControllerPath(m.Subsystems[0], cgroups)
  122. }
  123. func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
  124. res := make([]Mount, 0, len(ss))
  125. scanner := bufio.NewScanner(mi)
  126. numFound := 0
  127. for scanner.Scan() && numFound < len(ss) {
  128. txt := scanner.Text()
  129. sepIdx := strings.Index(txt, " - ")
  130. if sepIdx == -1 {
  131. return nil, fmt.Errorf("invalid mountinfo format")
  132. }
  133. if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
  134. continue
  135. }
  136. fields := strings.Split(txt, " ")
  137. m := Mount{
  138. Mountpoint: fields[4],
  139. Root: fields[3],
  140. }
  141. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  142. seen, known := ss[opt]
  143. if !known || (!all && seen) {
  144. continue
  145. }
  146. ss[opt] = true
  147. if strings.HasPrefix(opt, CgroupNamePrefix) {
  148. opt = opt[len(CgroupNamePrefix):]
  149. }
  150. m.Subsystems = append(m.Subsystems, opt)
  151. numFound++
  152. }
  153. if len(m.Subsystems) > 0 || all {
  154. res = append(res, m)
  155. }
  156. }
  157. if err := scanner.Err(); err != nil {
  158. return nil, err
  159. }
  160. return res, nil
  161. }
  162. // GetCgroupMounts returns the mounts for the cgroup subsystems.
  163. // all indicates whether to return just the first instance or all the mounts.
  164. func GetCgroupMounts(all bool) ([]Mount, error) {
  165. f, err := os.Open("/proc/self/mountinfo")
  166. if err != nil {
  167. return nil, err
  168. }
  169. defer f.Close()
  170. allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
  171. if err != nil {
  172. return nil, err
  173. }
  174. allMap := make(map[string]bool)
  175. for s := range allSubsystems {
  176. allMap[s] = false
  177. }
  178. return getCgroupMountsHelper(allMap, f, all)
  179. }
  180. // GetAllSubsystems returns all the cgroup subsystems supported by the kernel
  181. func GetAllSubsystems() ([]string, error) {
  182. f, err := os.Open("/proc/cgroups")
  183. if err != nil {
  184. return nil, err
  185. }
  186. defer f.Close()
  187. subsystems := []string{}
  188. s := bufio.NewScanner(f)
  189. for s.Scan() {
  190. text := s.Text()
  191. if text[0] != '#' {
  192. parts := strings.Fields(text)
  193. if len(parts) >= 4 && parts[3] != "0" {
  194. subsystems = append(subsystems, parts[0])
  195. }
  196. }
  197. }
  198. if err := s.Err(); err != nil {
  199. return nil, err
  200. }
  201. return subsystems, nil
  202. }
  203. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
  204. func GetOwnCgroup(subsystem string) (string, error) {
  205. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  206. if err != nil {
  207. return "", err
  208. }
  209. return getControllerPath(subsystem, cgroups)
  210. }
  211. func GetOwnCgroupPath(subsystem string) (string, error) {
  212. cgroup, err := GetOwnCgroup(subsystem)
  213. if err != nil {
  214. return "", err
  215. }
  216. return getCgroupPathHelper(subsystem, cgroup)
  217. }
  218. func GetInitCgroup(subsystem string) (string, error) {
  219. cgroups, err := ParseCgroupFile("/proc/1/cgroup")
  220. if err != nil {
  221. return "", err
  222. }
  223. return getControllerPath(subsystem, cgroups)
  224. }
  225. func GetInitCgroupPath(subsystem string) (string, error) {
  226. cgroup, err := GetInitCgroup(subsystem)
  227. if err != nil {
  228. return "", err
  229. }
  230. return getCgroupPathHelper(subsystem, cgroup)
  231. }
  232. func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
  233. mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
  234. if err != nil {
  235. return "", err
  236. }
  237. // This is needed for nested containers, because in /proc/self/cgroup we
  238. // see paths from host, which don't exist in container.
  239. relCgroup, err := filepath.Rel(root, cgroup)
  240. if err != nil {
  241. return "", err
  242. }
  243. return filepath.Join(mnt, relCgroup), nil
  244. }
  245. func readProcsFile(dir string) ([]int, error) {
  246. f, err := os.Open(filepath.Join(dir, CgroupProcesses))
  247. if err != nil {
  248. return nil, err
  249. }
  250. defer f.Close()
  251. var (
  252. s = bufio.NewScanner(f)
  253. out = []int{}
  254. )
  255. for s.Scan() {
  256. if t := s.Text(); t != "" {
  257. pid, err := strconv.Atoi(t)
  258. if err != nil {
  259. return nil, err
  260. }
  261. out = append(out, pid)
  262. }
  263. }
  264. return out, nil
  265. }
  266. // ParseCgroupFile parses the given cgroup file, typically from
  267. // /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
  268. func ParseCgroupFile(path string) (map[string]string, error) {
  269. f, err := os.Open(path)
  270. if err != nil {
  271. return nil, err
  272. }
  273. defer f.Close()
  274. return parseCgroupFromReader(f)
  275. }
  276. // helper function for ParseCgroupFile to make testing easier
  277. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  278. s := bufio.NewScanner(r)
  279. cgroups := make(map[string]string)
  280. for s.Scan() {
  281. text := s.Text()
  282. // from cgroups(7):
  283. // /proc/[pid]/cgroup
  284. // ...
  285. // For each cgroup hierarchy ... there is one entry
  286. // containing three colon-separated fields of the form:
  287. // hierarchy-ID:subsystem-list:cgroup-path
  288. parts := strings.SplitN(text, ":", 3)
  289. if len(parts) < 3 {
  290. return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
  291. }
  292. for _, subs := range strings.Split(parts[1], ",") {
  293. cgroups[subs] = parts[2]
  294. }
  295. }
  296. if err := s.Err(); err != nil {
  297. return nil, err
  298. }
  299. return cgroups, nil
  300. }
  301. func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
  302. if p, ok := cgroups[subsystem]; ok {
  303. return p, nil
  304. }
  305. if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
  306. return p, nil
  307. }
  308. return "", NewNotFoundError(subsystem)
  309. }
  310. func PathExists(path string) bool {
  311. if _, err := os.Stat(path); err != nil {
  312. return false
  313. }
  314. return true
  315. }
  316. func EnterPid(cgroupPaths map[string]string, pid int) error {
  317. for _, path := range cgroupPaths {
  318. if PathExists(path) {
  319. if err := WriteCgroupProc(path, pid); err != nil {
  320. return err
  321. }
  322. }
  323. }
  324. return nil
  325. }
  326. // RemovePaths iterates over the provided paths removing them.
  327. // We trying to remove all paths five times with increasing delay between tries.
  328. // If after all there are not removed cgroups - appropriate error will be
  329. // returned.
  330. func RemovePaths(paths map[string]string) (err error) {
  331. delay := 10 * time.Millisecond
  332. for i := 0; i < 5; i++ {
  333. if i != 0 {
  334. time.Sleep(delay)
  335. delay *= 2
  336. }
  337. for s, p := range paths {
  338. os.RemoveAll(p)
  339. // TODO: here probably should be logging
  340. _, err := os.Stat(p)
  341. // We need this strange way of checking cgroups existence because
  342. // RemoveAll almost always returns error, even on already removed
  343. // cgroups
  344. if os.IsNotExist(err) {
  345. delete(paths, s)
  346. }
  347. }
  348. if len(paths) == 0 {
  349. return nil
  350. }
  351. }
  352. return fmt.Errorf("Failed to remove paths: %v", paths)
  353. }
  354. func GetHugePageSize() ([]string, error) {
  355. var pageSizes []string
  356. sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
  357. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  358. if err != nil {
  359. return pageSizes, err
  360. }
  361. for _, st := range files {
  362. nameArray := strings.Split(st.Name(), "-")
  363. pageSize, err := units.RAMInBytes(nameArray[1])
  364. if err != nil {
  365. return []string{}, err
  366. }
  367. sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
  368. pageSizes = append(pageSizes, sizeString)
  369. }
  370. return pageSizes, nil
  371. }
  372. // GetPids returns all pids, that were added to cgroup at path.
  373. func GetPids(path string) ([]int, error) {
  374. return readProcsFile(path)
  375. }
  376. // GetAllPids returns all pids, that were added to cgroup at path and to all its
  377. // subcgroups.
  378. func GetAllPids(path string) ([]int, error) {
  379. var pids []int
  380. // collect pids from all sub-cgroups
  381. err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
  382. dir, file := filepath.Split(p)
  383. if file != CgroupProcesses {
  384. return nil
  385. }
  386. if iErr != nil {
  387. return iErr
  388. }
  389. cPids, err := readProcsFile(dir)
  390. if err != nil {
  391. return err
  392. }
  393. pids = append(pids, cPids...)
  394. return nil
  395. })
  396. return pids, err
  397. }
  398. // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
  399. func WriteCgroupProc(dir string, pid int) error {
  400. // Normally dir should not be empty, one case is that cgroup subsystem
  401. // is not mounted, we will get empty dir, and we want it fail here.
  402. if dir == "" {
  403. return fmt.Errorf("no such directory for %s", CgroupProcesses)
  404. }
  405. // Don't attach any pid to the cgroup if -1 is specified as a pid
  406. if pid != -1 {
  407. if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
  408. return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
  409. }
  410. }
  411. return nil
  412. }