utils.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. // +build linux
  2. package cgroups
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "github.com/docker/go-units"
  14. )
  15. const (
  16. cgroupNamePrefix = "name="
  17. CgroupProcesses = "cgroup.procs"
  18. )
  19. // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
  20. func FindCgroupMountpoint(subsystem string) (string, error) {
  21. mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
  22. return mnt, err
  23. }
  24. func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
  25. // We are not using mount.GetMounts() because it's super-inefficient,
  26. // parsing it directly sped up x10 times because of not using Sscanf.
  27. // It was one of two major performance drawbacks in container start.
  28. if !isSubsystemAvailable(subsystem) {
  29. return "", "", NewNotFoundError(subsystem)
  30. }
  31. f, err := os.Open("/proc/self/mountinfo")
  32. if err != nil {
  33. return "", "", err
  34. }
  35. defer f.Close()
  36. scanner := bufio.NewScanner(f)
  37. for scanner.Scan() {
  38. txt := scanner.Text()
  39. fields := strings.Split(txt, " ")
  40. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  41. if opt == subsystem {
  42. return fields[4], fields[3], nil
  43. }
  44. }
  45. }
  46. if err := scanner.Err(); err != nil {
  47. return "", "", err
  48. }
  49. return "", "", NewNotFoundError(subsystem)
  50. }
  51. func isSubsystemAvailable(subsystem string) bool {
  52. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  53. if err != nil {
  54. return false
  55. }
  56. _, avail := cgroups[subsystem]
  57. return avail
  58. }
  59. func GetClosestMountpointAncestor(dir, mountinfo string) string {
  60. deepestMountPoint := ""
  61. for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
  62. mountInfoParts := strings.Fields(mountInfoEntry)
  63. if len(mountInfoParts) < 5 {
  64. continue
  65. }
  66. mountPoint := mountInfoParts[4]
  67. if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
  68. deepestMountPoint = mountPoint
  69. }
  70. }
  71. return deepestMountPoint
  72. }
  73. func FindCgroupMountpointDir() (string, error) {
  74. f, err := os.Open("/proc/self/mountinfo")
  75. if err != nil {
  76. return "", err
  77. }
  78. defer f.Close()
  79. scanner := bufio.NewScanner(f)
  80. for scanner.Scan() {
  81. text := scanner.Text()
  82. fields := strings.Split(text, " ")
  83. // Safe as mountinfo encodes mountpoints with spaces as \040.
  84. index := strings.Index(text, " - ")
  85. postSeparatorFields := strings.Fields(text[index+3:])
  86. numPostFields := len(postSeparatorFields)
  87. // This is an error as we can't detect if the mount is for "cgroup"
  88. if numPostFields == 0 {
  89. return "", fmt.Errorf("Found no fields post '-' in %q", text)
  90. }
  91. if postSeparatorFields[0] == "cgroup" {
  92. // Check that the mount is properly formated.
  93. if numPostFields < 3 {
  94. return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
  95. }
  96. return filepath.Dir(fields[4]), nil
  97. }
  98. }
  99. if err := scanner.Err(); err != nil {
  100. return "", err
  101. }
  102. return "", NewNotFoundError("cgroup")
  103. }
  104. type Mount struct {
  105. Mountpoint string
  106. Root string
  107. Subsystems []string
  108. }
  109. func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
  110. if len(m.Subsystems) == 0 {
  111. return "", fmt.Errorf("no subsystem for mount")
  112. }
  113. return getControllerPath(m.Subsystems[0], cgroups)
  114. }
  115. func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
  116. res := make([]Mount, 0, len(ss))
  117. scanner := bufio.NewScanner(mi)
  118. numFound := 0
  119. for scanner.Scan() && numFound < len(ss) {
  120. txt := scanner.Text()
  121. sepIdx := strings.Index(txt, " - ")
  122. if sepIdx == -1 {
  123. return nil, fmt.Errorf("invalid mountinfo format")
  124. }
  125. if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
  126. continue
  127. }
  128. fields := strings.Split(txt, " ")
  129. m := Mount{
  130. Mountpoint: fields[4],
  131. Root: fields[3],
  132. }
  133. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  134. if !ss[opt] {
  135. continue
  136. }
  137. if strings.HasPrefix(opt, cgroupNamePrefix) {
  138. m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
  139. } else {
  140. m.Subsystems = append(m.Subsystems, opt)
  141. }
  142. if !all {
  143. numFound++
  144. }
  145. }
  146. res = append(res, m)
  147. }
  148. if err := scanner.Err(); err != nil {
  149. return nil, err
  150. }
  151. return res, nil
  152. }
  153. // GetCgroupMounts returns the mounts for the cgroup subsystems.
  154. // all indicates whether to return just the first instance or all the mounts.
  155. func GetCgroupMounts(all bool) ([]Mount, error) {
  156. f, err := os.Open("/proc/self/mountinfo")
  157. if err != nil {
  158. return nil, err
  159. }
  160. defer f.Close()
  161. allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
  162. if err != nil {
  163. return nil, err
  164. }
  165. allMap := make(map[string]bool)
  166. for s := range allSubsystems {
  167. allMap[s] = true
  168. }
  169. return getCgroupMountsHelper(allMap, f, all)
  170. }
  171. // GetAllSubsystems returns all the cgroup subsystems supported by the kernel
  172. func GetAllSubsystems() ([]string, error) {
  173. f, err := os.Open("/proc/cgroups")
  174. if err != nil {
  175. return nil, err
  176. }
  177. defer f.Close()
  178. subsystems := []string{}
  179. s := bufio.NewScanner(f)
  180. for s.Scan() {
  181. text := s.Text()
  182. if text[0] != '#' {
  183. parts := strings.Fields(text)
  184. if len(parts) >= 4 && parts[3] != "0" {
  185. subsystems = append(subsystems, parts[0])
  186. }
  187. }
  188. }
  189. if err := s.Err(); err != nil {
  190. return nil, err
  191. }
  192. return subsystems, nil
  193. }
  194. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
  195. func GetOwnCgroup(subsystem string) (string, error) {
  196. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  197. if err != nil {
  198. return "", err
  199. }
  200. return getControllerPath(subsystem, cgroups)
  201. }
  202. func GetOwnCgroupPath(subsystem string) (string, error) {
  203. cgroup, err := GetOwnCgroup(subsystem)
  204. if err != nil {
  205. return "", err
  206. }
  207. return getCgroupPathHelper(subsystem, cgroup)
  208. }
  209. func GetInitCgroup(subsystem string) (string, error) {
  210. cgroups, err := ParseCgroupFile("/proc/1/cgroup")
  211. if err != nil {
  212. return "", err
  213. }
  214. return getControllerPath(subsystem, cgroups)
  215. }
  216. func GetInitCgroupPath(subsystem string) (string, error) {
  217. cgroup, err := GetInitCgroup(subsystem)
  218. if err != nil {
  219. return "", err
  220. }
  221. return getCgroupPathHelper(subsystem, cgroup)
  222. }
  223. func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
  224. mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
  225. if err != nil {
  226. return "", err
  227. }
  228. // This is needed for nested containers, because in /proc/self/cgroup we
  229. // see pathes from host, which don't exist in container.
  230. relCgroup, err := filepath.Rel(root, cgroup)
  231. if err != nil {
  232. return "", err
  233. }
  234. return filepath.Join(mnt, relCgroup), nil
  235. }
  236. func readProcsFile(dir string) ([]int, error) {
  237. f, err := os.Open(filepath.Join(dir, CgroupProcesses))
  238. if err != nil {
  239. return nil, err
  240. }
  241. defer f.Close()
  242. var (
  243. s = bufio.NewScanner(f)
  244. out = []int{}
  245. )
  246. for s.Scan() {
  247. if t := s.Text(); t != "" {
  248. pid, err := strconv.Atoi(t)
  249. if err != nil {
  250. return nil, err
  251. }
  252. out = append(out, pid)
  253. }
  254. }
  255. return out, nil
  256. }
  257. // ParseCgroupFile parses the given cgroup file, typically from
  258. // /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
  259. func ParseCgroupFile(path string) (map[string]string, error) {
  260. f, err := os.Open(path)
  261. if err != nil {
  262. return nil, err
  263. }
  264. defer f.Close()
  265. return parseCgroupFromReader(f)
  266. }
  267. // helper function for ParseCgroupFile to make testing easier
  268. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  269. s := bufio.NewScanner(r)
  270. cgroups := make(map[string]string)
  271. for s.Scan() {
  272. text := s.Text()
  273. // from cgroups(7):
  274. // /proc/[pid]/cgroup
  275. // ...
  276. // For each cgroup hierarchy ... there is one entry
  277. // containing three colon-separated fields of the form:
  278. // hierarchy-ID:subsystem-list:cgroup-path
  279. parts := strings.SplitN(text, ":", 3)
  280. if len(parts) < 3 {
  281. return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
  282. }
  283. for _, subs := range strings.Split(parts[1], ",") {
  284. cgroups[subs] = parts[2]
  285. }
  286. }
  287. if err := s.Err(); err != nil {
  288. return nil, err
  289. }
  290. return cgroups, nil
  291. }
  292. func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
  293. if p, ok := cgroups[subsystem]; ok {
  294. return p, nil
  295. }
  296. if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
  297. return p, nil
  298. }
  299. return "", NewNotFoundError(subsystem)
  300. }
  301. func PathExists(path string) bool {
  302. if _, err := os.Stat(path); err != nil {
  303. return false
  304. }
  305. return true
  306. }
  307. func EnterPid(cgroupPaths map[string]string, pid int) error {
  308. for _, path := range cgroupPaths {
  309. if PathExists(path) {
  310. if err := WriteCgroupProc(path, pid); err != nil {
  311. return err
  312. }
  313. }
  314. }
  315. return nil
  316. }
  317. // RemovePaths iterates over the provided paths removing them.
  318. // We trying to remove all paths five times with increasing delay between tries.
  319. // If after all there are not removed cgroups - appropriate error will be
  320. // returned.
  321. func RemovePaths(paths map[string]string) (err error) {
  322. delay := 10 * time.Millisecond
  323. for i := 0; i < 5; i++ {
  324. if i != 0 {
  325. time.Sleep(delay)
  326. delay *= 2
  327. }
  328. for s, p := range paths {
  329. os.RemoveAll(p)
  330. // TODO: here probably should be logging
  331. _, err := os.Stat(p)
  332. // We need this strange way of checking cgroups existence because
  333. // RemoveAll almost always returns error, even on already removed
  334. // cgroups
  335. if os.IsNotExist(err) {
  336. delete(paths, s)
  337. }
  338. }
  339. if len(paths) == 0 {
  340. return nil
  341. }
  342. }
  343. return fmt.Errorf("Failed to remove paths: %v", paths)
  344. }
  345. func GetHugePageSize() ([]string, error) {
  346. var pageSizes []string
  347. sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
  348. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  349. if err != nil {
  350. return pageSizes, err
  351. }
  352. for _, st := range files {
  353. nameArray := strings.Split(st.Name(), "-")
  354. pageSize, err := units.RAMInBytes(nameArray[1])
  355. if err != nil {
  356. return []string{}, err
  357. }
  358. sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
  359. pageSizes = append(pageSizes, sizeString)
  360. }
  361. return pageSizes, nil
  362. }
  363. // GetPids returns all pids, that were added to cgroup at path.
  364. func GetPids(path string) ([]int, error) {
  365. return readProcsFile(path)
  366. }
  367. // GetAllPids returns all pids, that were added to cgroup at path and to all its
  368. // subcgroups.
  369. func GetAllPids(path string) ([]int, error) {
  370. var pids []int
  371. // collect pids from all sub-cgroups
  372. err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
  373. dir, file := filepath.Split(p)
  374. if file != CgroupProcesses {
  375. return nil
  376. }
  377. if iErr != nil {
  378. return iErr
  379. }
  380. cPids, err := readProcsFile(dir)
  381. if err != nil {
  382. return err
  383. }
  384. pids = append(pids, cPids...)
  385. return nil
  386. })
  387. return pids, err
  388. }
  389. // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
  390. func WriteCgroupProc(dir string, pid int) error {
  391. // Normally dir should not be empty, one case is that cgroup subsystem
  392. // is not mounted, we will get empty dir, and we want it fail here.
  393. if dir == "" {
  394. return fmt.Errorf("no such directory for %s", CgroupProcesses)
  395. }
  396. // Dont attach any pid to the cgroup if -1 is specified as a pid
  397. if pid != -1 {
  398. if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
  399. return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
  400. }
  401. }
  402. return nil
  403. }