utils.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. // +build linux
  2. package cgroups
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "time"
  13. units "github.com/docker/go-units"
  14. )
  15. const (
  16. cgroupNamePrefix = "name="
  17. CgroupProcesses = "cgroup.procs"
  18. )
  19. // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
  20. func FindCgroupMountpoint(subsystem string) (string, error) {
  21. mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
  22. return mnt, err
  23. }
  24. func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
  25. // We are not using mount.GetMounts() because it's super-inefficient,
  26. // parsing it directly sped up x10 times because of not using Sscanf.
  27. // It was one of two major performance drawbacks in container start.
  28. if !isSubsystemAvailable(subsystem) {
  29. return "", "", NewNotFoundError(subsystem)
  30. }
  31. f, err := os.Open("/proc/self/mountinfo")
  32. if err != nil {
  33. return "", "", err
  34. }
  35. defer f.Close()
  36. scanner := bufio.NewScanner(f)
  37. for scanner.Scan() {
  38. txt := scanner.Text()
  39. fields := strings.Split(txt, " ")
  40. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  41. if opt == subsystem {
  42. return fields[4], fields[3], nil
  43. }
  44. }
  45. }
  46. if err := scanner.Err(); err != nil {
  47. return "", "", err
  48. }
  49. return "", "", NewNotFoundError(subsystem)
  50. }
  51. func isSubsystemAvailable(subsystem string) bool {
  52. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  53. if err != nil {
  54. return false
  55. }
  56. _, avail := cgroups[subsystem]
  57. return avail
  58. }
  59. func GetClosestMountpointAncestor(dir, mountinfo string) string {
  60. deepestMountPoint := ""
  61. for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
  62. mountInfoParts := strings.Fields(mountInfoEntry)
  63. if len(mountInfoParts) < 5 {
  64. continue
  65. }
  66. mountPoint := mountInfoParts[4]
  67. if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
  68. deepestMountPoint = mountPoint
  69. }
  70. }
  71. return deepestMountPoint
  72. }
  73. func FindCgroupMountpointDir() (string, error) {
  74. f, err := os.Open("/proc/self/mountinfo")
  75. if err != nil {
  76. return "", err
  77. }
  78. defer f.Close()
  79. scanner := bufio.NewScanner(f)
  80. for scanner.Scan() {
  81. text := scanner.Text()
  82. fields := strings.Split(text, " ")
  83. // Safe as mountinfo encodes mountpoints with spaces as \040.
  84. index := strings.Index(text, " - ")
  85. postSeparatorFields := strings.Fields(text[index+3:])
  86. numPostFields := len(postSeparatorFields)
  87. // This is an error as we can't detect if the mount is for "cgroup"
  88. if numPostFields == 0 {
  89. return "", fmt.Errorf("Found no fields post '-' in %q", text)
  90. }
  91. if postSeparatorFields[0] == "cgroup" {
  92. // Check that the mount is properly formatted.
  93. if numPostFields < 3 {
  94. return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
  95. }
  96. return filepath.Dir(fields[4]), nil
  97. }
  98. }
  99. if err := scanner.Err(); err != nil {
  100. return "", err
  101. }
  102. return "", NewNotFoundError("cgroup")
  103. }
  104. type Mount struct {
  105. Mountpoint string
  106. Root string
  107. Subsystems []string
  108. }
  109. func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
  110. if len(m.Subsystems) == 0 {
  111. return "", fmt.Errorf("no subsystem for mount")
  112. }
  113. return getControllerPath(m.Subsystems[0], cgroups)
  114. }
  115. func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
  116. res := make([]Mount, 0, len(ss))
  117. scanner := bufio.NewScanner(mi)
  118. numFound := 0
  119. for scanner.Scan() && numFound < len(ss) {
  120. txt := scanner.Text()
  121. sepIdx := strings.Index(txt, " - ")
  122. if sepIdx == -1 {
  123. return nil, fmt.Errorf("invalid mountinfo format")
  124. }
  125. if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
  126. continue
  127. }
  128. fields := strings.Split(txt, " ")
  129. m := Mount{
  130. Mountpoint: fields[4],
  131. Root: fields[3],
  132. }
  133. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  134. seen, known := ss[opt]
  135. if !known || (!all && seen) {
  136. continue
  137. }
  138. ss[opt] = true
  139. if strings.HasPrefix(opt, cgroupNamePrefix) {
  140. opt = opt[len(cgroupNamePrefix):]
  141. }
  142. m.Subsystems = append(m.Subsystems, opt)
  143. numFound++
  144. }
  145. if len(m.Subsystems) > 0 || all {
  146. res = append(res, m)
  147. }
  148. }
  149. if err := scanner.Err(); err != nil {
  150. return nil, err
  151. }
  152. return res, nil
  153. }
  154. // GetCgroupMounts returns the mounts for the cgroup subsystems.
  155. // all indicates whether to return just the first instance or all the mounts.
  156. func GetCgroupMounts(all bool) ([]Mount, error) {
  157. f, err := os.Open("/proc/self/mountinfo")
  158. if err != nil {
  159. return nil, err
  160. }
  161. defer f.Close()
  162. allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
  163. if err != nil {
  164. return nil, err
  165. }
  166. allMap := make(map[string]bool)
  167. for s := range allSubsystems {
  168. allMap[s] = false
  169. }
  170. return getCgroupMountsHelper(allMap, f, all)
  171. }
  172. // GetAllSubsystems returns all the cgroup subsystems supported by the kernel
  173. func GetAllSubsystems() ([]string, error) {
  174. f, err := os.Open("/proc/cgroups")
  175. if err != nil {
  176. return nil, err
  177. }
  178. defer f.Close()
  179. subsystems := []string{}
  180. s := bufio.NewScanner(f)
  181. for s.Scan() {
  182. text := s.Text()
  183. if text[0] != '#' {
  184. parts := strings.Fields(text)
  185. if len(parts) >= 4 && parts[3] != "0" {
  186. subsystems = append(subsystems, parts[0])
  187. }
  188. }
  189. }
  190. if err := s.Err(); err != nil {
  191. return nil, err
  192. }
  193. return subsystems, nil
  194. }
  195. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
  196. func GetOwnCgroup(subsystem string) (string, error) {
  197. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  198. if err != nil {
  199. return "", err
  200. }
  201. return getControllerPath(subsystem, cgroups)
  202. }
  203. func GetOwnCgroupPath(subsystem string) (string, error) {
  204. cgroup, err := GetOwnCgroup(subsystem)
  205. if err != nil {
  206. return "", err
  207. }
  208. return getCgroupPathHelper(subsystem, cgroup)
  209. }
  210. func GetInitCgroup(subsystem string) (string, error) {
  211. cgroups, err := ParseCgroupFile("/proc/1/cgroup")
  212. if err != nil {
  213. return "", err
  214. }
  215. return getControllerPath(subsystem, cgroups)
  216. }
  217. func GetInitCgroupPath(subsystem string) (string, error) {
  218. cgroup, err := GetInitCgroup(subsystem)
  219. if err != nil {
  220. return "", err
  221. }
  222. return getCgroupPathHelper(subsystem, cgroup)
  223. }
  224. func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
  225. mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
  226. if err != nil {
  227. return "", err
  228. }
  229. // This is needed for nested containers, because in /proc/self/cgroup we
  230. // see paths from host, which don't exist in container.
  231. relCgroup, err := filepath.Rel(root, cgroup)
  232. if err != nil {
  233. return "", err
  234. }
  235. return filepath.Join(mnt, relCgroup), nil
  236. }
  237. func readProcsFile(dir string) ([]int, error) {
  238. f, err := os.Open(filepath.Join(dir, CgroupProcesses))
  239. if err != nil {
  240. return nil, err
  241. }
  242. defer f.Close()
  243. var (
  244. s = bufio.NewScanner(f)
  245. out = []int{}
  246. )
  247. for s.Scan() {
  248. if t := s.Text(); t != "" {
  249. pid, err := strconv.Atoi(t)
  250. if err != nil {
  251. return nil, err
  252. }
  253. out = append(out, pid)
  254. }
  255. }
  256. return out, nil
  257. }
  258. // ParseCgroupFile parses the given cgroup file, typically from
  259. // /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
  260. func ParseCgroupFile(path string) (map[string]string, error) {
  261. f, err := os.Open(path)
  262. if err != nil {
  263. return nil, err
  264. }
  265. defer f.Close()
  266. return parseCgroupFromReader(f)
  267. }
  268. // helper function for ParseCgroupFile to make testing easier
  269. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  270. s := bufio.NewScanner(r)
  271. cgroups := make(map[string]string)
  272. for s.Scan() {
  273. text := s.Text()
  274. // from cgroups(7):
  275. // /proc/[pid]/cgroup
  276. // ...
  277. // For each cgroup hierarchy ... there is one entry
  278. // containing three colon-separated fields of the form:
  279. // hierarchy-ID:subsystem-list:cgroup-path
  280. parts := strings.SplitN(text, ":", 3)
  281. if len(parts) < 3 {
  282. return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
  283. }
  284. for _, subs := range strings.Split(parts[1], ",") {
  285. cgroups[subs] = parts[2]
  286. }
  287. }
  288. if err := s.Err(); err != nil {
  289. return nil, err
  290. }
  291. return cgroups, nil
  292. }
  293. func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
  294. if p, ok := cgroups[subsystem]; ok {
  295. return p, nil
  296. }
  297. if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
  298. return p, nil
  299. }
  300. return "", NewNotFoundError(subsystem)
  301. }
  302. func PathExists(path string) bool {
  303. if _, err := os.Stat(path); err != nil {
  304. return false
  305. }
  306. return true
  307. }
  308. func EnterPid(cgroupPaths map[string]string, pid int) error {
  309. for _, path := range cgroupPaths {
  310. if PathExists(path) {
  311. if err := WriteCgroupProc(path, pid); err != nil {
  312. return err
  313. }
  314. }
  315. }
  316. return nil
  317. }
  318. // RemovePaths iterates over the provided paths removing them.
  319. // We trying to remove all paths five times with increasing delay between tries.
  320. // If after all there are not removed cgroups - appropriate error will be
  321. // returned.
  322. func RemovePaths(paths map[string]string) (err error) {
  323. delay := 10 * time.Millisecond
  324. for i := 0; i < 5; i++ {
  325. if i != 0 {
  326. time.Sleep(delay)
  327. delay *= 2
  328. }
  329. for s, p := range paths {
  330. os.RemoveAll(p)
  331. // TODO: here probably should be logging
  332. _, err := os.Stat(p)
  333. // We need this strange way of checking cgroups existence because
  334. // RemoveAll almost always returns error, even on already removed
  335. // cgroups
  336. if os.IsNotExist(err) {
  337. delete(paths, s)
  338. }
  339. }
  340. if len(paths) == 0 {
  341. return nil
  342. }
  343. }
  344. return fmt.Errorf("Failed to remove paths: %v", paths)
  345. }
  346. func GetHugePageSize() ([]string, error) {
  347. var pageSizes []string
  348. sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
  349. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  350. if err != nil {
  351. return pageSizes, err
  352. }
  353. for _, st := range files {
  354. nameArray := strings.Split(st.Name(), "-")
  355. pageSize, err := units.RAMInBytes(nameArray[1])
  356. if err != nil {
  357. return []string{}, err
  358. }
  359. sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
  360. pageSizes = append(pageSizes, sizeString)
  361. }
  362. return pageSizes, nil
  363. }
  364. // GetPids returns all pids, that were added to cgroup at path.
  365. func GetPids(path string) ([]int, error) {
  366. return readProcsFile(path)
  367. }
  368. // GetAllPids returns all pids, that were added to cgroup at path and to all its
  369. // subcgroups.
  370. func GetAllPids(path string) ([]int, error) {
  371. var pids []int
  372. // collect pids from all sub-cgroups
  373. err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
  374. dir, file := filepath.Split(p)
  375. if file != CgroupProcesses {
  376. return nil
  377. }
  378. if iErr != nil {
  379. return iErr
  380. }
  381. cPids, err := readProcsFile(dir)
  382. if err != nil {
  383. return err
  384. }
  385. pids = append(pids, cPids...)
  386. return nil
  387. })
  388. return pids, err
  389. }
  390. // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
  391. func WriteCgroupProc(dir string, pid int) error {
  392. // Normally dir should not be empty, one case is that cgroup subsystem
  393. // is not mounted, we will get empty dir, and we want it fail here.
  394. if dir == "" {
  395. return fmt.Errorf("no such directory for %s", CgroupProcesses)
  396. }
  397. // Dont attach any pid to the cgroup if -1 is specified as a pid
  398. if pid != -1 {
  399. if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
  400. return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
  401. }
  402. }
  403. return nil
  404. }