utils.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. // +build linux
  2. package cgroups
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "github.com/docker/go-units"
  14. )
  15. const (
  16. cgroupNamePrefix = "name="
  17. CgroupProcesses = "cgroup.procs"
  18. )
  19. // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
  20. func FindCgroupMountpoint(subsystem string) (string, error) {
  21. mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
  22. return mnt, err
  23. }
  24. func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
  25. // We are not using mount.GetMounts() because it's super-inefficient,
  26. // parsing it directly sped up x10 times because of not using Sscanf.
  27. // It was one of two major performance drawbacks in container start.
  28. if !isSubsystemAvailable(subsystem) {
  29. return "", "", NewNotFoundError(subsystem)
  30. }
  31. f, err := os.Open("/proc/self/mountinfo")
  32. if err != nil {
  33. return "", "", err
  34. }
  35. defer f.Close()
  36. scanner := bufio.NewScanner(f)
  37. for scanner.Scan() {
  38. txt := scanner.Text()
  39. fields := strings.Split(txt, " ")
  40. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  41. if opt == subsystem {
  42. return fields[4], fields[3], nil
  43. }
  44. }
  45. }
  46. if err := scanner.Err(); err != nil {
  47. return "", "", err
  48. }
  49. return "", "", NewNotFoundError(subsystem)
  50. }
  51. func isSubsystemAvailable(subsystem string) bool {
  52. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  53. if err != nil {
  54. return false
  55. }
  56. _, avail := cgroups[subsystem]
  57. return avail
  58. }
  59. func FindCgroupMountpointDir() (string, error) {
  60. f, err := os.Open("/proc/self/mountinfo")
  61. if err != nil {
  62. return "", err
  63. }
  64. defer f.Close()
  65. scanner := bufio.NewScanner(f)
  66. for scanner.Scan() {
  67. text := scanner.Text()
  68. fields := strings.Split(text, " ")
  69. // Safe as mountinfo encodes mountpoints with spaces as \040.
  70. index := strings.Index(text, " - ")
  71. postSeparatorFields := strings.Fields(text[index+3:])
  72. numPostFields := len(postSeparatorFields)
  73. // This is an error as we can't detect if the mount is for "cgroup"
  74. if numPostFields == 0 {
  75. return "", fmt.Errorf("Found no fields post '-' in %q", text)
  76. }
  77. if postSeparatorFields[0] == "cgroup" {
  78. // Check that the mount is properly formated.
  79. if numPostFields < 3 {
  80. return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
  81. }
  82. return filepath.Dir(fields[4]), nil
  83. }
  84. }
  85. if err := scanner.Err(); err != nil {
  86. return "", err
  87. }
  88. return "", NewNotFoundError("cgroup")
  89. }
  90. type Mount struct {
  91. Mountpoint string
  92. Root string
  93. Subsystems []string
  94. }
  95. func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
  96. if len(m.Subsystems) == 0 {
  97. return "", fmt.Errorf("no subsystem for mount")
  98. }
  99. return getControllerPath(m.Subsystems[0], cgroups)
  100. }
  101. func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
  102. res := make([]Mount, 0, len(ss))
  103. scanner := bufio.NewScanner(mi)
  104. numFound := 0
  105. for scanner.Scan() && numFound < len(ss) {
  106. txt := scanner.Text()
  107. sepIdx := strings.Index(txt, " - ")
  108. if sepIdx == -1 {
  109. return nil, fmt.Errorf("invalid mountinfo format")
  110. }
  111. if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
  112. continue
  113. }
  114. fields := strings.Split(txt, " ")
  115. m := Mount{
  116. Mountpoint: fields[4],
  117. Root: fields[3],
  118. }
  119. for _, opt := range strings.Split(fields[len(fields)-1], ",") {
  120. if !ss[opt] {
  121. continue
  122. }
  123. if strings.HasPrefix(opt, cgroupNamePrefix) {
  124. m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
  125. } else {
  126. m.Subsystems = append(m.Subsystems, opt)
  127. }
  128. if !all {
  129. numFound++
  130. }
  131. }
  132. res = append(res, m)
  133. }
  134. if err := scanner.Err(); err != nil {
  135. return nil, err
  136. }
  137. return res, nil
  138. }
  139. // GetCgroupMounts returns the mounts for the cgroup subsystems.
  140. // all indicates whether to return just the first instance or all the mounts.
  141. func GetCgroupMounts(all bool) ([]Mount, error) {
  142. f, err := os.Open("/proc/self/mountinfo")
  143. if err != nil {
  144. return nil, err
  145. }
  146. defer f.Close()
  147. allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
  148. if err != nil {
  149. return nil, err
  150. }
  151. allMap := make(map[string]bool)
  152. for s := range allSubsystems {
  153. allMap[s] = true
  154. }
  155. return getCgroupMountsHelper(allMap, f, all)
  156. }
  157. // GetAllSubsystems returns all the cgroup subsystems supported by the kernel
  158. func GetAllSubsystems() ([]string, error) {
  159. f, err := os.Open("/proc/cgroups")
  160. if err != nil {
  161. return nil, err
  162. }
  163. defer f.Close()
  164. subsystems := []string{}
  165. s := bufio.NewScanner(f)
  166. for s.Scan() {
  167. text := s.Text()
  168. if text[0] != '#' {
  169. parts := strings.Fields(text)
  170. if len(parts) >= 4 && parts[3] != "0" {
  171. subsystems = append(subsystems, parts[0])
  172. }
  173. }
  174. }
  175. if err := s.Err(); err != nil {
  176. return nil, err
  177. }
  178. return subsystems, nil
  179. }
  180. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
  181. func GetOwnCgroup(subsystem string) (string, error) {
  182. cgroups, err := ParseCgroupFile("/proc/self/cgroup")
  183. if err != nil {
  184. return "", err
  185. }
  186. return getControllerPath(subsystem, cgroups)
  187. }
  188. func GetOwnCgroupPath(subsystem string) (string, error) {
  189. cgroup, err := GetOwnCgroup(subsystem)
  190. if err != nil {
  191. return "", err
  192. }
  193. return getCgroupPathHelper(subsystem, cgroup)
  194. }
  195. func GetInitCgroup(subsystem string) (string, error) {
  196. cgroups, err := ParseCgroupFile("/proc/1/cgroup")
  197. if err != nil {
  198. return "", err
  199. }
  200. return getControllerPath(subsystem, cgroups)
  201. }
  202. func GetInitCgroupPath(subsystem string) (string, error) {
  203. cgroup, err := GetInitCgroup(subsystem)
  204. if err != nil {
  205. return "", err
  206. }
  207. return getCgroupPathHelper(subsystem, cgroup)
  208. }
  209. func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
  210. mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
  211. if err != nil {
  212. return "", err
  213. }
  214. // This is needed for nested containers, because in /proc/self/cgroup we
  215. // see pathes from host, which don't exist in container.
  216. relCgroup, err := filepath.Rel(root, cgroup)
  217. if err != nil {
  218. return "", err
  219. }
  220. return filepath.Join(mnt, relCgroup), nil
  221. }
  222. func readProcsFile(dir string) ([]int, error) {
  223. f, err := os.Open(filepath.Join(dir, CgroupProcesses))
  224. if err != nil {
  225. return nil, err
  226. }
  227. defer f.Close()
  228. var (
  229. s = bufio.NewScanner(f)
  230. out = []int{}
  231. )
  232. for s.Scan() {
  233. if t := s.Text(); t != "" {
  234. pid, err := strconv.Atoi(t)
  235. if err != nil {
  236. return nil, err
  237. }
  238. out = append(out, pid)
  239. }
  240. }
  241. return out, nil
  242. }
  243. // ParseCgroupFile parses the given cgroup file, typically from
  244. // /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
  245. func ParseCgroupFile(path string) (map[string]string, error) {
  246. f, err := os.Open(path)
  247. if err != nil {
  248. return nil, err
  249. }
  250. defer f.Close()
  251. return parseCgroupFromReader(f)
  252. }
  253. // helper function for ParseCgroupFile to make testing easier
  254. func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
  255. s := bufio.NewScanner(r)
  256. cgroups := make(map[string]string)
  257. for s.Scan() {
  258. text := s.Text()
  259. // from cgroups(7):
  260. // /proc/[pid]/cgroup
  261. // ...
  262. // For each cgroup hierarchy ... there is one entry
  263. // containing three colon-separated fields of the form:
  264. // hierarchy-ID:subsystem-list:cgroup-path
  265. parts := strings.SplitN(text, ":", 3)
  266. if len(parts) < 3 {
  267. return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
  268. }
  269. for _, subs := range strings.Split(parts[1], ",") {
  270. cgroups[subs] = parts[2]
  271. }
  272. }
  273. if err := s.Err(); err != nil {
  274. return nil, err
  275. }
  276. return cgroups, nil
  277. }
  278. func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
  279. if p, ok := cgroups[subsystem]; ok {
  280. return p, nil
  281. }
  282. if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
  283. return p, nil
  284. }
  285. return "", NewNotFoundError(subsystem)
  286. }
  287. func PathExists(path string) bool {
  288. if _, err := os.Stat(path); err != nil {
  289. return false
  290. }
  291. return true
  292. }
  293. func EnterPid(cgroupPaths map[string]string, pid int) error {
  294. for _, path := range cgroupPaths {
  295. if PathExists(path) {
  296. if err := WriteCgroupProc(path, pid); err != nil {
  297. return err
  298. }
  299. }
  300. }
  301. return nil
  302. }
  303. // RemovePaths iterates over the provided paths removing them.
  304. // We trying to remove all paths five times with increasing delay between tries.
  305. // If after all there are not removed cgroups - appropriate error will be
  306. // returned.
  307. func RemovePaths(paths map[string]string) (err error) {
  308. delay := 10 * time.Millisecond
  309. for i := 0; i < 5; i++ {
  310. if i != 0 {
  311. time.Sleep(delay)
  312. delay *= 2
  313. }
  314. for s, p := range paths {
  315. os.RemoveAll(p)
  316. // TODO: here probably should be logging
  317. _, err := os.Stat(p)
  318. // We need this strange way of checking cgroups existence because
  319. // RemoveAll almost always returns error, even on already removed
  320. // cgroups
  321. if os.IsNotExist(err) {
  322. delete(paths, s)
  323. }
  324. }
  325. if len(paths) == 0 {
  326. return nil
  327. }
  328. }
  329. return fmt.Errorf("Failed to remove paths: %v", paths)
  330. }
  331. func GetHugePageSize() ([]string, error) {
  332. var pageSizes []string
  333. sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
  334. files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
  335. if err != nil {
  336. return pageSizes, err
  337. }
  338. for _, st := range files {
  339. nameArray := strings.Split(st.Name(), "-")
  340. pageSize, err := units.RAMInBytes(nameArray[1])
  341. if err != nil {
  342. return []string{}, err
  343. }
  344. sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
  345. pageSizes = append(pageSizes, sizeString)
  346. }
  347. return pageSizes, nil
  348. }
  349. // GetPids returns all pids, that were added to cgroup at path.
  350. func GetPids(path string) ([]int, error) {
  351. return readProcsFile(path)
  352. }
  353. // GetAllPids returns all pids, that were added to cgroup at path and to all its
  354. // subcgroups.
  355. func GetAllPids(path string) ([]int, error) {
  356. var pids []int
  357. // collect pids from all sub-cgroups
  358. err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
  359. dir, file := filepath.Split(p)
  360. if file != CgroupProcesses {
  361. return nil
  362. }
  363. if iErr != nil {
  364. return iErr
  365. }
  366. cPids, err := readProcsFile(dir)
  367. if err != nil {
  368. return err
  369. }
  370. pids = append(pids, cPids...)
  371. return nil
  372. })
  373. return pids, err
  374. }
  375. // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
  376. func WriteCgroupProc(dir string, pid int) error {
  377. // Normally dir should not be empty, one case is that cgroup subsystem
  378. // is not mounted, we will get empty dir, and we want it fail here.
  379. if dir == "" {
  380. return fmt.Errorf("no such directory for %s", CgroupProcesses)
  381. }
  382. // Dont attach any pid to the cgroup if -1 is specified as a pid
  383. if pid != -1 {
  384. if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
  385. return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
  386. }
  387. }
  388. return nil
  389. }