123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370 |
- /*
- Copyright The containerd Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package cgroups
- import (
- "bufio"
- "fmt"
- "io"
- "io/ioutil"
- "os"
- "path/filepath"
- "strconv"
- "strings"
- "sync"
- "time"
- units "github.com/docker/go-units"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "golang.org/x/sys/unix"
- )
- var (
- isUserNS = runningInUserNS()
- checkMode sync.Once
- cgMode CGMode
- )
- const unifiedMountpoint = "/sys/fs/cgroup"
- // CGMode is the cgroups mode of the host system
- type CGMode int
- const (
- // Unavailable cgroup mountpoint
- Unavailable CGMode = iota
- // Legacy cgroups v1
- Legacy
- // Hybrid with cgroups v1 and v2 controllers mounted
- Hybrid
- // Unified with only cgroups v2 mounted
- Unified
- )
- // Mode returns the cgroups mode running on the host
- func Mode() CGMode {
- checkMode.Do(func() {
- var st unix.Statfs_t
- if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
- cgMode = Unavailable
- return
- }
- switch st.Type {
- case unix.CGROUP2_SUPER_MAGIC:
- cgMode = Unified
- default:
- cgMode = Legacy
- if err := unix.Statfs(filepath.Join(unifiedMountpoint, "unified"), &st); err != nil {
- return
- }
- if st.Type == unix.CGROUP2_SUPER_MAGIC {
- cgMode = Hybrid
- }
- }
- })
- return cgMode
- }
- // runningInUserNS detects whether we are currently running in a user namespace.
- // Copied from github.com/lxc/lxd/shared/util.go
- func runningInUserNS() bool {
- file, err := os.Open("/proc/self/uid_map")
- if err != nil {
- // This kernel-provided file only exists if user namespaces are supported
- return false
- }
- defer file.Close()
- buf := bufio.NewReader(file)
- l, _, err := buf.ReadLine()
- if err != nil {
- return false
- }
- line := string(l)
- var a, b, c int64
- fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
- /*
- * We assume we are in the initial user namespace if we have a full
- * range - 4294967295 uids starting at uid 0.
- */
- if a == 0 && b == 0 && c == 4294967295 {
- return false
- }
- return true
- }
- // defaults returns all known groups
- func defaults(root string) ([]Subsystem, error) {
- h, err := NewHugetlb(root)
- if err != nil && !os.IsNotExist(err) {
- return nil, err
- }
- s := []Subsystem{
- NewNamed(root, "systemd"),
- NewFreezer(root),
- NewPids(root),
- NewNetCls(root),
- NewNetPrio(root),
- NewPerfEvent(root),
- NewCputset(root),
- NewCpu(root),
- NewCpuacct(root),
- NewMemory(root),
- NewBlkio(root),
- NewRdma(root),
- }
- // only add the devices cgroup if we are not in a user namespace
- // because modifications are not allowed
- if !isUserNS {
- s = append(s, NewDevices(root))
- }
- // add the hugetlb cgroup if error wasn't due to missing hugetlb
- // cgroup support on the host
- if err == nil {
- s = append(s, h)
- }
- return s, nil
- }
- // remove will remove a cgroup path handling EAGAIN and EBUSY errors and
- // retrying the remove after a exp timeout
- func remove(path string) error {
- delay := 10 * time.Millisecond
- for i := 0; i < 5; i++ {
- if i != 0 {
- time.Sleep(delay)
- delay *= 2
- }
- if err := os.RemoveAll(path); err == nil {
- return nil
- }
- }
- return fmt.Errorf("cgroups: unable to remove path %q", path)
- }
- // readPids will read all the pids of processes in a cgroup by the provided path
- func readPids(path string, subsystem Name) ([]Process, error) {
- f, err := os.Open(filepath.Join(path, cgroupProcs))
- if err != nil {
- return nil, err
- }
- defer f.Close()
- var (
- out []Process
- s = bufio.NewScanner(f)
- )
- for s.Scan() {
- if t := s.Text(); t != "" {
- pid, err := strconv.Atoi(t)
- if err != nil {
- return nil, err
- }
- out = append(out, Process{
- Pid: pid,
- Subsystem: subsystem,
- Path: path,
- })
- }
- }
- return out, nil
- }
- // readTasksPids will read all the pids of tasks in a cgroup by the provided path
- func readTasksPids(path string, subsystem Name) ([]Task, error) {
- f, err := os.Open(filepath.Join(path, cgroupTasks))
- if err != nil {
- return nil, err
- }
- defer f.Close()
- var (
- out []Task
- s = bufio.NewScanner(f)
- )
- for s.Scan() {
- if t := s.Text(); t != "" {
- pid, err := strconv.Atoi(t)
- if err != nil {
- return nil, err
- }
- out = append(out, Task{
- Pid: pid,
- Subsystem: subsystem,
- Path: path,
- })
- }
- }
- return out, nil
- }
- func hugePageSizes() ([]string, error) {
- var (
- pageSizes []string
- sizeList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
- )
- files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
- if err != nil {
- return nil, err
- }
- for _, st := range files {
- nameArray := strings.Split(st.Name(), "-")
- pageSize, err := units.RAMInBytes(nameArray[1])
- if err != nil {
- return nil, err
- }
- pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
- }
- return pageSizes, nil
- }
- func readUint(path string) (uint64, error) {
- v, err := ioutil.ReadFile(path)
- if err != nil {
- return 0, err
- }
- return parseUint(strings.TrimSpace(string(v)), 10, 64)
- }
- func parseUint(s string, base, bitSize int) (uint64, error) {
- v, err := strconv.ParseUint(s, base, bitSize)
- if err != nil {
- intValue, intErr := strconv.ParseInt(s, base, bitSize)
- // 1. Handle negative values greater than MinInt64 (and)
- // 2. Handle negative values lesser than MinInt64
- if intErr == nil && intValue < 0 {
- return 0, nil
- } else if intErr != nil &&
- intErr.(*strconv.NumError).Err == strconv.ErrRange &&
- intValue < 0 {
- return 0, nil
- }
- return 0, err
- }
- return v, nil
- }
- func parseKV(raw string) (string, uint64, error) {
- parts := strings.Fields(raw)
- switch len(parts) {
- case 2:
- v, err := parseUint(parts[1], 10, 64)
- if err != nil {
- return "", 0, err
- }
- return parts[0], v, nil
- default:
- return "", 0, ErrInvalidFormat
- }
- }
- func parseCgroupFile(path string) (map[string]string, error) {
- f, err := os.Open(path)
- if err != nil {
- return nil, err
- }
- defer f.Close()
- return parseCgroupFromReader(f)
- }
- func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
- var (
- cgroups = make(map[string]string)
- s = bufio.NewScanner(r)
- )
- for s.Scan() {
- if err := s.Err(); err != nil {
- return nil, err
- }
- var (
- text = s.Text()
- parts = strings.SplitN(text, ":", 3)
- )
- if len(parts) < 3 {
- return nil, fmt.Errorf("invalid cgroup entry: %q", text)
- }
- for _, subs := range strings.Split(parts[1], ",") {
- if subs != "" {
- cgroups[subs] = parts[2]
- }
- }
- }
- return cgroups, nil
- }
- func getCgroupDestination(subsystem string) (string, error) {
- f, err := os.Open("/proc/self/mountinfo")
- if err != nil {
- return "", err
- }
- defer f.Close()
- s := bufio.NewScanner(f)
- for s.Scan() {
- if err := s.Err(); err != nil {
- return "", err
- }
- fields := strings.Fields(s.Text())
- for _, opt := range strings.Split(fields[len(fields)-1], ",") {
- if opt == subsystem {
- return fields[3], nil
- }
- }
- }
- return "", ErrNoCgroupMountDestination
- }
- func pathers(subystems []Subsystem) []pather {
- var out []pather
- for _, s := range subystems {
- if p, ok := s.(pather); ok {
- out = append(out, p)
- }
- }
- return out
- }
- func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
- if c, ok := s.(creator); ok {
- p, err := path(s.Name())
- if err != nil {
- return err
- }
- if err := c.Create(p, resources); err != nil {
- return err
- }
- } else if c, ok := s.(pather); ok {
- p, err := path(s.Name())
- if err != nil {
- return err
- }
- // do the default create if the group does not have a custom one
- if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
- return err
- }
- }
- return nil
- }
- func cleanPath(path string) string {
- if path == "" {
- return ""
- }
- path = filepath.Clean(path)
- if !filepath.IsAbs(path) {
- path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
- }
- return filepath.Clean(path)
- }
|