From 457399013bfa84f01e3092222041a098ef164764 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 30 Jun 2023 19:32:26 +0200 Subject: [PATCH] vendor: github.com/containerd/cgroups/v3 v3.0.2 full diff: https://github.com/containerd/cgroups/compare/v3.0.1...v3.0.2 relevant changes: - cgroup2: only enable the cpuset controller if cpus or mems is specified - cgroup1 delete: proceed to the next subsystem when a cgroup is not found - Cgroup2: Reduce allocations for manager.Stat - Improve performance by for pid stats (cgroups1) re-using readuint - Reduce allocs in ReadUint64 by pre-allocating byte buffer - cgroup2: rm/simplify some code Signed-off-by: Sebastiaan van Stijn (cherry picked from commit f379af6d17d3aa3a1fc8890b1897933eb4458462) Signed-off-by: Sebastiaan van Stijn --- vendor.mod | 2 +- vendor.sum | 4 +- .../containerd/cgroups/v3/README.md | 21 ++ .../containerd/cgroups/v3/cgroup1/blkio.go | 1 - .../containerd/cgroups/v3/cgroup1/cgroup.go | 10 +- .../containerd/cgroups/v3/cgroup1/control.go | 2 +- .../containerd/cgroups/v3/cgroup1/memory.go | 2 +- .../containerd/cgroups/v3/cgroup1/opts.go | 6 +- .../containerd/cgroups/v3/cgroup1/pids.go | 9 +- .../containerd/cgroups/v3/cgroup1/rdma.go | 1 - .../containerd/cgroups/v3/cgroup1/systemd.go | 5 +- .../containerd/cgroups/v3/cgroup1/utils.go | 19 +- .../containerd/cgroups/v3/cgroup1/v1.go | 2 +- .../cgroups/v3/cgroup2/devicefilter.go | 4 +- .../containerd/cgroups/v3/cgroup2/manager.go | 277 +++++++----------- .../containerd/cgroups/v3/cgroup2/utils.go | 170 +++++++---- vendor/modules.txt | 4 +- 17 files changed, 271 insertions(+), 268 deletions(-) diff --git a/vendor.mod b/vendor.mod index 1e31d364a7..04886abac8 100644 --- a/vendor.mod +++ b/vendor.mod @@ -24,7 +24,7 @@ require ( github.com/aws/smithy-go v1.13.1 github.com/bsphere/le_go v0.0.0-20200109081728-fc06dab2caa8 github.com/cloudflare/cfssl v0.0.0-20180323000720-5d63dbd981b5 - github.com/containerd/cgroups/v3 v3.0.1 + github.com/containerd/cgroups/v3 v3.0.2 github.com/containerd/containerd v1.6.21 github.com/containerd/continuity v0.3.0 github.com/containerd/fifo v1.1.0 diff --git a/vendor.sum b/vendor.sum index 544dabc553..d03c012de7 100644 --- a/vendor.sum +++ b/vendor.sum @@ -344,8 +344,8 @@ github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTF github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU= github.com/containerd/cgroups v1.0.4 h1:jN/mbWBEaz+T1pi5OFtnkQ+8qnmEbAr1Oo1FRm5B0dA= github.com/containerd/cgroups v1.0.4/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA= -github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE= -github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw= +github.com/containerd/cgroups/v3 v3.0.2 h1:f5WFqIVSgo5IZmtTT3qVBo6TzI1ON6sycSBKkymb9L0= +github.com/containerd/cgroups/v3 v3.0.2/go.mod h1:JUgITrzdFqp42uI2ryGA+ge0ap/nxzYgkGmIcetmErE= github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= diff --git a/vendor/github.com/containerd/cgroups/v3/README.md b/vendor/github.com/containerd/cgroups/v3/README.md index a90b87266e..c7f37c612f 100644 --- a/vendor/github.com/containerd/cgroups/v3/README.md +++ b/vendor/github.com/containerd/cgroups/v3/README.md @@ -201,6 +201,27 @@ if err != nil { } ``` + +### Get and set cgroup type +```go +m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice") +if err != nil { + return err +} + +// https://www.kernel.org/doc/html/v5.0/admin-guide/cgroup-v2.html#threads +cgType, err := m.GetType() +if err != nil { + return err +} +fmt.Println(cgType) + +err = m.SetType(cgroup2.Threaded) +if err != nil { + return err +} +``` + ### Attention All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/blkio.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/blkio.go index 9ee96e1396..3be884c7e6 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/blkio.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/blkio.go @@ -331,7 +331,6 @@ type deviceKey struct { // keyed by major and minor number. Since devices may be mapped multiple times, // we err on taking the first occurrence. func getDevices(r io.Reader) (map[deviceKey]string, error) { - var ( s = bufio.NewScanner(r) devices = make(map[deviceKey]string) diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/cgroup.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/cgroup.go index 2a31b8041f..eae04f05bc 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/cgroup.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/cgroup.go @@ -41,7 +41,7 @@ func New(path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, return nil, err } } - subsystems, err := config.hiearchy() + subsystems, err := config.hierarchy() if err != nil { return nil, err } @@ -79,7 +79,7 @@ func Load(path Path, opts ...InitOpts) (Cgroup, error) { } } var activeSubsystems []Subsystem - subsystems, err := config.hiearchy() + subsystems, err := config.hierarchy() if err != nil { return nil, err } @@ -158,7 +158,7 @@ func (c *cgroup) subsystemsFilter(subsystems ...Name) []Subsystem { return c.subsystems } - var filteredSubsystems = []Subsystem{} + filteredSubsystems := []Subsystem{} for _, s := range c.subsystems { for _, f := range subsystems { if s.Name() == f { @@ -259,6 +259,10 @@ func (c *cgroup) Delete() error { // kernel prevents cgroups with running process from being removed, check the tree is empty procs, err := c.processes(s.Name(), true, cgroupProcs) if err != nil { + // if the control group does not exist within a subsystem, then proceed to the next subsystem + if errors.Is(err, os.ErrNotExist) { + continue + } return err } if len(procs) > 0 { diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/control.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/control.go index 6cbf5323ea..8fee13d037 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/control.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/control.go @@ -28,7 +28,7 @@ type procType = string const ( cgroupProcs procType = "cgroup.procs" cgroupTasks procType = "tasks" - defaultDirPerm = 0755 + defaultDirPerm = 0o755 ) // defaultFilePerm is a var so that the test framework can change the filemode diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/memory.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/memory.go index e84ec2b3c1..caf5e9a7eb 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/memory.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/memory.go @@ -472,7 +472,7 @@ func (m *memoryController) memoryEvent(path string, event MemoryEvent) (uintptr, defer evtFile.Close() data := fmt.Sprintf("%d %d %s", efd, evtFile.Fd(), event.Arg()) evctlPath := filepath.Join(root, "cgroup.event_control") - if err := os.WriteFile(evctlPath, []byte(data), 0700); err != nil { + if err := os.WriteFile(evctlPath, []byte(data), 0o700); err != nil { unix.Close(efd) return 0, err } diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/opts.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/opts.go index 187e0f5eab..3aa7f4fbbb 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/opts.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/opts.go @@ -36,13 +36,13 @@ type InitOpts func(*InitConfig) error type InitConfig struct { // InitCheck can be used to check initialization errors from the subsystem InitCheck InitCheck - hiearchy Hierarchy + hierarchy Hierarchy } func newInitConfig() *InitConfig { return &InitConfig{ InitCheck: RequireDevices, - hiearchy: Default, + hierarchy: Default, } } @@ -66,7 +66,7 @@ func RequireDevices(s Subsystem, _ Path, _ error) error { // The default list is coming from /proc/self/mountinfo. func WithHiearchy(h Hierarchy) InitOpts { return func(c *InitConfig) error { - c.hiearchy = h + c.hierarchy = h return nil } } diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/pids.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/pids.go index 9b5b263af5..31e2dda164 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/pids.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/pids.go @@ -20,7 +20,6 @@ import ( "os" "path/filepath" "strconv" - "strings" v1 "github.com/containerd/cgroups/v3/cgroup1/stats" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -67,16 +66,10 @@ func (p *pidsController) Stat(path string, stats *v1.Metrics) error { if err != nil { return err } - var max uint64 - maxData, err := os.ReadFile(filepath.Join(p.Path(path), "pids.max")) + max, err := readUint(filepath.Join(p.Path(path), "pids.max")) if err != nil { return err } - if maxS := strings.TrimSpace(string(maxData)); maxS != "max" { - if max, err = parseUint(maxS, 10, 64); err != nil { - return err - } - } stats.Pids = &v1.PidsStat{ Current: current, Limit: max, diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/rdma.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/rdma.go index 99299070cf..0a45ae08fb 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/rdma.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/rdma.go @@ -124,7 +124,6 @@ func toRdmaEntry(strEntries []string) []*v1.RdmaEntry { } func (p *rdmaController) Stat(path string, stats *v1.Metrics) error { - currentData, err := os.ReadFile(filepath.Join(p.Path(path), "rdma.current")) if err != nil { return err diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/systemd.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/systemd.go index d327effc8b..335a255b83 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/systemd.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/systemd.go @@ -29,7 +29,7 @@ import ( const ( SystemdDbus Name = "systemd" - defaultSlice = "system.slice" + defaultSlice Name = "system.slice" ) var ( @@ -56,7 +56,7 @@ func Systemd() ([]Subsystem, error) { func Slice(slice, name string) Path { if slice == "" { - slice = defaultSlice + slice = string(defaultSlice) } return func(subsystem Name) (string, error) { return filepath.Join(slice, name), nil @@ -70,7 +70,6 @@ func NewSystemd(root string) (*SystemdController, error) { } type SystemdController struct { - mu sync.Mutex root string } diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/utils.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/utils.go index 8ae005dad2..2b7d552001 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/utils.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/utils.go @@ -18,6 +18,7 @@ package cgroup1 import ( "bufio" + "bytes" "fmt" "os" "path/filepath" @@ -131,11 +132,25 @@ func hugePageSizes() ([]string, error) { } func readUint(path string) (uint64, error) { - v, err := os.ReadFile(path) + f, err := os.Open(path) if err != nil { return 0, err } - return parseUint(strings.TrimSpace(string(v)), 10, 64) + defer f.Close() + + // We should only need 20 bytes for the max uint64, but for a nice power of 2 + // lets use 32. + b := make([]byte, 32) + n, err := f.Read(b) + if err != nil { + return 0, err + } + s := string(bytes.TrimSpace(b[:n])) + if s == "max" { + // Return 0 for the max value to maintain backward compatibility. + return 0, nil + } + return parseUint(s, 10, 64) } func parseUint(s string, base, bitSize int) (uint64, error) { diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup1/v1.go b/vendor/github.com/containerd/cgroups/v3/cgroup1/v1.go index d4c7db6f20..ce025bbd98 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup1/v1.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup1/v1.go @@ -45,7 +45,7 @@ func Default() ([]Subsystem, error) { } // v1MountPoint returns the mount point where the cgroup -// mountpoints are mounted in a single hiearchy +// mountpoints are mounted in a single hierarchy func v1MountPoint() (string, error) { f, err := os.Open("/proc/self/mountinfo") if err != nil { diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup2/devicefilter.go b/vendor/github.com/containerd/cgroups/v3/cgroup2/devicefilter.go index 3a73ab1050..0cd5f7f3dd 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup2/devicefilter.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup2/devicefilter.go @@ -167,7 +167,7 @@ func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error { } p.insts = append(p.insts, acceptBlock(dev.Allow)...) // set blockSym to the first instruction we added in this iteration - p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) + p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym) p.blockID++ return nil } @@ -180,7 +180,7 @@ func (p *program) finalize() (asm.Instructions, error) { blockSym := fmt.Sprintf("block-%d", p.blockID) p.insts = append(p.insts, // R0 <- 0 - asm.Mov.Imm32(asm.R0, 0).Sym(blockSym), + asm.Mov.Imm32(asm.R0, 0).WithSymbol(blockSym), asm.Return(), ) p.blockID = -1 diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go b/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go index fc9fcf4533..4a4292d5fc 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go @@ -21,13 +21,11 @@ import ( "context" "errors" "fmt" - "io" "math" "os" "path/filepath" "strconv" "strings" - "syscall" "time" "github.com/containerd/cgroups/v3/cgroup2/stats" @@ -43,13 +41,12 @@ const ( subtreeControl = "cgroup.subtree_control" controllersFile = "cgroup.controllers" killFile = "cgroup.kill" + typeFile = "cgroup.type" defaultCgroup2Path = "/sys/fs/cgroup" defaultSlice = "system.slice" ) -var ( - canDelegate bool -) +var canDelegate bool type Event struct { Low uint64 @@ -99,7 +96,9 @@ func (r *Resources) Values() (o []Value) { func (r *Resources) EnabledControllers() (c []string) { if r.CPU != nil { c = append(c, "cpu") - c = append(c, "cpuset") + if r.CPU.Cpus != "" || r.CPU.Mems != "" { + c = append(c, "cpuset") + } } if r.Memory != nil { c = append(c, "memory") @@ -238,6 +237,35 @@ func setResources(path string, resources *Resources) error { return nil } +// CgroupType represents the types a cgroup can be. +type CgroupType string + +const ( + Domain CgroupType = "domain" + Threaded CgroupType = "threaded" +) + +func (c *Manager) GetType() (CgroupType, error) { + val, err := os.ReadFile(filepath.Join(c.path, typeFile)) + if err != nil { + return "", err + } + trimmed := strings.TrimSpace(string(val)) + return CgroupType(trimmed), nil +} + +func (c *Manager) SetType(cgType CgroupType) error { + // NOTE: We could abort if cgType != Threaded here as currently + // it's not possible to revert back to domain, but not sure + // it's worth being that opinionated, especially if that may + // ever change. + v := Value{ + filename: typeFile, + value: string(cgType), + } + return writeValues(c.path, []Value{v}) +} + func (c *Manager) RootControllers() ([]string, error) { b, err := os.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile)) if err != nil { @@ -492,17 +520,15 @@ func (c *Manager) MoveTo(destination *Manager) error { return nil } -var singleValueFiles = []string{ - "pids.current", - "pids.max", -} - func (c *Manager) Stat() (*stats.Metrics, error) { controllers, err := c.Controllers() if err != nil { return nil, err } - out := make(map[string]interface{}) + // Sizing this avoids an allocation to increase the map at runtime; + // currently the default bucket size is 8 and we put 40+ elements + // in it so we'd always end up allocating. + out := make(map[string]uint64, 50) for _, controller := range controllers { switch controller { case "cpu", "memory": @@ -514,66 +540,58 @@ func (c *Manager) Stat() (*stats.Metrics, error) { } } } - for _, name := range singleValueFiles { - if err := readSingleFile(c.path, name, out); err != nil { - if os.IsNotExist(err) { - continue - } - return nil, err - } - } - memoryEvents := make(map[string]interface{}) + memoryEvents := make(map[string]uint64) if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil { if !os.IsNotExist(err) { return nil, err } } - var metrics stats.Metrics + var metrics stats.Metrics metrics.Pids = &stats.PidsStat{ - Current: getPidValue("pids.current", out), - Limit: getPidValue("pids.max", out), + Current: getStatFileContentUint64(filepath.Join(c.path, "pids.current")), + Limit: getStatFileContentUint64(filepath.Join(c.path, "pids.max")), } metrics.CPU = &stats.CPUStat{ - UsageUsec: getUint64Value("usage_usec", out), - UserUsec: getUint64Value("user_usec", out), - SystemUsec: getUint64Value("system_usec", out), - NrPeriods: getUint64Value("nr_periods", out), - NrThrottled: getUint64Value("nr_throttled", out), - ThrottledUsec: getUint64Value("throttled_usec", out), + UsageUsec: out["usage_usec"], + UserUsec: out["user_usec"], + SystemUsec: out["system_usec"], + NrPeriods: out["nr_periods"], + NrThrottled: out["nr_throttled"], + ThrottledUsec: out["throttled_usec"], } metrics.Memory = &stats.MemoryStat{ - Anon: getUint64Value("anon", out), - File: getUint64Value("file", out), - KernelStack: getUint64Value("kernel_stack", out), - Slab: getUint64Value("slab", out), - Sock: getUint64Value("sock", out), - Shmem: getUint64Value("shmem", out), - FileMapped: getUint64Value("file_mapped", out), - FileDirty: getUint64Value("file_dirty", out), - FileWriteback: getUint64Value("file_writeback", out), - AnonThp: getUint64Value("anon_thp", out), - InactiveAnon: getUint64Value("inactive_anon", out), - ActiveAnon: getUint64Value("active_anon", out), - InactiveFile: getUint64Value("inactive_file", out), - ActiveFile: getUint64Value("active_file", out), - Unevictable: getUint64Value("unevictable", out), - SlabReclaimable: getUint64Value("slab_reclaimable", out), - SlabUnreclaimable: getUint64Value("slab_unreclaimable", out), - Pgfault: getUint64Value("pgfault", out), - Pgmajfault: getUint64Value("pgmajfault", out), - WorkingsetRefault: getUint64Value("workingset_refault", out), - WorkingsetActivate: getUint64Value("workingset_activate", out), - WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out), - Pgrefill: getUint64Value("pgrefill", out), - Pgscan: getUint64Value("pgscan", out), - Pgsteal: getUint64Value("pgsteal", out), - Pgactivate: getUint64Value("pgactivate", out), - Pgdeactivate: getUint64Value("pgdeactivate", out), - Pglazyfree: getUint64Value("pglazyfree", out), - Pglazyfreed: getUint64Value("pglazyfreed", out), - ThpFaultAlloc: getUint64Value("thp_fault_alloc", out), - ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out), + Anon: out["anon"], + File: out["file"], + KernelStack: out["kernel_stack"], + Slab: out["slab"], + Sock: out["sock"], + Shmem: out["shmem"], + FileMapped: out["file_mapped"], + FileDirty: out["file_dirty"], + FileWriteback: out["file_writeback"], + AnonThp: out["anon_thp"], + InactiveAnon: out["inactive_anon"], + ActiveAnon: out["active_anon"], + InactiveFile: out["inactive_file"], + ActiveFile: out["active_file"], + Unevictable: out["unevictable"], + SlabReclaimable: out["slab_reclaimable"], + SlabUnreclaimable: out["slab_unreclaimable"], + Pgfault: out["pgfault"], + Pgmajfault: out["pgmajfault"], + WorkingsetRefault: out["workingset_refault"], + WorkingsetActivate: out["workingset_activate"], + WorkingsetNodereclaim: out["workingset_nodereclaim"], + Pgrefill: out["pgrefill"], + Pgscan: out["pgscan"], + Pgsteal: out["pgsteal"], + Pgactivate: out["pgactivate"], + Pgdeactivate: out["pgdeactivate"], + Pglazyfree: out["pglazyfree"], + Pglazyfreed: out["pglazyfreed"], + ThpFaultAlloc: out["thp_fault_alloc"], + ThpCollapseAlloc: out["thp_collapse_alloc"], Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")), UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")), SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")), @@ -581,11 +599,11 @@ func (c *Manager) Stat() (*stats.Metrics, error) { } if len(memoryEvents) > 0 { metrics.MemoryEvents = &stats.MemoryEvents{ - Low: getUint64Value("low", memoryEvents), - High: getUint64Value("high", memoryEvents), - Max: getUint64Value("max", memoryEvents), - Oom: getUint64Value("oom", memoryEvents), - OomKill: getUint64Value("oom_kill", memoryEvents), + Low: memoryEvents["low"], + High: memoryEvents["high"], + Max: memoryEvents["max"], + Oom: memoryEvents["oom"], + OomKill: memoryEvents["oom_kill"], } } metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)} @@ -598,56 +616,7 @@ func (c *Manager) Stat() (*stats.Metrics, error) { return &metrics, nil } -func getUint64Value(key string, out map[string]interface{}) uint64 { - v, ok := out[key] - if !ok { - return 0 - } - switch t := v.(type) { - case uint64: - return t - } - return 0 -} - -func getPidValue(key string, out map[string]interface{}) uint64 { - v, ok := out[key] - if !ok { - return 0 - } - switch t := v.(type) { - case uint64: - return t - case string: - if t == "max" { - return math.MaxUint64 - } - } - return 0 -} - -func readSingleFile(path string, file string, out map[string]interface{}) error { - f, err := os.Open(filepath.Join(path, file)) - if err != nil { - return err - } - defer f.Close() - data, err := io.ReadAll(f) - if err != nil { - return err - } - s := strings.TrimSpace(string(data)) - v, err := parseUint(s, 10, 64) - if err != nil { - // if we cannot parse as a uint, parse as a string - out[file] = s - return nil - } - out[file] = v - return nil -} - -func readKVStatsFile(path string, file string, out map[string]interface{}) error { +func readKVStatsFile(path string, file string, out map[string]uint64) error { f, err := os.Open(filepath.Join(path, file)) if err != nil { return err @@ -692,16 +661,12 @@ func (c *Manager) freeze(path string, state State) error { func (c *Manager) isCgroupEmpty() bool { // In case of any error we return true so that we exit and don't leak resources - out := make(map[string]interface{}) + out := make(map[string]uint64) if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil { return true } if v, ok := out["populated"]; ok { - populated, ok := v.(uint64) - if !ok { - return true - } - return populated == 0 + return v == 0 } return true } @@ -709,19 +674,19 @@ func (c *Manager) isCgroupEmpty() bool { // MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor func (c *Manager) MemoryEventFD() (int, uint32, error) { fpath := filepath.Join(c.path, "memory.events") - fd, err := syscall.InotifyInit() + fd, err := unix.InotifyInit() if err != nil { return 0, 0, errors.New("failed to create inotify fd") } - wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY) + wd, err := unix.InotifyAddWatch(fd, fpath, unix.IN_MODIFY) if err != nil { - syscall.Close(fd) + unix.Close(fd) return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err) } // monitor to detect process exit/cgroup deletion evpath := filepath.Join(c.path, "cgroup.events") - if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil { - syscall.Close(fd) + if _, err = unix.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil { + unix.Close(fd) return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err) } @@ -736,41 +701,6 @@ func (c *Manager) EventChan() (<-chan Event, <-chan error) { return ec, errCh } -func parseMemoryEvents(out map[string]interface{}) (Event, error) { - e := Event{} - if v, ok := out["high"]; ok { - e.High, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v) - } - } - if v, ok := out["low"]; ok { - e.Low, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v) - } - } - if v, ok := out["max"]; ok { - e.Max, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v) - } - } - if v, ok := out["oom"]; ok { - e.OOM, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v) - } - } - if v, ok := out["oom_kill"]; ok { - e.OOMKill, ok = v.(uint64) - if !ok { - return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v) - } - } - return e, nil -} - func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) { defer close(errCh) @@ -779,17 +709,17 @@ func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) { errCh <- err return } - defer syscall.Close(fd) + defer unix.Close(fd) for { - buffer := make([]byte, syscall.SizeofInotifyEvent*10) - bytesRead, err := syscall.Read(fd, buffer) + buffer := make([]byte, unix.SizeofInotifyEvent*10) + bytesRead, err := unix.Read(fd, buffer) if err != nil { errCh <- err return } - if bytesRead >= syscall.SizeofInotifyEvent { - out := make(map[string]interface{}) + if bytesRead >= unix.SizeofInotifyEvent { + out := make(map[string]uint64) if err := readKVStatsFile(c.path, "memory.events", out); err != nil { // When cgroup is deleted read may return -ENODEV instead of -ENOENT from open. if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) { @@ -797,12 +727,13 @@ func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) { } return } - e, err := parseMemoryEvents(out) - if err != nil { - errCh <- err - return + ec <- Event{ + Low: out["low"], + High: out["high"], + Max: out["max"], + OOM: out["oom"], + OOMKill: out["oom_kill"], } - ec <- e if c.isCgroupEmpty() { return } @@ -818,7 +749,7 @@ func setDevices(path string, devices []specs.LinuxDeviceCgroup) error { if err != nil { return err } - dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600) + dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0o600) if err != nil { return fmt.Errorf("cannot get dir FD for %s", path) } diff --git a/vendor/github.com/containerd/cgroups/v3/cgroup2/utils.go b/vendor/github.com/containerd/cgroups/v3/cgroup2/utils.go index 7765018308..f5302444a7 100644 --- a/vendor/github.com/containerd/cgroups/v3/cgroup2/utils.go +++ b/vendor/github.com/containerd/cgroups/v3/cgroup2/utils.go @@ -18,6 +18,7 @@ package cgroup2 import ( "bufio" + "errors" "fmt" "io" "math" @@ -25,6 +26,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" "time" "unsafe" @@ -39,7 +41,7 @@ import ( const ( cgroupProcs = "cgroup.procs" cgroupThreads = "cgroup.threads" - defaultDirPerm = 0755 + defaultDirPerm = 0o755 ) // defaultFilePerm is a var so that the test framework can change the filemode @@ -92,19 +94,13 @@ func parseCgroupProcsFile(path string) ([]uint64, error) { return out, nil } -func parseKV(raw string) (string, interface{}, error) { +func parseKV(raw string) (string, uint64, error) { parts := strings.Fields(raw) - switch len(parts) { - case 2: - v, err := parseUint(parts[1], 10, 64) - if err != nil { - // if we cannot parse as a uint, parse as a string - return parts[0], parts[1], nil - } - return parts[0], v, nil - default: + if len(parts) != 2 { return "", 0, ErrInvalidFormat } + v, err := parseUint(parts[1], 10, 64) + return parts[0], v, err } func parseUint(s string, base, bitSize int) (uint64, error) { @@ -136,9 +132,7 @@ func parseCgroupFile(path string) (string, error) { } func parseCgroupFromReader(r io.Reader) (string, error) { - var ( - s = bufio.NewScanner(r) - ) + s := bufio.NewScanner(r) for s.Scan() { var ( text = s.Text() @@ -244,18 +238,28 @@ func ToResources(spec *specs.LinuxResources) *Resources { // Gets uint64 parsed content of single value cgroup stat file func getStatFileContentUint64(filePath string) uint64 { - contents, err := os.ReadFile(filePath) + f, err := os.Open(filePath) if err != nil { return 0 } - trimmed := strings.TrimSpace(string(contents)) + defer f.Close() + + // We expect an unsigned 64 bit integer, or a "max" string + // in some cases. + buf := make([]byte, 32) + n, err := f.Read(buf) + if err != nil { + return 0 + } + + trimmed := strings.TrimSpace(string(buf[:n])) if trimmed == "max" { return math.MaxUint64 } res, err := parseUint(trimmed, 10, 64) if err != nil { - logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath) + logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", trimmed, filePath) return res } @@ -385,58 +389,96 @@ func systemdUnitFromPath(path string) string { } func readHugeTlbStats(path string) []*stats.HugeTlbStat { - var usage = []*stats.HugeTlbStat{} - var keyUsage = make(map[string]*stats.HugeTlbStat) - f, err := os.Open(path) - if err != nil { - return usage - } - files, err := f.Readdir(-1) - f.Close() - if err != nil { - return usage - } - - for _, file := range files { - if strings.Contains(file.Name(), "hugetlb") && - (strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) { - var hugeTlb *stats.HugeTlbStat - var ok bool - fileName := strings.Split(file.Name(), ".") - pageSize := fileName[1] - if hugeTlb, ok = keyUsage[pageSize]; !ok { - hugeTlb = &stats.HugeTlbStat{} - } - hugeTlb.Pagesize = pageSize - out, err := os.ReadFile(filepath.Join(path, file.Name())) - if err != nil { - continue - } - var value uint64 - stringVal := strings.TrimSpace(string(out)) - if stringVal == "max" { - value = math.MaxUint64 - } else { - value, err = strconv.ParseUint(stringVal, 10, 64) - } - if err != nil { - continue - } - switch fileName[2] { - case "max": - hugeTlb.Max = value - case "current": - hugeTlb.Current = value - } - keyUsage[pageSize] = hugeTlb + hpSizes := hugePageSizes() + usage := make([]*stats.HugeTlbStat, len(hpSizes)) + for idx, pagesize := range hpSizes { + usage[idx] = &stats.HugeTlbStat{ + Max: getStatFileContentUint64(filepath.Join(path, "hugetlb."+pagesize+".max")), + Current: getStatFileContentUint64(filepath.Join(path, "hugetlb."+pagesize+".current")), + Pagesize: pagesize, } } - for _, entry := range keyUsage { - usage = append(usage, entry) - } return usage } +var ( + hPageSizes []string + initHPSOnce sync.Once +) + +// The following idea and implementation is taken pretty much line for line from +// runc. Because the hugetlb files are well known, and the only variable thrown in +// the mix is what huge page sizes you have on your host, this lends itself well +// to doing the work to find the files present once, and then re-using this. This +// saves a os.Readdirnames(0) call to search for hugeltb files on every `manager.Stat` +// call. +// https://github.com/opencontainers/runc/blob/3a2c0c2565644d8a7e0f1dd594a060b21fa96cf1/libcontainer/cgroups/utils.go#L301 +func hugePageSizes() []string { + initHPSOnce.Do(func() { + dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return + } + files, err := dir.Readdirnames(0) + dir.Close() + if err != nil { + return + } + + hPageSizes, err = getHugePageSizeFromFilenames(files) + if err != nil { + logrus.Warnf("hugePageSizes: %s", err) + } + }) + + return hPageSizes +} + +func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { + pageSizes := make([]string, 0, len(fileNames)) + var warn error + + for _, file := range fileNames { + // example: hugepages-1048576kB + val := strings.TrimPrefix(file, "hugepages-") + if len(val) == len(file) { + // Unexpected file name: no prefix found, ignore it. + continue + } + // In all known versions of Linux up to 6.3 the suffix is always + // "kB". If we find something else, produce an error but keep going. + eLen := len(val) - 2 + val = strings.TrimSuffix(val, "kB") + if len(val) != eLen { + // Highly unlikely. + if warn == nil { + warn = errors.New(file + `: invalid suffix (expected "kB")`) + } + continue + } + size, err := strconv.Atoi(val) + if err != nil { + // Highly unlikely. + if warn == nil { + warn = fmt.Errorf("%s: %w", file, err) + } + continue + } + // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 + // but in our case the size is in KB already. + if size >= (1 << 20) { + val = strconv.Itoa(size>>20) + "GB" + } else if size >= (1 << 10) { + val = strconv.Itoa(size>>10) + "MB" + } else { + val += "KB" + } + pageSizes = append(pageSizes, val) + } + + return pageSizes, warn +} + func getSubreaper() (int, error) { var i uintptr if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { diff --git a/vendor/modules.txt b/vendor/modules.txt index 4ae0e00ece..3483c56266 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -206,8 +206,8 @@ github.com/container-storage-interface/spec/lib/go/csi # github.com/containerd/cgroups v1.0.4 ## explicit; go 1.17 github.com/containerd/cgroups/stats/v1 -# github.com/containerd/cgroups/v3 v3.0.1 -## explicit; go 1.17 +# github.com/containerd/cgroups/v3 v3.0.2 +## explicit; go 1.18 github.com/containerd/cgroups/v3 github.com/containerd/cgroups/v3/cgroup1 github.com/containerd/cgroups/v3/cgroup1/stats