vendor: github.com/containerd/cgroups/v3 v3.0.2

full diff: https://github.com/containerd/cgroups/compare/v3.0.1...v3.0.2

relevant changes:

- cgroup2: only enable the cpuset controller if cpus or mems is specified
- cgroup1 delete: proceed to the next subsystem when a cgroup is not found
- Cgroup2: Reduce allocations for manager.Stat
- Improve performance by for pid stats (cgroups1) re-using readuint
- Reduce allocs in ReadUint64 by pre-allocating byte buffer
- cgroup2: rm/simplify some code

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
(cherry picked from commit f379af6d17)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn 2023-06-30 19:32:26 +02:00
parent 3bd0f582c9
commit 457399013b
No known key found for this signature in database
GPG key ID: 76698F39D527CE8C
17 changed files with 271 additions and 268 deletions

View file

@ -24,7 +24,7 @@ require (
github.com/aws/smithy-go v1.13.1
github.com/bsphere/le_go v0.0.0-20200109081728-fc06dab2caa8
github.com/cloudflare/cfssl v0.0.0-20180323000720-5d63dbd981b5
github.com/containerd/cgroups/v3 v3.0.1
github.com/containerd/cgroups/v3 v3.0.2
github.com/containerd/containerd v1.6.21
github.com/containerd/continuity v0.3.0
github.com/containerd/fifo v1.1.0

View file

@ -344,8 +344,8 @@ github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTF
github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU=
github.com/containerd/cgroups v1.0.4 h1:jN/mbWBEaz+T1pi5OFtnkQ+8qnmEbAr1Oo1FRm5B0dA=
github.com/containerd/cgroups v1.0.4/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA=
github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE=
github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw=
github.com/containerd/cgroups/v3 v3.0.2 h1:f5WFqIVSgo5IZmtTT3qVBo6TzI1ON6sycSBKkymb9L0=
github.com/containerd/cgroups/v3 v3.0.2/go.mod h1:JUgITrzdFqp42uI2ryGA+ge0ap/nxzYgkGmIcetmErE=
github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE=

View file

@ -201,6 +201,27 @@ if err != nil {
}
```
### Get and set cgroup type
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
// https://www.kernel.org/doc/html/v5.0/admin-guide/cgroup-v2.html#threads
cgType, err := m.GetType()
if err != nil {
return err
}
fmt.Println(cgType)
err = m.SetType(cgroup2.Threaded)
if err != nil {
return err
}
```
### Attention
All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name

View file

@ -331,7 +331,6 @@ type deviceKey struct {
// keyed by major and minor number. Since devices may be mapped multiple times,
// we err on taking the first occurrence.
func getDevices(r io.Reader) (map[deviceKey]string, error) {
var (
s = bufio.NewScanner(r)
devices = make(map[deviceKey]string)

View file

@ -41,7 +41,7 @@ func New(path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup,
return nil, err
}
}
subsystems, err := config.hiearchy()
subsystems, err := config.hierarchy()
if err != nil {
return nil, err
}
@ -79,7 +79,7 @@ func Load(path Path, opts ...InitOpts) (Cgroup, error) {
}
}
var activeSubsystems []Subsystem
subsystems, err := config.hiearchy()
subsystems, err := config.hierarchy()
if err != nil {
return nil, err
}
@ -158,7 +158,7 @@ func (c *cgroup) subsystemsFilter(subsystems ...Name) []Subsystem {
return c.subsystems
}
var filteredSubsystems = []Subsystem{}
filteredSubsystems := []Subsystem{}
for _, s := range c.subsystems {
for _, f := range subsystems {
if s.Name() == f {
@ -259,6 +259,10 @@ func (c *cgroup) Delete() error {
// kernel prevents cgroups with running process from being removed, check the tree is empty
procs, err := c.processes(s.Name(), true, cgroupProcs)
if err != nil {
// if the control group does not exist within a subsystem, then proceed to the next subsystem
if errors.Is(err, os.ErrNotExist) {
continue
}
return err
}
if len(procs) > 0 {

View file

@ -28,7 +28,7 @@ type procType = string
const (
cgroupProcs procType = "cgroup.procs"
cgroupTasks procType = "tasks"
defaultDirPerm = 0755
defaultDirPerm = 0o755
)
// defaultFilePerm is a var so that the test framework can change the filemode

View file

@ -472,7 +472,7 @@ func (m *memoryController) memoryEvent(path string, event MemoryEvent) (uintptr,
defer evtFile.Close()
data := fmt.Sprintf("%d %d %s", efd, evtFile.Fd(), event.Arg())
evctlPath := filepath.Join(root, "cgroup.event_control")
if err := os.WriteFile(evctlPath, []byte(data), 0700); err != nil {
if err := os.WriteFile(evctlPath, []byte(data), 0o700); err != nil {
unix.Close(efd)
return 0, err
}

View file

@ -36,13 +36,13 @@ type InitOpts func(*InitConfig) error
type InitConfig struct {
// InitCheck can be used to check initialization errors from the subsystem
InitCheck InitCheck
hiearchy Hierarchy
hierarchy Hierarchy
}
func newInitConfig() *InitConfig {
return &InitConfig{
InitCheck: RequireDevices,
hiearchy: Default,
hierarchy: Default,
}
}
@ -66,7 +66,7 @@ func RequireDevices(s Subsystem, _ Path, _ error) error {
// The default list is coming from /proc/self/mountinfo.
func WithHiearchy(h Hierarchy) InitOpts {
return func(c *InitConfig) error {
c.hiearchy = h
c.hierarchy = h
return nil
}
}

View file

@ -20,7 +20,6 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
specs "github.com/opencontainers/runtime-spec/specs-go"
@ -67,16 +66,10 @@ func (p *pidsController) Stat(path string, stats *v1.Metrics) error {
if err != nil {
return err
}
var max uint64
maxData, err := os.ReadFile(filepath.Join(p.Path(path), "pids.max"))
max, err := readUint(filepath.Join(p.Path(path), "pids.max"))
if err != nil {
return err
}
if maxS := strings.TrimSpace(string(maxData)); maxS != "max" {
if max, err = parseUint(maxS, 10, 64); err != nil {
return err
}
}
stats.Pids = &v1.PidsStat{
Current: current,
Limit: max,

View file

@ -124,7 +124,6 @@ func toRdmaEntry(strEntries []string) []*v1.RdmaEntry {
}
func (p *rdmaController) Stat(path string, stats *v1.Metrics) error {
currentData, err := os.ReadFile(filepath.Join(p.Path(path), "rdma.current"))
if err != nil {
return err

View file

@ -29,7 +29,7 @@ import (
const (
SystemdDbus Name = "systemd"
defaultSlice = "system.slice"
defaultSlice Name = "system.slice"
)
var (
@ -56,7 +56,7 @@ func Systemd() ([]Subsystem, error) {
func Slice(slice, name string) Path {
if slice == "" {
slice = defaultSlice
slice = string(defaultSlice)
}
return func(subsystem Name) (string, error) {
return filepath.Join(slice, name), nil
@ -70,7 +70,6 @@ func NewSystemd(root string) (*SystemdController, error) {
}
type SystemdController struct {
mu sync.Mutex
root string
}

View file

@ -18,6 +18,7 @@ package cgroup1
import (
"bufio"
"bytes"
"fmt"
"os"
"path/filepath"
@ -131,11 +132,25 @@ func hugePageSizes() ([]string, error) {
}
func readUint(path string) (uint64, error) {
v, err := os.ReadFile(path)
f, err := os.Open(path)
if err != nil {
return 0, err
}
return parseUint(strings.TrimSpace(string(v)), 10, 64)
defer f.Close()
// We should only need 20 bytes for the max uint64, but for a nice power of 2
// lets use 32.
b := make([]byte, 32)
n, err := f.Read(b)
if err != nil {
return 0, err
}
s := string(bytes.TrimSpace(b[:n]))
if s == "max" {
// Return 0 for the max value to maintain backward compatibility.
return 0, nil
}
return parseUint(s, 10, 64)
}
func parseUint(s string, base, bitSize int) (uint64, error) {

View file

@ -45,7 +45,7 @@ func Default() ([]Subsystem, error) {
}
// v1MountPoint returns the mount point where the cgroup
// mountpoints are mounted in a single hiearchy
// mountpoints are mounted in a single hierarchy
func v1MountPoint() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {

View file

@ -167,7 +167,7 @@ func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
}
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
// set blockSym to the first instruction we added in this iteration
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym)
p.blockID++
return nil
}
@ -180,7 +180,7 @@ func (p *program) finalize() (asm.Instructions, error) {
blockSym := fmt.Sprintf("block-%d", p.blockID)
p.insts = append(p.insts,
// R0 <- 0
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
asm.Mov.Imm32(asm.R0, 0).WithSymbol(blockSym),
asm.Return(),
)
p.blockID = -1

View file

@ -21,13 +21,11 @@ import (
"context"
"errors"
"fmt"
"io"
"math"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/containerd/cgroups/v3/cgroup2/stats"
@ -43,13 +41,12 @@ const (
subtreeControl = "cgroup.subtree_control"
controllersFile = "cgroup.controllers"
killFile = "cgroup.kill"
typeFile = "cgroup.type"
defaultCgroup2Path = "/sys/fs/cgroup"
defaultSlice = "system.slice"
)
var (
canDelegate bool
)
var canDelegate bool
type Event struct {
Low uint64
@ -99,7 +96,9 @@ func (r *Resources) Values() (o []Value) {
func (r *Resources) EnabledControllers() (c []string) {
if r.CPU != nil {
c = append(c, "cpu")
c = append(c, "cpuset")
if r.CPU.Cpus != "" || r.CPU.Mems != "" {
c = append(c, "cpuset")
}
}
if r.Memory != nil {
c = append(c, "memory")
@ -238,6 +237,35 @@ func setResources(path string, resources *Resources) error {
return nil
}
// CgroupType represents the types a cgroup can be.
type CgroupType string
const (
Domain CgroupType = "domain"
Threaded CgroupType = "threaded"
)
func (c *Manager) GetType() (CgroupType, error) {
val, err := os.ReadFile(filepath.Join(c.path, typeFile))
if err != nil {
return "", err
}
trimmed := strings.TrimSpace(string(val))
return CgroupType(trimmed), nil
}
func (c *Manager) SetType(cgType CgroupType) error {
// NOTE: We could abort if cgType != Threaded here as currently
// it's not possible to revert back to domain, but not sure
// it's worth being that opinionated, especially if that may
// ever change.
v := Value{
filename: typeFile,
value: string(cgType),
}
return writeValues(c.path, []Value{v})
}
func (c *Manager) RootControllers() ([]string, error) {
b, err := os.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile))
if err != nil {
@ -492,17 +520,15 @@ func (c *Manager) MoveTo(destination *Manager) error {
return nil
}
var singleValueFiles = []string{
"pids.current",
"pids.max",
}
func (c *Manager) Stat() (*stats.Metrics, error) {
controllers, err := c.Controllers()
if err != nil {
return nil, err
}
out := make(map[string]interface{})
// Sizing this avoids an allocation to increase the map at runtime;
// currently the default bucket size is 8 and we put 40+ elements
// in it so we'd always end up allocating.
out := make(map[string]uint64, 50)
for _, controller := range controllers {
switch controller {
case "cpu", "memory":
@ -514,66 +540,58 @@ func (c *Manager) Stat() (*stats.Metrics, error) {
}
}
}
for _, name := range singleValueFiles {
if err := readSingleFile(c.path, name, out); err != nil {
if os.IsNotExist(err) {
continue
}
return nil, err
}
}
memoryEvents := make(map[string]interface{})
memoryEvents := make(map[string]uint64)
if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil {
if !os.IsNotExist(err) {
return nil, err
}
}
var metrics stats.Metrics
var metrics stats.Metrics
metrics.Pids = &stats.PidsStat{
Current: getPidValue("pids.current", out),
Limit: getPidValue("pids.max", out),
Current: getStatFileContentUint64(filepath.Join(c.path, "pids.current")),
Limit: getStatFileContentUint64(filepath.Join(c.path, "pids.max")),
}
metrics.CPU = &stats.CPUStat{
UsageUsec: getUint64Value("usage_usec", out),
UserUsec: getUint64Value("user_usec", out),
SystemUsec: getUint64Value("system_usec", out),
NrPeriods: getUint64Value("nr_periods", out),
NrThrottled: getUint64Value("nr_throttled", out),
ThrottledUsec: getUint64Value("throttled_usec", out),
UsageUsec: out["usage_usec"],
UserUsec: out["user_usec"],
SystemUsec: out["system_usec"],
NrPeriods: out["nr_periods"],
NrThrottled: out["nr_throttled"],
ThrottledUsec: out["throttled_usec"],
}
metrics.Memory = &stats.MemoryStat{
Anon: getUint64Value("anon", out),
File: getUint64Value("file", out),
KernelStack: getUint64Value("kernel_stack", out),
Slab: getUint64Value("slab", out),
Sock: getUint64Value("sock", out),
Shmem: getUint64Value("shmem", out),
FileMapped: getUint64Value("file_mapped", out),
FileDirty: getUint64Value("file_dirty", out),
FileWriteback: getUint64Value("file_writeback", out),
AnonThp: getUint64Value("anon_thp", out),
InactiveAnon: getUint64Value("inactive_anon", out),
ActiveAnon: getUint64Value("active_anon", out),
InactiveFile: getUint64Value("inactive_file", out),
ActiveFile: getUint64Value("active_file", out),
Unevictable: getUint64Value("unevictable", out),
SlabReclaimable: getUint64Value("slab_reclaimable", out),
SlabUnreclaimable: getUint64Value("slab_unreclaimable", out),
Pgfault: getUint64Value("pgfault", out),
Pgmajfault: getUint64Value("pgmajfault", out),
WorkingsetRefault: getUint64Value("workingset_refault", out),
WorkingsetActivate: getUint64Value("workingset_activate", out),
WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out),
Pgrefill: getUint64Value("pgrefill", out),
Pgscan: getUint64Value("pgscan", out),
Pgsteal: getUint64Value("pgsteal", out),
Pgactivate: getUint64Value("pgactivate", out),
Pgdeactivate: getUint64Value("pgdeactivate", out),
Pglazyfree: getUint64Value("pglazyfree", out),
Pglazyfreed: getUint64Value("pglazyfreed", out),
ThpFaultAlloc: getUint64Value("thp_fault_alloc", out),
ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out),
Anon: out["anon"],
File: out["file"],
KernelStack: out["kernel_stack"],
Slab: out["slab"],
Sock: out["sock"],
Shmem: out["shmem"],
FileMapped: out["file_mapped"],
FileDirty: out["file_dirty"],
FileWriteback: out["file_writeback"],
AnonThp: out["anon_thp"],
InactiveAnon: out["inactive_anon"],
ActiveAnon: out["active_anon"],
InactiveFile: out["inactive_file"],
ActiveFile: out["active_file"],
Unevictable: out["unevictable"],
SlabReclaimable: out["slab_reclaimable"],
SlabUnreclaimable: out["slab_unreclaimable"],
Pgfault: out["pgfault"],
Pgmajfault: out["pgmajfault"],
WorkingsetRefault: out["workingset_refault"],
WorkingsetActivate: out["workingset_activate"],
WorkingsetNodereclaim: out["workingset_nodereclaim"],
Pgrefill: out["pgrefill"],
Pgscan: out["pgscan"],
Pgsteal: out["pgsteal"],
Pgactivate: out["pgactivate"],
Pgdeactivate: out["pgdeactivate"],
Pglazyfree: out["pglazyfree"],
Pglazyfreed: out["pglazyfreed"],
ThpFaultAlloc: out["thp_fault_alloc"],
ThpCollapseAlloc: out["thp_collapse_alloc"],
Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")),
UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")),
SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
@ -581,11 +599,11 @@ func (c *Manager) Stat() (*stats.Metrics, error) {
}
if len(memoryEvents) > 0 {
metrics.MemoryEvents = &stats.MemoryEvents{
Low: getUint64Value("low", memoryEvents),
High: getUint64Value("high", memoryEvents),
Max: getUint64Value("max", memoryEvents),
Oom: getUint64Value("oom", memoryEvents),
OomKill: getUint64Value("oom_kill", memoryEvents),
Low: memoryEvents["low"],
High: memoryEvents["high"],
Max: memoryEvents["max"],
Oom: memoryEvents["oom"],
OomKill: memoryEvents["oom_kill"],
}
}
metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
@ -598,56 +616,7 @@ func (c *Manager) Stat() (*stats.Metrics, error) {
return &metrics, nil
}
func getUint64Value(key string, out map[string]interface{}) uint64 {
v, ok := out[key]
if !ok {
return 0
}
switch t := v.(type) {
case uint64:
return t
}
return 0
}
func getPidValue(key string, out map[string]interface{}) uint64 {
v, ok := out[key]
if !ok {
return 0
}
switch t := v.(type) {
case uint64:
return t
case string:
if t == "max" {
return math.MaxUint64
}
}
return 0
}
func readSingleFile(path string, file string, out map[string]interface{}) error {
f, err := os.Open(filepath.Join(path, file))
if err != nil {
return err
}
defer f.Close()
data, err := io.ReadAll(f)
if err != nil {
return err
}
s := strings.TrimSpace(string(data))
v, err := parseUint(s, 10, 64)
if err != nil {
// if we cannot parse as a uint, parse as a string
out[file] = s
return nil
}
out[file] = v
return nil
}
func readKVStatsFile(path string, file string, out map[string]interface{}) error {
func readKVStatsFile(path string, file string, out map[string]uint64) error {
f, err := os.Open(filepath.Join(path, file))
if err != nil {
return err
@ -692,16 +661,12 @@ func (c *Manager) freeze(path string, state State) error {
func (c *Manager) isCgroupEmpty() bool {
// In case of any error we return true so that we exit and don't leak resources
out := make(map[string]interface{})
out := make(map[string]uint64)
if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil {
return true
}
if v, ok := out["populated"]; ok {
populated, ok := v.(uint64)
if !ok {
return true
}
return populated == 0
return v == 0
}
return true
}
@ -709,19 +674,19 @@ func (c *Manager) isCgroupEmpty() bool {
// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
func (c *Manager) MemoryEventFD() (int, uint32, error) {
fpath := filepath.Join(c.path, "memory.events")
fd, err := syscall.InotifyInit()
fd, err := unix.InotifyInit()
if err != nil {
return 0, 0, errors.New("failed to create inotify fd")
}
wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
wd, err := unix.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
if err != nil {
syscall.Close(fd)
unix.Close(fd)
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err)
}
// monitor to detect process exit/cgroup deletion
evpath := filepath.Join(c.path, "cgroup.events")
if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil {
syscall.Close(fd)
if _, err = unix.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil {
unix.Close(fd)
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err)
}
@ -736,41 +701,6 @@ func (c *Manager) EventChan() (<-chan Event, <-chan error) {
return ec, errCh
}
func parseMemoryEvents(out map[string]interface{}) (Event, error) {
e := Event{}
if v, ok := out["high"]; ok {
e.High, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v)
}
}
if v, ok := out["low"]; ok {
e.Low, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v)
}
}
if v, ok := out["max"]; ok {
e.Max, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v)
}
}
if v, ok := out["oom"]; ok {
e.OOM, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v)
}
}
if v, ok := out["oom_kill"]; ok {
e.OOMKill, ok = v.(uint64)
if !ok {
return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v)
}
}
return e, nil
}
func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
defer close(errCh)
@ -779,17 +709,17 @@ func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
errCh <- err
return
}
defer syscall.Close(fd)
defer unix.Close(fd)
for {
buffer := make([]byte, syscall.SizeofInotifyEvent*10)
bytesRead, err := syscall.Read(fd, buffer)
buffer := make([]byte, unix.SizeofInotifyEvent*10)
bytesRead, err := unix.Read(fd, buffer)
if err != nil {
errCh <- err
return
}
if bytesRead >= syscall.SizeofInotifyEvent {
out := make(map[string]interface{})
if bytesRead >= unix.SizeofInotifyEvent {
out := make(map[string]uint64)
if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
// When cgroup is deleted read may return -ENODEV instead of -ENOENT from open.
if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) {
@ -797,12 +727,13 @@ func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
}
return
}
e, err := parseMemoryEvents(out)
if err != nil {
errCh <- err
return
ec <- Event{
Low: out["low"],
High: out["high"],
Max: out["max"],
OOM: out["oom"],
OOMKill: out["oom_kill"],
}
ec <- e
if c.isCgroupEmpty() {
return
}
@ -818,7 +749,7 @@ func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
if err != nil {
return err
}
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600)
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0o600)
if err != nil {
return fmt.Errorf("cannot get dir FD for %s", path)
}

View file

@ -18,6 +18,7 @@ package cgroup2
import (
"bufio"
"errors"
"fmt"
"io"
"math"
@ -25,6 +26,7 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"unsafe"
@ -39,7 +41,7 @@ import (
const (
cgroupProcs = "cgroup.procs"
cgroupThreads = "cgroup.threads"
defaultDirPerm = 0755
defaultDirPerm = 0o755
)
// defaultFilePerm is a var so that the test framework can change the filemode
@ -92,19 +94,13 @@ func parseCgroupProcsFile(path string) ([]uint64, error) {
return out, nil
}
func parseKV(raw string) (string, interface{}, error) {
func parseKV(raw string) (string, uint64, error) {
parts := strings.Fields(raw)
switch len(parts) {
case 2:
v, err := parseUint(parts[1], 10, 64)
if err != nil {
// if we cannot parse as a uint, parse as a string
return parts[0], parts[1], nil
}
return parts[0], v, nil
default:
if len(parts) != 2 {
return "", 0, ErrInvalidFormat
}
v, err := parseUint(parts[1], 10, 64)
return parts[0], v, err
}
func parseUint(s string, base, bitSize int) (uint64, error) {
@ -136,9 +132,7 @@ func parseCgroupFile(path string) (string, error) {
}
func parseCgroupFromReader(r io.Reader) (string, error) {
var (
s = bufio.NewScanner(r)
)
s := bufio.NewScanner(r)
for s.Scan() {
var (
text = s.Text()
@ -244,18 +238,28 @@ func ToResources(spec *specs.LinuxResources) *Resources {
// Gets uint64 parsed content of single value cgroup stat file
func getStatFileContentUint64(filePath string) uint64 {
contents, err := os.ReadFile(filePath)
f, err := os.Open(filePath)
if err != nil {
return 0
}
trimmed := strings.TrimSpace(string(contents))
defer f.Close()
// We expect an unsigned 64 bit integer, or a "max" string
// in some cases.
buf := make([]byte, 32)
n, err := f.Read(buf)
if err != nil {
return 0
}
trimmed := strings.TrimSpace(string(buf[:n]))
if trimmed == "max" {
return math.MaxUint64
}
res, err := parseUint(trimmed, 10, 64)
if err != nil {
logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath)
logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", trimmed, filePath)
return res
}
@ -385,58 +389,96 @@ func systemdUnitFromPath(path string) string {
}
func readHugeTlbStats(path string) []*stats.HugeTlbStat {
var usage = []*stats.HugeTlbStat{}
var keyUsage = make(map[string]*stats.HugeTlbStat)
f, err := os.Open(path)
if err != nil {
return usage
}
files, err := f.Readdir(-1)
f.Close()
if err != nil {
return usage
}
for _, file := range files {
if strings.Contains(file.Name(), "hugetlb") &&
(strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) {
var hugeTlb *stats.HugeTlbStat
var ok bool
fileName := strings.Split(file.Name(), ".")
pageSize := fileName[1]
if hugeTlb, ok = keyUsage[pageSize]; !ok {
hugeTlb = &stats.HugeTlbStat{}
}
hugeTlb.Pagesize = pageSize
out, err := os.ReadFile(filepath.Join(path, file.Name()))
if err != nil {
continue
}
var value uint64
stringVal := strings.TrimSpace(string(out))
if stringVal == "max" {
value = math.MaxUint64
} else {
value, err = strconv.ParseUint(stringVal, 10, 64)
}
if err != nil {
continue
}
switch fileName[2] {
case "max":
hugeTlb.Max = value
case "current":
hugeTlb.Current = value
}
keyUsage[pageSize] = hugeTlb
hpSizes := hugePageSizes()
usage := make([]*stats.HugeTlbStat, len(hpSizes))
for idx, pagesize := range hpSizes {
usage[idx] = &stats.HugeTlbStat{
Max: getStatFileContentUint64(filepath.Join(path, "hugetlb."+pagesize+".max")),
Current: getStatFileContentUint64(filepath.Join(path, "hugetlb."+pagesize+".current")),
Pagesize: pagesize,
}
}
for _, entry := range keyUsage {
usage = append(usage, entry)
}
return usage
}
var (
hPageSizes []string
initHPSOnce sync.Once
)
// The following idea and implementation is taken pretty much line for line from
// runc. Because the hugetlb files are well known, and the only variable thrown in
// the mix is what huge page sizes you have on your host, this lends itself well
// to doing the work to find the files present once, and then re-using this. This
// saves a os.Readdirnames(0) call to search for hugeltb files on every `manager.Stat`
// call.
// https://github.com/opencontainers/runc/blob/3a2c0c2565644d8a7e0f1dd594a060b21fa96cf1/libcontainer/cgroups/utils.go#L301
func hugePageSizes() []string {
initHPSOnce.Do(func() {
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return
}
files, err := dir.Readdirnames(0)
dir.Close()
if err != nil {
return
}
hPageSizes, err = getHugePageSizeFromFilenames(files)
if err != nil {
logrus.Warnf("hugePageSizes: %s", err)
}
})
return hPageSizes
}
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
pageSizes := make([]string, 0, len(fileNames))
var warn error
for _, file := range fileNames {
// example: hugepages-1048576kB
val := strings.TrimPrefix(file, "hugepages-")
if len(val) == len(file) {
// Unexpected file name: no prefix found, ignore it.
continue
}
// In all known versions of Linux up to 6.3 the suffix is always
// "kB". If we find something else, produce an error but keep going.
eLen := len(val) - 2
val = strings.TrimSuffix(val, "kB")
if len(val) != eLen {
// Highly unlikely.
if warn == nil {
warn = errors.New(file + `: invalid suffix (expected "kB")`)
}
continue
}
size, err := strconv.Atoi(val)
if err != nil {
// Highly unlikely.
if warn == nil {
warn = fmt.Errorf("%s: %w", file, err)
}
continue
}
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
// but in our case the size is in KB already.
if size >= (1 << 20) {
val = strconv.Itoa(size>>20) + "GB"
} else if size >= (1 << 10) {
val = strconv.Itoa(size>>10) + "MB"
} else {
val += "KB"
}
pageSizes = append(pageSizes, val)
}
return pageSizes, warn
}
func getSubreaper() (int, error) {
var i uintptr
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {

4
vendor/modules.txt vendored
View file

@ -206,8 +206,8 @@ github.com/container-storage-interface/spec/lib/go/csi
# github.com/containerd/cgroups v1.0.4
## explicit; go 1.17
github.com/containerd/cgroups/stats/v1
# github.com/containerd/cgroups/v3 v3.0.1
## explicit; go 1.17
# github.com/containerd/cgroups/v3 v3.0.2
## explicit; go 1.18
github.com/containerd/cgroups/v3
github.com/containerd/cgroups/v3/cgroup1
github.com/containerd/cgroups/v3/cgroup1/stats