Merge pull request #41253 from AkihiroSuda/stats-v2-failcnt

statsV2: implement Failcnt
This commit is contained in:
Sebastiaan van Stijn 2020-07-30 15:42:40 +02:00 committed by GitHub
commit 9e33baffc5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
65 changed files with 4892 additions and 1100 deletions

View file

@ -1566,12 +1566,16 @@ func (daemon *Daemon) statsV2(s *types.StatsJSON, stats *statsV2.Metrics) (*type
Usage: stats.Memory.Usage,
// MaxUsage is not supported
Limit: stats.Memory.UsageLimit,
// TODO: Failcnt
}
// if the container does not set memory limit, use the machineMemory
if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
s.MemoryStats.Limit = daemon.machineMemory
}
if stats.MemoryEvents != nil {
// Failcnt is set to the "oom" field of the "memory.events" file.
// See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
s.MemoryStats.Failcnt = stats.MemoryEvents.Oom
}
}
if stats.Pids != nil {

View file

@ -125,13 +125,13 @@ google.golang.org/genproto 3f1135a288c9a07e340ae8ba4cc6
github.com/containerd/containerd c80284d4b5291a351bb471bcdabb5c1d95e7a583 # master / v1.4.0-dev
github.com/containerd/fifo ff969a566b00877c63489baf6e8c35d60af6142c
github.com/containerd/continuity efbc4488d8fe1bdc16bde3b2d2990d9b3a899165
github.com/containerd/cgroups 44306b6a1d46985d916b48b4199f93a378af314f
github.com/containerd/cgroups 318312a373405e5e91134d8063d04d59768a1bff
github.com/containerd/console 8375c3424e4d7b114e8a90a4a40c8e1b40d1d4e6 # v1.0.0
github.com/containerd/go-runc 7016d3ce2328dd2cb1192b2076ebd565c4e8df0c
github.com/containerd/typeurl cd3ce7159eae562a4f60ceff37dada11a939d247 # v1.0.1
github.com/containerd/ttrpc 72bb1b21c5b0a4a107f59dd85f6ab58e564b68d6 # v1.0.1
github.com/gogo/googleapis 01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2
github.com/cilium/ebpf 60c3aa43f488292fe2ee50fb8b833b383ca8ebbb
github.com/cilium/ebpf 1c8d4c9ef7759622653a1d319284a44652333b28
# cluster
github.com/docker/swarmkit 293aa2e66279a930999044cbf6d0e590baac16ff

31
vendor/github.com/cilium/ebpf/abi.go generated vendored
View file

@ -3,13 +3,13 @@ package ebpf
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"os"
"syscall"
"github.com/cilium/ebpf/internal"
"github.com/pkg/errors"
)
// MapABI are the attributes of a Map which are available across all supported kernels.
@ -31,10 +31,10 @@ func newMapABIFromSpec(spec *MapSpec) *MapABI {
}
}
func newMapABIFromFd(fd *bpfFD) (string, *MapABI, error) {
func newMapABIFromFd(fd *internal.FD) (string, *MapABI, error) {
info, err := bpfGetMapInfoByFD(fd)
if err != nil {
if errors.Cause(err) == syscall.EINVAL {
if errors.Is(err, syscall.EINVAL) {
abi, err := newMapABIFromProc(fd)
return "", abi, err
}
@ -50,7 +50,7 @@ func newMapABIFromFd(fd *bpfFD) (string, *MapABI, error) {
}, nil
}
func newMapABIFromProc(fd *bpfFD) (*MapABI, error) {
func newMapABIFromProc(fd *internal.FD) (*MapABI, error) {
var abi MapABI
err := scanFdInfo(fd, map[string]interface{}{
"map_type": &abi.Type,
@ -94,10 +94,10 @@ func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI {
}
}
func newProgramABIFromFd(fd *bpfFD) (string, *ProgramABI, error) {
func newProgramABIFromFd(fd *internal.FD) (string, *ProgramABI, error) {
info, err := bpfGetProgInfoByFD(fd)
if err != nil {
if errors.Cause(err) == syscall.EINVAL {
if errors.Is(err, syscall.EINVAL) {
return newProgramABIFromProc(fd)
}
@ -105,10 +105,10 @@ func newProgramABIFromFd(fd *bpfFD) (string, *ProgramABI, error) {
}
var name string
if bpfName := convertCString(info.name[:]); bpfName != "" {
if bpfName := internal.CString(info.name[:]); bpfName != "" {
name = bpfName
} else {
name = convertCString(info.tag[:])
name = internal.CString(info.tag[:])
}
return name, &ProgramABI{
@ -116,7 +116,7 @@ func newProgramABIFromFd(fd *bpfFD) (string, *ProgramABI, error) {
}, nil
}
func newProgramABIFromProc(fd *bpfFD) (string, *ProgramABI, error) {
func newProgramABIFromProc(fd *internal.FD) (string, *ProgramABI, error) {
var (
abi ProgramABI
name string
@ -126,7 +126,7 @@ func newProgramABIFromProc(fd *bpfFD) (string, *ProgramABI, error) {
"prog_type": &abi.Type,
"prog_tag": &name,
})
if errors.Cause(err) == errMissingFields {
if errors.Is(err, errMissingFields) {
return "", nil, &internal.UnsupportedFeatureError{
Name: "reading ABI from /proc/self/fdinfo",
MinimumVersion: internal.Version{4, 11, 0},
@ -139,8 +139,8 @@ func newProgramABIFromProc(fd *bpfFD) (string, *ProgramABI, error) {
return name, &abi, nil
}
func scanFdInfo(fd *bpfFD, fields map[string]interface{}) error {
raw, err := fd.value()
func scanFdInfo(fd *internal.FD, fields map[string]interface{}) error {
raw, err := fd.Value()
if err != nil {
return err
}
@ -151,7 +151,10 @@ func scanFdInfo(fd *bpfFD, fields map[string]interface{}) error {
}
defer fh.Close()
return errors.Wrap(scanFdInfoReader(fh, fields), fh.Name())
if err := scanFdInfoReader(fh, fields); err != nil {
return fmt.Errorf("%s: %w", fh.Name(), err)
}
return nil
}
var errMissingFields = errors.New("missing fields")
@ -175,7 +178,7 @@ func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
}
if n, err := fmt.Fscanln(bytes.NewReader(parts[1]), field); err != nil || n != 1 {
return errors.Wrapf(err, "can't parse field %s", name)
return fmt.Errorf("can't parse field %s: %v", name, err)
}
scanned++

View file

@ -2,12 +2,11 @@ package asm
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"strings"
"github.com/pkg/errors"
)
// InstructionSize is the size of a BPF instruction in bytes
@ -39,10 +38,12 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, err
}
ins.OpCode = bi.OpCode
ins.Dst = bi.Registers.Dst()
ins.Src = bi.Registers.Src()
ins.Offset = bi.Offset
ins.Constant = int64(bi.Constant)
ins.Dst, ins.Src, err = bi.Registers.Unmarshal(bo)
if err != nil {
return 0, fmt.Errorf("can't unmarshal registers: %s", err)
}
if !bi.OpCode.isDWordLoad() {
return InstructionSize, nil
@ -75,9 +76,14 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
cons = int32(uint32(ins.Constant))
}
regs, err := newBPFRegisters(ins.Dst, ins.Src, bo)
if err != nil {
return 0, fmt.Errorf("can't marshal registers: %s", err)
}
bpfi := bpfInstruction{
ins.OpCode,
newBPFRegisters(ins.Dst, ins.Src),
regs,
ins.Offset,
cons,
}
@ -103,22 +109,52 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
// RewriteMapPtr changes an instruction to use a new map fd.
//
// Returns an error if the fd is invalid, or the instruction
// is incorrect.
// Returns an error if the instruction doesn't load a map.
func (ins *Instruction) RewriteMapPtr(fd int) error {
if !ins.OpCode.isDWordLoad() {
return errors.Errorf("%s is not a 64 bit load", ins.OpCode)
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
}
if fd < 0 {
return errors.New("invalid fd")
if ins.Src != PseudoMapFD && ins.Src != PseudoMapValue {
return errors.New("not a load from a map")
}
ins.Src = R1
ins.Constant = int64(fd)
// Preserve the offset value for direct map loads.
offset := uint64(ins.Constant) & (math.MaxUint32 << 32)
rawFd := uint64(uint32(fd))
ins.Constant = int64(offset | rawFd)
return nil
}
func (ins *Instruction) mapPtr() uint32 {
return uint32(uint64(ins.Constant) & math.MaxUint32)
}
// RewriteMapOffset changes the offset of a direct load from a map.
//
// Returns an error if the instruction is not a direct load.
func (ins *Instruction) RewriteMapOffset(offset uint32) error {
if !ins.OpCode.isDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
}
if ins.Src != PseudoMapValue {
return errors.New("not a direct load from a map")
}
fd := uint64(ins.Constant) & math.MaxUint32
ins.Constant = int64(uint64(offset)<<32 | fd)
return nil
}
func (ins *Instruction) mapOffset() uint32 {
return uint32(uint64(ins.Constant) >> 32)
}
func (ins *Instruction) isLoadFromMap() bool {
return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue)
}
// Format implements fmt.Formatter.
func (ins Instruction) Format(f fmt.State, c rune) {
if c != 'v' {
@ -139,6 +175,19 @@ func (ins Instruction) Format(f fmt.State, c rune) {
return
}
if ins.isLoadFromMap() {
fd := int32(ins.mapPtr())
switch ins.Src {
case PseudoMapFD:
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
case PseudoMapValue:
fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset())
}
goto ref
}
fmt.Fprintf(f, "%v ", op)
switch cls := op.Class(); cls {
case LdClass, LdXClass, StClass, StXClass:
@ -166,7 +215,7 @@ func (ins Instruction) Format(f fmt.State, c rune) {
case JumpClass:
switch jop := op.JumpOp(); jop {
case Call:
if ins.Src == R1 {
if ins.Src == PseudoCall {
// bpf-to-bpf call
fmt.Fprint(f, ins.Constant)
} else {
@ -183,6 +232,7 @@ func (ins Instruction) Format(f fmt.State, c rune) {
}
}
ref:
if ins.Reference != "" {
fmt.Fprintf(f, " <%s>", ins.Reference)
}
@ -235,7 +285,7 @@ func (insns Instructions) SymbolOffsets() (map[string]int, error) {
}
if _, ok := offsets[ins.Symbol]; ok {
return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol)
}
offsets[ins.Symbol] = i
@ -273,7 +323,7 @@ func (insns Instructions) marshalledOffsets() (map[string]int, error) {
}
if _, ok := symbols[ins.Symbol]; ok {
return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol)
}
symbols[ins.Symbol] = currentPos
@ -350,11 +400,11 @@ func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
num := 0
for i, ins := range insns {
switch {
case ins.OpCode.JumpOp() == Call && ins.Constant == -1:
case ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall && ins.Constant == -1:
// Rewrite bpf to bpf call
offset, ok := absoluteOffsets[ins.Reference]
if !ok {
return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
return fmt.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
}
ins.Constant = int64(offset - num - 1)
@ -363,7 +413,7 @@ func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
// Rewrite jump to label
offset, ok := absoluteOffsets[ins.Reference]
if !ok {
return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
return fmt.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
}
ins.Offset = int16(offset - num - 1)
@ -371,7 +421,7 @@ func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
n, err := ins.Marshal(w, bo)
if err != nil {
return errors.Wrapf(err, "instruction %d", i)
return fmt.Errorf("instruction %d: %w", i, err)
}
num += int(n / InstructionSize)
@ -388,16 +438,26 @@ type bpfInstruction struct {
type bpfRegisters uint8
func newBPFRegisters(dst, src Register) bpfRegisters {
return bpfRegisters((src << 4) | (dst & 0xF))
func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) {
switch bo {
case binary.LittleEndian:
return bpfRegisters((src << 4) | (dst & 0xF)), nil
case binary.BigEndian:
return bpfRegisters((dst << 4) | (src & 0xF)), nil
default:
return 0, fmt.Errorf("unrecognized ByteOrder %T", bo)
}
}
func (r bpfRegisters) Dst() Register {
return Register(r & 0xF)
}
func (r bpfRegisters) Src() Register {
return Register(r >> 4)
func (r bpfRegisters) Unmarshal(bo binary.ByteOrder) (dst, src Register, err error) {
switch bo {
case binary.LittleEndian:
return Register(r & 0xF), Register(r >> 4), nil
case binary.BigEndian:
return Register(r >> 4), Register(r & 0xf), nil
default:
return 0, 0, fmt.Errorf("unrecognized ByteOrder %T", bo)
}
}
type unreferencedSymbolError struct {

View file

@ -95,7 +95,7 @@ func (op JumpOp) Label(label string) Instruction {
if op == Call {
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(Call),
Src: R1,
Src: PseudoCall,
Constant: -1,
Reference: label,
}

View file

@ -110,11 +110,26 @@ func LoadMapPtr(dst Register, fd int) Instruction {
return Instruction{
OpCode: LoadImmOp(DWord),
Dst: dst,
Src: R1,
Src: PseudoMapFD,
Constant: int64(fd),
}
}
// LoadMapValue stores a pointer to the value at a certain offset of a map.
func LoadMapValue(dst Register, fd int, offset uint32) Instruction {
if fd < 0 {
return Instruction{OpCode: InvalidOpCode}
}
fdAndOffset := (uint64(offset) << 32) | uint64(uint32(fd))
return Instruction{
OpCode: LoadImmOp(DWord),
Dst: dst,
Src: PseudoMapValue,
Constant: int64(fdAndOffset),
}
}
// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff.
func LoadIndOp(size Size) OpCode {
return OpCode(LdClass).SetMode(IndMode).SetSize(size)

View file

@ -225,7 +225,7 @@ func (op OpCode) String() string {
}
default:
fmt.Fprintf(&f, "%#x", op)
fmt.Fprintf(&f, "OpCode(%#x)", uint8(op))
}
return f.String()

View file

@ -33,6 +33,13 @@ const (
RFP = R10
)
// Pseudo registers used by 64bit loads and jumps
const (
PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD
PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE
PseudoCall = R1 // BPF_PSEUDO_CALL
)
func (r Register) String() string {
v := uint8(r)
if v == 10 {

View file

@ -1,8 +1,13 @@
package ebpf
import (
"errors"
"fmt"
"math"
"github.com/cilium/ebpf/asm"
"github.com/pkg/errors"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
)
// CollectionOptions control loading a collection into the kernel.
@ -38,6 +43,89 @@ func (cs *CollectionSpec) Copy() *CollectionSpec {
return &cpy
}
// RewriteMaps replaces all references to specific maps.
//
// Use this function to use pre-existing maps instead of creating new ones
// when calling NewCollection. Any named maps are removed from CollectionSpec.Maps.
//
// Returns an error if a named map isn't used in at least one program.
func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
for symbol, m := range maps {
// have we seen a program that uses this symbol / map
seen := false
fd := m.FD()
for progName, progSpec := range cs.Programs {
err := progSpec.Instructions.RewriteMapPtr(symbol, fd)
switch {
case err == nil:
seen = true
case asm.IsUnreferencedSymbol(err):
// Not all programs need to use the map
default:
return fmt.Errorf("program %s: %w", progName, err)
}
}
if !seen {
return fmt.Errorf("map %s not referenced by any programs", symbol)
}
// Prevent NewCollection from creating rewritten maps
delete(cs.Maps, symbol)
}
return nil
}
// RewriteConstants replaces the value of multiple constants.
//
// The constant must be defined like so in the C program:
//
// static volatile const type foobar;
// static volatile const type foobar = default;
//
// Replacement values must be of the same length as the C sizeof(type).
// If necessary, they are marshalled according to the same rules as
// map values.
//
// From Linux 5.5 the verifier will use constants to eliminate dead code.
//
// Returns an error if a constant doesn't exist.
func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error {
rodata := cs.Maps[".rodata"]
if rodata == nil {
return errors.New("missing .rodata section")
}
if rodata.BTF == nil {
return errors.New(".rodata section has no BTF")
}
if n := len(rodata.Contents); n != 1 {
return fmt.Errorf("expected one key in .rodata, found %d", n)
}
kv := rodata.Contents[0]
value, ok := kv.Value.([]byte)
if !ok {
return fmt.Errorf("first value in .rodata is %T not []byte", kv.Value)
}
buf := make([]byte, len(value))
copy(buf, value)
err := patchValue(buf, btf.MapValue(rodata.BTF), consts)
if err != nil {
return err
}
rodata.Contents[0] = MapKV{kv.Key, buf}
return nil
}
// Collection is a collection of Programs and Maps associated
// with their symbols
type Collection struct {
@ -55,45 +143,103 @@ func NewCollection(spec *CollectionSpec) (*Collection, error) {
// NewCollectionWithOptions creates a Collection from a specification.
//
// Only maps referenced by at least one of the programs are initialized.
func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
maps := make(map[string]*Map)
for mapName, mapSpec := range spec.Maps {
m, err := NewMap(mapSpec)
func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (coll *Collection, err error) {
var (
maps = make(map[string]*Map)
progs = make(map[string]*Program)
btfs = make(map[*btf.Spec]*btf.Handle)
)
defer func() {
for _, btf := range btfs {
btf.Close()
}
if err == nil {
return
}
for _, m := range maps {
m.Close()
}
for _, p := range progs {
p.Close()
}
}()
loadBTF := func(spec *btf.Spec) (*btf.Handle, error) {
if btfs[spec] != nil {
return btfs[spec], nil
}
handle, err := btf.NewHandle(spec)
if err != nil {
return nil, errors.Wrapf(err, "map %s", mapName)
return nil, err
}
btfs[spec] = handle
return handle, nil
}
for mapName, mapSpec := range spec.Maps {
var handle *btf.Handle
if mapSpec.BTF != nil {
handle, err = loadBTF(btf.MapSpec(mapSpec.BTF))
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, err
}
}
m, err := newMapWithBTF(mapSpec, handle)
if err != nil {
return nil, fmt.Errorf("map %s: %w", mapName, err)
}
maps[mapName] = m
}
progs := make(map[string]*Program)
for progName, origProgSpec := range spec.Programs {
progSpec := origProgSpec.Copy()
// Rewrite any reference to a valid map.
for i := range progSpec.Instructions {
var (
ins = &progSpec.Instructions[i]
m = maps[ins.Reference]
)
ins := &progSpec.Instructions[i]
if ins.Reference == "" || m == nil {
if ins.OpCode != asm.LoadImmOp(asm.DWord) || ins.Reference == "" {
continue
}
if ins.Src == asm.R1 {
if uint32(ins.Constant) != math.MaxUint32 {
// Don't overwrite maps already rewritten, users can
// rewrite programs in the spec themselves
continue
}
m := maps[ins.Reference]
if m == nil {
return nil, fmt.Errorf("program %s: missing map %s", progName, ins.Reference)
}
fd := m.FD()
if fd < 0 {
return nil, fmt.Errorf("map %s: %w", ins.Reference, internal.ErrClosedFd)
}
if err := ins.RewriteMapPtr(m.FD()); err != nil {
return nil, errors.Wrapf(err, "progam %s: map %s", progName, ins.Reference)
return nil, fmt.Errorf("progam %s: map %s: %w", progName, ins.Reference, err)
}
}
prog, err := NewProgramWithOptions(progSpec, opts.Programs)
var handle *btf.Handle
if progSpec.BTF != nil {
handle, err = loadBTF(btf.ProgramSpec(progSpec.BTF))
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, err
}
}
prog, err := newProgramWithBTF(progSpec, handle, opts.Programs)
if err != nil {
return nil, errors.Wrapf(err, "program %s", progName)
return nil, fmt.Errorf("program %s: %w", progName, err)
}
progs[progName] = prog
}

View file

@ -4,20 +4,25 @@ import (
"bytes"
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"strings"
"github.com/cilium/ebpf/asm"
"github.com/pkg/errors"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/unix"
)
type elfCode struct {
*elf.File
symbols []elf.Symbol
symbolsPerSection map[elf.SectionIndex]map[uint64]string
symbolsPerSection map[elf.SectionIndex]map[uint64]elf.Symbol
license string
version uint32
}
// LoadCollectionSpec parses an ELF file into a CollectionSpec.
@ -29,12 +34,15 @@ func LoadCollectionSpec(file string) (*CollectionSpec, error) {
defer f.Close()
spec, err := LoadCollectionSpecFromReader(f)
return spec, errors.Wrapf(err, "file %s", file)
if err != nil {
return nil, fmt.Errorf("file %s: %w", file, err)
}
return spec, nil
}
// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
f, err := elf.NewFile(code)
func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
f, err := elf.NewFile(rd)
if err != nil {
return nil, err
}
@ -42,15 +50,21 @@ func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
symbols, err := f.Symbols()
if err != nil {
return nil, errors.Wrap(err, "load symbols")
return nil, fmt.Errorf("load symbols: %v", err)
}
ec := &elfCode{f, symbols, symbolsPerSection(symbols)}
ec := &elfCode{f, symbols, symbolsPerSection(symbols), "", 0}
var (
licenseSection *elf.Section
versionSection *elf.Section
btfMaps = make(map[elf.SectionIndex]*elf.Section)
progSections = make(map[elf.SectionIndex]*elf.Section)
relSections = make(map[elf.SectionIndex]*elf.Section)
mapSections = make(map[elf.SectionIndex]*elf.Section)
dataSections = make(map[elf.SectionIndex]*elf.Section)
)
var licenseSection, versionSection *elf.Section
progSections := make(map[elf.SectionIndex]*elf.Section)
relSections := make(map[elf.SectionIndex]*elf.Section)
mapSections := make(map[elf.SectionIndex]*elf.Section)
for i, sec := range ec.Sections {
switch {
case strings.HasPrefix(sec.Name, "license"):
@ -59,15 +73,19 @@ func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
versionSection = sec
case strings.HasPrefix(sec.Name, "maps"):
mapSections[elf.SectionIndex(i)] = sec
case sec.Name == ".maps":
btfMaps[elf.SectionIndex(i)] = sec
case sec.Name == ".bss" || sec.Name == ".rodata" || sec.Name == ".data":
dataSections[elf.SectionIndex(i)] = sec
case sec.Type == elf.SHT_REL:
if int(sec.Info) >= len(ec.Sections) {
return nil, errors.Errorf("found relocation section %v for missing section %v", i, sec.Info)
return nil, fmt.Errorf("found relocation section %v for missing section %v", i, sec.Info)
}
// Store relocations under the section index of the target
idx := elf.SectionIndex(sec.Info)
if relSections[idx] != nil {
return nil, errors.Errorf("section %d has multiple relocation sections", idx)
return nil, fmt.Errorf("section %d has multiple relocation sections", sec.Info)
}
relSections[idx] = sec
case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
@ -75,33 +93,54 @@ func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
}
}
license, err := loadLicense(licenseSection)
ec.license, err = loadLicense(licenseSection)
if err != nil {
return nil, errors.Wrap(err, "load license")
return nil, fmt.Errorf("load license: %w", err)
}
version, err := loadVersion(versionSection, ec.ByteOrder)
ec.version, err = loadVersion(versionSection, ec.ByteOrder)
if err != nil {
return nil, errors.Wrap(err, "load version")
return nil, fmt.Errorf("load version: %w", err)
}
maps, err := ec.loadMaps(mapSections)
btfSpec, err := btf.LoadSpecFromReader(rd)
if err != nil {
return nil, errors.Wrap(err, "load maps")
return nil, fmt.Errorf("load BTF: %w", err)
}
progs, libs, err := ec.loadPrograms(progSections, relSections, license, version)
relocations, referencedSections, err := ec.loadRelocations(relSections)
if err != nil {
return nil, errors.Wrap(err, "load programs")
return nil, fmt.Errorf("load relocations: %w", err)
}
if len(libs) > 0 {
for name, prog := range progs {
prog.Instructions, err = link(prog.Instructions, libs...)
if err != nil {
return nil, errors.Wrapf(err, "program %s", name)
maps := make(map[string]*MapSpec)
if err := ec.loadMaps(maps, mapSections); err != nil {
return nil, fmt.Errorf("load maps: %w", err)
}
if len(btfMaps) > 0 {
if err := ec.loadBTFMaps(maps, btfMaps, btfSpec); err != nil {
return nil, fmt.Errorf("load BTF maps: %w", err)
}
}
if len(dataSections) > 0 {
for idx := range dataSections {
if !referencedSections[idx] {
// Prune data sections which are not referenced by any
// instructions.
delete(dataSections, idx)
}
}
if err := ec.loadDataSections(maps, dataSections, btfSpec); err != nil {
return nil, fmt.Errorf("load data sections: %w", err)
}
}
progs, err := ec.loadPrograms(progSections, relocations, btfSpec)
if err != nil {
return nil, fmt.Errorf("load programs: %w", err)
}
return &CollectionSpec{maps, progs}, nil
@ -109,11 +148,12 @@ func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
func loadLicense(sec *elf.Section) (string, error) {
if sec == nil {
return "", errors.Errorf("missing license section")
return "", nil
}
data, err := sec.Data()
if err != nil {
return "", errors.Wrapf(err, "section %s", sec.Name)
return "", fmt.Errorf("section %s: %v", sec.Name, err)
}
return string(bytes.TrimRight(data, "\000")), nil
}
@ -124,92 +164,228 @@ func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
}
var version uint32
err := binary.Read(sec.Open(), bo, &version)
return version, errors.Wrapf(err, "section %s", sec.Name)
if err := binary.Read(sec.Open(), bo, &version); err != nil {
return 0, fmt.Errorf("section %s: %v", sec.Name, err)
}
return version, nil
}
func (ec *elfCode) loadPrograms(progSections, relSections map[elf.SectionIndex]*elf.Section, license string, version uint32) (map[string]*ProgramSpec, []asm.Instructions, error) {
func (ec *elfCode) loadPrograms(progSections map[elf.SectionIndex]*elf.Section, relocations map[elf.SectionIndex]map[uint64]elf.Symbol, btfSpec *btf.Spec) (map[string]*ProgramSpec, error) {
var (
progs = make(map[string]*ProgramSpec)
libs []asm.Instructions
progs []*ProgramSpec
libs []*ProgramSpec
)
for idx, prog := range progSections {
for idx, sec := range progSections {
syms := ec.symbolsPerSection[idx]
if len(syms) == 0 {
return nil, nil, errors.Errorf("section %v: missing symbols", prog.Name)
return nil, fmt.Errorf("section %v: missing symbols", sec.Name)
}
funcSym := syms[0]
if funcSym == "" {
return nil, nil, errors.Errorf("section %v: no label at start", prog.Name)
funcSym, ok := syms[0]
if !ok {
return nil, fmt.Errorf("section %v: no label at start", sec.Name)
}
rels, err := ec.loadRelocations(relSections[idx])
insns, length, err := ec.loadInstructions(sec, syms, relocations[idx])
if err != nil {
return nil, nil, errors.Wrapf(err, "program %s: can't load relocations", funcSym)
return nil, fmt.Errorf("program %s: can't unmarshal instructions: %w", funcSym.Name, err)
}
insns, err := ec.loadInstructions(prog, syms, rels)
if err != nil {
return nil, nil, errors.Wrapf(err, "program %s: can't unmarshal instructions", funcSym)
progType, attachType, attachTo := getProgType(sec.Name)
spec := &ProgramSpec{
Name: funcSym.Name,
Type: progType,
AttachType: attachType,
AttachTo: attachTo,
License: ec.license,
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
}
if progType, attachType := getProgType(prog.Name); progType == UnspecifiedProgram {
if btfSpec != nil {
spec.BTF, err = btfSpec.Program(sec.Name, length)
if err != nil && !errors.Is(err, btf.ErrNoExtendedInfo) {
return nil, fmt.Errorf("program %s: %w", funcSym.Name, err)
}
}
if spec.Type == UnspecifiedProgram {
// There is no single name we can use for "library" sections,
// since they may contain multiple functions. We'll decode the
// labels they contain later on, and then link sections that way.
libs = append(libs, insns)
libs = append(libs, spec)
} else {
progs[funcSym] = &ProgramSpec{
Name: funcSym,
Type: progType,
AttachType: attachType,
License: license,
KernelVersion: version,
Instructions: insns,
}
progs = append(progs, spec)
}
}
return progs, libs, nil
res := make(map[string]*ProgramSpec, len(progs))
for _, prog := range progs {
err := link(prog, libs)
if err != nil {
return nil, fmt.Errorf("program %s: %w", prog.Name, err)
}
res[prog.Name] = prog
}
return res, nil
}
func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]string) (asm.Instructions, error) {
func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]elf.Symbol) (asm.Instructions, uint64, error) {
var (
r = section.Open()
insns asm.Instructions
ins asm.Instruction
offset uint64
)
for {
var ins asm.Instruction
n, err := ins.Unmarshal(r, ec.ByteOrder)
if err == io.EOF {
return insns, nil
return insns, offset, nil
}
if err != nil {
return nil, errors.Wrapf(err, "offset %d", offset)
return nil, 0, fmt.Errorf("offset %d: %w", offset, err)
}
ins.Symbol = symbols[offset]
ins.Reference = relocations[offset]
ins.Symbol = symbols[offset].Name
if rel, ok := relocations[offset]; ok {
if err = ec.relocateInstruction(&ins, rel); err != nil {
return nil, 0, fmt.Errorf("offset %d: can't relocate instruction: %w", offset, err)
}
}
insns = append(insns, ins)
offset += n
}
}
func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[string]*MapSpec, error) {
func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error {
var (
maps = make(map[string]*MapSpec)
b = make([]byte, 1)
typ = elf.ST_TYPE(rel.Info)
bind = elf.ST_BIND(rel.Info)
name = rel.Name
)
if typ == elf.STT_SECTION {
// Symbols with section type do not have a name set. Get it
// from the section itself.
idx := int(rel.Section)
if idx > len(ec.Sections) {
return errors.New("out-of-bounds section index")
}
name = ec.Sections[idx].Name
}
outer:
switch {
case ins.OpCode == asm.LoadImmOp(asm.DWord):
// There are two distinct types of a load from a map:
// a direct one, where the value is extracted without
// a call to map_lookup_elem in eBPF, and an indirect one
// that goes via the helper. They are distinguished by
// different relocations.
switch typ {
case elf.STT_SECTION:
// This is a direct load since the referenced symbol is a
// section. Weirdly, the offset of the real symbol in the
// section is encoded in the instruction stream.
if bind != elf.STB_LOCAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
}
// For some reason, clang encodes the offset of the symbol its
// section in the first basic BPF instruction, while the kernel
// expects it in the second one.
ins.Constant <<= 32
ins.Src = asm.PseudoMapValue
case elf.STT_NOTYPE:
if bind == elf.STB_GLOBAL && rel.Section == elf.SHN_UNDEF {
// This is a relocation generated by inline assembly.
// We can't do more than assigning ins.Reference.
break outer
}
// This is an ELF generated on clang < 8, which doesn't tag
// relocations appropriately.
fallthrough
case elf.STT_OBJECT:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("load: %s: unsupported binding: %s", name, bind)
}
ins.Src = asm.PseudoMapFD
default:
return fmt.Errorf("load: %s: unsupported relocation: %s", name, typ)
}
// Mark the instruction as needing an update when creating the
// collection.
if err := ins.RewriteMapPtr(-1); err != nil {
return err
}
case ins.OpCode.JumpOp() == asm.Call:
if ins.Src != asm.PseudoCall {
return fmt.Errorf("call: %s: incorrect source register", name)
}
switch typ {
case elf.STT_NOTYPE, elf.STT_FUNC:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
}
case elf.STT_SECTION:
if bind != elf.STB_LOCAL {
return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
}
// The function we want to call is in the indicated section,
// at the offset encoded in the instruction itself. Reverse
// the calculation to find the real function we're looking for.
// A value of -1 references the first instruction in the section.
offset := int64(int32(ins.Constant)+1) * asm.InstructionSize
if offset < 0 {
return fmt.Errorf("call: %s: invalid offset %d", name, offset)
}
sym, ok := ec.symbolsPerSection[rel.Section][uint64(offset)]
if !ok {
return fmt.Errorf("call: %s: no symbol at offset %d", name, offset)
}
ins.Constant = -1
name = sym.Name
default:
return fmt.Errorf("call: %s: invalid symbol type %s", name, typ)
}
default:
return fmt.Errorf("relocation for unsupported instruction: %s", ins.OpCode)
}
ins.Reference = name
return nil
}
func (ec *elfCode) loadMaps(maps map[string]*MapSpec, mapSections map[elf.SectionIndex]*elf.Section) error {
for idx, sec := range mapSections {
syms := ec.symbolsPerSection[idx]
if len(syms) == 0 {
return nil, errors.Errorf("section %v: no symbols", sec.Name)
return fmt.Errorf("section %v: no symbols", sec.Name)
}
if sec.Size%uint64(len(syms)) != 0 {
return nil, errors.Errorf("section %v: map descriptors are not of equal size", sec.Name)
return fmt.Errorf("section %v: map descriptors are not of equal size", sec.Name)
}
var (
@ -217,154 +393,324 @@ func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[
size = sec.Size / uint64(len(syms))
)
for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size {
mapSym := syms[offset]
if mapSym == "" {
fmt.Println(syms)
return nil, errors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
mapSym, ok := syms[offset]
if !ok {
return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
}
if maps[mapSym] != nil {
return nil, errors.Errorf("section %v: map %v already exists", sec.Name, mapSym)
if maps[mapSym.Name] != nil {
return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym)
}
lr := io.LimitReader(r, int64(size))
var spec MapSpec
spec := MapSpec{
Name: SanitizeName(mapSym.Name, -1),
}
switch {
case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
return nil, errors.Errorf("map %v: missing type", mapSym)
return fmt.Errorf("map %v: missing type", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
return nil, errors.Errorf("map %v: missing key size", mapSym)
return fmt.Errorf("map %v: missing key size", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
return nil, errors.Errorf("map %v: missing value size", mapSym)
return fmt.Errorf("map %v: missing value size", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
return nil, errors.Errorf("map %v: missing max entries", mapSym)
return fmt.Errorf("map %v: missing max entries", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
return nil, errors.Errorf("map %v: missing flags", mapSym)
return fmt.Errorf("map %v: missing flags", mapSym)
}
for {
_, err := lr.Read(b)
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if b[0] != 0 {
return nil, errors.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
}
if _, err := io.Copy(internal.DiscardZeroes{}, lr); err != nil {
return fmt.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
}
maps[mapSym] = &spec
maps[mapSym.Name] = &spec
}
}
return maps, nil
return nil
}
func getProgType(v string) (ProgramType, AttachType) {
types := map[string]ProgramType{
// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c#n3568
"socket": SocketFilter,
"seccomp": SocketFilter,
"kprobe/": Kprobe,
"kretprobe/": Kprobe,
"tracepoint/": TracePoint,
"xdp": XDP,
"perf_event": PerfEvent,
"sockops": SockOps,
"sk_skb": SkSKB,
"sk_msg": SkMsg,
"lirc_mode2": LircMode2,
"flow_dissector": FlowDissector,
func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec, mapSections map[elf.SectionIndex]*elf.Section, spec *btf.Spec) error {
if spec == nil {
return fmt.Errorf("missing BTF")
}
"cgroup_skb/": CGroupSKB,
"cgroup/dev": CGroupDevice,
"cgroup/skb": CGroupSKB,
"cgroup/sock": CGroupSock,
"cgroup/post_bind": CGroupSock,
"cgroup/bind": CGroupSockAddr,
"cgroup/connect": CGroupSockAddr,
"cgroup/sendmsg": CGroupSockAddr,
"cgroup/recvmsg": CGroupSockAddr,
"cgroup/sysctl": CGroupSysctl,
"cgroup/getsockopt": CGroupSockopt,
"cgroup/setsockopt": CGroupSockopt,
"classifier": SchedCLS,
"action": SchedACT,
}
attachTypes := map[string]AttachType{
"cgroup_skb/ingress": AttachCGroupInetIngress,
"cgroup_skb/egress": AttachCGroupInetEgress,
"cgroup/sock": AttachCGroupInetSockCreate,
"cgroup/post_bind4": AttachCGroupInet4PostBind,
"cgroup/post_bind6": AttachCGroupInet6PostBind,
"cgroup/dev": AttachCGroupDevice,
"sockops": AttachCGroupSockOps,
"sk_skb/stream_parser": AttachSkSKBStreamParser,
"sk_skb/stream_verdict": AttachSkSKBStreamVerdict,
"sk_msg": AttachSkSKBStreamVerdict,
"lirc_mode2": AttachLircMode2,
"flow_dissector": AttachFlowDissector,
"cgroup/bind4": AttachCGroupInet4Bind,
"cgroup/bind6": AttachCGroupInet6Bind,
"cgroup/connect4": AttachCGroupInet4Connect,
"cgroup/connect6": AttachCGroupInet6Connect,
"cgroup/sendmsg4": AttachCGroupUDP4Sendmsg,
"cgroup/sendmsg6": AttachCGroupUDP6Sendmsg,
"cgroup/recvmsg4": AttachCGroupUDP4Recvmsg,
"cgroup/recvmsg6": AttachCGroupUDP6Recvmsg,
"cgroup/sysctl": AttachCGroupSysctl,
"cgroup/getsockopt": AttachCGroupGetsockopt,
"cgroup/setsockopt": AttachCGroupSetsockopt,
}
attachType := AttachNone
for k, t := range attachTypes {
if strings.HasPrefix(v, k) {
attachType = t
for idx, sec := range mapSections {
syms := ec.symbolsPerSection[idx]
if len(syms) == 0 {
return fmt.Errorf("section %v: no symbols", sec.Name)
}
for _, sym := range syms {
name := sym.Name
if maps[name] != nil {
return fmt.Errorf("section %v: map %v already exists", sec.Name, sym)
}
mapSpec, err := mapSpecFromBTF(spec, name)
if err != nil {
return fmt.Errorf("map %v: %w", name, err)
}
maps[name] = mapSpec
}
}
for k, t := range types {
if strings.HasPrefix(v, k) {
return t, attachType
}
}
return UnspecifiedProgram, AttachNone
return nil
}
func (ec *elfCode) loadRelocations(sec *elf.Section) (map[uint64]string, error) {
rels := make(map[uint64]string)
if sec == nil {
return rels, nil
func mapSpecFromBTF(spec *btf.Spec, name string) (*MapSpec, error) {
btfMap, btfMapMembers, err := spec.Map(name)
if err != nil {
return nil, fmt.Errorf("can't get BTF: %w", err)
}
if sec.Entsize < 16 {
return nil, errors.New("rels are less than 16 bytes")
keyType := btf.MapKey(btfMap)
size, err := btf.Sizeof(keyType)
if err != nil {
return nil, fmt.Errorf("can't get size of BTF key: %w", err)
}
keySize := uint32(size)
r := sec.Open()
for off := uint64(0); off < sec.Size; off += sec.Entsize {
ent := io.LimitReader(r, int64(sec.Entsize))
valueType := btf.MapValue(btfMap)
size, err = btf.Sizeof(valueType)
if err != nil {
return nil, fmt.Errorf("can't get size of BTF value: %w", err)
}
valueSize := uint32(size)
var rel elf.Rel64
if binary.Read(ent, ec.ByteOrder, &rel) != nil {
return nil, errors.Errorf("can't parse relocation at offset %v", off)
var (
mapType, flags, maxEntries uint32
)
for _, member := range btfMapMembers {
switch member.Name {
case "type":
mapType, err = uintFromBTF(member.Type)
if err != nil {
return nil, fmt.Errorf("can't get type: %w", err)
}
case "map_flags":
flags, err = uintFromBTF(member.Type)
if err != nil {
return nil, fmt.Errorf("can't get BTF map flags: %w", err)
}
case "max_entries":
maxEntries, err = uintFromBTF(member.Type)
if err != nil {
return nil, fmt.Errorf("can't get BTF map max entries: %w", err)
}
case "key_size":
if _, isVoid := keyType.(*btf.Void); !isVoid {
return nil, errors.New("both key and key_size given")
}
keySize, err = uintFromBTF(member.Type)
if err != nil {
return nil, fmt.Errorf("can't get BTF key size: %w", err)
}
case "value_size":
if _, isVoid := valueType.(*btf.Void); !isVoid {
return nil, errors.New("both value and value_size given")
}
valueSize, err = uintFromBTF(member.Type)
if err != nil {
return nil, fmt.Errorf("can't get BTF value size: %w", err)
}
case "pinning":
pinning, err := uintFromBTF(member.Type)
if err != nil {
return nil, fmt.Errorf("can't get pinning: %w", err)
}
if pinning != 0 {
return nil, fmt.Errorf("'pinning' attribute not supported: %w", ErrNotSupported)
}
case "key", "value":
default:
return nil, fmt.Errorf("unrecognized field %s in BTF map definition", member.Name)
}
symNo := int(elf.R_SYM64(rel.Info) - 1)
if symNo >= len(ec.symbols) {
return nil, errors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
}
rels[rel.Off] = ec.symbols[symNo].Name
}
return rels, nil
return &MapSpec{
Type: MapType(mapType),
KeySize: keySize,
ValueSize: valueSize,
MaxEntries: maxEntries,
Flags: flags,
BTF: btfMap,
}, nil
}
func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]string {
result := make(map[elf.SectionIndex]map[uint64]string)
for i, sym := range symbols {
// uintFromBTF resolves the __uint macro, which is a pointer to a sized
// array, e.g. for int (*foo)[10], this function will return 10.
func uintFromBTF(typ btf.Type) (uint32, error) {
ptr, ok := typ.(*btf.Pointer)
if !ok {
return 0, fmt.Errorf("not a pointer: %v", typ)
}
arr, ok := ptr.Target.(*btf.Array)
if !ok {
return 0, fmt.Errorf("not a pointer to array: %v", typ)
}
return arr.Nelems, nil
}
func (ec *elfCode) loadDataSections(maps map[string]*MapSpec, dataSections map[elf.SectionIndex]*elf.Section, spec *btf.Spec) error {
if spec == nil {
return errors.New("data sections require BTF, make sure all consts are marked as static")
}
for _, sec := range dataSections {
btfMap, err := spec.Datasec(sec.Name)
if err != nil {
return err
}
data, err := sec.Data()
if err != nil {
return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err)
}
if uint64(len(data)) > math.MaxUint32 {
return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name)
}
mapSpec := &MapSpec{
Name: SanitizeName(sec.Name, -1),
Type: Array,
KeySize: 4,
ValueSize: uint32(len(data)),
MaxEntries: 1,
Contents: []MapKV{{uint32(0), data}},
BTF: btfMap,
}
switch sec.Name {
case ".rodata":
mapSpec.Flags = unix.BPF_F_RDONLY_PROG
mapSpec.Freeze = true
case ".bss":
// The kernel already zero-initializes the map
mapSpec.Contents = nil
}
maps[sec.Name] = mapSpec
}
return nil
}
func getProgType(sectionName string) (ProgramType, AttachType, string) {
types := map[string]struct {
progType ProgramType
attachType AttachType
}{
// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c
"socket": {SocketFilter, AttachNone},
"seccomp": {SocketFilter, AttachNone},
"kprobe/": {Kprobe, AttachNone},
"uprobe/": {Kprobe, AttachNone},
"kretprobe/": {Kprobe, AttachNone},
"uretprobe/": {Kprobe, AttachNone},
"tracepoint/": {TracePoint, AttachNone},
"raw_tracepoint/": {RawTracepoint, AttachNone},
"xdp": {XDP, AttachNone},
"perf_event": {PerfEvent, AttachNone},
"lwt_in": {LWTIn, AttachNone},
"lwt_out": {LWTOut, AttachNone},
"lwt_xmit": {LWTXmit, AttachNone},
"lwt_seg6local": {LWTSeg6Local, AttachNone},
"sockops": {SockOps, AttachCGroupSockOps},
"sk_skb/stream_parser": {SkSKB, AttachSkSKBStreamParser},
"sk_skb/stream_verdict": {SkSKB, AttachSkSKBStreamParser},
"sk_msg": {SkMsg, AttachSkSKBStreamVerdict},
"lirc_mode2": {LircMode2, AttachLircMode2},
"flow_dissector": {FlowDissector, AttachFlowDissector},
"iter/": {Tracing, AttachTraceIter},
"cgroup_skb/ingress": {CGroupSKB, AttachCGroupInetIngress},
"cgroup_skb/egress": {CGroupSKB, AttachCGroupInetEgress},
"cgroup/dev": {CGroupDevice, AttachCGroupDevice},
"cgroup/skb": {CGroupSKB, AttachNone},
"cgroup/sock": {CGroupSock, AttachCGroupInetSockCreate},
"cgroup/post_bind4": {CGroupSock, AttachCGroupInet4PostBind},
"cgroup/post_bind6": {CGroupSock, AttachCGroupInet6PostBind},
"cgroup/bind4": {CGroupSockAddr, AttachCGroupInet4Bind},
"cgroup/bind6": {CGroupSockAddr, AttachCGroupInet6Bind},
"cgroup/connect4": {CGroupSockAddr, AttachCGroupInet4Connect},
"cgroup/connect6": {CGroupSockAddr, AttachCGroupInet6Connect},
"cgroup/sendmsg4": {CGroupSockAddr, AttachCGroupUDP4Sendmsg},
"cgroup/sendmsg6": {CGroupSockAddr, AttachCGroupUDP6Sendmsg},
"cgroup/recvmsg4": {CGroupSockAddr, AttachCGroupUDP4Recvmsg},
"cgroup/recvmsg6": {CGroupSockAddr, AttachCGroupUDP6Recvmsg},
"cgroup/sysctl": {CGroupSysctl, AttachCGroupSysctl},
"cgroup/getsockopt": {CGroupSockopt, AttachCGroupGetsockopt},
"cgroup/setsockopt": {CGroupSockopt, AttachCGroupSetsockopt},
"classifier": {SchedCLS, AttachNone},
"action": {SchedACT, AttachNone},
}
for prefix, t := range types {
if !strings.HasPrefix(sectionName, prefix) {
continue
}
if !strings.HasSuffix(prefix, "/") {
return t.progType, t.attachType, ""
}
return t.progType, t.attachType, sectionName[len(prefix):]
}
return UnspecifiedProgram, AttachNone, ""
}
func (ec *elfCode) loadRelocations(sections map[elf.SectionIndex]*elf.Section) (map[elf.SectionIndex]map[uint64]elf.Symbol, map[elf.SectionIndex]bool, error) {
result := make(map[elf.SectionIndex]map[uint64]elf.Symbol)
targets := make(map[elf.SectionIndex]bool)
for idx, sec := range sections {
rels := make(map[uint64]elf.Symbol)
if sec.Entsize < 16 {
return nil, nil, fmt.Errorf("section %s: relocations are less than 16 bytes", sec.Name)
}
r := sec.Open()
for off := uint64(0); off < sec.Size; off += sec.Entsize {
ent := io.LimitReader(r, int64(sec.Entsize))
var rel elf.Rel64
if binary.Read(ent, ec.ByteOrder, &rel) != nil {
return nil, nil, fmt.Errorf("can't parse relocation at offset %v", off)
}
symNo := int(elf.R_SYM64(rel.Info) - 1)
if symNo >= len(ec.symbols) {
return nil, nil, fmt.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
}
symbol := ec.symbols[symNo]
targets[symbol.Section] = true
rels[rel.Off] = ec.symbols[symNo]
}
result[idx] = rels
}
return result, targets, nil
}
func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]elf.Symbol {
result := make(map[elf.SectionIndex]map[uint64]elf.Symbol)
for _, sym := range symbols {
switch elf.ST_TYPE(sym.Info) {
case elf.STT_NOTYPE:
// Older versions of LLVM doesn't tag
@ -378,15 +724,19 @@ func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]str
continue
}
if sym.Section == elf.SHN_UNDEF || sym.Section >= elf.SHN_LORESERVE {
continue
}
if sym.Name == "" {
continue
}
idx := sym.Section
if _, ok := result[idx]; !ok {
result[idx] = make(map[uint64]string)
result[idx] = make(map[uint64]elf.Symbol)
}
result[idx][sym.Value] = symbols[i].Name
result[idx][sym.Value] = sym
}
return result
}

View file

@ -1,8 +1,5 @@
module github.com/cilium/ebpf
go 1.12
go 1.13
require (
github.com/pkg/errors v0.8.1
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7
)
require golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9

716
vendor/github.com/cilium/ebpf/internal/btf/btf.go generated vendored Normal file
View file

@ -0,0 +1,716 @@
package btf
import (
"bytes"
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"math"
"os"
"reflect"
"sync"
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
const btfMagic = 0xeB9F
// Errors returned by BTF functions.
var (
ErrNotSupported = internal.ErrNotSupported
ErrNotFound = errors.New("not found")
ErrNoExtendedInfo = errors.New("no extended info")
)
// Spec represents decoded BTF.
type Spec struct {
rawTypes []rawType
strings stringTable
types map[string][]Type
funcInfos map[string]extInfo
lineInfos map[string]extInfo
byteOrder binary.ByteOrder
}
type btfHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
TypeOff uint32
TypeLen uint32
StringOff uint32
StringLen uint32
}
// LoadSpecFromReader reads BTF sections from an ELF.
//
// Returns a nil Spec and no error if no BTF was present.
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := elf.NewFile(rd)
if err != nil {
return nil, err
}
defer file.Close()
var (
btfSection *elf.Section
btfExtSection *elf.Section
sectionSizes = make(map[string]uint32)
)
for _, sec := range file.Sections {
switch sec.Name {
case ".BTF":
btfSection = sec
case ".BTF.ext":
btfExtSection = sec
default:
if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS {
break
}
if sec.Size > math.MaxUint32 {
return nil, fmt.Errorf("section %s exceeds maximum size", sec.Name)
}
sectionSizes[sec.Name] = uint32(sec.Size)
}
}
if btfSection == nil {
return nil, nil
}
symbols, err := file.Symbols()
if err != nil {
return nil, fmt.Errorf("can't read symbols: %v", err)
}
variableOffsets := make(map[variable]uint32)
for _, symbol := range symbols {
if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
// Ignore things like SHN_ABS
continue
}
secName := file.Sections[symbol.Section].Name
if _, ok := sectionSizes[secName]; !ok {
continue
}
if symbol.Value > math.MaxUint32 {
return nil, fmt.Errorf("section %s: symbol %s: size exceeds maximum", secName, symbol.Name)
}
variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value)
}
spec, err := loadNakedSpec(btfSection.Open(), file.ByteOrder, sectionSizes, variableOffsets)
if err != nil {
return nil, err
}
if btfExtSection == nil {
return spec, nil
}
spec.funcInfos, spec.lineInfos, err = parseExtInfos(btfExtSection.Open(), file.ByteOrder, spec.strings)
if err != nil {
return nil, fmt.Errorf("can't read ext info: %w", err)
}
return spec, nil
}
func loadNakedSpec(btf io.ReadSeeker, bo binary.ByteOrder, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) (*Spec, error) {
rawTypes, rawStrings, err := parseBTF(btf, bo)
if err != nil {
return nil, err
}
err = fixupDatasec(rawTypes, rawStrings, sectionSizes, variableOffsets)
if err != nil {
return nil, err
}
types, err := inflateRawTypes(rawTypes, rawStrings)
if err != nil {
return nil, err
}
return &Spec{
rawTypes: rawTypes,
types: types,
strings: rawStrings,
byteOrder: bo,
}, nil
}
var kernelBTF struct {
sync.Mutex
*Spec
}
// LoadKernelSpec returns the current kernel's BTF information.
//
// Requires a >= 5.5 kernel with CONFIG_DEBUG_INFO_BTF enabled. Returns
// ErrNotSupported if BTF is not enabled.
func LoadKernelSpec() (*Spec, error) {
kernelBTF.Lock()
defer kernelBTF.Unlock()
if kernelBTF.Spec != nil {
return kernelBTF.Spec, nil
}
var err error
kernelBTF.Spec, err = loadKernelSpec()
return kernelBTF.Spec, err
}
func loadKernelSpec() (*Spec, error) {
fh, err := os.Open("/sys/kernel/btf/vmlinux")
if os.IsNotExist(err) {
return nil, fmt.Errorf("can't open kernel BTF at /sys/kernel/btf/vmlinux: %w", ErrNotFound)
}
if err != nil {
return nil, fmt.Errorf("can't read kernel BTF: %s", err)
}
defer fh.Close()
return loadNakedSpec(fh, internal.NativeEndian, nil, nil)
}
func parseBTF(btf io.ReadSeeker, bo binary.ByteOrder) ([]rawType, stringTable, error) {
rawBTF, err := ioutil.ReadAll(btf)
if err != nil {
return nil, nil, fmt.Errorf("can't read BTF: %v", err)
}
rd := bytes.NewReader(rawBTF)
var header btfHeader
if err := binary.Read(rd, bo, &header); err != nil {
return nil, nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, nil, errors.New("header is too short")
}
if _, err := io.CopyN(internal.DiscardZeroes{}, rd, remainder); err != nil {
return nil, nil, fmt.Errorf("header padding: %v", err)
}
if _, err := rd.Seek(int64(header.HdrLen+header.StringOff), io.SeekStart); err != nil {
return nil, nil, fmt.Errorf("can't seek to start of string section: %v", err)
}
rawStrings, err := readStringTable(io.LimitReader(rd, int64(header.StringLen)))
if err != nil {
return nil, nil, fmt.Errorf("can't read type names: %w", err)
}
if _, err := rd.Seek(int64(header.HdrLen+header.TypeOff), io.SeekStart); err != nil {
return nil, nil, fmt.Errorf("can't seek to start of type section: %v", err)
}
rawTypes, err := readTypes(io.LimitReader(rd, int64(header.TypeLen)), bo)
if err != nil {
return nil, nil, fmt.Errorf("can't read types: %w", err)
}
return rawTypes, rawStrings, nil
}
type variable struct {
section string
name string
}
func fixupDatasec(rawTypes []rawType, rawStrings stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
for i, rawType := range rawTypes {
if rawType.Kind() != kindDatasec {
continue
}
name, err := rawStrings.Lookup(rawType.NameOff)
if err != nil {
return err
}
if name == ".kconfig" || name == ".ksym" {
return fmt.Errorf("reference to %s: %w", name, ErrNotSupported)
}
size, ok := sectionSizes[name]
if !ok {
return fmt.Errorf("data section %s: missing size", name)
}
rawTypes[i].SizeType = size
secinfos := rawType.data.([]btfVarSecinfo)
for j, secInfo := range secinfos {
id := int(secInfo.Type - 1)
if id >= len(rawTypes) {
return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j)
}
varName, err := rawStrings.Lookup(rawTypes[id].NameOff)
if err != nil {
return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err)
}
offset, ok := variableOffsets[variable{name, varName}]
if !ok {
return fmt.Errorf("data section %s: missing offset for variable %s", name, varName)
}
secinfos[j].Offset = offset
}
}
return nil
}
type marshalOpts struct {
ByteOrder binary.ByteOrder
StripFuncLinkage bool
}
func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
var (
buf bytes.Buffer
header = new(btfHeader)
headerLen = binary.Size(header)
)
// Reserve space for the header. We have to write it last since
// we don't know the size of the type section yet.
_, _ = buf.Write(make([]byte, headerLen))
// Write type section, just after the header.
for _, raw := range s.rawTypes {
switch {
case opts.StripFuncLinkage && raw.Kind() == kindFunc:
raw.SetLinkage(linkageStatic)
}
if err := raw.Marshal(&buf, opts.ByteOrder); err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
}
typeLen := uint32(buf.Len() - headerLen)
// Write string section after type section.
_, _ = buf.Write(s.strings)
// Fill out the header, and write it out.
header = &btfHeader{
Magic: btfMagic,
Version: 1,
Flags: 0,
HdrLen: uint32(headerLen),
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(len(s.strings)),
}
raw := buf.Bytes()
err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header)
if err != nil {
return nil, fmt.Errorf("can't write header: %v", err)
}
return raw, nil
}
type sliceWriter []byte
func (sw sliceWriter) Write(p []byte) (int, error) {
if len(p) != len(sw) {
return 0, errors.New("size doesn't match")
}
return copy(sw, p), nil
}
// Program finds the BTF for a specific section.
//
// Length is the number of bytes in the raw BPF instruction stream.
//
// Returns an error which may wrap ErrNoExtendedInfo if the Spec doesn't
// contain extended BTF info.
func (s *Spec) Program(name string, length uint64) (*Program, error) {
if length == 0 {
return nil, errors.New("length musn't be zero")
}
if s.funcInfos == nil && s.lineInfos == nil {
return nil, fmt.Errorf("BTF for section %s: %w", name, ErrNoExtendedInfo)
}
funcInfos, funcOK := s.funcInfos[name]
lineInfos, lineOK := s.lineInfos[name]
if !funcOK && !lineOK {
return nil, fmt.Errorf("no extended BTF info for section %s", name)
}
return &Program{s, length, funcInfos, lineInfos}, nil
}
// Map finds the BTF for a map.
//
// Returns an error if there is no BTF for the given name.
func (s *Spec) Map(name string) (*Map, []Member, error) {
var mapVar Var
if err := s.FindType(name, &mapVar); err != nil {
return nil, nil, err
}
mapStruct, ok := mapVar.Type.(*Struct)
if !ok {
return nil, nil, fmt.Errorf("expected struct, have %s", mapVar.Type)
}
var key, value Type
for _, member := range mapStruct.Members {
switch member.Name {
case "key":
key = member.Type
case "value":
value = member.Type
}
}
if key == nil {
key = (*Void)(nil)
}
if value == nil {
value = (*Void)(nil)
}
return &Map{s, key, value}, mapStruct.Members, nil
}
// Datasec returns the BTF required to create maps which represent data sections.
func (s *Spec) Datasec(name string) (*Map, error) {
var datasec Datasec
if err := s.FindType(name, &datasec); err != nil {
return nil, fmt.Errorf("data section %s: can't get BTF: %w", name, err)
}
return &Map{s, &Void{}, &datasec}, nil
}
// FindType searches for a type with a specific name.
//
// hint determines the type of the returned Type.
//
// Returns an error wrapping ErrNotFound if no matching
// type exists in spec.
func (s *Spec) FindType(name string, typ Type) error {
var (
wanted = reflect.TypeOf(typ)
candidate Type
)
for _, typ := range s.types[name] {
if reflect.TypeOf(typ) != wanted {
continue
}
if candidate != nil {
return fmt.Errorf("type %s: multiple candidates for %T", name, typ)
}
candidate = typ
}
if candidate == nil {
return fmt.Errorf("type %s: %w", name, ErrNotFound)
}
value := reflect.Indirect(reflect.ValueOf(copyType(candidate)))
reflect.Indirect(reflect.ValueOf(typ)).Set(value)
return nil
}
// Handle is a reference to BTF loaded into the kernel.
type Handle struct {
fd *internal.FD
}
// NewHandle loads BTF into the kernel.
//
// Returns ErrNotSupported if BTF is not supported.
func NewHandle(spec *Spec) (*Handle, error) {
if err := haveBTF(); err != nil {
return nil, err
}
if spec.byteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian)
}
btf, err := spec.marshal(marshalOpts{
ByteOrder: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
})
if err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
if uint64(len(btf)) > math.MaxUint32 {
return nil, errors.New("BTF exceeds the maximum size")
}
attr := &bpfLoadBTFAttr{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
}
fd, err := bpfLoadBTF(attr)
if err != nil {
logBuf := make([]byte, 64*1024)
attr.logBuf = internal.NewSlicePointer(logBuf)
attr.btfLogSize = uint32(len(logBuf))
attr.btfLogLevel = 1
_, logErr := bpfLoadBTF(attr)
return nil, internal.ErrorWithLog(err, logBuf, logErr)
}
return &Handle{fd}, nil
}
// Close destroys the handle.
//
// Subsequent calls to FD will return an invalid value.
func (h *Handle) Close() error {
return h.fd.Close()
}
// FD returns the file descriptor for the handle.
func (h *Handle) FD() int {
value, err := h.fd.Value()
if err != nil {
return -1
}
return int(value)
}
// Map is the BTF for a map.
type Map struct {
spec *Spec
key, value Type
}
// MapSpec should be a method on Map, but is a free function
// to hide it from users of the ebpf package.
func MapSpec(m *Map) *Spec {
return m.spec
}
// MapKey should be a method on Map, but is a free function
// to hide it from users of the ebpf package.
func MapKey(m *Map) Type {
return m.key
}
// MapValue should be a method on Map, but is a free function
// to hide it from users of the ebpf package.
func MapValue(m *Map) Type {
return m.value
}
// Program is the BTF information for a stream of instructions.
type Program struct {
spec *Spec
length uint64
funcInfos, lineInfos extInfo
}
// ProgramSpec returns the Spec needed for loading function and line infos into the kernel.
//
// This is a free function instead of a method to hide it from users
// of package ebpf.
func ProgramSpec(s *Program) *Spec {
return s.spec
}
// ProgramAppend the information from other to the Program.
//
// This is a free function instead of a method to hide it from users
// of package ebpf.
func ProgramAppend(s, other *Program) error {
funcInfos, err := s.funcInfos.append(other.funcInfos, s.length)
if err != nil {
return fmt.Errorf("func infos: %w", err)
}
lineInfos, err := s.lineInfos.append(other.lineInfos, s.length)
if err != nil {
return fmt.Errorf("line infos: %w", err)
}
s.length += other.length
s.funcInfos = funcInfos
s.lineInfos = lineInfos
return nil
}
// ProgramFuncInfos returns the binary form of BTF function infos.
//
// This is a free function instead of a method to hide it from users
// of package ebpf.
func ProgramFuncInfos(s *Program) (recordSize uint32, bytes []byte, err error) {
bytes, err = s.funcInfos.MarshalBinary()
if err != nil {
return 0, nil, err
}
return s.funcInfos.recordSize, bytes, nil
}
// ProgramLineInfos returns the binary form of BTF line infos.
//
// This is a free function instead of a method to hide it from users
// of package ebpf.
func ProgramLineInfos(s *Program) (recordSize uint32, bytes []byte, err error) {
bytes, err = s.lineInfos.MarshalBinary()
if err != nil {
return 0, nil, err
}
return s.lineInfos.recordSize, bytes, nil
}
type bpfLoadBTFAttr struct {
btf internal.Pointer
logBuf internal.Pointer
btfSize uint32
btfLogSize uint32
btfLogLevel uint32
}
func bpfLoadBTF(attr *bpfLoadBTFAttr) (*internal.FD, error) {
const _BTFLoad = 18
fd, err := internal.BPF(_BTFLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return internal.NewFD(uint32(fd)), nil
}
func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte {
const minHeaderLength = 24
typesLen := uint32(binary.Size(types))
header := btfHeader{
Magic: btfMagic,
Version: 1,
HdrLen: minHeaderLength,
TypeOff: 0,
TypeLen: typesLen,
StringOff: typesLen,
StringLen: uint32(len(strings)),
}
buf := new(bytes.Buffer)
_ = binary.Write(buf, bo, &header)
_ = binary.Write(buf, bo, types)
buf.Write(strings)
return buf.Bytes()
}
var haveBTF = internal.FeatureTest("BTF", "5.1", func() (bool, error) {
var (
types struct {
Integer btfType
Var btfType
btfVar struct{ Linkage uint32 }
}
strings = []byte{0, 'a', 0}
)
// We use a BTF_KIND_VAR here, to make sure that
// the kernel understands BTF at least as well as we
// do. BTF_KIND_VAR was introduced ~5.1.
types.Integer.SetKind(kindPointer)
types.Var.NameOff = 1
types.Var.SetKind(kindVar)
types.Var.SizeType = 1
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := bpfLoadBTF(&bpfLoadBTFAttr{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
})
if err == nil {
fd.Close()
}
// Check for EINVAL specifically, rather than err != nil since we
// otherwise misdetect due to insufficient permissions.
return !errors.Is(err, unix.EINVAL), nil
})
var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() (bool, error) {
var (
types struct {
FuncProto btfType
Func btfType
}
strings = []byte{0, 'a', 0}
)
types.FuncProto.SetKind(kindFuncProto)
types.Func.SetKind(kindFunc)
types.Func.SizeType = 1 // aka FuncProto
types.Func.NameOff = 1
types.Func.SetLinkage(linkageGlobal)
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := bpfLoadBTF(&bpfLoadBTFAttr{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
})
if err == nil {
fd.Close()
}
// Check for EINVAL specifically, rather than err != nil since we
// otherwise misdetect due to insufficient permissions.
return !errors.Is(err, unix.EINVAL), nil
})

259
vendor/github.com/cilium/ebpf/internal/btf/btf_types.go generated vendored Normal file
View file

@ -0,0 +1,259 @@
package btf
import (
"encoding/binary"
"fmt"
"io"
)
// btfKind describes a Type.
type btfKind uint8
// Equivalents of the BTF_KIND_* constants.
const (
kindUnknown btfKind = iota
kindInt
kindPointer
kindArray
kindStruct
kindUnion
kindEnum
kindForward
kindTypedef
kindVolatile
kindConst
kindRestrict
// Added ~4.20
kindFunc
kindFuncProto
// Added ~5.1
kindVar
kindDatasec
)
type btfFuncLinkage uint8
const (
linkageStatic btfFuncLinkage = iota
linkageGlobal
linkageExtern
)
const (
btfTypeKindShift = 24
btfTypeKindLen = 4
btfTypeVlenShift = 0
btfTypeVlenMask = 16
)
// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst.
type btfType struct {
NameOff uint32
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members), linkage
* bits 16-23: unused
* bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 28-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
Info uint32
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* "type" is a type_id referring to another type.
*/
SizeType uint32
}
func (k btfKind) String() string {
switch k {
case kindUnknown:
return "Unknown"
case kindInt:
return "Integer"
case kindPointer:
return "Pointer"
case kindArray:
return "Array"
case kindStruct:
return "Struct"
case kindUnion:
return "Union"
case kindEnum:
return "Enumeration"
case kindForward:
return "Forward"
case kindTypedef:
return "Typedef"
case kindVolatile:
return "Volatile"
case kindConst:
return "Const"
case kindRestrict:
return "Restrict"
case kindFunc:
return "Function"
case kindFuncProto:
return "Function Proto"
case kindVar:
return "Variable"
case kindDatasec:
return "Section"
default:
return fmt.Sprintf("Unknown (%d)", k)
}
}
func mask(len uint32) uint32 {
return (1 << len) - 1
}
func (bt *btfType) info(len, shift uint32) uint32 {
return (bt.Info >> shift) & mask(len)
}
func (bt *btfType) setInfo(value, len, shift uint32) {
bt.Info &^= mask(len) << shift
bt.Info |= (value & mask(len)) << shift
}
func (bt *btfType) Kind() btfKind {
return btfKind(bt.info(btfTypeKindLen, btfTypeKindShift))
}
func (bt *btfType) SetKind(kind btfKind) {
bt.setInfo(uint32(kind), btfTypeKindLen, btfTypeKindShift)
}
func (bt *btfType) Vlen() int {
return int(bt.info(btfTypeVlenMask, btfTypeVlenShift))
}
func (bt *btfType) SetVlen(vlen int) {
bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift)
}
func (bt *btfType) Linkage() btfFuncLinkage {
return btfFuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift))
}
func (bt *btfType) SetLinkage(linkage btfFuncLinkage) {
bt.setInfo(uint32(linkage), btfTypeVlenMask, btfTypeVlenShift)
}
func (bt *btfType) Type() TypeID {
// TODO: Panic here if wrong kind?
return TypeID(bt.SizeType)
}
func (bt *btfType) Size() uint32 {
// TODO: Panic here if wrong kind?
return bt.SizeType
}
type rawType struct {
btfType
data interface{}
}
func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error {
if err := binary.Write(w, bo, &rt.btfType); err != nil {
return err
}
if rt.data == nil {
return nil
}
return binary.Write(w, bo, rt.data)
}
type btfArray struct {
Type TypeID
IndexType TypeID
Nelems uint32
}
type btfMember struct {
NameOff uint32
Type TypeID
Offset uint32
}
type btfVarSecinfo struct {
Type TypeID
Offset uint32
Size uint32
}
type btfVariable struct {
Linkage uint32
}
type btfEnum struct {
NameOff uint32
Val int32
}
type btfParam struct {
NameOff uint32
Type TypeID
}
func readTypes(r io.Reader, bo binary.ByteOrder) ([]rawType, error) {
var (
header btfType
types []rawType
)
for id := TypeID(1); ; id++ {
if err := binary.Read(r, bo, &header); err == io.EOF {
return types, nil
} else if err != nil {
return nil, fmt.Errorf("can't read type info for id %v: %v", id, err)
}
var data interface{}
switch header.Kind() {
case kindInt:
data = new(uint32)
case kindPointer:
case kindArray:
data = new(btfArray)
case kindStruct:
fallthrough
case kindUnion:
data = make([]btfMember, header.Vlen())
case kindEnum:
data = make([]btfEnum, header.Vlen())
case kindForward:
case kindTypedef:
case kindVolatile:
case kindConst:
case kindRestrict:
case kindFunc:
case kindFuncProto:
data = make([]btfParam, header.Vlen())
case kindVar:
data = new(btfVariable)
case kindDatasec:
data = make([]btfVarSecinfo, header.Vlen())
default:
return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind())
}
if data == nil {
types = append(types, rawType{header, nil})
continue
}
if err := binary.Read(r, bo, data); err != nil {
return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err)
}
types = append(types, rawType{header, data})
}
}

8
vendor/github.com/cilium/ebpf/internal/btf/doc.go generated vendored Normal file
View file

@ -0,0 +1,8 @@
// Package btf handles data encoded according to the BPF Type Format.
//
// The canonical documentation lives in the Linux kernel repository and is
// available at https://www.kernel.org/doc/html/latest/bpf/btf.html
//
// The API is very much unstable. You should only use this via the main
// ebpf library.
package btf

182
vendor/github.com/cilium/ebpf/internal/btf/ext_info.go generated vendored Normal file
View file

@ -0,0 +1,182 @@
package btf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
)
type btfExtHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
FuncInfoOff uint32
FuncInfoLen uint32
LineInfoOff uint32
LineInfoLen uint32
}
func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, err error) {
var header btfExtHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, nil, errors.New("header is too short")
}
// Of course, the .BTF.ext header has different semantics than the
// .BTF ext header. We need to ignore non-null values.
_, err = io.CopyN(ioutil.Discard, r, remainder)
if err != nil {
return nil, nil, fmt.Errorf("header padding: %v", err)
}
if _, err := r.Seek(int64(header.HdrLen+header.FuncInfoOff), io.SeekStart); err != nil {
return nil, nil, fmt.Errorf("can't seek to function info section: %v", err)
}
funcInfo, err = parseExtInfo(io.LimitReader(r, int64(header.FuncInfoLen)), bo, strings)
if err != nil {
return nil, nil, fmt.Errorf("function info: %w", err)
}
if _, err := r.Seek(int64(header.HdrLen+header.LineInfoOff), io.SeekStart); err != nil {
return nil, nil, fmt.Errorf("can't seek to line info section: %v", err)
}
lineInfo, err = parseExtInfo(io.LimitReader(r, int64(header.LineInfoLen)), bo, strings)
if err != nil {
return nil, nil, fmt.Errorf("line info: %w", err)
}
return funcInfo, lineInfo, nil
}
type btfExtInfoSec struct {
SecNameOff uint32
NumInfo uint32
}
type extInfoRecord struct {
InsnOff uint64
Opaque []byte
}
type extInfo struct {
recordSize uint32
records []extInfoRecord
}
func (ei extInfo) append(other extInfo, offset uint64) (extInfo, error) {
if other.recordSize != ei.recordSize {
return extInfo{}, fmt.Errorf("ext_info record size mismatch, want %d (got %d)", ei.recordSize, other.recordSize)
}
records := make([]extInfoRecord, 0, len(ei.records)+len(other.records))
records = append(records, ei.records...)
for _, info := range other.records {
records = append(records, extInfoRecord{
InsnOff: info.InsnOff + offset,
Opaque: info.Opaque,
})
}
return extInfo{ei.recordSize, records}, nil
}
func (ei extInfo) MarshalBinary() ([]byte, error) {
if len(ei.records) == 0 {
return nil, nil
}
buf := bytes.NewBuffer(make([]byte, 0, int(ei.recordSize)*len(ei.records)))
for _, info := range ei.records {
// The kernel expects offsets in number of raw bpf instructions,
// while the ELF tracks it in bytes.
insnOff := uint32(info.InsnOff / asm.InstructionSize)
if err := binary.Write(buf, internal.NativeEndian, insnOff); err != nil {
return nil, fmt.Errorf("can't write instruction offset: %v", err)
}
buf.Write(info.Opaque)
}
return buf.Bytes(), nil
}
func parseExtInfo(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]extInfo, error) {
var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil {
return nil, fmt.Errorf("can't read record size: %v", err)
}
if recordSize < 4 {
// Need at least insnOff
return nil, errors.New("record size too short")
}
result := make(map[string]extInfo)
for {
var infoHeader btfExtInfoSec
if err := binary.Read(r, bo, &infoHeader); err == io.EOF {
return result, nil
} else if err != nil {
return nil, fmt.Errorf("can't read ext info header: %v", err)
}
secName, err := strings.Lookup(infoHeader.SecNameOff)
if err != nil {
return nil, fmt.Errorf("can't get section name: %w", err)
}
if infoHeader.NumInfo == 0 {
return nil, fmt.Errorf("section %s has invalid number of records", secName)
}
var records []extInfoRecord
for i := uint32(0); i < infoHeader.NumInfo; i++ {
var byteOff uint32
if err := binary.Read(r, bo, &byteOff); err != nil {
return nil, fmt.Errorf("section %v: can't read extended info offset: %v", secName, err)
}
buf := make([]byte, int(recordSize-4))
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("section %v: can't read record: %v", secName, err)
}
if byteOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, byteOff)
}
records = append(records, extInfoRecord{uint64(byteOff), buf})
}
result[secName] = extInfo{
recordSize,
records,
}
}
}

60
vendor/github.com/cilium/ebpf/internal/btf/strings.go generated vendored Normal file
View file

@ -0,0 +1,60 @@
package btf
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
)
type stringTable []byte
func readStringTable(r io.Reader) (stringTable, error) {
contents, err := ioutil.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("can't read string table: %v", err)
}
if len(contents) < 1 {
return nil, errors.New("string table is empty")
}
if contents[0] != '\x00' {
return nil, errors.New("first item in string table is non-empty")
}
if contents[len(contents)-1] != '\x00' {
return nil, errors.New("string table isn't null terminated")
}
return stringTable(contents), nil
}
func (st stringTable) Lookup(offset uint32) (string, error) {
if int64(offset) > int64(^uint(0)>>1) {
return "", fmt.Errorf("offset %d overflows int", offset)
}
pos := int(offset)
if pos >= len(st) {
return "", fmt.Errorf("offset %d is out of bounds", offset)
}
if pos > 0 && st[pos-1] != '\x00' {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
}
str := st[pos:]
end := bytes.IndexByte(str, '\x00')
if end == -1 {
return "", fmt.Errorf("offset %d isn't null terminated", offset)
}
return string(str[:end]), nil
}
func (st stringTable) LookupName(offset uint32) (Name, error) {
str, err := st.Lookup(offset)
return Name(str), err
}

587
vendor/github.com/cilium/ebpf/internal/btf/types.go generated vendored Normal file
View file

@ -0,0 +1,587 @@
package btf
import (
"errors"
"fmt"
"math"
)
const maxTypeDepth = 32
// TypeID identifies a type in a BTF section.
type TypeID uint32
// ID implements part of the Type interface.
func (tid TypeID) ID() TypeID {
return tid
}
// Type represents a type described by BTF.
type Type interface {
ID() TypeID
// Make a copy of the type, without copying Type members.
copy() Type
walk(*copyStack)
}
// Name identifies a type.
//
// Anonymous types have an empty name.
type Name string
func (n Name) name() string {
return string(n)
}
// Void is the unit type of BTF.
type Void struct{}
func (v *Void) ID() TypeID { return 0 }
func (v *Void) size() uint32 { return 0 }
func (v *Void) copy() Type { return (*Void)(nil) }
func (v *Void) walk(*copyStack) {}
// Int is an integer of a given length.
type Int struct {
TypeID
Name
// The size of the integer in bytes.
Size uint32
}
func (i *Int) size() uint32 { return i.Size }
func (i *Int) walk(*copyStack) {}
func (i *Int) copy() Type {
cpy := *i
return &cpy
}
// Pointer is a pointer to another type.
type Pointer struct {
TypeID
Target Type
}
func (p *Pointer) size() uint32 { return 8 }
func (p *Pointer) walk(cs *copyStack) { cs.push(&p.Target) }
func (p *Pointer) copy() Type {
cpy := *p
return &cpy
}
// Array is an array with a fixed number of elements.
type Array struct {
TypeID
Type Type
Nelems uint32
}
func (arr *Array) walk(cs *copyStack) { cs.push(&arr.Type) }
func (arr *Array) copy() Type {
cpy := *arr
return &cpy
}
// Struct is a compound type of consecutive members.
type Struct struct {
TypeID
Name
// The size of the struct including padding, in bytes
Size uint32
Members []Member
}
func (s *Struct) size() uint32 { return s.Size }
func (s *Struct) walk(cs *copyStack) {
for i := range s.Members {
cs.push(&s.Members[i].Type)
}
}
func (s *Struct) copy() Type {
cpy := *s
cpy.Members = make([]Member, len(s.Members))
copy(cpy.Members, s.Members)
return &cpy
}
// Union is a compound type where members occupy the same memory.
type Union struct {
TypeID
Name
// The size of the union including padding, in bytes.
Size uint32
Members []Member
}
func (u *Union) size() uint32 { return u.Size }
func (u *Union) walk(cs *copyStack) {
for i := range u.Members {
cs.push(&u.Members[i].Type)
}
}
func (u *Union) copy() Type {
cpy := *u
cpy.Members = make([]Member, len(u.Members))
copy(cpy.Members, u.Members)
return &cpy
}
// Member is part of a Struct or Union.
//
// It is not a valid Type.
type Member struct {
Name
Type Type
Offset uint32
}
// Enum lists possible values.
type Enum struct {
TypeID
Name
}
func (e *Enum) size() uint32 { return 4 }
func (e *Enum) walk(*copyStack) {}
func (e *Enum) copy() Type {
cpy := *e
return &cpy
}
// Fwd is a forward declaration of a Type.
type Fwd struct {
TypeID
Name
}
func (f *Fwd) walk(*copyStack) {}
func (f *Fwd) copy() Type {
cpy := *f
return &cpy
}
// Typedef is an alias of a Type.
type Typedef struct {
TypeID
Name
Type Type
}
func (td *Typedef) walk(cs *copyStack) { cs.push(&td.Type) }
func (td *Typedef) copy() Type {
cpy := *td
return &cpy
}
// Volatile is a modifier.
type Volatile struct {
TypeID
Type Type
}
func (v *Volatile) walk(cs *copyStack) { cs.push(&v.Type) }
func (v *Volatile) copy() Type {
cpy := *v
return &cpy
}
// Const is a modifier.
type Const struct {
TypeID
Type Type
}
func (c *Const) walk(cs *copyStack) { cs.push(&c.Type) }
func (c *Const) copy() Type {
cpy := *c
return &cpy
}
// Restrict is a modifier.
type Restrict struct {
TypeID
Type Type
}
func (r *Restrict) walk(cs *copyStack) { cs.push(&r.Type) }
func (r *Restrict) copy() Type {
cpy := *r
return &cpy
}
// Func is a function definition.
type Func struct {
TypeID
Name
Type Type
}
func (f *Func) walk(cs *copyStack) { cs.push(&f.Type) }
func (f *Func) copy() Type {
cpy := *f
return &cpy
}
// FuncProto is a function declaration.
type FuncProto struct {
TypeID
Return Type
// Parameters not supported yet
}
func (fp *FuncProto) walk(cs *copyStack) { cs.push(&fp.Return) }
func (fp *FuncProto) copy() Type {
cpy := *fp
return &cpy
}
// Var is a global variable.
type Var struct {
TypeID
Name
Type Type
}
func (v *Var) walk(cs *copyStack) { cs.push(&v.Type) }
func (v *Var) copy() Type {
cpy := *v
return &cpy
}
// Datasec is a global program section containing data.
type Datasec struct {
TypeID
Name
Size uint32
Vars []VarSecinfo
}
func (ds *Datasec) size() uint32 { return ds.Size }
func (ds *Datasec) walk(cs *copyStack) {
for i := range ds.Vars {
cs.push(&ds.Vars[i].Type)
}
}
func (ds *Datasec) copy() Type {
cpy := *ds
cpy.Vars = make([]VarSecinfo, len(ds.Vars))
copy(cpy.Vars, ds.Vars)
return &cpy
}
// VarSecinfo describes variable in a Datasec
type VarSecinfo struct {
Type Type
Offset uint32
Size uint32
}
type sizer interface {
size() uint32
}
var (
_ sizer = (*Int)(nil)
_ sizer = (*Pointer)(nil)
_ sizer = (*Struct)(nil)
_ sizer = (*Union)(nil)
_ sizer = (*Enum)(nil)
_ sizer = (*Datasec)(nil)
)
// Sizeof returns the size of a type in bytes.
//
// Returns an error if the size can't be computed.
func Sizeof(typ Type) (int, error) {
var (
n = int64(1)
elem int64
)
for i := 0; i < maxTypeDepth; i++ {
switch v := typ.(type) {
case *Array:
if n > 0 && int64(v.Nelems) > math.MaxInt64/n {
return 0, errors.New("overflow")
}
// Arrays may be of zero length, which allows
// n to be zero as well.
n *= int64(v.Nelems)
typ = v.Type
continue
case sizer:
elem = int64(v.size())
case *Typedef:
typ = v.Type
continue
case *Volatile:
typ = v.Type
continue
case *Const:
typ = v.Type
continue
case *Restrict:
typ = v.Type
continue
default:
return 0, fmt.Errorf("unrecognized type %T", typ)
}
if n > 0 && elem > math.MaxInt64/n {
return 0, errors.New("overflow")
}
size := n * elem
if int64(int(size)) != size {
return 0, errors.New("overflow")
}
return int(size), nil
}
return 0, errors.New("exceeded type depth")
}
// copy a Type recursively.
//
// typ may form a cycle.
func copyType(typ Type) Type {
var (
copies = make(map[Type]Type)
work copyStack
)
for t := &typ; t != nil; t = work.pop() {
// *t is the identity of the type.
if cpy := copies[*t]; cpy != nil {
*t = cpy
continue
}
cpy := (*t).copy()
copies[*t] = cpy
*t = cpy
// Mark any nested types for copying.
cpy.walk(&work)
}
return typ
}
// copyStack keeps track of pointers to types which still
// need to be copied.
type copyStack []*Type
// push adds a type to the stack.
func (cs *copyStack) push(t *Type) {
*cs = append(*cs, t)
}
// pop returns the topmost Type, or nil.
func (cs *copyStack) pop() *Type {
n := len(*cs)
if n == 0 {
return nil
}
t := (*cs)[n-1]
*cs = (*cs)[:n-1]
return t
}
type namer interface {
name() string
}
var _ namer = Name("")
// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns
// it into a graph of Types connected via pointers.
//
// Returns a map of named types (so, where NameOff is non-zero). Since BTF ignores
// compilation units, multiple types may share the same name. A Type may form a
// cyclic graph by pointing at itself.
func inflateRawTypes(rawTypes []rawType, rawStrings stringTable) (namedTypes map[string][]Type, err error) {
type fixupDef struct {
id TypeID
expectedKind btfKind
typ *Type
}
var fixups []fixupDef
fixup := func(id TypeID, expectedKind btfKind, typ *Type) {
fixups = append(fixups, fixupDef{id, expectedKind, typ})
}
convertMembers := func(raw []btfMember) ([]Member, error) {
// NB: The fixup below relies on pre-allocating this array to
// work, since otherwise append might re-allocate members.
members := make([]Member, 0, len(raw))
for i, btfMember := range raw {
name, err := rawStrings.LookupName(btfMember.NameOff)
if err != nil {
return nil, fmt.Errorf("can't get name for member %d: %w", i, err)
}
members = append(members, Member{
Name: name,
Offset: btfMember.Offset,
})
}
for i := range members {
fixup(raw[i].Type, kindUnknown, &members[i].Type)
}
return members, nil
}
types := make([]Type, 0, len(rawTypes))
types = append(types, (*Void)(nil))
namedTypes = make(map[string][]Type)
for i, raw := range rawTypes {
var (
// Void is defined to always be type ID 0, and is thus
// omitted from BTF.
id = TypeID(i + 1)
typ Type
)
name, err := rawStrings.LookupName(raw.NameOff)
if err != nil {
return nil, fmt.Errorf("can't get name for type id %d: %w", id, err)
}
switch raw.Kind() {
case kindInt:
typ = &Int{id, name, raw.Size()}
case kindPointer:
ptr := &Pointer{id, nil}
fixup(raw.Type(), kindUnknown, &ptr.Target)
typ = ptr
case kindArray:
btfArr := raw.data.(*btfArray)
// IndexType is unused according to btf.rst.
// Don't make it available right now.
arr := &Array{id, nil, btfArr.Nelems}
fixup(btfArr.Type, kindUnknown, &arr.Type)
typ = arr
case kindStruct:
members, err := convertMembers(raw.data.([]btfMember))
if err != nil {
return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err)
}
typ = &Struct{id, name, raw.Size(), members}
case kindUnion:
members, err := convertMembers(raw.data.([]btfMember))
if err != nil {
return nil, fmt.Errorf("union %s (id %d): %w", name, id, err)
}
typ = &Union{id, name, raw.Size(), members}
case kindEnum:
typ = &Enum{id, name}
case kindForward:
typ = &Fwd{id, name}
case kindTypedef:
typedef := &Typedef{id, name, nil}
fixup(raw.Type(), kindUnknown, &typedef.Type)
typ = typedef
case kindVolatile:
volatile := &Volatile{id, nil}
fixup(raw.Type(), kindUnknown, &volatile.Type)
typ = volatile
case kindConst:
cnst := &Const{id, nil}
fixup(raw.Type(), kindUnknown, &cnst.Type)
typ = cnst
case kindRestrict:
restrict := &Restrict{id, nil}
fixup(raw.Type(), kindUnknown, &restrict.Type)
typ = restrict
case kindFunc:
fn := &Func{id, name, nil}
fixup(raw.Type(), kindFuncProto, &fn.Type)
typ = fn
case kindFuncProto:
fp := &FuncProto{id, nil}
fixup(raw.Type(), kindUnknown, &fp.Return)
typ = fp
case kindVar:
v := &Var{id, name, nil}
fixup(raw.Type(), kindUnknown, &v.Type)
typ = v
case kindDatasec:
btfVars := raw.data.([]btfVarSecinfo)
vars := make([]VarSecinfo, 0, len(btfVars))
for _, btfVar := range btfVars {
vars = append(vars, VarSecinfo{
Offset: btfVar.Offset,
Size: btfVar.Size,
})
}
for i := range vars {
fixup(btfVars[i].Type, kindVar, &vars[i].Type)
}
typ = &Datasec{id, name, raw.SizeType, vars}
default:
return nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind())
}
types = append(types, typ)
if namer, ok := typ.(namer); ok {
if name := namer.name(); name != "" {
namedTypes[name] = append(namedTypes[name], typ)
}
}
}
for _, fixup := range fixups {
i := int(fixup.id)
if i >= len(types) {
return nil, fmt.Errorf("reference to invalid type id: %d", fixup.id)
}
// Default void (id 0) to unknown
rawKind := kindUnknown
if i > 0 {
rawKind = rawTypes[i-1].Kind()
}
if expected := fixup.expectedKind; expected != kindUnknown && rawKind != expected {
return nil, fmt.Errorf("expected type id %d to have kind %s, found %s", fixup.id, expected, rawKind)
}
*fixup.typ = types[i]
}
return namedTypes, nil
}

View file

@ -2,10 +2,9 @@ package internal
import (
"fmt"
"os"
"io/ioutil"
"strings"
"sync"
"github.com/pkg/errors"
)
var sysCPU struct {
@ -18,45 +17,44 @@ var sysCPU struct {
// Logical CPU numbers must be of the form 0-n
func PossibleCPUs() (int, error) {
sysCPU.once.Do(func() {
sysCPU.num, sysCPU.err = parseCPUs("/sys/devices/system/cpu/possible")
sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
return sysCPU.num, sysCPU.err
}
var onlineCPU struct {
once sync.Once
err error
num int
}
// OnlineCPUs returns the number of currently online CPUs
// Logical CPU numbers must be of the form 0-n
func OnlineCPUs() (int, error) {
onlineCPU.once.Do(func() {
onlineCPU.num, onlineCPU.err = parseCPUs("/sys/devices/system/cpu/online")
})
return onlineCPU.num, onlineCPU.err
}
// parseCPUs parses the number of cpus from sysfs,
// in the format of "/sys/devices/system/cpu/{possible,online,..}.
// Logical CPU numbers must be of the form 0-n
func parseCPUs(path string) (int, error) {
file, err := os.Open(path)
func parseCPUsFromFile(path string) (int, error) {
spec, err := ioutil.ReadFile(path)
if err != nil {
return 0, err
}
defer file.Close()
n, err := parseCPUs(string(spec))
if err != nil {
return 0, fmt.Errorf("can't parse %s: %v", path, err)
}
return n, nil
}
// parseCPUs parses the number of cpus from a string produced
// by bitmap_list_string() in the Linux kernel.
// Multiple ranges are rejected, since they can't be unified
// into a single number.
// This is the format of /sys/devices/system/cpu/possible, it
// is not suitable for /sys/devices/system/cpu/online, etc.
func parseCPUs(spec string) (int, error) {
if strings.Trim(spec, "\n") == "0" {
return 1, nil
}
var low, high int
n, _ := fmt.Fscanf(file, "%d-%d", &low, &high)
if n < 1 || low != 0 {
return 0, errors.Wrapf(err, "%s has unknown format", path)
n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high)
if n != 2 || err != nil {
return 0, fmt.Errorf("invalid format: %s", spec)
}
if n == 1 {
high = low
if low != 0 {
return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec)
}
// cpus is 0 indexed

47
vendor/github.com/cilium/ebpf/internal/errors.go generated vendored Normal file
View file

@ -0,0 +1,47 @@
package internal
import (
"bytes"
"errors"
"fmt"
"strings"
"github.com/cilium/ebpf/internal/unix"
)
// ErrorWithLog returns an error that includes logs from the
// kernel verifier.
//
// logErr should be the error returned by the syscall that generated
// the log. It is used to check for truncation of the output.
func ErrorWithLog(err error, log []byte, logErr error) error {
logStr := strings.Trim(CString(log), "\t\r\n ")
if errors.Is(logErr, unix.ENOSPC) {
logStr += " (truncated...)"
}
return &VerifierError{err, logStr}
}
// VerifierError includes information from the eBPF verifier.
type VerifierError struct {
cause error
log string
}
func (le *VerifierError) Error() string {
if le.log == "" {
return le.cause.Error()
}
return fmt.Sprintf("%s: %s", le.cause, le.log)
}
// CString turns a NUL / zero terminated byte buffer into a string.
func CString(in []byte) string {
inLen := bytes.IndexByte(in, 0)
if inLen == -1 {
return ""
}
return string(in[:inLen])
}

69
vendor/github.com/cilium/ebpf/internal/fd.go generated vendored Normal file
View file

@ -0,0 +1,69 @@
package internal
import (
"errors"
"fmt"
"os"
"runtime"
"strconv"
"github.com/cilium/ebpf/internal/unix"
)
var ErrClosedFd = errors.New("use of closed file descriptor")
type FD struct {
raw int64
}
func NewFD(value uint32) *FD {
fd := &FD{int64(value)}
runtime.SetFinalizer(fd, (*FD).Close)
return fd
}
func (fd *FD) String() string {
return strconv.FormatInt(fd.raw, 10)
}
func (fd *FD) Value() (uint32, error) {
if fd.raw < 0 {
return 0, ErrClosedFd
}
return uint32(fd.raw), nil
}
func (fd *FD) Close() error {
if fd.raw < 0 {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.Forget()
return unix.Close(value)
}
func (fd *FD) Forget() {
runtime.SetFinalizer(fd, nil)
}
func (fd *FD) Dup() (*FD, error) {
if fd.raw < 0 {
return nil, ErrClosedFd
}
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("can't dup fd: %v", err)
}
return NewFD(uint32(dup)), nil
}
func (fd *FD) File(name string) *os.File {
fd.Forget()
return os.NewFile(uintptr(fd.raw), name)
}

View file

@ -1,12 +1,14 @@
package internal
import (
"errors"
"fmt"
"sync"
"github.com/pkg/errors"
)
// ErrNotSupported indicates that a feature is not supported by the current kernel.
var ErrNotSupported = errors.New("not supported")
// UnsupportedFeatureError is returned by FeatureTest() functions.
type UnsupportedFeatureError struct {
// The minimum Linux mainline version required for this feature.
@ -21,33 +23,68 @@ func (ufe *UnsupportedFeatureError) Error() string {
return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
}
// Is indicates that UnsupportedFeatureError is ErrNotSupported.
func (ufe *UnsupportedFeatureError) Is(target error) bool {
return target == ErrNotSupported
}
type featureTest struct {
sync.Mutex
successful bool
result error
}
// FeatureTestFn is used to determine whether the kernel supports
// a certain feature.
//
// The return values have the following semantics:
//
// err != nil: the test couldn't be executed
// err == nil && available: the feature is available
// err == nil && !available: the feature isn't available
type FeatureTestFn func() (available bool, err error)
// FeatureTest wraps a function so that it is run at most once.
//
// name should identify the tested feature, while version must be in the
// form Major.Minor[.Patch].
//
// Returns a descriptive UnsupportedFeatureError if the feature is not available.
func FeatureTest(name, version string, fn func() bool) func() error {
// Returns an error wrapping ErrNotSupported if the feature is not supported.
func FeatureTest(name, version string, fn FeatureTestFn) func() error {
v, err := NewVersion(version)
if err != nil {
return func() error { return err }
}
var (
once sync.Once
result error
)
ft := new(featureTest)
return func() error {
once.Do(func() {
if !fn() {
result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: name,
}
ft.Lock()
defer ft.Unlock()
if ft.successful {
return ft.result
}
available, err := fn()
if errors.Is(err, ErrNotSupported) {
// The feature test aborted because a dependent feature
// is missing, which we should cache.
available = false
} else if err != nil {
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("can't detect support for %s: %w", name, err)
}
ft.successful = true
if !available {
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: name,
}
})
return result
}
return ft.result
}
}
@ -61,7 +98,7 @@ func NewVersion(ver string) (Version, error) {
var major, minor, patch uint16
n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
if n < 2 {
return Version{}, errors.Errorf("invalid version: %s", ver)
return Version{}, fmt.Errorf("invalid version: %s", ver)
}
return Version{major, minor, patch}, nil
}

16
vendor/github.com/cilium/ebpf/internal/io.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
package internal
import "errors"
// DiscardZeroes makes sure that all written bytes are zero
// before discarding them.
type DiscardZeroes struct{}
func (DiscardZeroes) Write(p []byte) (int, error) {
for _, b := range p {
if b != 0 {
return 0, errors.New("encountered non-zero byte")
}
}
return len(p), nil
}

30
vendor/github.com/cilium/ebpf/internal/ptr.go generated vendored Normal file
View file

@ -0,0 +1,30 @@
package internal
import "unsafe"
// NewPointer creates a 64-bit pointer from an unsafe Pointer.
func NewPointer(ptr unsafe.Pointer) Pointer {
return Pointer{ptr: ptr}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
func NewSlicePointer(buf []byte) Pointer {
if len(buf) == 0 {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer {
if str == "" {
return Pointer{}
}
// The kernel expects strings to be zero terminated
buf := make([]byte, len(str)+1)
copy(buf, str)
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}

View file

@ -1,14 +1,14 @@
// +build armbe mips mips64p32
package ebpf
package internal
import (
"unsafe"
)
// ptr wraps an unsafe.Pointer to be 64bit to
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type syscallPtr struct {
type Pointer struct {
pad uint32
ptr unsafe.Pointer
}

View file

@ -1,14 +1,14 @@
// +build 386 amd64p32 arm mipsle mips64p32le
package ebpf
package internal
import (
"unsafe"
)
// ptr wraps an unsafe.Pointer to be 64bit to
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type syscallPtr struct {
type Pointer struct {
ptr unsafe.Pointer
pad uint32
}

View file

@ -1,14 +1,14 @@
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le
// +build !armbe,!mips,!mips64p32
package ebpf
package internal
import (
"unsafe"
)
// ptr wraps an unsafe.Pointer to be 64bit to
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type syscallPtr struct {
type Pointer struct {
ptr unsafe.Pointer
}

139
vendor/github.com/cilium/ebpf/internal/syscall.go generated vendored Normal file
View file

@ -0,0 +1,139 @@
package internal
import (
"fmt"
"path/filepath"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
//go:generate stringer -output syscall_string.go -type=BPFCmd
// BPFCmd identifies a subcommand of the bpf syscall.
type BPFCmd int
// Well known BPF commands.
const (
BPF_MAP_CREATE BPFCmd = iota
BPF_MAP_LOOKUP_ELEM
BPF_MAP_UPDATE_ELEM
BPF_MAP_DELETE_ELEM
BPF_MAP_GET_NEXT_KEY
BPF_PROG_LOAD
BPF_OBJ_PIN
BPF_OBJ_GET
BPF_PROG_ATTACH
BPF_PROG_DETACH
BPF_PROG_TEST_RUN
BPF_PROG_GET_NEXT_ID
BPF_MAP_GET_NEXT_ID
BPF_PROG_GET_FD_BY_ID
BPF_MAP_GET_FD_BY_ID
BPF_OBJ_GET_INFO_BY_FD
BPF_PROG_QUERY
BPF_RAW_TRACEPOINT_OPEN
BPF_BTF_LOAD
BPF_BTF_GET_FD_BY_ID
BPF_TASK_FD_QUERY
BPF_MAP_LOOKUP_AND_DELETE_ELEM
BPF_MAP_FREEZE
BPF_BTF_GET_NEXT_ID
BPF_MAP_LOOKUP_BATCH
BPF_MAP_LOOKUP_AND_DELETE_BATCH
BPF_MAP_UPDATE_BATCH
BPF_MAP_DELETE_BATCH
BPF_LINK_CREATE
BPF_LINK_UPDATE
BPF_LINK_GET_FD_BY_ID
BPF_LINK_GET_NEXT_ID
BPF_ENABLE_STATS
BPF_ITER_CREATE
)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd BPFCmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
var err error
if errNo != 0 {
err = errNo
}
return r1, err
}
type BPFProgAttachAttr struct {
TargetFd uint32
AttachBpfFd uint32
AttachType uint32
AttachFlags uint32
ReplaceBpfFd uint32
}
func BPFProgAttach(attr *BPFProgAttachAttr) error {
_, err := BPF(BPF_PROG_ATTACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
type BPFProgDetachAttr struct {
TargetFd uint32
AttachBpfFd uint32
AttachType uint32
}
func BPFProgDetach(attr *BPFProgDetachAttr) error {
_, err := BPF(BPF_PROG_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
type bpfObjAttr struct {
fileName Pointer
fd uint32
fileFlags uint32
}
const bpfFSType = 0xcafe4a11
// BPFObjPin wraps BPF_OBJ_PIN.
func BPFObjPin(fileName string, fd *FD) error {
dirName := filepath.Dir(fileName)
var statfs unix.Statfs_t
if err := unix.Statfs(dirName, &statfs); err != nil {
return err
}
if uint64(statfs.Type) != bpfFSType {
return fmt.Errorf("%s is not on a bpf filesystem", fileName)
}
value, err := fd.Value()
if err != nil {
return err
}
attr := bpfObjAttr{
fileName: NewStringPointer(fileName),
fd: value,
}
_, err = BPF(BPF_OBJ_PIN, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return fmt.Errorf("pin object %s: %w", fileName, err)
}
return nil
}
// BPFObjGet wraps BPF_OBJ_GET.
func BPFObjGet(fileName string) (*FD, error) {
attr := bpfObjAttr{
fileName: NewStringPointer(fileName),
}
ptr, err := BPF(BPF_OBJ_GET, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return nil, fmt.Errorf("get object %s: %w", fileName, err)
}
return NewFD(uint32(ptr)), nil
}

View file

@ -0,0 +1,56 @@
// Code generated by "stringer -output syscall_string.go -type=BPFCmd"; DO NOT EDIT.
package internal
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BPF_MAP_CREATE-0]
_ = x[BPF_MAP_LOOKUP_ELEM-1]
_ = x[BPF_MAP_UPDATE_ELEM-2]
_ = x[BPF_MAP_DELETE_ELEM-3]
_ = x[BPF_MAP_GET_NEXT_KEY-4]
_ = x[BPF_PROG_LOAD-5]
_ = x[BPF_OBJ_PIN-6]
_ = x[BPF_OBJ_GET-7]
_ = x[BPF_PROG_ATTACH-8]
_ = x[BPF_PROG_DETACH-9]
_ = x[BPF_PROG_TEST_RUN-10]
_ = x[BPF_PROG_GET_NEXT_ID-11]
_ = x[BPF_MAP_GET_NEXT_ID-12]
_ = x[BPF_PROG_GET_FD_BY_ID-13]
_ = x[BPF_MAP_GET_FD_BY_ID-14]
_ = x[BPF_OBJ_GET_INFO_BY_FD-15]
_ = x[BPF_PROG_QUERY-16]
_ = x[BPF_RAW_TRACEPOINT_OPEN-17]
_ = x[BPF_BTF_LOAD-18]
_ = x[BPF_BTF_GET_FD_BY_ID-19]
_ = x[BPF_TASK_FD_QUERY-20]
_ = x[BPF_MAP_LOOKUP_AND_DELETE_ELEM-21]
_ = x[BPF_MAP_FREEZE-22]
_ = x[BPF_BTF_GET_NEXT_ID-23]
_ = x[BPF_MAP_LOOKUP_BATCH-24]
_ = x[BPF_MAP_LOOKUP_AND_DELETE_BATCH-25]
_ = x[BPF_MAP_UPDATE_BATCH-26]
_ = x[BPF_MAP_DELETE_BATCH-27]
_ = x[BPF_LINK_CREATE-28]
_ = x[BPF_LINK_UPDATE-29]
_ = x[BPF_LINK_GET_FD_BY_ID-30]
_ = x[BPF_LINK_GET_NEXT_ID-31]
_ = x[BPF_ENABLE_STATS-32]
_ = x[BPF_ITER_CREATE-33]
}
const _BPFCmd_name = "BPF_MAP_CREATEBPF_MAP_LOOKUP_ELEMBPF_MAP_UPDATE_ELEMBPF_MAP_DELETE_ELEMBPF_MAP_GET_NEXT_KEYBPF_PROG_LOADBPF_OBJ_PINBPF_OBJ_GETBPF_PROG_ATTACHBPF_PROG_DETACHBPF_PROG_TEST_RUNBPF_PROG_GET_NEXT_IDBPF_MAP_GET_NEXT_IDBPF_PROG_GET_FD_BY_IDBPF_MAP_GET_FD_BY_IDBPF_OBJ_GET_INFO_BY_FDBPF_PROG_QUERYBPF_RAW_TRACEPOINT_OPENBPF_BTF_LOADBPF_BTF_GET_FD_BY_IDBPF_TASK_FD_QUERYBPF_MAP_LOOKUP_AND_DELETE_ELEMBPF_MAP_FREEZEBPF_BTF_GET_NEXT_IDBPF_MAP_LOOKUP_BATCHBPF_MAP_LOOKUP_AND_DELETE_BATCHBPF_MAP_UPDATE_BATCHBPF_MAP_DELETE_BATCHBPF_LINK_CREATEBPF_LINK_UPDATEBPF_LINK_GET_FD_BY_IDBPF_LINK_GET_NEXT_IDBPF_ENABLE_STATSBPF_ITER_CREATE"
var _BPFCmd_index = [...]uint16{0, 14, 33, 52, 71, 91, 104, 115, 126, 141, 156, 173, 193, 212, 233, 253, 275, 289, 312, 324, 344, 361, 391, 405, 424, 444, 475, 495, 515, 530, 545, 566, 586, 602, 617}
func (i BPFCmd) String() string {
if i < 0 || i >= BPFCmd(len(_BPFCmd_index)-1) {
return "BPFCmd(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _BPFCmd_name[_BPFCmd_index[i]:_BPFCmd_index[i+1]]
}

View file

@ -10,10 +10,17 @@ import (
const (
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
SYS_BPF = linux.SYS_BPF
@ -31,6 +38,7 @@ const (
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
)
// Statfs_t is a wrapper
@ -125,3 +133,18 @@ type Utsname = linux.Utsname
func Uname(buf *Utsname) (err error) {
return linux.Uname(buf)
}
// Getpid is a wrapper
func Getpid() int {
return linux.Getpid()
}
// Gettid is a wrapper
func Gettid() int {
return linux.Gettid()
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return linux.Tgkill(tgid, tid, sig)
}

View file

@ -12,9 +12,16 @@ var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime
const (
ENOENT = syscall.ENOENT
EEXIST = syscall.EEXIST
EAGAIN = syscall.EAGAIN
ENOSPC = syscall.ENOSPC
EINVAL = syscall.EINVAL
EINTR = syscall.EINTR
EPERM = syscall.EPERM
ESRCH = syscall.ESRCH
ENODEV = syscall.ENODEV
BPF_F_RDONLY_PROG = 0
BPF_F_WRONLY_PROG = 0
BPF_OBJ_NAME_LEN = 0x10
BPF_TAG_SIZE = 0x8
SYS_BPF = 321
@ -32,6 +39,8 @@ const (
PerfBitWatermark = 0x4000
PERF_SAMPLE_RAW = 0x400
PERF_FLAG_FD_CLOEXEC = 0x8
RLIM_INFINITY = 0x7fffffffffffffff
RLIMIT_MEMLOCK = 8
)
// Statfs_t is a wrapper
@ -184,10 +193,25 @@ func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int
// Utsname is a wrapper
type Utsname struct {
Release [65]byte
Release [65]byte
}
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return errNonLinux
}
// Getpid is a wrapper
func Getpid() int {
return -1
}
// Gettid is a wrapper
func Gettid() int {
return -1
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return errNonLinux
}

View file

@ -1,31 +1,60 @@
package ebpf
import (
"fmt"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal/btf"
)
// link resolves bpf-to-bpf calls.
//
// Each section may contain multiple functions / labels, and is only linked
// if the program being edited references one of these functions.
// Each library may contain multiple functions / labels, and is only linked
// if prog references one of these functions.
//
// Sections must not require linking themselves.
func link(insns asm.Instructions, sections ...asm.Instructions) (asm.Instructions, error) {
for _, section := range sections {
var err error
insns, err = linkSection(insns, section)
if err != nil {
return nil, err
// Libraries also linked.
func link(prog *ProgramSpec, libs []*ProgramSpec) error {
var (
linked = make(map[*ProgramSpec]bool)
pending = []asm.Instructions{prog.Instructions}
insns asm.Instructions
)
for len(pending) > 0 {
insns, pending = pending[0], pending[1:]
for _, lib := range libs {
if linked[lib] {
continue
}
needed, err := needSection(insns, lib.Instructions)
if err != nil {
return fmt.Errorf("linking %s: %w", lib.Name, err)
}
if !needed {
continue
}
linked[lib] = true
prog.Instructions = append(prog.Instructions, lib.Instructions...)
pending = append(pending, lib.Instructions)
if prog.BTF != nil && lib.BTF != nil {
if err := btf.ProgramAppend(prog.BTF, lib.BTF); err != nil {
return fmt.Errorf("linking BTF of %s: %w", lib.Name, err)
}
}
}
}
return insns, nil
return nil
}
func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
func needSection(insns, section asm.Instructions) (bool, error) {
// A map of symbols to the libraries which contain them.
symbols, err := section.SymbolOffsets()
if err != nil {
return nil, err
return false, err
}
for _, ins := range insns {
@ -33,7 +62,7 @@ func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
continue
}
if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.R1 {
if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.PseudoCall {
continue
}
@ -48,11 +77,10 @@ func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
}
// At this point we know that at least one function in the
// library is called from insns. Merge the two sections.
// The rewrite of ins.Constant happens in asm.Instruction.Marshal.
return append(insns, section...), nil
// library is called from insns, so we have to link it.
return true, nil
}
// None of the functions in the section are called. Do nothing.
return insns, nil
// None of the functions in the section are called.
return false, nil
}

400
vendor/github.com/cilium/ebpf/map.go generated vendored
View file

@ -1,15 +1,25 @@
package ebpf
import (
"errors"
"fmt"
"unsafe"
"strings"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/unix"
"github.com/pkg/errors"
)
// Errors returned by Map and MapIterator methods.
var (
ErrKeyNotExist = errors.New("key does not exist")
ErrKeyExist = errors.New("key already exists")
ErrIterationAborted = errors.New("iteration aborted")
)
// MapID represents the unique ID of an eBPF map
type MapID uint32
// MapSpec defines a Map.
type MapSpec struct {
// Name is passed to the kernel as a debug aid. Must only contain
@ -20,8 +30,18 @@ type MapSpec struct {
ValueSize uint32
MaxEntries uint32
Flags uint32
// The initial contents of the map. May be nil.
Contents []MapKV
// Whether to freeze a map after setting its initial contents.
Freeze bool
// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
InnerMap *MapSpec
// The BTF associated with this map.
BTF *btf.Map
}
func (ms *MapSpec) String() string {
@ -29,16 +49,26 @@ func (ms *MapSpec) String() string {
}
// Copy returns a copy of the spec.
//
// MapSpec.Contents is a shallow copy.
func (ms *MapSpec) Copy() *MapSpec {
if ms == nil {
return nil
}
cpy := *ms
cpy.Contents = make([]MapKV, len(ms.Contents))
copy(cpy.Contents, ms.Contents)
cpy.InnerMap = ms.InnerMap.Copy()
return &cpy
}
// MapKV is used to initialize the contents of a Map.
type MapKV struct {
Key interface{}
Value interface{}
}
// Map represents a Map file descriptor.
//
// It is not safe to close a map which is used by other goroutines.
@ -50,7 +80,7 @@ func (ms *MapSpec) Copy() *MapSpec {
// if you require custom encoding.
type Map struct {
name string
fd *bpfFD
fd *internal.FD
abi MapABI
// Per CPU maps return values larger than the size in the spec
fullValueSize int
@ -63,11 +93,11 @@ func NewMapFromFD(fd int) (*Map, error) {
if fd < 0 {
return nil, errors.New("invalid fd")
}
bpfFd := newBPFFD(uint32(fd))
bpfFd := internal.NewFD(uint32(fd))
name, abi, err := newMapABIFromFd(bpfFd)
if err != nil {
bpfFd.forget()
bpfFd.Forget()
return nil, err
}
return newMap(bpfFd, name, abi)
@ -77,26 +107,43 @@ func NewMapFromFD(fd int) (*Map, error) {
//
// Creating a map for the first time will perform feature detection
// by creating small, temporary maps.
//
// The caller is responsible for ensuring the process' rlimit is set
// sufficiently high for locking memory during map creation. This can be done
// by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMap.
func NewMap(spec *MapSpec) (*Map, error) {
if spec.BTF == nil {
return newMapWithBTF(spec, nil)
}
handle, err := btf.NewHandle(btf.MapSpec(spec.BTF))
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, fmt.Errorf("can't load BTF: %w", err)
}
return newMapWithBTF(spec, handle)
}
func newMapWithBTF(spec *MapSpec, handle *btf.Handle) (*Map, error) {
if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
return createMap(spec, nil)
return createMap(spec, nil, handle)
}
if spec.InnerMap == nil {
return nil, errors.Errorf("%s requires InnerMap", spec.Type)
return nil, fmt.Errorf("%s requires InnerMap", spec.Type)
}
template, err := createMap(spec.InnerMap, nil)
template, err := createMap(spec.InnerMap, nil, handle)
if err != nil {
return nil, err
}
defer template.Close()
return createMap(spec, template.fd)
return createMap(spec, template.fd, handle)
}
func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
spec = spec.Copy()
func createMap(spec *MapSpec, inner *internal.FD, handle *btf.Handle) (*Map, error) {
abi := newMapABIFromSpec(spec)
switch spec.Type {
case ArrayOfMaps:
@ -106,64 +153,89 @@ func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
return nil, err
}
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.Errorf("ValueSize must be zero or four for map of map")
if abi.ValueSize != 0 && abi.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for map of map")
}
spec.ValueSize = 4
abi.ValueSize = 4
case PerfEventArray:
if spec.KeySize != 0 {
return nil, errors.Errorf("KeySize must be zero for perf event array")
}
if spec.ValueSize != 0 {
return nil, errors.Errorf("ValueSize must be zero for perf event array")
}
if spec.MaxEntries == 0 {
n, err := internal.OnlineCPUs()
if err != nil {
return nil, errors.Wrap(err, "perf event array")
}
spec.MaxEntries = uint32(n)
if abi.KeySize != 0 && abi.KeySize != 4 {
return nil, errors.New("KeySize must be zero or four for perf event array")
}
abi.KeySize = 4
spec.KeySize = 4
spec.ValueSize = 4
if abi.ValueSize != 0 && abi.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for perf event array")
}
abi.ValueSize = 4
if abi.MaxEntries == 0 {
n, err := internal.PossibleCPUs()
if err != nil {
return nil, fmt.Errorf("perf event array: %w", err)
}
abi.MaxEntries = uint32(n)
}
}
if abi.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze {
if err := haveMapMutabilityModifiers(); err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
}
attr := bpfMapCreateAttr{
mapType: spec.Type,
keySize: spec.KeySize,
valueSize: spec.ValueSize,
maxEntries: spec.MaxEntries,
flags: spec.Flags,
mapType: abi.Type,
keySize: abi.KeySize,
valueSize: abi.ValueSize,
maxEntries: abi.MaxEntries,
flags: abi.Flags,
}
if inner != nil {
var err error
attr.innerMapFd, err = inner.value()
attr.innerMapFd, err = inner.Value()
if err != nil {
return nil, errors.Wrap(err, "map create")
return nil, fmt.Errorf("map create: %w", err)
}
}
name, err := newBPFObjName(spec.Name)
if err != nil {
return nil, errors.Wrap(err, "map create")
if handle != nil && spec.BTF != nil {
attr.btfFd = uint32(handle.FD())
attr.btfKeyTypeID = btf.MapKey(spec.BTF).ID()
attr.btfValueTypeID = btf.MapValue(spec.BTF).ID()
}
if haveObjName() == nil {
attr.mapName = name
attr.mapName = newBPFObjName(spec.Name)
}
fd, err := bpfMapCreate(&attr)
if err != nil {
return nil, errors.Wrap(err, "map create")
return nil, fmt.Errorf("map create: %w", err)
}
return newMap(fd, spec.Name, newMapABIFromSpec(spec))
m, err := newMap(fd, spec.Name, abi)
if err != nil {
return nil, err
}
if err := m.populate(spec.Contents); err != nil {
m.Close()
return nil, fmt.Errorf("map create: can't set initial contents: %w", err)
}
if spec.Freeze {
if err := m.Freeze(); err != nil {
m.Close()
return nil, fmt.Errorf("can't freeze map: %w", err)
}
}
return m, nil
}
func newMap(fd *bpfFD, name string, abi *MapABI) (*Map, error) {
func newMap(fd *internal.FD, name string, abi *MapABI) (*Map, error) {
m := &Map{
name,
fd,
@ -228,9 +300,9 @@ func (m *Map) Lookup(key, valueOut interface{}) error {
*value = m
return nil
case *Map:
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
return fmt.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
case Map:
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
return fmt.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
case **Program:
p, err := unmarshalProgram(valueBytes)
@ -242,38 +314,58 @@ func (m *Map) Lookup(key, valueOut interface{}) error {
*value = p
return nil
case *Program:
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
return fmt.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
case Program:
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
return fmt.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
default:
return unmarshalBytes(valueOut, valueBytes)
}
}
// LookupAndDelete retrieves and deletes a value from a Map.
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
if err != nil {
return fmt.Errorf("can't marshal key: %w", err)
}
if err := bpfMapLookupAndDelete(m.fd, keyPtr, valuePtr); err != nil {
return fmt.Errorf("lookup and delete failed: %w", err)
}
return unmarshalBytes(valueOut, valueBytes)
}
// LookupBytes gets a value from Map.
//
// Returns a nil value if a key doesn't exist.
func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
valueBytes := make([]byte, m.fullValueSize)
valuePtr := newPtr(unsafe.Pointer(&valueBytes[0]))
valuePtr := internal.NewSlicePointer(valueBytes)
err := m.lookup(key, valuePtr)
if IsNotExist(err) {
if errors.Is(err, ErrKeyNotExist) {
return nil, nil
}
return valueBytes, err
}
func (m *Map) lookup(key interface{}, valueOut syscallPtr) error {
func (m *Map) lookup(key interface{}, valueOut internal.Pointer) error {
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
if err != nil {
return errors.WithMessage(err, "can't marshal key")
return fmt.Errorf("can't marshal key: %w", err)
}
err = bpfMapLookupElem(m.fd, keyPtr, valueOut)
return errors.WithMessage(err, "lookup failed")
if err = bpfMapLookupElem(m.fd, keyPtr, valueOut); err != nil {
return fmt.Errorf("lookup failed: %w", err)
}
return nil
}
// MapUpdateFlags controls the behaviour of the Map.Update call.
@ -301,38 +393,46 @@ func (m *Map) Put(key, value interface{}) error {
func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
if err != nil {
return errors.WithMessage(err, "can't marshal key")
return fmt.Errorf("can't marshal key: %w", err)
}
var valuePtr syscallPtr
var valuePtr internal.Pointer
if m.abi.Type.hasPerCPUValue() {
valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize))
} else {
valuePtr, err = marshalPtr(value, int(m.abi.ValueSize))
}
if err != nil {
return errors.WithMessage(err, "can't marshal value")
return fmt.Errorf("can't marshal value: %w", err)
}
return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags))
if err = bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)); err != nil {
return fmt.Errorf("update failed: %w", err)
}
return nil
}
// Delete removes a value.
//
// Returns an error if the key does not exist, see IsNotExist.
// Returns ErrKeyNotExist if the key does not exist.
func (m *Map) Delete(key interface{}) error {
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
if err != nil {
return errors.WithMessage(err, "can't marshal key")
return fmt.Errorf("can't marshal key: %w", err)
}
err = bpfMapDeleteElem(m.fd, keyPtr)
return errors.WithMessage(err, "can't delete key")
if err = bpfMapDeleteElem(m.fd, keyPtr); err != nil {
return fmt.Errorf("delete failed: %w", err)
}
return nil
}
// NextKey finds the key following an initial key.
//
// See NextKeyBytes for details.
//
// Returns ErrKeyNotExist if there is no next key.
func (m *Map) NextKey(key, nextKeyOut interface{}) error {
nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize))
@ -344,8 +444,10 @@ func (m *Map) NextKey(key, nextKeyOut interface{}) error {
return nil
}
err := unmarshalBytes(nextKeyOut, nextKeyBytes)
return errors.WithMessage(err, "can't unmarshal next key")
if err := unmarshalBytes(nextKeyOut, nextKeyBytes); err != nil {
return fmt.Errorf("can't unmarshal next key: %w", err)
}
return nil
}
// NextKeyBytes returns the key following an initial key as a byte slice.
@ -353,33 +455,37 @@ func (m *Map) NextKey(key, nextKeyOut interface{}) error {
// Passing nil will return the first key.
//
// Use Iterate if you want to traverse all entries in the map.
//
// Returns nil if there are no more keys.
func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
nextKey := make([]byte, m.abi.KeySize)
nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0]))
nextKeyPtr := internal.NewSlicePointer(nextKey)
err := m.nextKey(key, nextKeyPtr)
if IsNotExist(err) {
if errors.Is(err, ErrKeyNotExist) {
return nil, nil
}
return nextKey, err
}
func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error {
func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
var (
keyPtr syscallPtr
keyPtr internal.Pointer
err error
)
if key != nil {
keyPtr, err = marshalPtr(key, int(m.abi.KeySize))
if err != nil {
return errors.WithMessage(err, "can't marshal key")
return fmt.Errorf("can't marshal key: %w", err)
}
}
err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut)
return errors.WithMessage(err, "can't get next key")
if err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut); err != nil {
return fmt.Errorf("next key failed: %w", err)
}
return nil
}
// Iterate traverses a map.
@ -400,14 +506,14 @@ func (m *Map) Close() error {
return nil
}
return m.fd.close()
return m.fd.Close()
}
// FD gets the file descriptor of the Map.
//
// Calling this function is invalid after Close has been called.
func (m *Map) FD() int {
fd, err := m.fd.value()
fd, err := m.fd.Value()
if err != nil {
// Best effort: -1 is the number most likely to be an
// invalid file descriptor.
@ -428,9 +534,9 @@ func (m *Map) Clone() (*Map, error) {
return nil, nil
}
dup, err := m.fd.dup()
dup, err := m.fd.Dup()
if err != nil {
return nil, errors.Wrap(err, "can't clone map")
return nil, fmt.Errorf("can't clone map: %w", err)
}
return newMap(dup, m.name, &m.abi)
@ -440,7 +546,30 @@ func (m *Map) Clone() (*Map, error) {
//
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
func (m *Map) Pin(fileName string) error {
return bpfPinObject(fileName, m.fd)
return internal.BPFObjPin(fileName, m.fd)
}
// Freeze prevents a map to be modified from user space.
//
// It makes no changes to kernel-side restrictions.
func (m *Map) Freeze() error {
if err := haveMapMutabilityModifiers(); err != nil {
return fmt.Errorf("can't freeze map: %w", err)
}
if err := bpfMapFreeze(m.fd); err != nil {
return fmt.Errorf("can't freeze map: %w", err)
}
return nil
}
func (m *Map) populate(contents []MapKV) error {
for _, kv := range contents {
if err := m.Put(kv.Key, kv.Value); err != nil {
return fmt.Errorf("key %v: %w", kv.Key, err)
}
}
return nil
}
// LoadPinnedMap load a Map from a BPF file.
@ -448,13 +577,13 @@ func (m *Map) Pin(fileName string) error {
// The function is not compatible with nested maps.
// Use LoadPinnedMapExplicit in these situations.
func LoadPinnedMap(fileName string) (*Map, error) {
fd, err := bpfGetObject(fileName)
fd, err := internal.BPFObjGet(fileName)
if err != nil {
return nil, err
}
name, abi, err := newMapABIFromFd(fd)
if err != nil {
_ = fd.close()
_ = fd.Close()
return nil, err
}
return newMap(fd, name, abi)
@ -462,7 +591,7 @@ func LoadPinnedMap(fileName string) (*Map, error) {
// LoadPinnedMapExplicit loads a map with explicit parameters.
func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
fd, err := bpfGetObject(fileName)
fd, err := internal.BPFObjGet(fileName)
if err != nil {
return nil, err
}
@ -477,23 +606,12 @@ func unmarshalMap(buf []byte) (*Map, error) {
// Looking up an entry in a nested map or prog array returns an id,
// not an fd.
id := internal.NativeEndian.Uint32(buf)
fd, err := bpfGetMapFDByID(id)
if err != nil {
return nil, err
}
name, abi, err := newMapABIFromFd(fd)
if err != nil {
_ = fd.close()
return nil, err
}
return newMap(fd, name, abi)
return NewMapFromID(MapID(id))
}
// MarshalBinary implements BinaryMarshaler.
func (m *Map) MarshalBinary() ([]byte, error) {
fd, err := m.fd.value()
fd, err := m.fd.Value()
if err != nil {
return nil, err
}
@ -503,6 +621,60 @@ func (m *Map) MarshalBinary() ([]byte, error) {
return buf, nil
}
func patchValue(value []byte, typ btf.Type, replacements map[string]interface{}) error {
replaced := make(map[string]bool)
replace := func(name string, offset, size int, replacement interface{}) error {
if offset+size > len(value) {
return fmt.Errorf("%s: offset %d(+%d) is out of bounds", name, offset, size)
}
buf, err := marshalBytes(replacement, size)
if err != nil {
return fmt.Errorf("marshal %s: %w", name, err)
}
copy(value[offset:offset+size], buf)
replaced[name] = true
return nil
}
switch parent := typ.(type) {
case *btf.Datasec:
for _, secinfo := range parent.Vars {
name := string(secinfo.Type.(*btf.Var).Name)
replacement, ok := replacements[name]
if !ok {
continue
}
err := replace(name, int(secinfo.Offset), int(secinfo.Size), replacement)
if err != nil {
return err
}
}
default:
return fmt.Errorf("patching %T is not supported", typ)
}
if len(replaced) == len(replacements) {
return nil
}
var missing []string
for name := range replacements {
if !replaced[name] {
missing = append(missing, name)
}
}
if len(missing) == 1 {
return fmt.Errorf("unknown field: %s", missing[0])
}
return fmt.Errorf("unknown fields: %s", strings.Join(missing, ","))
}
// MapIterator iterates a Map.
//
// See Map.Iterate.
@ -523,8 +695,6 @@ func newMapIterator(target *Map) *MapIterator {
}
}
var errIterationAborted = errors.New("iteration aborted")
// Next decodes the next key and value.
//
// Iterating a hash map from which keys are being deleted is not
@ -560,7 +730,7 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
mi.prevKey = mi.prevBytes
mi.err = mi.target.Lookup(nextBytes, valueOut)
if IsNotExist(mi.err) {
if errors.Is(mi.err, ErrKeyNotExist) {
// Even though the key should be valid, we couldn't look up
// its value. If we're iterating a hash map this is probably
// because a concurrent delete removed the value before we
@ -579,26 +749,50 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
return mi.err == nil
}
mi.err = errIterationAborted
mi.err = fmt.Errorf("%w", ErrIterationAborted)
return false
}
// Err returns any encountered error.
//
// The method must be called after Next returns nil.
//
// Returns ErrIterationAborted if it wasn't possible to do a full iteration.
func (mi *MapIterator) Err() error {
return mi.err
}
// IsNotExist returns true if the error indicates that a
// key doesn't exist.
func IsNotExist(err error) bool {
return errors.Cause(err) == unix.ENOENT
// MapGetNextID returns the ID of the next eBPF map.
//
// Returns ErrNotExist, if there is no next eBPF map.
func MapGetNextID(startID MapID) (MapID, error) {
id, err := objGetNextID(internal.BPF_MAP_GET_NEXT_ID, uint32(startID))
return MapID(id), err
}
// IsIterationAborted returns true if the iteration was aborted.
// NewMapFromID returns the map for a given id.
//
// This occurs when keys are deleted from a hash map during iteration.
func IsIterationAborted(err error) bool {
return errors.Cause(err) == errIterationAborted
// Returns ErrNotExist, if there is no eBPF map with the given id.
func NewMapFromID(id MapID) (*Map, error) {
fd, err := bpfObjGetFDByID(internal.BPF_MAP_GET_FD_BY_ID, uint32(id))
if err != nil {
return nil, err
}
name, abi, err := newMapABIFromFd(fd)
if err != nil {
_ = fd.Close()
return nil, err
}
return newMap(fd, name, abi)
}
// ID returns the systemwide unique ID of the map.
func (m *Map) ID() (MapID, error) {
info, err := bpfGetMapInfoByFD(m.fd)
if err != nil {
return MapID(0), err
}
return MapID(info.id), nil
}

View file

@ -4,26 +4,33 @@ import (
"bytes"
"encoding"
"encoding/binary"
"errors"
"fmt"
"reflect"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/pkg/errors"
)
func marshalPtr(data interface{}, length int) (syscallPtr, error) {
func marshalPtr(data interface{}, length int) (internal.Pointer, error) {
if data == nil {
if length == 0 {
return internal.NewPointer(nil), nil
}
return internal.Pointer{}, errors.New("can't use nil as key of map")
}
if ptr, ok := data.(unsafe.Pointer); ok {
return newPtr(ptr), nil
return internal.NewPointer(ptr), nil
}
buf, err := marshalBytes(data, length)
if err != nil {
return syscallPtr{}, err
return internal.Pointer{}, err
}
return newPtr(unsafe.Pointer(&buf[0])), nil
return internal.NewSlicePointer(buf), nil
}
func marshalBytes(data interface{}, length int) (buf []byte, err error) {
@ -39,7 +46,9 @@ func marshalBytes(data interface{}, length int) (buf []byte, err error) {
default:
var wr bytes.Buffer
err = binary.Write(&wr, internal.NativeEndian, value)
err = errors.Wrapf(err, "encoding %T", value)
if err != nil {
err = fmt.Errorf("encoding %T: %v", value, err)
}
buf = wr.Bytes()
}
if err != nil {
@ -47,18 +56,18 @@ func marshalBytes(data interface{}, length int) (buf []byte, err error) {
}
if len(buf) != length {
return nil, errors.Errorf("%T doesn't marshal to %d bytes", data, length)
return nil, fmt.Errorf("%T doesn't marshal to %d bytes", data, length)
}
return buf, nil
}
func makeBuffer(dst interface{}, length int) (syscallPtr, []byte) {
func makeBuffer(dst interface{}, length int) (internal.Pointer, []byte) {
if ptr, ok := dst.(unsafe.Pointer); ok {
return newPtr(ptr), nil
return internal.NewPointer(ptr), nil
}
buf := make([]byte, length)
return newPtr(unsafe.Pointer(&buf[0])), buf
return internal.NewSlicePointer(buf), buf
}
func unmarshalBytes(data interface{}, buf []byte) error {
@ -88,8 +97,10 @@ func unmarshalBytes(data interface{}, buf []byte) error {
return errors.New("require pointer to []byte")
default:
rd := bytes.NewReader(buf)
err := binary.Read(rd, internal.NativeEndian, value)
return errors.Wrapf(err, "decoding %T", value)
if err := binary.Read(rd, internal.NativeEndian, value); err != nil {
return fmt.Errorf("decoding %T: %v", value, err)
}
return nil
}
}
@ -99,21 +110,21 @@ func unmarshalBytes(data interface{}, buf []byte) error {
// Values are initialized to zero if the slice has less elements than CPUs.
//
// slice must have a type like []elementType.
func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
func marshalPerCPUValue(slice interface{}, elemLength int) (internal.Pointer, error) {
sliceType := reflect.TypeOf(slice)
if sliceType.Kind() != reflect.Slice {
return syscallPtr{}, errors.New("per-CPU value requires slice")
return internal.Pointer{}, errors.New("per-CPU value requires slice")
}
possibleCPUs, err := internal.PossibleCPUs()
if err != nil {
return syscallPtr{}, err
return internal.Pointer{}, err
}
sliceValue := reflect.ValueOf(slice)
sliceLen := sliceValue.Len()
if sliceLen > possibleCPUs {
return syscallPtr{}, errors.Errorf("per-CPU value exceeds number of CPUs")
return internal.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs")
}
alignedElemLength := align(elemLength, 8)
@ -123,14 +134,14 @@ func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
elem := sliceValue.Index(i).Interface()
elemBytes, err := marshalBytes(elem, elemLength)
if err != nil {
return syscallPtr{}, err
return internal.Pointer{}, err
}
offset := i * alignedElemLength
copy(buf[offset:offset+elemLength], elemBytes)
}
return newPtr(unsafe.Pointer(&buf[0])), nil
return internal.NewSlicePointer(buf), nil
}
// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
@ -140,7 +151,7 @@ func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
slicePtrType := reflect.TypeOf(slicePtr)
if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
return errors.Errorf("per-cpu value requires pointer to slice")
return fmt.Errorf("per-cpu value requires pointer to slice")
}
possibleCPUs, err := internal.PossibleCPUs()
@ -159,7 +170,7 @@ func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) erro
step := len(buf) / possibleCPUs
if step < elemLength {
return errors.Errorf("per-cpu element length is larger than available data")
return fmt.Errorf("per-cpu element length is larger than available data")
}
for i := 0; i < possibleCPUs; i++ {
var elem interface{}
@ -177,7 +188,7 @@ func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) erro
err := unmarshalBytes(elem, elemBytes)
if err != nil {
return errors.Wrapf(err, "cpu %d", i)
return fmt.Errorf("cpu %d: %w", i, err)
}
buf = buf[step:]

344
vendor/github.com/cilium/ebpf/prog.go generated vendored
View file

@ -2,19 +2,25 @@ package ebpf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
"strings"
"time"
"unsafe"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/unix"
"github.com/pkg/errors"
)
// ErrNotSupported is returned whenever the kernel doesn't support a feature.
var ErrNotSupported = internal.ErrNotSupported
// ProgramID represents the unique ID of an eBPF program
type ProgramID uint32
const (
// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
// This is currently the maximum of spare space allocated for SKB
@ -40,12 +46,33 @@ type ProgramOptions struct {
type ProgramSpec struct {
// Name is passed to the kernel as a debug aid. Must only contain
// alpha numeric and '_' characters.
Name string
Type ProgramType
AttachType AttachType
Instructions asm.Instructions
License string
Name string
// Type determines at which hook in the kernel a program will run.
Type ProgramType
AttachType AttachType
// Name of a kernel data structure to attach to. It's interpretation
// depends on Type and AttachType.
AttachTo string
Instructions asm.Instructions
// License of the program. Some helpers are only available if
// the license is deemed compatible with the GPL.
//
// See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1
License string
// Version used by tracing programs.
//
// Deprecated: superseded by BTF.
KernelVersion uint32
// The BTF associated with this program. Changing Instructions
// will most likely invalidate the contained data, and may
// result in errors when attempting to load it into the kernel.
BTF *btf.Program
// The byte order this program was compiled for, may be nil.
ByteOrder binary.ByteOrder
}
// Copy returns a copy of the spec.
@ -68,9 +95,10 @@ type Program struct {
// otherwise it is empty.
VerifierLog string
fd *bpfFD
name string
abi ProgramABI
fd *internal.FD
name string
abi ProgramABI
attachType AttachType
}
// NewProgram creates a new Program.
@ -86,7 +114,20 @@ func NewProgram(spec *ProgramSpec) (*Program, error) {
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
attr, err := convertProgramSpec(spec)
if spec.BTF == nil {
return newProgramWithBTF(spec, nil, opts)
}
handle, err := btf.NewHandle(btf.ProgramSpec(spec.BTF))
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, fmt.Errorf("can't load BTF: %w", err)
}
return newProgramWithBTF(spec, handle, opts)
}
func newProgramWithBTF(spec *ProgramSpec, btf *btf.Handle, opts ProgramOptions) (*Program, error) {
attr, err := convertProgramSpec(spec, btf)
if err != nil {
return nil, err
}
@ -101,34 +142,29 @@ func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
logBuf = make([]byte, logSize)
attr.logLevel = opts.LogLevel
attr.logSize = uint32(len(logBuf))
attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
attr.logBuf = internal.NewSlicePointer(logBuf)
}
fd, err := bpfProgLoad(attr)
if err == nil {
prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type})
prog.VerifierLog = convertCString(logBuf)
prog.VerifierLog = internal.CString(logBuf)
return prog, nil
}
truncated := errors.Cause(err) == unix.ENOSPC
logErr := err
if opts.LogLevel == 0 {
// Re-run with the verifier enabled to get better error messages.
logBuf = make([]byte, logSize)
attr.logLevel = 1
attr.logSize = uint32(len(logBuf))
attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
attr.logBuf = internal.NewSlicePointer(logBuf)
_, nerr := bpfProgLoad(attr)
truncated = errors.Cause(nerr) == unix.ENOSPC
_, logErr = bpfProgLoad(attr)
}
logs := convertCString(logBuf)
if truncated {
logs += "\n(truncated...)"
}
return nil, &loadError{err, logs}
err = internal.ErrorWithLog(err, logBuf, logErr)
return nil, fmt.Errorf("can't load program: %w", err)
}
// NewProgramFromFD creates a program from a raw fd.
@ -140,18 +176,18 @@ func NewProgramFromFD(fd int) (*Program, error) {
if fd < 0 {
return nil, errors.New("invalid fd")
}
bpfFd := newBPFFD(uint32(fd))
bpfFd := internal.NewFD(uint32(fd))
name, abi, err := newProgramABIFromFd(bpfFd)
if err != nil {
bpfFd.forget()
bpfFd.Forget()
return nil, err
}
return newProgram(bpfFd, name, abi), nil
}
func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program {
func newProgram(fd *internal.FD, name string, abi *ProgramABI) *Program {
return &Program{
name: name,
fd: fd,
@ -159,7 +195,7 @@ func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program {
}
}
func convertProgramSpec(spec *ProgramSpec) (*bpfProgLoadAttr, error) {
func convertProgramSpec(spec *ProgramSpec, handle *btf.Handle) (*bpfProgLoadAttr, error) {
if len(spec.Instructions) == 0 {
return nil, errors.New("Instructions cannot be empty")
}
@ -168,6 +204,10 @@ func convertProgramSpec(spec *ProgramSpec) (*bpfProgLoadAttr, error) {
return nil, errors.New("License cannot be empty")
}
if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
}
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
err := spec.Instructions.Marshal(buf, internal.NativeEndian)
if err != nil {
@ -176,22 +216,47 @@ func convertProgramSpec(spec *ProgramSpec) (*bpfProgLoadAttr, error) {
bytecode := buf.Bytes()
insCount := uint32(len(bytecode) / asm.InstructionSize)
lic := []byte(spec.License)
attr := &bpfProgLoadAttr{
progType: spec.Type,
expectedAttachType: spec.AttachType,
insCount: insCount,
instructions: newPtr(unsafe.Pointer(&bytecode[0])),
license: newPtr(unsafe.Pointer(&lic[0])),
}
name, err := newBPFObjName(spec.Name)
if err != nil {
return nil, err
instructions: internal.NewSlicePointer(bytecode),
license: internal.NewStringPointer(spec.License),
kernelVersion: spec.KernelVersion,
}
if haveObjName() == nil {
attr.progName = name
attr.progName = newBPFObjName(spec.Name)
}
if handle != nil && spec.BTF != nil {
attr.progBTFFd = uint32(handle.FD())
recSize, bytes, err := btf.ProgramLineInfos(spec.BTF)
if err != nil {
return nil, fmt.Errorf("can't get BTF line infos: %w", err)
}
attr.lineInfoRecSize = recSize
attr.lineInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize))
attr.lineInfo = internal.NewSlicePointer(bytes)
recSize, bytes, err = btf.ProgramFuncInfos(spec.BTF)
if err != nil {
return nil, fmt.Errorf("can't get BTF function infos: %w", err)
}
attr.funcInfoRecSize = recSize
attr.funcInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize))
attr.funcInfo = internal.NewSlicePointer(bytes)
}
if spec.AttachTo != "" {
target, err := resolveBTFType(spec.AttachTo, spec.Type, spec.AttachType)
if err != nil {
return nil, err
}
if target != nil {
attr.attachBTFID = target.ID()
}
}
return attr, nil
@ -213,7 +278,7 @@ func (p *Program) ABI() ProgramABI {
//
// It is invalid to call this function after Close has been called.
func (p *Program) FD() int {
fd, err := p.fd.value()
fd, err := p.fd.Value()
if err != nil {
// Best effort: -1 is the number most likely to be an
// invalid file descriptor.
@ -233,9 +298,9 @@ func (p *Program) Clone() (*Program, error) {
return nil, nil
}
dup, err := p.fd.dup()
dup, err := p.fd.Dup()
if err != nil {
return nil, errors.Wrap(err, "can't clone program")
return nil, fmt.Errorf("can't clone program: %w", err)
}
return newProgram(dup, p.name, &p.abi), nil
@ -245,7 +310,10 @@ func (p *Program) Clone() (*Program, error) {
//
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
func (p *Program) Pin(fileName string) error {
return errors.Wrap(bpfPinObject(fileName, p.fd), "can't pin program")
if err := internal.BPFObjPin(fileName, p.fd); err != nil {
return fmt.Errorf("can't pin program: %w", err)
}
return nil
}
// Close unloads the program from the kernel.
@ -254,7 +322,7 @@ func (p *Program) Close() error {
return nil
}
return p.fd.close()
return p.fd.Close()
}
// Test runs the Program in the kernel with the given input and returns the
@ -265,23 +333,33 @@ func (p *Program) Close() error {
//
// This function requires at least Linux 4.12.
func (p *Program) Test(in []byte) (uint32, []byte, error) {
ret, out, _, err := p.testRun(in, 1)
return ret, out, errors.Wrap(err, "can't test program")
ret, out, _, err := p.testRun(in, 1, nil)
if err != nil {
return ret, nil, fmt.Errorf("can't test program: %w", err)
}
return ret, out, nil
}
// Benchmark runs the Program with the given input for a number of times
// and returns the time taken per iteration.
//
// The returned value is the return value of the last execution of
// the program.
// Returns the result of the last execution of the program and the time per
// run or an error. reset is called whenever the benchmark syscall is
// interrupted, and should be set to testing.B.ResetTimer or similar.
//
// Note: profiling a call to this function will skew it's results, see
// https://github.com/cilium/ebpf/issues/24
//
// This function requires at least Linux 4.12.
func (p *Program) Benchmark(in []byte, repeat int) (uint32, time.Duration, error) {
ret, _, total, err := p.testRun(in, repeat)
return ret, total, errors.Wrap(err, "can't benchmark program")
func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) {
ret, _, total, err := p.testRun(in, repeat, reset)
if err != nil {
return ret, total, fmt.Errorf("can't benchmark program: %w", err)
}
return ret, total, nil
}
var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() bool {
var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() (bool, error) {
prog, err := NewProgram(&ProgramSpec{
Type: SocketFilter,
Instructions: asm.Instructions{
@ -292,31 +370,26 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() b
})
if err != nil {
// This may be because we lack sufficient permissions, etc.
return false
return false, err
}
defer prog.Close()
fd, err := prog.fd.value()
if err != nil {
return false
}
// Programs require at least 14 bytes input
in := make([]byte, 14)
attr := bpfProgTestRunAttr{
fd: fd,
fd: uint32(prog.FD()),
dataSizeIn: uint32(len(in)),
dataIn: newPtr(unsafe.Pointer(&in[0])),
dataIn: internal.NewSlicePointer(in),
}
_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
err = bpfProgTestRun(&attr)
// Check for EINVAL specifically, rather than err != nil since we
// otherwise misdetect due to insufficient permissions.
return errors.Cause(err) != unix.EINVAL
return !errors.Is(err, unix.EINVAL), nil
})
func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration, error) {
func (p *Program) testRun(in []byte, repeat int, reset func()) (uint32, []byte, time.Duration, error) {
if uint(repeat) > math.MaxUint32 {
return 0, nil, 0, fmt.Errorf("repeat is too high")
}
@ -340,7 +413,7 @@ func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration,
// See https://patchwork.ozlabs.org/cover/1006822/
out := make([]byte, len(in)+outputPad)
fd, err := p.fd.value()
fd, err := p.fd.Value()
if err != nil {
return 0, nil, 0, err
}
@ -349,14 +422,25 @@ func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration,
fd: fd,
dataSizeIn: uint32(len(in)),
dataSizeOut: uint32(len(out)),
dataIn: newPtr(unsafe.Pointer(&in[0])),
dataOut: newPtr(unsafe.Pointer(&out[0])),
dataIn: internal.NewSlicePointer(in),
dataOut: internal.NewSlicePointer(out),
repeat: uint32(repeat),
}
_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return 0, nil, 0, errors.Wrap(err, "can't run test")
for {
err = bpfProgTestRun(&attr)
if err == nil {
break
}
if errors.Is(err, unix.EINTR) {
if reset != nil {
reset()
}
continue
}
return 0, nil, 0, fmt.Errorf("can't run test: %w", err)
}
if int(attr.dataSizeOut) > cap(out) {
@ -378,23 +462,12 @@ func unmarshalProgram(buf []byte) (*Program, error) {
// Looking up an entry in a nested map or prog array returns an id,
// not an fd.
id := internal.NativeEndian.Uint32(buf)
fd, err := bpfGetProgramFDByID(id)
if err != nil {
return nil, err
}
name, abi, err := newProgramABIFromFd(fd)
if err != nil {
_ = fd.close()
return nil, err
}
return newProgram(fd, name, abi), nil
return NewProgramFromID(ProgramID(id))
}
// MarshalBinary implements BinaryMarshaler.
func (p *Program) MarshalBinary() ([]byte, error) {
value, err := p.fd.value()
value, err := p.fd.Value()
if err != nil {
return nil, err
}
@ -404,61 +477,68 @@ func (p *Program) MarshalBinary() ([]byte, error) {
return buf, nil
}
// Attach a Program to a container object fd
// Attach a Program.
//
// Deprecated: use link.RawAttachProgram instead.
func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
if fd < 0 {
return errors.New("invalid fd")
}
pfd, err := p.fd.value()
pfd, err := p.fd.Value()
if err != nil {
return err
}
attr := bpfProgAlterAttr{
targetFd: uint32(fd),
attachBpfFd: pfd,
attachType: uint32(typ),
attachFlags: uint32(flags),
attr := internal.BPFProgAttachAttr{
TargetFd: uint32(fd),
AttachBpfFd: pfd,
AttachType: uint32(typ),
AttachFlags: uint32(flags),
}
return bpfProgAlter(_ProgAttach, &attr)
return internal.BPFProgAttach(&attr)
}
// Detach a Program from a container object fd
// Detach a Program.
//
// Deprecated: use link.RawDetachProgram instead.
func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
if fd < 0 {
return errors.New("invalid fd")
}
pfd, err := p.fd.value()
if flags != 0 {
return errors.New("flags must be zero")
}
pfd, err := p.fd.Value()
if err != nil {
return err
}
attr := bpfProgAlterAttr{
targetFd: uint32(fd),
attachBpfFd: pfd,
attachType: uint32(typ),
attachFlags: uint32(flags),
attr := internal.BPFProgDetachAttr{
TargetFd: uint32(fd),
AttachBpfFd: pfd,
AttachType: uint32(typ),
}
return bpfProgAlter(_ProgDetach, &attr)
return internal.BPFProgDetach(&attr)
}
// LoadPinnedProgram loads a Program from a BPF file.
//
// Requires at least Linux 4.11.
func LoadPinnedProgram(fileName string) (*Program, error) {
fd, err := bpfGetObject(fileName)
fd, err := internal.BPFObjGet(fileName)
if err != nil {
return nil, err
}
name, abi, err := newProgramABIFromFd(fd)
if err != nil {
_ = fd.close()
return nil, errors.Wrapf(err, "can't get ABI for %s", fileName)
_ = fd.Close()
return nil, fmt.Errorf("can't get ABI for %s: %w", fileName, err)
}
return newProgram(fd, name, abi), nil
@ -480,25 +560,63 @@ func SanitizeName(name string, replacement rune) string {
}, name)
}
type loadError struct {
cause error
verifierLog string
// ProgramGetNextID returns the ID of the next eBPF program.
//
// Returns ErrNotExist, if there is no next eBPF program.
func ProgramGetNextID(startID ProgramID) (ProgramID, error) {
id, err := objGetNextID(internal.BPF_PROG_GET_NEXT_ID, uint32(startID))
return ProgramID(id), err
}
func (le *loadError) Error() string {
if le.verifierLog == "" {
return fmt.Sprintf("failed to load program: %s", le.cause)
// NewProgramFromID returns the program for a given id.
//
// Returns ErrNotExist, if there is no eBPF program with the given id.
func NewProgramFromID(id ProgramID) (*Program, error) {
fd, err := bpfObjGetFDByID(internal.BPF_PROG_GET_FD_BY_ID, uint32(id))
if err != nil {
return nil, err
}
return fmt.Sprintf("failed to load program: %s: %s", le.cause, le.verifierLog)
name, abi, err := newProgramABIFromFd(fd)
if err != nil {
_ = fd.Close()
return nil, err
}
return newProgram(fd, name, abi), nil
}
func (le *loadError) Cause() error {
return le.cause
// ID returns the systemwide unique ID of the program.
func (p *Program) ID() (ProgramID, error) {
info, err := bpfGetProgInfoByFD(p.fd)
if err != nil {
return ProgramID(0), err
}
return ProgramID(info.id), nil
}
// IsNotSupported returns true if an error occurred because
// the kernel does not have support for a specific feature.
func IsNotSupported(err error) bool {
_, notSupported := errors.Cause(err).(*internal.UnsupportedFeatureError)
return notSupported
func resolveBTFType(name string, progType ProgramType, attachType AttachType) (btf.Type, error) {
kernel, err := btf.LoadKernelSpec()
if err != nil {
return nil, fmt.Errorf("can't resolve BTF type %s: %w", name, err)
}
type match struct {
p ProgramType
a AttachType
}
target := match{progType, attachType}
switch target {
case match{Tracing, AttachTraceIter}:
var target btf.Func
if err := kernel.FindType("bpf_iter_"+name, &target); err != nil {
return nil, fmt.Errorf("can't resolve BTF for iterator %s: %w", name, err)
}
return &target, nil
default:
return nil, nil
}
}

View file

@ -13,6 +13,11 @@ The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Ciliu
The package is production ready, but **the API is explicitly unstable
right now**. Expect to update your code if you want to follow along.
## Requirements
* A version of Go that is [supported by upstream](https://golang.org/doc/devel/release.html#policy)
* Linux 4.9, 4.19 or 5.4 (versions in-between should work, but are not tested)
## Useful resources
* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended)

View file

@ -1,89 +1,35 @@
package ebpf
import (
"bytes"
"path/filepath"
"runtime"
"strconv"
"strings"
"errors"
"fmt"
"os"
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/unix"
"github.com/pkg/errors"
)
var errClosedFd = errors.New("use of closed file descriptor")
type bpfFD struct {
raw int64
}
func newBPFFD(value uint32) *bpfFD {
fd := &bpfFD{int64(value)}
runtime.SetFinalizer(fd, (*bpfFD).close)
return fd
}
func (fd *bpfFD) String() string {
return strconv.FormatInt(fd.raw, 10)
}
func (fd *bpfFD) value() (uint32, error) {
if fd.raw < 0 {
return 0, errClosedFd
}
return uint32(fd.raw), nil
}
func (fd *bpfFD) close() error {
if fd.raw < 0 {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.forget()
return unix.Close(value)
}
func (fd *bpfFD) forget() {
runtime.SetFinalizer(fd, nil)
}
func (fd *bpfFD) dup() (*bpfFD, error) {
if fd.raw < 0 {
return nil, errClosedFd
}
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return nil, errors.Wrap(err, "can't dup fd")
}
return newBPFFD(uint32(dup)), nil
}
// Generic errors returned by BPF syscalls.
var (
ErrNotExist = errors.New("requested object does not exist")
)
// bpfObjName is a null-terminated string made up of
// 'A-Za-z0-9_' characters.
type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte
// newBPFObjName truncates the result if it is too long.
func newBPFObjName(name string) (bpfObjName, error) {
idx := strings.IndexFunc(name, invalidBPFObjNameChar)
if idx != -1 {
return bpfObjName{}, errors.Errorf("invalid character '%c' in name '%s'", name[idx], name)
}
func newBPFObjName(name string) bpfObjName {
var result bpfObjName
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
return result, nil
return result
}
func invalidBPFObjNameChar(char rune) bool {
dotAllowed := objNameAllowsDot() == nil
switch {
case char >= 'A' && char <= 'Z':
fallthrough
@ -91,6 +37,8 @@ func invalidBPFObjNameChar(char rune) bool {
fallthrough
case char >= '0' && char <= '9':
fallthrough
case dotAllowed && char == '.':
fallthrough
case char == '_':
return false
default:
@ -99,21 +47,25 @@ func invalidBPFObjNameChar(char rune) bool {
}
type bpfMapCreateAttr struct {
mapType MapType
keySize uint32
valueSize uint32
maxEntries uint32
flags uint32
innerMapFd uint32 // since 4.12 56f668dfe00d
numaNode uint32 // since 4.14 96eabe7a40aa
mapName bpfObjName // since 4.15 ad5b177bd73f
mapType MapType
keySize uint32
valueSize uint32
maxEntries uint32
flags uint32
innerMapFd uint32 // since 4.12 56f668dfe00d
numaNode uint32 // since 4.14 96eabe7a40aa
mapName bpfObjName // since 4.15 ad5b177bd73f
mapIfIndex uint32
btfFd uint32
btfKeyTypeID btf.TypeID
btfValueTypeID btf.TypeID
}
type bpfMapOpAttr struct {
mapFd uint32
padding uint32
key syscallPtr
value syscallPtr
key internal.Pointer
value internal.Pointer
flags uint64
}
@ -127,25 +79,28 @@ type bpfMapInfo struct {
mapName bpfObjName // since 4.15 ad5b177bd73f
}
type bpfPinObjAttr struct {
fileName syscallPtr
fd uint32
padding uint32
}
type bpfProgLoadAttr struct {
progType ProgramType
insCount uint32
instructions syscallPtr
license syscallPtr
instructions internal.Pointer
license internal.Pointer
logLevel uint32
logSize uint32
logBuf syscallPtr
logBuf internal.Pointer
kernelVersion uint32 // since 4.1 2541517c32be
progFlags uint32 // since 4.11 e07b98d9bffe
progName bpfObjName // since 4.15 067cae47771c
progIfIndex uint32 // since 4.15 1f6f4cb7ba21
expectedAttachType AttachType // since 4.17 5e43f899b03a
progBTFFd uint32
funcInfoRecSize uint32
funcInfo internal.Pointer
funcInfoCnt uint32
lineInfoRecSize uint32
lineInfo internal.Pointer
lineInfoCnt uint32
attachBTFID btf.TypeID
attachProgFd uint32
}
type bpfProgInfo struct {
@ -154,12 +109,12 @@ type bpfProgInfo struct {
tag [unix.BPF_TAG_SIZE]byte
jitedLen uint32
xlatedLen uint32
jited syscallPtr
xlated syscallPtr
jited internal.Pointer
xlated internal.Pointer
loadTime uint64 // since 4.15 cb4d2b3f03d8
createdByUID uint32
nrMapIDs uint32
mapIds syscallPtr
mapIds internal.Pointer
name bpfObjName
}
@ -168,23 +123,16 @@ type bpfProgTestRunAttr struct {
retval uint32
dataSizeIn uint32
dataSizeOut uint32
dataIn syscallPtr
dataOut syscallPtr
dataIn internal.Pointer
dataOut internal.Pointer
repeat uint32
duration uint32
}
type bpfProgAlterAttr struct {
targetFd uint32
attachBpfFd uint32
attachType uint32
attachFlags uint32
}
type bpfObjGetInfoByFDAttr struct {
fd uint32
infoLen uint32
info syscallPtr // May be either bpfMapInfo or bpfProgInfo
info internal.Pointer // May be either bpfMapInfo or bpfProgInfo
}
type bpfGetFDByIDAttr struct {
@ -192,13 +140,19 @@ type bpfGetFDByIDAttr struct {
next uint32
}
func newPtr(ptr unsafe.Pointer) syscallPtr {
return syscallPtr{ptr: ptr}
type bpfMapFreezeAttr struct {
mapFd uint32
}
func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) {
type bpfObjGetNextIDAttr struct {
startID uint32
nextID uint32
openFlags uint32
}
func bpfProgLoad(attr *bpfProgLoadAttr) (*internal.FD, error) {
for {
fd, err := bpfCall(_ProgLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
fd, err := internal.BPF(internal.BPF_PROG_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
// As of ~4.20 the verifier can be interrupted by a signal,
// and returns EAGAIN in that case.
if err == unix.EAGAIN {
@ -209,25 +163,29 @@ func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) {
return nil, err
}
return newBPFFD(uint32(fd)), nil
return internal.NewFD(uint32(fd)), nil
}
}
func bpfProgAlter(cmd int, attr *bpfProgAlterAttr) error {
_, err := bpfCall(cmd, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
func bpfProgTestRun(attr *bpfProgTestRunAttr) error {
_, err := internal.BPF(internal.BPF_PROG_TEST_RUN, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
func bpfMapCreate(attr *bpfMapCreateAttr) (*bpfFD, error) {
fd, err := bpfCall(_MapCreate, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
func bpfMapCreate(attr *bpfMapCreateAttr) (*internal.FD, error) {
fd, err := internal.BPF(internal.BPF_MAP_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if errors.Is(err, os.ErrPermission) {
return nil, errors.New("permission denied or insufficient rlimit to lock memory for map")
}
if err != nil {
return nil, err
}
return newBPFFD(uint32(fd)), nil
return internal.NewFD(uint32(fd)), nil
}
var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() bool {
var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() (bool, error) {
inner, err := bpfMapCreate(&bpfMapCreateAttr{
mapType: Array,
keySize: 4,
@ -235,11 +193,11 @@ var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() bool {
maxEntries: 1,
})
if err != nil {
return false
return false, err
}
defer inner.close()
defer inner.Close()
innerFd, _ := inner.value()
innerFd, _ := inner.Value()
nested, err := bpfMapCreate(&bpfMapCreateAttr{
mapType: ArrayOfMaps,
keySize: 4,
@ -248,15 +206,32 @@ var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() bool {
innerMapFd: innerFd,
})
if err != nil {
return false
return false, nil
}
_ = nested.close()
return true
_ = nested.Close()
return true, nil
})
func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error {
fd, err := m.value()
var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() (bool, error) {
// This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since
// BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check.
m, err := bpfMapCreate(&bpfMapCreateAttr{
mapType: Array,
keySize: 4,
valueSize: 4,
maxEntries: 1,
flags: unix.BPF_F_RDONLY_PROG,
})
if err != nil {
return false, nil
}
_ = m.Close()
return true, nil
})
func bpfMapLookupElem(m *internal.FD, key, valueOut internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
@ -266,12 +241,27 @@ func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error {
key: key,
value: valueOut,
}
_, err = bpfCall(_MapLookupElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return err
_, err = internal.BPF(internal.BPF_MAP_LOOKUP_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error {
fd, err := m.value()
func bpfMapLookupAndDelete(m *internal.FD, key, valueOut internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapOpAttr{
mapFd: fd,
key: key,
value: valueOut,
}
_, err = internal.BPF(internal.BPF_MAP_LOOKUP_AND_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapUpdateElem(m *internal.FD, key, valueOut internal.Pointer, flags uint64) error {
fd, err := m.Value()
if err != nil {
return err
}
@ -282,12 +272,12 @@ func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error {
value: valueOut,
flags: flags,
}
_, err = bpfCall(_MapUpdateElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return err
_, err = internal.BPF(internal.BPF_MAP_UPDATE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error {
fd, err := m.value()
func bpfMapDeleteElem(m *internal.FD, key internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
@ -296,12 +286,12 @@ func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error {
mapFd: fd,
key: key,
}
_, err = bpfCall(_MapDeleteElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return err
_, err = internal.BPF(internal.BPF_MAP_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error {
fd, err := m.value()
func bpfMapGetNextKey(m *internal.FD, key, nextKeyOut internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
@ -311,46 +301,60 @@ func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error {
key: key,
value: nextKeyOut,
}
_, err = bpfCall(_MapGetNextKey, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
_, err = internal.BPF(internal.BPF_MAP_GET_NEXT_KEY, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func objGetNextID(cmd internal.BPFCmd, start uint32) (uint32, error) {
attr := bpfObjGetNextIDAttr{
startID: start,
}
_, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return attr.nextID, wrapObjError(err)
}
func wrapObjError(err error) error {
if err == nil {
return nil
}
if errors.Is(err, unix.ENOENT) {
return fmt.Errorf("%w", ErrNotExist)
}
return errors.New(err.Error())
}
func wrapMapError(err error) error {
if err == nil {
return nil
}
if errors.Is(err, unix.ENOENT) {
return ErrKeyNotExist
}
if errors.Is(err, unix.EEXIST) {
return ErrKeyExist
}
return errors.New(err.Error())
}
func bpfMapFreeze(m *internal.FD) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapFreezeAttr{
mapFd: fd,
}
_, err = internal.BPF(internal.BPF_MAP_FREEZE, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return err
}
const bpfFSType = 0xcafe4a11
func bpfPinObject(fileName string, fd *bpfFD) error {
dirName := filepath.Dir(fileName)
var statfs unix.Statfs_t
if err := unix.Statfs(dirName, &statfs); err != nil {
return err
}
if uint64(statfs.Type) != bpfFSType {
return errors.Errorf("%s is not on a bpf filesystem", fileName)
}
value, err := fd.value()
if err != nil {
return err
}
_, err = bpfCall(_ObjPin, unsafe.Pointer(&bpfPinObjAttr{
fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
fd: value,
}), 16)
return errors.Wrapf(err, "pin object %s", fileName)
}
func bpfGetObject(fileName string) (*bpfFD, error) {
ptr, err := bpfCall(_ObjGet, unsafe.Pointer(&bpfPinObjAttr{
fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
}), 16)
if err != nil {
return nil, errors.Wrapf(err, "get object %s", fileName)
}
return newBPFFD(uint32(ptr)), nil
}
func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error {
value, err := fd.value()
func bpfGetObjectInfoByFD(fd *internal.FD, info unsafe.Pointer, size uintptr) error {
value, err := fd.Value()
if err != nil {
return err
}
@ -359,30 +363,53 @@ func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error {
attr := bpfObjGetInfoByFDAttr{
fd: value,
infoLen: uint32(size),
info: newPtr(info),
info: internal.NewPointer(info),
}
_, err = bpfCall(_ObjGetInfoByFD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return errors.Wrapf(err, "fd %d", value)
_, err = internal.BPF(internal.BPF_OBJ_GET_INFO_BY_FD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return fmt.Errorf("fd %d: %w", fd, err)
}
return nil
}
func bpfGetProgInfoByFD(fd *bpfFD) (*bpfProgInfo, error) {
func bpfGetProgInfoByFD(fd *internal.FD) (*bpfProgInfo, error) {
var info bpfProgInfo
err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
return &info, errors.Wrap(err, "can't get program info")
if err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)); err != nil {
return nil, fmt.Errorf("can't get program info: %w", err)
}
return &info, nil
}
func bpfGetMapInfoByFD(fd *bpfFD) (*bpfMapInfo, error) {
func bpfGetMapInfoByFD(fd *internal.FD) (*bpfMapInfo, error) {
var info bpfMapInfo
err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
return &info, errors.Wrap(err, "can't get map info")
if err != nil {
return nil, fmt.Errorf("can't get map info: %w", err)
}
return &info, nil
}
var haveObjName = internal.FeatureTest("object names", "4.15", func() bool {
name, err := newBPFObjName("feature_test")
var haveObjName = internal.FeatureTest("object names", "4.15", func() (bool, error) {
attr := bpfMapCreateAttr{
mapType: Array,
keySize: 4,
valueSize: 4,
maxEntries: 1,
mapName: newBPFObjName("feature_test"),
}
fd, err := bpfMapCreate(&attr)
if err != nil {
// This really is a fatal error, but it should be caught
// by the unit tests not working.
return false
return false, nil
}
_ = fd.Close()
return true, nil
})
var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() (bool, error) {
if err := haveObjName(); err != nil {
return false, err
}
attr := bpfMapCreateAttr{
@ -390,58 +417,22 @@ var haveObjName = internal.FeatureTest("object names", "4.15", func() bool {
keySize: 4,
valueSize: 4,
maxEntries: 1,
mapName: name,
mapName: newBPFObjName(".test"),
}
fd, err := bpfMapCreate(&attr)
if err != nil {
return false
return false, nil
}
_ = fd.close()
return true
_ = fd.Close()
return true, nil
})
func bpfGetMapFDByID(id uint32) (*bpfFD, error) {
// available from 4.13
func bpfObjGetFDByID(cmd internal.BPFCmd, id uint32) (*internal.FD, error) {
attr := bpfGetFDByIDAttr{
id: id,
}
ptr, err := bpfCall(_MapGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return nil, errors.Wrapf(err, "can't get fd for map id %d", id)
}
return newBPFFD(uint32(ptr)), nil
}
func bpfGetProgramFDByID(id uint32) (*bpfFD, error) {
// available from 4.13
attr := bpfGetFDByIDAttr{
id: id,
}
ptr, err := bpfCall(_ProgGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return nil, errors.Wrapf(err, "can't get fd for program id %d", id)
}
return newBPFFD(uint32(ptr)), nil
}
func bpfCall(cmd int, attr unsafe.Pointer, size uintptr) (uintptr, error) {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
var err error
if errNo != 0 {
err = errNo
}
return r1, err
}
func convertCString(in []byte) string {
inLen := bytes.IndexByte(in, 0)
if inLen == -1 {
return ""
}
return string(in[:inLen])
ptr, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return internal.NewFD(uint32(ptr)), wrapObjError(err)
}

View file

@ -1,6 +1,6 @@
package ebpf
//go:generate stringer -output types_string.go -type=MapType,ProgramType
//go:generate stringer -output types_string.go -type=MapType,ProgramType,AttachType
// MapType indicates the type map structure
// that will be initialized in the kernel.
@ -57,41 +57,40 @@ const (
// HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps
// itself.
HashOfMaps
// DevMap - Specialized map to store references to network devices.
DevMap
// SockMap - Specialized map to store references to sockets.
SockMap
// CPUMap - Specialized map to store references to CPUs.
CPUMap
// XSKMap - Specialized map for XDP programs to store references to open sockets.
XSKMap
// SockHash - Specialized hash to store references to sockets.
SockHash
// CGroupStorage - Special map for CGroups.
CGroupStorage
// ReusePortSockArray - Specialized map to store references to sockets that can be reused.
ReusePortSockArray
// PerCPUCGroupStorage - Special per CPU map for CGroups.
PerCPUCGroupStorage
// Queue - FIFO storage for BPF programs.
Queue
// Stack - LIFO storage for BPF programs.
Stack
// SkStorage - Specialized map for local storage at SK for BPF programs.
SkStorage
// DevMapHash - Hash-based indexing scheme for references to network devices.
DevMapHash
)
// hasPerCPUValue returns true if the Map stores a value per CPU.
func (mt MapType) hasPerCPUValue() bool {
if mt == PerCPUHash || mt == PerCPUArray {
if mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash {
return true
}
return false
}
const (
_MapCreate = iota
_MapLookupElem
_MapUpdateElem
_MapDeleteElem
_MapGetNextKey
_ProgLoad
_ObjPin
_ObjGet
_ProgAttach
_ProgDetach
_ProgTestRun
_ProgGetNextID
_MapGetNextID
_ProgGetFDByID
_MapGetFDByID
_ObjGetInfoByFD
)
const (
_Any = iota
_NoExist
_Exist
)
// ProgramType of the eBPF program
type ProgramType uint32
@ -149,6 +148,8 @@ const (
RawTracepointWritable
// CGroupSockopt program
CGroupSockopt
// Tracing program
Tracing
)
// AttachType of the eBPF program, needed to differentiate allowed context accesses in
@ -183,6 +184,12 @@ const (
AttachCGroupUDP6Recvmsg
AttachCGroupGetsockopt
AttachCGroupSetsockopt
AttachTraceRawTp
AttachTraceFEntry
AttachTraceFExit
AttachModifyReturn
AttachLSMMac
AttachTraceIter
)
// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command

View file

@ -1,4 +1,4 @@
// Code generated by "stringer -output types_string.go -type=MapType,ProgramType"; DO NOT EDIT.
// Code generated by "stringer -output types_string.go -type=MapType,ProgramType,AttachType"; DO NOT EDIT.
package ebpf
@ -22,11 +22,23 @@ func _() {
_ = x[LPMTrie-11]
_ = x[ArrayOfMaps-12]
_ = x[HashOfMaps-13]
_ = x[DevMap-14]
_ = x[SockMap-15]
_ = x[CPUMap-16]
_ = x[XSKMap-17]
_ = x[SockHash-18]
_ = x[CGroupStorage-19]
_ = x[ReusePortSockArray-20]
_ = x[PerCPUCGroupStorage-21]
_ = x[Queue-22]
_ = x[Stack-23]
_ = x[SkStorage-24]
_ = x[DevMapHash-25]
}
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMaps"
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHash"
var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136}
var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248}
func (i MapType) String() string {
if i >= MapType(len(_MapType_index)-1) {
@ -64,11 +76,12 @@ func _() {
_ = x[CGroupSysctl-23]
_ = x[RawTracepointWritable-24]
_ = x[CGroupSockopt-25]
_ = x[Tracing-26]
}
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockopt"
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracing"
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258}
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265}
func (i ProgramType) String() string {
if i >= ProgramType(len(_ProgramType_index)-1) {
@ -76,3 +89,49 @@ func (i ProgramType) String() string {
}
return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[AttachNone-0]
_ = x[AttachCGroupInetIngress-0]
_ = x[AttachCGroupInetEgress-1]
_ = x[AttachCGroupInetSockCreate-2]
_ = x[AttachCGroupSockOps-3]
_ = x[AttachSkSKBStreamParser-4]
_ = x[AttachSkSKBStreamVerdict-5]
_ = x[AttachCGroupDevice-6]
_ = x[AttachSkMsgVerdict-7]
_ = x[AttachCGroupInet4Bind-8]
_ = x[AttachCGroupInet6Bind-9]
_ = x[AttachCGroupInet4Connect-10]
_ = x[AttachCGroupInet6Connect-11]
_ = x[AttachCGroupInet4PostBind-12]
_ = x[AttachCGroupInet6PostBind-13]
_ = x[AttachCGroupUDP4Sendmsg-14]
_ = x[AttachCGroupUDP6Sendmsg-15]
_ = x[AttachLircMode2-16]
_ = x[AttachFlowDissector-17]
_ = x[AttachCGroupSysctl-18]
_ = x[AttachCGroupUDP4Recvmsg-19]
_ = x[AttachCGroupUDP6Recvmsg-20]
_ = x[AttachCGroupGetsockopt-21]
_ = x[AttachCGroupSetsockopt-22]
_ = x[AttachTraceRawTp-23]
_ = x[AttachTraceFEntry-24]
_ = x[AttachTraceFExit-25]
_ = x[AttachModifyReturn-26]
_ = x[AttachLSMMac-27]
_ = x[AttachTraceIter-28]
}
const _AttachType_name = "AttachNoneAttachCGroupInetEgressAttachCGroupInetSockCreateAttachCGroupSockOpsAttachSkSKBStreamParserAttachSkSKBStreamVerdictAttachCGroupDeviceAttachSkMsgVerdictAttachCGroupInet4BindAttachCGroupInet6BindAttachCGroupInet4ConnectAttachCGroupInet6ConnectAttachCGroupInet4PostBindAttachCGroupInet6PostBindAttachCGroupUDP4SendmsgAttachCGroupUDP6SendmsgAttachLircMode2AttachFlowDissectorAttachCGroupSysctlAttachCGroupUDP4RecvmsgAttachCGroupUDP6RecvmsgAttachCGroupGetsockoptAttachCGroupSetsockoptAttachTraceRawTpAttachTraceFEntryAttachTraceFExitAttachModifyReturnAttachLSMMacAttachTraceIter"
var _AttachType_index = [...]uint16{0, 10, 32, 58, 77, 100, 124, 142, 160, 181, 202, 226, 250, 275, 300, 323, 346, 361, 380, 398, 421, 444, 466, 488, 504, 521, 537, 555, 567, 582}
func (i AttachType) String() string {
if i >= AttachType(len(_AttachType_index)-1) {
return "AttachType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _AttachType_name[_AttachType_index[i]:_AttachType_index[i+1]]
}

View file

@ -1,6 +1,6 @@
# cgroups
[![Build Status](https://travis-ci.org/containerd/cgroups.svg?branch=master)](https://travis-ci.org/containerd/cgroups)
[![Build Status](https://github.com/containerd/cgroups/workflows/CI/badge.svg)](https://github.com/containerd/cgroups/actions?query=workflow%3ACI)
[![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups)
[![GoDoc](https://godoc.org/github.com/containerd/cgroups?status.svg)](https://godoc.org/github.com/containerd/cgroups)
[![Go Report Card](https://goreportcard.com/badge/github.com/containerd/cgroups)](https://goreportcard.com/report/github.com/containerd/cgroups)
@ -65,7 +65,7 @@ To update the resources applied in the cgroup
```go
shares = uint64(200)
if err := control.Update(&specs.LinuxResources{
CPU: &specs.CPU{
CPU: &specs.LinuxCPU{
Shares: &shares,
},
}); err != nil {
@ -112,6 +112,27 @@ err := control.MoveTo(destination)
subCgroup, err := control.New("child", resources)
```
### Registering for memory events
This allows you to get notified by an eventfd for v1 memory cgroups events.
```go
event := cgroups.MemoryThresholdEvent(50 * 1024 * 1024, false)
efd, err := control.RegisterMemoryEvent(event)
```
```go
event := cgroups.MemoryPressureEvent(cgroups.MediumPressure, cgroups.DefaultMode)
efd, err := control.RegisterMemoryEvent(event)
```
```go
efd, err := control.OOMEventFD()
// or by using RegisterMemoryEvent
event := cgroups.OOMEvent()
efd, err := control.RegisterMemoryEvent(event)
```
### Attention
All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name

View file

@ -20,7 +20,6 @@ import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -72,8 +71,8 @@ func (b *blkioController) Create(path string, resources *specs.LinuxResources) e
}
for _, t := range createBlkioSettings(resources.BlockIO) {
if t.value != nil {
if err := ioutil.WriteFile(
filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", t.name)),
if err := retryingWriteFile(
filepath.Join(b.Path(path), "blkio."+t.name),
t.format(t.value),
defaultFilePerm,
); err != nil {
@ -90,54 +89,47 @@ func (b *blkioController) Update(path string, resources *specs.LinuxResources) e
func (b *blkioController) Stat(path string, stats *v1.Metrics) error {
stats.Blkio = &v1.BlkIOStat{}
settings := []blkioStatSettings{
{
name: "throttle.io_serviced",
entry: &stats.Blkio.IoServicedRecursive,
},
{
name: "throttle.io_service_bytes",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
}
var settings []blkioStatSettings
// Try to read CFQ stats available on all CFQ enabled kernels first
if _, err := os.Lstat(filepath.Join(b.Path(path), fmt.Sprintf("blkio.io_serviced_recursive"))); err == nil {
settings = []blkioStatSettings{}
settings = append(settings,
blkioStatSettings{
if _, err := os.Lstat(filepath.Join(b.Path(path), "blkio.io_serviced_recursive")); err == nil {
settings = []blkioStatSettings{
{
name: "sectors_recursive",
entry: &stats.Blkio.SectorsRecursive,
},
blkioStatSettings{
{
name: "io_service_bytes_recursive",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
blkioStatSettings{
{
name: "io_serviced_recursive",
entry: &stats.Blkio.IoServicedRecursive,
},
blkioStatSettings{
{
name: "io_queued_recursive",
entry: &stats.Blkio.IoQueuedRecursive,
},
blkioStatSettings{
{
name: "io_service_time_recursive",
entry: &stats.Blkio.IoServiceTimeRecursive,
},
blkioStatSettings{
{
name: "io_wait_time_recursive",
entry: &stats.Blkio.IoWaitTimeRecursive,
},
blkioStatSettings{
{
name: "io_merged_recursive",
entry: &stats.Blkio.IoMergedRecursive,
},
blkioStatSettings{
{
name: "time_recursive",
entry: &stats.Blkio.IoTimeRecursive,
},
)
}
}
f, err := os.Open(filepath.Join(b.procRoot, "diskstats"))
if err != nil {
return err
@ -149,6 +141,29 @@ func (b *blkioController) Stat(path string, stats *v1.Metrics) error {
return err
}
var size int
for _, t := range settings {
if err := b.readEntry(devices, path, t.name, t.entry); err != nil {
return err
}
size += len(*t.entry)
}
if size > 0 {
return nil
}
// Even the kernel is compiled with the CFQ scheduler, the cgroup may not use
// block devices with the CFQ scheduler. If so, we should fallback to throttle.* files.
settings = []blkioStatSettings{
{
name: "throttle.io_serviced",
entry: &stats.Blkio.IoServicedRecursive,
},
{
name: "throttle.io_service_bytes",
entry: &stats.Blkio.IoServiceBytesRecursive,
},
}
for _, t := range settings {
if err := b.readEntry(devices, path, t.name, t.entry); err != nil {
return err
@ -158,16 +173,13 @@ func (b *blkioController) Stat(path string, stats *v1.Metrics) error {
}
func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]*v1.BlkIOEntry) error {
f, err := os.Open(filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", name)))
f, err := os.Open(filepath.Join(b.Path(path), "blkio."+name))
if err != nil {
return err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
// format: dev type amount
fields := strings.FieldsFunc(sc.Text(), splitBlkIOStatLine)
if len(fields) < 3 {
@ -175,7 +187,7 @@ func (b *blkioController) readEntry(devices map[deviceKey]string, path, name str
// skip total line
continue
} else {
return fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
return fmt.Errorf("invalid line found while parsing %s: %s", path, sc.Text())
}
}
major, err := strconv.ParseUint(fields[0], 10, 64)
@ -204,7 +216,7 @@ func (b *blkioController) readEntry(devices map[deviceKey]string, path, name str
Value: v,
})
}
return nil
return sc.Err()
}
func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings {
@ -344,11 +356,3 @@ func getDevices(r io.Reader) (map[deviceKey]string, error) {
}
return devices, s.Err()
}
func major(devNumber uint64) uint64 {
return (devNumber >> 8) & 0xfff
}
func minor(devNumber uint64) uint64 {
return (devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)
}

View file

@ -18,7 +18,6 @@ package cgroups
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -169,7 +168,7 @@ func (c *cgroup) add(process Process) error {
if err != nil {
return err
}
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(s.Path(p), cgroupProcs),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
@ -199,7 +198,7 @@ func (c *cgroup) addTask(process Process) error {
if err != nil {
return err
}
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(s.Path(p), cgroupTasks),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
@ -217,7 +216,7 @@ func (c *cgroup) Delete() error {
if c.err != nil {
return c.err
}
var errors []string
var errs []string
for _, s := range c.subsystems {
if d, ok := s.(deleter); ok {
sp, err := c.path(s.Name())
@ -225,7 +224,7 @@ func (c *cgroup) Delete() error {
return err
}
if err := d.Delete(sp); err != nil {
errors = append(errors, string(s.Name()))
errs = append(errs, string(s.Name()))
}
continue
}
@ -236,12 +235,12 @@ func (c *cgroup) Delete() error {
}
path := p.Path(sp)
if err := remove(path); err != nil {
errors = append(errors, path)
errs = append(errs, path)
}
}
}
if len(errors) > 0 {
return fmt.Errorf("cgroups: unable to remove paths %s", strings.Join(errors, ", "))
if len(errs) > 0 {
return fmt.Errorf("cgroups: unable to remove paths %s", strings.Join(errs, ", "))
}
c.err = ErrCgroupDeleted
return nil
@ -458,7 +457,26 @@ func (c *cgroup) OOMEventFD() (uintptr, error) {
if err != nil {
return 0, err
}
return s.(*memoryController).OOMEventFD(sp)
return s.(*memoryController).memoryEvent(sp, OOMEvent())
}
// RegisterMemoryEvent allows the ability to register for all v1 memory cgroups
// notifications.
func (c *cgroup) RegisterMemoryEvent(event MemoryEvent) (uintptr, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
return 0, c.err
}
s := c.getSubsystem(Memory)
if s == nil {
return 0, ErrMemoryNotSupported
}
sp, err := c.path(Memory)
if err != nil {
return 0, err
}
return s.(*memoryController).memoryEvent(sp, event)
}
// State returns the state of the cgroup and its processes

View file

@ -82,6 +82,9 @@ type Cgroup interface {
Thaw() error
// OOMEventFD returns the memory subsystem's event fd for OOM events
OOMEventFD() (uintptr, error)
// RegisterMemoryEvent returns the memory subsystems event fd for whatever memory event was
// registered for. Can alternatively register for the oom event with this method.
RegisterMemoryEvent(MemoryEvent) (uintptr, error)
// State returns the cgroups current state
State() State
// Subsystems returns all the subsystems in the cgroup

View file

@ -18,8 +18,6 @@ package cgroups
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -84,8 +82,8 @@ func (c *cpuController) Create(path string, resources *specs.LinuxResources) err
value = []byte(strconv.FormatInt(*t.ivalue, 10))
}
if value != nil {
if err := ioutil.WriteFile(
filepath.Join(c.Path(path), fmt.Sprintf("cpu.%s", t.name)),
if err := retryingWriteFile(
filepath.Join(c.Path(path), "cpu."+t.name),
value,
defaultFilePerm,
); err != nil {
@ -110,9 +108,6 @@ func (c *cpuController) Stat(path string, stats *v1.Metrics) error {
// get or create the cpu field because cpuacct can also set values on this struct
sc := bufio.NewScanner(f)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
key, v, err := parseKV(sc.Text())
if err != nil {
return err
@ -126,5 +121,5 @@ func (c *cpuController) Stat(path string, stats *v1.Metrics) error {
stats.CPU.Throttling.ThrottledTime = v
}
}
return nil
return sc.Err()
}

View file

@ -26,7 +26,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewCputset(root string) *cpusetController {
func NewCpuset(root string) *cpusetController {
return &cpusetController{
root: filepath.Join(root, string(Cpuset)),
}
@ -69,8 +69,8 @@ func (c *cpusetController) Create(path string, resources *specs.LinuxResources)
},
} {
if t.value != "" {
if err := ioutil.WriteFile(
filepath.Join(c.Path(path), fmt.Sprintf("cpuset.%s", t.name)),
if err := retryingWriteFile(
filepath.Join(c.Path(path), "cpuset."+t.name),
[]byte(t.value),
defaultFilePerm,
); err != nil {
@ -134,7 +134,7 @@ func (c *cpusetController) copyIfNeeded(current, parent string) error {
return err
}
if isEmpty(currentCpus) {
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(current, "cpuset.cpus"),
parentCpus,
defaultFilePerm,
@ -143,7 +143,7 @@ func (c *cpusetController) copyIfNeeded(current, parent string) error {
}
}
if isEmpty(currentMems) {
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(current, "cpuset.mems"),
parentMems,
defaultFilePerm,

View file

@ -18,7 +18,6 @@ package cgroups
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
@ -61,7 +60,7 @@ func (d *devicesController) Create(path string, resources *specs.LinuxResources)
if device.Type == "" {
device.Type = "a"
}
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(d.Path(path), file),
[]byte(deviceString(device)),
defaultFilePerm,

View file

@ -50,7 +50,7 @@ func (f *freezerController) Thaw(path string) error {
}
func (f *freezerController) changeState(path string, state State) error {
return ioutil.WriteFile(
return retryingWriteFile(
filepath.Join(f.root, path, "freezer.state"),
[]byte(strings.ToUpper(string(state))),
defaultFilePerm,

View file

@ -3,17 +3,16 @@ module github.com/containerd/cgroups
go 1.13
require (
github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3
github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
github.com/coreos/go-systemd/v22 v22.0.0
github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect
github.com/docker/go-units v0.4.0
github.com/godbus/dbus/v5 v5.0.3
github.com/gogo/protobuf v1.3.1
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700
github.com/opencontainers/runtime-spec v1.0.2
github.com/pkg/errors v0.9.1
github.com/sirupsen/logrus v1.4.2
github.com/sirupsen/logrus v1.6.0
github.com/stretchr/testify v1.2.2
github.com/urfave/cli v1.22.2
golang.org/x/sys v0.0.0-20200120151820-655fe14d7479
golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9
)

View file

@ -16,5 +16,5 @@
package cgroups
// Hierarchy enableds both unified and split hierarchy for cgroups
// Hierarchy enables both unified and split hierarchy for cgroups
type Hierarchy func() ([]Subsystem, error)

View file

@ -17,7 +17,6 @@
package cgroups
import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -57,7 +56,7 @@ func (h *hugetlbController) Create(path string, resources *specs.LinuxResources)
return err
}
for _, limit := range resources.HugepageLimits {
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", limit.Pagesize, "limit_in_bytes"}, ".")),
[]byte(strconv.FormatUint(limit.Limit, 10)),
defaultFilePerm,

View file

@ -20,7 +20,6 @@ import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -32,6 +31,128 @@ import (
"golang.org/x/sys/unix"
)
// MemoryEvent is an interface that V1 memory Cgroup notifications implement. Arg returns the
// file name whose fd should be written to "cgroups.event_control". EventFile returns the name of
// the file that supports the notification api e.g. "memory.usage_in_bytes".
type MemoryEvent interface {
Arg() string
EventFile() string
}
type memoryThresholdEvent struct {
threshold uint64
swap bool
}
// MemoryThresholdEvent returns a new memory threshold event to be used with RegisterMemoryEvent.
// If swap is true, the event will be registered using memory.memsw.usage_in_bytes
func MemoryThresholdEvent(threshold uint64, swap bool) MemoryEvent {
return &memoryThresholdEvent{
threshold,
swap,
}
}
func (m *memoryThresholdEvent) Arg() string {
return strconv.FormatUint(m.threshold, 10)
}
func (m *memoryThresholdEvent) EventFile() string {
if m.swap {
return "memory.memsw.usage_in_bytes"
}
return "memory.usage_in_bytes"
}
type oomEvent struct{}
// OOMEvent returns a new oom event to be used with RegisterMemoryEvent.
func OOMEvent() MemoryEvent {
return &oomEvent{}
}
func (oom *oomEvent) Arg() string {
return ""
}
func (oom *oomEvent) EventFile() string {
return "memory.oom_control"
}
type memoryPressureEvent struct {
pressureLevel MemoryPressureLevel
hierarchy EventNotificationMode
}
// MemoryPressureEvent returns a new memory pressure event to be used with RegisterMemoryEvent.
func MemoryPressureEvent(pressureLevel MemoryPressureLevel, hierarchy EventNotificationMode) MemoryEvent {
return &memoryPressureEvent{
pressureLevel,
hierarchy,
}
}
func (m *memoryPressureEvent) Arg() string {
return string(m.pressureLevel) + "," + string(m.hierarchy)
}
func (m *memoryPressureEvent) EventFile() string {
return "memory.pressure_level"
}
// MemoryPressureLevel corresponds to the memory pressure levels defined
// for memory cgroups.
type MemoryPressureLevel string
// The three memory pressure levels are as follows.
// - The "low" level means that the system is reclaiming memory for new
// allocations. Monitoring this reclaiming activity might be useful for
// maintaining cache level. Upon notification, the program (typically
// "Activity Manager") might analyze vmstat and act in advance (i.e.
// prematurely shutdown unimportant services).
// - The "medium" level means that the system is experiencing medium memory
// pressure, the system might be making swap, paging out active file caches,
// etc. Upon this event applications may decide to further analyze
// vmstat/zoneinfo/memcg or internal memory usage statistics and free any
// resources that can be easily reconstructed or re-read from a disk.
// - The "critical" level means that the system is actively thrashing, it is
// about to out of memory (OOM) or even the in-kernel OOM killer is on its
// way to trigger. Applications should do whatever they can to help the
// system. It might be too late to consult with vmstat or any other
// statistics, so it is advisable to take an immediate action.
// "https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt" Section 11
const (
LowPressure MemoryPressureLevel = "low"
MediumPressure MemoryPressureLevel = "medium"
CriticalPressure MemoryPressureLevel = "critical"
)
// EventNotificationMode corresponds to the notification modes
// for the memory cgroups pressure level notifications.
type EventNotificationMode string
// There are three optional modes that specify different propagation behavior:
// - "default": this is the default behavior specified above. This mode is the
// same as omitting the optional mode parameter, preserved by backwards
// compatibility.
// - "hierarchy": events always propagate up to the root, similar to the default
// behavior, except that propagation continues regardless of whether there are
// event listeners at each level, with the "hierarchy" mode. In the above
// example, groups A, B, and C will receive notification of memory pressure.
// - "local": events are pass-through, i.e. they only receive notifications when
// memory pressure is experienced in the memcg for which the notification is
// registered. In the above example, group C will receive notification if
// registered for "local" notification and the group experiences memory
// pressure. However, group B will never receive notification, regardless if
// there is an event listener for group C or not, if group B is registered for
// local notification.
// "https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt" Section 11
const (
DefaultMode EventNotificationMode = "default"
LocalMode EventNotificationMode = "local"
HierarchyMode EventNotificationMode = "hierarchy"
)
// NewMemory returns a Memory controller given the root folder of cgroups.
// It may optionally accept other configuration options, such as IgnoreModules(...)
func NewMemory(root string, options ...func(*memoryController)) *memoryController {
@ -92,7 +213,7 @@ func (m *memoryController) Create(path string, resources *specs.LinuxResources)
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(m.Path(path), "memory.kmem.limit_in_bytes"),
[]byte(strconv.FormatInt(i, 10)),
defaultFilePerm,
@ -201,34 +322,6 @@ func (m *memoryController) Stat(path string, stats *v1.Metrics) error {
return nil
}
func (m *memoryController) OOMEventFD(path string) (uintptr, error) {
root := m.Path(path)
f, err := os.Open(filepath.Join(root, "memory.oom_control"))
if err != nil {
return 0, err
}
defer f.Close()
fd, _, serr := unix.RawSyscall(unix.SYS_EVENTFD2, 0, unix.EFD_CLOEXEC, 0)
if serr != 0 {
return 0, serr
}
if err := writeEventFD(root, f.Fd(), fd); err != nil {
unix.Close(int(fd))
return 0, err
}
return fd, nil
}
func writeEventFD(root string, cfd, efd uintptr) error {
f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0)
if err != nil {
return err
}
_, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd))
f.Close()
return err
}
func (m *memoryController) parseStats(r io.Reader, stat *v1.MemoryStat) error {
var (
raw = make(map[string]uint64)
@ -236,9 +329,6 @@ func (m *memoryController) parseStats(r io.Reader, stat *v1.MemoryStat) error {
line int
)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
key, v, err := parseKV(sc.Text())
if err != nil {
return fmt.Errorf("%d: %v", line, err)
@ -246,6 +336,9 @@ func (m *memoryController) parseStats(r io.Reader, stat *v1.MemoryStat) error {
raw[key] = v
line++
}
if err := sc.Err(); err != nil {
return err
}
stat.Cache = raw["cache"]
stat.RSS = raw["rss"]
stat.RSSHuge = raw["rss_huge"]
@ -284,8 +377,8 @@ func (m *memoryController) parseStats(r io.Reader, stat *v1.MemoryStat) error {
func (m *memoryController) set(path string, settings []memorySettings) error {
for _, t := range settings {
if t.value != nil {
if err := ioutil.WriteFile(
filepath.Join(m.Path(path), fmt.Sprintf("memory.%s", t.name)),
if err := retryingWriteFile(
filepath.Join(m.Path(path), "memory."+t.name),
[]byte(strconv.FormatInt(*t.value, 10)),
defaultFilePerm,
); err != nil {
@ -359,3 +452,24 @@ func getOomControlValue(mem *specs.LinuxMemory) *int64 {
}
return nil
}
func (m *memoryController) memoryEvent(path string, event MemoryEvent) (uintptr, error) {
root := m.Path(path)
efd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
if err != nil {
return 0, err
}
evtFile, err := os.Open(filepath.Join(root, event.EventFile()))
if err != nil {
unix.Close(efd)
return 0, err
}
defer evtFile.Close()
data := fmt.Sprintf("%d %d %s", efd, evtFile.Fd(), event.Arg())
evctlPath := filepath.Join(root, "cgroup.event_control")
if err := retryingWriteFile(evctlPath, []byte(data), 0700); err != nil {
unix.Close(efd)
return 0, err
}
return uintptr(efd), nil
}

View file

@ -17,7 +17,6 @@
package cgroups
import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -48,7 +47,7 @@ func (n *netclsController) Create(path string, resources *specs.LinuxResources)
return err
}
if resources.Network != nil && resources.Network.ClassID != nil && *resources.Network.ClassID > 0 {
return ioutil.WriteFile(
return retryingWriteFile(
filepath.Join(n.Path(path), "net_cls.classid"),
[]byte(strconv.FormatUint(uint64(*resources.Network.ClassID), 10)),
defaultFilePerm,

View file

@ -18,7 +18,6 @@ package cgroups
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
@ -49,7 +48,7 @@ func (n *netprioController) Create(path string, resources *specs.LinuxResources)
}
if resources.Network != nil {
for _, prio := range resources.Network.Priorities {
if err := ioutil.WriteFile(
if err := retryingWriteFile(
filepath.Join(n.Path(path), "net_prio.ifpriomap"),
formatPrio(prio.Name, prio.Priority),
defaultFilePerm,

View file

@ -48,12 +48,12 @@ func newInitConfig() *InitConfig {
type InitCheck func(Subsystem, Path, error) error
// AllowAny allows any subsystem errors to be skipped
func AllowAny(s Subsystem, p Path, err error) error {
func AllowAny(_ Subsystem, _ Path, _ error) error {
return ErrIgnoreSubsystem
}
// RequireDevices requires the device subsystem but no others
func RequireDevices(s Subsystem, p Path, err error) error {
func RequireDevices(s Subsystem, _ Path, _ error) error {
if s.Name() == Devices {
return ErrDevicesRequired
}

View file

@ -25,7 +25,7 @@ import (
type Path func(subsystem Name) (string, error)
func RootPath(subsysem Name) (string, error) {
func RootPath(subsystem Name) (string, error) {
return "/", nil
}
@ -63,7 +63,7 @@ var ErrControllerNotActive = errors.New("controller is not supported")
func existingPath(paths map[string]string, suffix string) Path {
// localize the paths based on the root mount dest for nested cgroups
for n, p := range paths {
dest, err := getCgroupDestination(string(n))
dest, err := getCgroupDestination(n)
if err != nil {
return errorPath(err)
}
@ -79,7 +79,7 @@ func existingPath(paths map[string]string, suffix string) Path {
return func(name Name) (string, error) {
root, ok := paths[string(name)]
if !ok {
if root, ok = paths[fmt.Sprintf("name=%s", name)]; !ok {
if root, ok = paths["name="+string(name)]; !ok {
return "", ErrControllerNotActive
}
}

View file

@ -50,7 +50,7 @@ func (p *pidsController) Create(path string, resources *specs.LinuxResources) er
return err
}
if resources.Pids != nil && resources.Pids.Limit > 0 {
return ioutil.WriteFile(
return retryingWriteFile(
filepath.Join(p.Path(path), "pids.max"),
[]byte(strconv.FormatInt(resources.Pids.Limit, 10)),
defaultFilePerm,

View file

@ -67,7 +67,7 @@ func (p *rdmaController) Create(path string, resources *specs.LinuxResources) er
for device, limit := range resources.Rdma {
if device != "" && (limit.HcaHandles != nil || limit.HcaObjects != nil) {
return ioutil.WriteFile(
return retryingWriteFile(
filepath.Join(p.Path(path), "rdma.max"),
[]byte(createCmdString(device, &limit)),
defaultFilePerm,

View file

@ -17,7 +17,6 @@
package cgroups
import (
"fmt"
"path/filepath"
"strings"
"sync"
@ -78,7 +77,7 @@ func (s *SystemdController) Name() Name {
return SystemdDbus
}
func (s *SystemdController) Create(path string, resources *specs.LinuxResources) error {
func (s *SystemdController) Create(path string, _ *specs.LinuxResources) error {
conn, err := systemdDbus.New()
if err != nil {
return err
@ -105,7 +104,7 @@ func (s *SystemdController) Create(path string, resources *specs.LinuxResources)
}
once.Do(checkDelegate)
properties := []systemdDbus.Property{
systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)),
systemdDbus.PropDescription("cgroup " + name),
systemdDbus.PropWants(slice),
newProperty("DefaultDependencies", false),
newProperty("MemoryAccounting", true),
@ -150,10 +149,6 @@ func newProperty(name string, units interface{}) systemdDbus.Property {
}
}
func unitName(name string) string {
return fmt.Sprintf("%s.slice", name)
}
func splitName(path string) (slice string, unit string) {
slice, unit = filepath.Split(path)
return strings.TrimSuffix(slice, "/"), unit

View file

@ -18,6 +18,7 @@ package cgroups
import (
"bufio"
"errors"
"fmt"
"io"
"io/ioutil"
@ -26,6 +27,7 @@ import (
"strconv"
"strings"
"sync"
"syscall"
"time"
units "github.com/docker/go-units"
@ -121,7 +123,7 @@ func defaults(root string) ([]Subsystem, error) {
NewNetCls(root),
NewNetPrio(root),
NewPerfEvent(root),
NewCputset(root),
NewCpuset(root),
NewCpu(root),
NewCpuacct(root),
NewMemory(root),
@ -181,6 +183,10 @@ func readPids(path string, subsystem Name) ([]Process, error) {
})
}
}
if err := s.Err(); err != nil {
// failed to read all pids?
return nil, err
}
return out, nil
}
@ -208,6 +214,9 @@ func readTasksPids(path string, subsystem Name) ([]Task, error) {
})
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@ -286,9 +295,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s = bufio.NewScanner(r)
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
var (
text = s.Text()
parts = strings.SplitN(text, ":", 3)
@ -302,6 +308,9 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
}
}
}
if err := s.Err(); err != nil {
return nil, err
}
return cgroups, nil
}
@ -313,16 +322,23 @@ func getCgroupDestination(subsystem string) (string, error) {
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
fields := strings.Split(s.Text(), " ")
if len(fields) < 10 {
// broken mountinfo?
continue
}
if fields[len(fields)-3] != "cgroup" {
continue
}
fields := strings.Fields(s.Text())
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[3], nil
}
}
}
if err := s.Err(); err != nil {
return "", err
}
return "", ErrNoCgroupMountDestination
}
@ -366,5 +382,18 @@ func cleanPath(path string) string {
if !filepath.IsAbs(path) {
path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
}
return filepath.Clean(path)
return path
}
func retryingWriteFile(path string, data []byte, mode os.FileMode) error {
// Retry writes on EINTR; see:
// https://github.com/golang/go/issues/38033
for {
err := ioutil.WriteFile(path, data, mode)
if err == nil {
return nil
} else if !errors.Is(err, syscall.EINTR) {
return err
}
}
}

View file

@ -54,28 +54,20 @@ func v1MountPoint() (string, error) {
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
if err := scanner.Err(); err != nil {
return "", err
}
var (
text = scanner.Text()
fields = strings.Split(text, " ")
// safe as mountinfo encodes mountpoints with spaces as \040.
index = strings.Index(text, " - ")
postSeparatorFields = strings.Fields(text[index+3:])
numPostFields = len(postSeparatorFields)
text = scanner.Text()
fields = strings.Split(text, " ")
numFields = len(fields)
)
// this is an error as we can't detect if the mount is for "cgroup"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
if numFields < 10 {
return "", fmt.Errorf("mountinfo: bad entry %q", text)
}
if postSeparatorFields[0] == "cgroup" {
// check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
if fields[numFields-3] == "cgroup" {
return filepath.Dir(fields[4]), nil
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", ErrMountPointNotExist
}

View file

@ -44,7 +44,3 @@ func IgnoreNotExist(err error) error {
}
return err
}
func errPassthrough(err error) error {
return err
}

View file

@ -18,26 +18,22 @@ package v2
import (
"bufio"
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"golang.org/x/sys/unix"
"github.com/containerd/cgroups/v2/stats"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"golang.org/x/sys/unix"
)
const (
@ -49,13 +45,8 @@ const (
var (
canDelegate bool
once sync.Once
)
type cgValuer interface {
Values() []Value
}
type Event struct {
Low uint64
High uint64
@ -149,11 +140,21 @@ func (c *Value) write(path string, perm os.FileMode) error {
default:
return ErrInvalidFormat
}
return ioutil.WriteFile(
filepath.Join(path, c.filename),
data,
perm,
)
// Retry writes on EINTR; see:
// https://github.com/golang/go/issues/38033
for {
err := ioutil.WriteFile(
filepath.Join(path, c.filename),
data,
perm,
)
if err == nil {
return nil
} else if !errors.Is(err, syscall.EINTR) {
return err
}
}
}
func writeValues(path string, values []Value) error {
@ -166,6 +167,9 @@ func writeValues(path string, values []Value) error {
}
func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
if resources == nil {
return nil, errors.New("resources reference is nil")
}
if err := VerifyGroupPath(group); err != nil {
return nil, err
}
@ -256,7 +260,7 @@ func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) er
// Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written.
split := strings.Split(c.path, "/")
var lastErr error
for i, _ := range split {
for i := range split {
f := strings.Join(split[:i], "/")
if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
continue
@ -359,8 +363,7 @@ func (c *Manager) Stat() (*stats.Metrics, error) {
for _, controller := range controllers {
switch controller {
case "cpu", "memory":
filename := fmt.Sprintf("%s.stat", controller)
if err := readKVStatsFile(c.path, filename, out); err != nil {
if err := readKVStatsFile(c.path, controller+".stat", out); err != nil {
if os.IsNotExist(err) {
continue
}
@ -376,6 +379,12 @@ func (c *Manager) Stat() (*stats.Metrics, error) {
return nil, err
}
}
memoryEvents := make(map[string]interface{})
if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil {
if !os.IsNotExist(err) {
return nil, err
}
}
var metrics stats.Metrics
metrics.Pids = &stats.PidsStat{
@ -427,7 +436,15 @@ func (c *Manager) Stat() (*stats.Metrics, error) {
SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")),
}
if len(memoryEvents) > 0 {
metrics.MemoryEvents = &stats.MemoryEvents{
Low: getUint64Value("low", memoryEvents),
High: getUint64Value("high", memoryEvents),
Max: getUint64Value("max", memoryEvents),
Oom: getUint64Value("oom", memoryEvents),
OomKill: getUint64Value("oom_kill", memoryEvents),
}
}
metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
metrics.Rdma = &stats.RdmaStat{
Current: rdmaStats(filepath.Join(c.path, "rdma.current")),
@ -496,16 +513,13 @@ func readKVStatsFile(path string, file string, out map[string]interface{}) error
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return err
}
name, value, err := parseKV(s.Text())
if err != nil {
return errors.Wrapf(err, "error while parsing %s (line=%q)", filepath.Join(path, file), s.Text())
}
out[name] = value
}
return nil
return s.Err()
}
func (c *Manager) Freeze() error {
@ -575,15 +589,44 @@ func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
errCh <- err
return
}
var out map[string]interface{}
if bytesRead >= syscall.SizeofInotifyEvent {
if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
e := Event{
High: out["high"].(uint64),
Low: out["low"].(uint64),
Max: out["max"].(uint64),
OOM: out["oom"].(uint64),
OOMKill: out["oom_kill"].(uint64),
out := make(map[string]interface{})
if err := readKVStatsFile(c.path, "memory.events", out); err == nil {
e := Event{}
if v, ok := out["high"]; ok {
e.High, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert high to uint64: %+v", v)
return
}
}
if v, ok := out["low"]; ok {
e.Low, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert low to uint64: %+v", v)
return
}
}
if v, ok := out["max"]; ok {
e.Max, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert max to uint64: %+v", v)
return
}
}
if v, ok := out["oom"]; ok {
e.OOM, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert oom to uint64: %+v", v)
return
}
}
if v, ok := out["oom_kill"]; ok {
e.OOMKill, ok = v.(uint64)
if !ok {
errCh <- errors.Errorf("cannot convert oom_kill to uint64: %+v", v)
return
}
}
ec <- e
} else {
@ -627,7 +670,7 @@ func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, e
defer conn.Close()
properties := []systemdDbus.Property{
systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", group)),
systemdDbus.PropDescription("cgroup " + group),
newSystemdProperty("DefaultDependencies", false),
newSystemdProperty("MemoryAccounting", true),
newSystemdProperty("CPUAccounting", true),

View file

@ -29,7 +29,7 @@ func NestedGroupPath(suffix string) (string, error) {
if err != nil {
return "", err
}
return filepath.Join(string(path), suffix), nil
return filepath.Join(path, suffix), nil
}
// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup

View file

@ -31,6 +31,7 @@ type Metrics struct {
Rdma *RdmaStat `protobuf:"bytes,5,opt,name=rdma,proto3" json:"rdma,omitempty"`
Io *IOStat `protobuf:"bytes,6,opt,name=io,proto3" json:"io,omitempty"`
Hugetlb []*HugeTlbStat `protobuf:"bytes,7,rep,name=hugetlb,proto3" json:"hugetlb,omitempty"`
MemoryEvents *MemoryEvents `protobuf:"bytes,8,opt,name=memory_events,json=memoryEvents,proto3" json:"memory_events,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
@ -225,6 +226,49 @@ func (m *MemoryStat) XXX_DiscardUnknown() {
var xxx_messageInfo_MemoryStat proto.InternalMessageInfo
type MemoryEvents struct {
Low uint64 `protobuf:"varint,1,opt,name=low,proto3" json:"low,omitempty"`
High uint64 `protobuf:"varint,2,opt,name=high,proto3" json:"high,omitempty"`
Max uint64 `protobuf:"varint,3,opt,name=max,proto3" json:"max,omitempty"`
Oom uint64 `protobuf:"varint,4,opt,name=oom,proto3" json:"oom,omitempty"`
OomKill uint64 `protobuf:"varint,5,opt,name=oom_kill,json=oomKill,proto3" json:"oom_kill,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *MemoryEvents) Reset() { *m = MemoryEvents{} }
func (*MemoryEvents) ProtoMessage() {}
func (*MemoryEvents) Descriptor() ([]byte, []int) {
return fileDescriptor_2fc6005842049e6b, []int{4}
}
func (m *MemoryEvents) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
}
func (m *MemoryEvents) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
if deterministic {
return xxx_messageInfo_MemoryEvents.Marshal(b, m, deterministic)
} else {
b = b[:cap(b)]
n, err := m.MarshalTo(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func (m *MemoryEvents) XXX_Merge(src proto.Message) {
xxx_messageInfo_MemoryEvents.Merge(m, src)
}
func (m *MemoryEvents) XXX_Size() int {
return m.Size()
}
func (m *MemoryEvents) XXX_DiscardUnknown() {
xxx_messageInfo_MemoryEvents.DiscardUnknown(m)
}
var xxx_messageInfo_MemoryEvents proto.InternalMessageInfo
type RdmaStat struct {
Current []*RdmaEntry `protobuf:"bytes,1,rep,name=current,proto3" json:"current,omitempty"`
Limit []*RdmaEntry `protobuf:"bytes,2,rep,name=limit,proto3" json:"limit,omitempty"`
@ -236,7 +280,7 @@ type RdmaStat struct {
func (m *RdmaStat) Reset() { *m = RdmaStat{} }
func (*RdmaStat) ProtoMessage() {}
func (*RdmaStat) Descriptor() ([]byte, []int) {
return fileDescriptor_2fc6005842049e6b, []int{4}
return fileDescriptor_2fc6005842049e6b, []int{5}
}
func (m *RdmaStat) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
@ -277,7 +321,7 @@ type RdmaEntry struct {
func (m *RdmaEntry) Reset() { *m = RdmaEntry{} }
func (*RdmaEntry) ProtoMessage() {}
func (*RdmaEntry) Descriptor() ([]byte, []int) {
return fileDescriptor_2fc6005842049e6b, []int{5}
return fileDescriptor_2fc6005842049e6b, []int{6}
}
func (m *RdmaEntry) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
@ -316,7 +360,7 @@ type IOStat struct {
func (m *IOStat) Reset() { *m = IOStat{} }
func (*IOStat) ProtoMessage() {}
func (*IOStat) Descriptor() ([]byte, []int) {
return fileDescriptor_2fc6005842049e6b, []int{6}
return fileDescriptor_2fc6005842049e6b, []int{7}
}
func (m *IOStat) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
@ -360,7 +404,7 @@ type IOEntry struct {
func (m *IOEntry) Reset() { *m = IOEntry{} }
func (*IOEntry) ProtoMessage() {}
func (*IOEntry) Descriptor() ([]byte, []int) {
return fileDescriptor_2fc6005842049e6b, []int{7}
return fileDescriptor_2fc6005842049e6b, []int{8}
}
func (m *IOEntry) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
@ -401,7 +445,7 @@ type HugeTlbStat struct {
func (m *HugeTlbStat) Reset() { *m = HugeTlbStat{} }
func (*HugeTlbStat) ProtoMessage() {}
func (*HugeTlbStat) Descriptor() ([]byte, []int) {
return fileDescriptor_2fc6005842049e6b, []int{8}
return fileDescriptor_2fc6005842049e6b, []int{9}
}
func (m *HugeTlbStat) XXX_Unmarshal(b []byte) error {
return m.Unmarshal(b)
@ -435,6 +479,7 @@ func init() {
proto.RegisterType((*PidsStat)(nil), "io.containerd.cgroups.v2.PidsStat")
proto.RegisterType((*CPUStat)(nil), "io.containerd.cgroups.v2.CPUStat")
proto.RegisterType((*MemoryStat)(nil), "io.containerd.cgroups.v2.MemoryStat")
proto.RegisterType((*MemoryEvents)(nil), "io.containerd.cgroups.v2.MemoryEvents")
proto.RegisterType((*RdmaStat)(nil), "io.containerd.cgroups.v2.RdmaStat")
proto.RegisterType((*RdmaEntry)(nil), "io.containerd.cgroups.v2.RdmaEntry")
proto.RegisterType((*IOStat)(nil), "io.containerd.cgroups.v2.IOStat")
@ -447,77 +492,82 @@ func init() {
}
var fileDescriptor_2fc6005842049e6b = []byte{
// 1118 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0x4d, 0x6f, 0x1c, 0x45,
0x10, 0xcd, 0xda, 0x9b, 0xfd, 0xe8, 0xb5, 0x13, 0xa7, 0xe3, 0x84, 0x4e, 0x42, 0xd6, 0xf6, 0x86,
0xa0, 0x20, 0xc1, 0x2e, 0x32, 0x5f, 0x02, 0x05, 0x21, 0x27, 0x10, 0x05, 0x09, 0x13, 0x6b, 0xe2,
0x15, 0xc7, 0x51, 0xef, 0x4c, 0x7b, 0x76, 0xec, 0xf9, 0x52, 0x77, 0x8f, 0xcd, 0xe6, 0xc4, 0x81,
0x2b, 0xe2, 0x6f, 0xe5, 0x06, 0x47, 0x4e, 0x88, 0xf8, 0xc4, 0xcf, 0x40, 0x55, 0xd5, 0xb3, 0x33,
0x1c, 0x6c, 0xb8, 0x75, 0xbd, 0x7a, 0xaf, 0xa6, 0xfa, 0xf5, 0x76, 0xf5, 0xb2, 0x8f, 0xa3, 0xd8,
0xce, 0xcb, 0xd9, 0x38, 0xc8, 0xd3, 0x49, 0x90, 0x67, 0x56, 0xc6, 0x99, 0xd2, 0xe1, 0x24, 0x88,
0x74, 0x5e, 0x16, 0x66, 0x72, 0xba, 0x3b, 0x31, 0x56, 0x5a, 0x33, 0x49, 0x95, 0xd5, 0x71, 0x60,
0xc6, 0x85, 0xce, 0x6d, 0xce, 0x45, 0x9c, 0x8f, 0x6b, 0xf6, 0xd8, 0xb1, 0xc7, 0xa7, 0xbb, 0x77,
0x37, 0xa3, 0x3c, 0xca, 0x91, 0x34, 0x81, 0x15, 0xf1, 0x47, 0x7f, 0xaf, 0xb0, 0xee, 0x3e, 0x55,
0xe0, 0x9f, 0xb2, 0x76, 0x11, 0x87, 0x46, 0xb4, 0xb6, 0x5b, 0x8f, 0x06, 0xbb, 0xa3, 0xf1, 0x45,
0xa5, 0xc6, 0x07, 0x71, 0x68, 0x5e, 0x5a, 0x69, 0x3d, 0xe4, 0xf3, 0xc7, 0x6c, 0x35, 0x28, 0x4a,
0xb1, 0x82, 0xb2, 0x9d, 0x8b, 0x65, 0x4f, 0x0f, 0xa6, 0xa0, 0x7a, 0xd2, 0x3d, 0xff, 0x73, 0x6b,
0xf5, 0xe9, 0xc1, 0xd4, 0x03, 0x19, 0x7f, 0xcc, 0x3a, 0xa9, 0x4a, 0x73, 0xbd, 0x10, 0x6d, 0x2c,
0xf0, 0xce, 0xc5, 0x05, 0xf6, 0x91, 0x87, 0x5f, 0x76, 0x1a, 0xe8, 0x59, 0x87, 0xa9, 0x14, 0x57,
0xff, 0xab, 0x67, 0x2f, 0x4c, 0x25, 0xf5, 0x0c, 0x7c, 0xfe, 0x21, 0x5b, 0x89, 0x73, 0xd1, 0x41,
0xd5, 0xf6, 0xc5, 0xaa, 0x6f, 0x5f, 0xa0, 0x66, 0x25, 0xce, 0xf9, 0x57, 0xac, 0x3b, 0x2f, 0x23,
0x65, 0x93, 0x99, 0xe8, 0x6e, 0xaf, 0x3e, 0x1a, 0xec, 0x3e, 0xbc, 0x58, 0xf6, 0xbc, 0x8c, 0xd4,
0x61, 0x32, 0x43, 0x6d, 0xa5, 0x1a, 0x7d, 0xc1, 0x7a, 0x95, 0x71, 0x5c, 0xb0, 0x6e, 0x50, 0x6a,
0xad, 0x32, 0x8b, 0x6e, 0xb7, 0xbd, 0x2a, 0xe4, 0x9b, 0xec, 0x6a, 0x12, 0xa7, 0xb1, 0x45, 0x3b,
0xdb, 0x1e, 0x05, 0xa3, 0xdf, 0x5a, 0xac, 0xeb, 0xec, 0xe3, 0xf7, 0x19, 0x2b, 0x8d, 0x8c, 0x94,
0x5f, 0x1a, 0x15, 0x38, 0x79, 0x1f, 0x91, 0xa9, 0x51, 0x01, 0xbf, 0xc7, 0xfa, 0xa5, 0x51, 0x9a,
0xb2, 0x54, 0xa4, 0x07, 0x00, 0x26, 0xb7, 0xd8, 0xc0, 0x2c, 0x8c, 0x55, 0x29, 0xa5, 0x57, 0x31,
0xcd, 0x08, 0x42, 0xc2, 0x7d, 0xc6, 0x32, 0xed, 0x17, 0x4a, 0xc7, 0x79, 0x68, 0xf0, 0x44, 0xda,
0x5e, 0x3f, 0xd3, 0x07, 0x04, 0xf0, 0x1d, 0xb6, 0x96, 0x69, 0xdf, 0xce, 0x75, 0x6e, 0x6d, 0xa2,
0x42, 0xb4, 0xbd, 0xed, 0x0d, 0x32, 0x7d, 0x58, 0x41, 0xfc, 0x21, 0xbb, 0xb6, 0xcc, 0xd3, 0x57,
0x3a, 0x48, 0x5a, 0x5f, 0xa2, 0xf0, 0xa1, 0xd1, 0xaf, 0x7d, 0xc6, 0xea, 0xf3, 0xe4, 0x9c, 0xb5,
0x65, 0x96, 0x67, 0x6e, 0x3b, 0xb8, 0x06, 0xec, 0x28, 0x4e, 0x94, 0xdb, 0x04, 0xae, 0xa1, 0x81,
0x13, 0xa5, 0x33, 0x95, 0xf8, 0xc6, 0xca, 0xe0, 0xc4, 0xed, 0x60, 0x40, 0xd8, 0x4b, 0x80, 0x40,
0x66, 0x12, 0x39, 0x73, 0xcd, 0xe3, 0x1a, 0xb1, 0x3c, 0x38, 0x71, 0xfd, 0xe2, 0x1a, 0x9c, 0x36,
0xf3, 0x54, 0xa5, 0xae, 0x3f, 0x0a, 0xc0, 0x21, 0xf8, 0x90, 0x9f, 0xca, 0xa2, 0x50, 0xa1, 0xe8,
0x92, 0x43, 0x00, 0xed, 0x23, 0x02, 0x0e, 0x21, 0x21, 0x8c, 0xb5, 0x5d, 0x88, 0x1e, 0x39, 0x04,
0xc8, 0xd7, 0x00, 0xc0, 0xf6, 0x31, 0x7d, 0xa6, 0x63, 0xab, 0x66, 0xd0, 0x62, 0x9f, 0xb6, 0x0f,
0xe8, 0x0f, 0x15, 0xc8, 0xef, 0xb0, 0x1e, 0xec, 0xd1, 0xb7, 0xf3, 0x42, 0x30, 0xfa, 0x05, 0x40,
0x7c, 0x38, 0x2f, 0xf8, 0x03, 0xb6, 0x1e, 0x67, 0x32, 0xb0, 0xf1, 0xa9, 0xf2, 0xd1, 0x93, 0x01,
0xe6, 0xd7, 0x2a, 0x70, 0x0f, 0xbc, 0xd9, 0x62, 0x83, 0x26, 0x65, 0x8d, 0xda, 0x6c, 0x10, 0x9a,
0x55, 0xd0, 0xc5, 0xf5, 0x7f, 0x57, 0x79, 0x06, 0x6e, 0xd6, 0x55, 0x90, 0x72, 0xad, 0x59, 0x05,
0x09, 0xdb, 0x6c, 0x50, 0x66, 0xea, 0x34, 0x0e, 0xac, 0x9c, 0x25, 0x4a, 0x5c, 0x27, 0xb7, 0x1b,
0x10, 0x7f, 0x8f, 0x6d, 0x80, 0xc3, 0xbe, 0x56, 0x41, 0x22, 0xe3, 0x14, 0x69, 0x1b, 0x48, 0xbb,
0x0e, 0xb8, 0x57, 0xc3, 0xfc, 0x03, 0xc6, 0x91, 0x5a, 0x66, 0x4d, 0xf2, 0x0d, 0x24, 0xdf, 0x80,
0xcc, 0xb4, 0x99, 0x80, 0x3b, 0x52, 0x44, 0x47, 0xb2, 0x4c, 0xac, 0xe0, 0xe4, 0x90, 0x0b, 0xf9,
0x90, 0xb1, 0x22, 0x4a, 0xe5, 0x31, 0x25, 0x6f, 0x52, 0xd7, 0x35, 0x02, 0x1f, 0x3a, 0xcb, 0xf5,
0x49, 0x9c, 0x45, 0x46, 0x59, 0x5f, 0x2b, 0xe2, 0x6d, 0xd2, 0x87, 0xea, 0x8c, 0x47, 0x09, 0x3e,
0x61, 0x37, 0x1b, 0x74, 0xdc, 0xbd, 0xb4, 0x4a, 0xdc, 0x42, 0x7e, 0xa3, 0xd2, 0x9e, 0xcb, 0xf0,
0x4f, 0xd8, 0xed, 0x86, 0x20, 0xcb, 0x43, 0xe5, 0xfa, 0x16, 0xb7, 0x51, 0x73, 0xab, 0xce, 0x7e,
0x5f, 0x27, 0xf9, 0x5d, 0xd6, 0x2b, 0x22, 0xad, 0x8e, 0xe2, 0x24, 0x11, 0x6f, 0xd1, 0xc5, 0xac,
0x62, 0x7e, 0x9b, 0x75, 0x8a, 0xc8, 0x04, 0x32, 0x13, 0x02, 0x33, 0x2e, 0x22, 0x13, 0x8c, 0x55,
0x32, 0x11, 0x77, 0x2a, 0x13, 0x30, 0x24, 0x13, 0x96, 0xcd, 0xde, 0xad, 0x4c, 0xa8, 0x10, 0x3e,
0x62, 0x6b, 0x45, 0x14, 0xaa, 0x25, 0xe3, 0x1e, 0x9d, 0x7f, 0x13, 0xa3, 0x1a, 0x89, 0x7c, 0xb5,
0x38, 0xd2, 0x4a, 0x89, 0xb7, 0xab, 0x1a, 0x15, 0x02, 0xc7, 0x5f, 0x47, 0xa1, 0xb8, 0x4f, 0xc7,
0xdf, 0x80, 0xf8, 0xbb, 0xec, 0xba, 0x9d, 0x17, 0x3e, 0x1a, 0xe9, 0xcb, 0x24, 0xc9, 0x03, 0x31,
0xac, 0xae, 0x7b, 0xf1, 0x0c, 0xd0, 0x3d, 0x00, 0xf9, 0xfb, 0x8c, 0x03, 0x2f, 0xc8, 0x93, 0x44,
0x16, 0x46, 0x39, 0xea, 0x16, 0x52, 0x37, 0xec, 0xbc, 0x78, 0xea, 0x12, 0xc4, 0xde, 0x64, 0x57,
0x71, 0xa0, 0x89, 0x6d, 0xba, 0x9a, 0x18, 0xc0, 0xaf, 0x95, 0x06, 0x1f, 0x0d, 0xc8, 0x1d, 0x6a,
0x17, 0xa1, 0xef, 0x00, 0x81, 0xab, 0x69, 0xce, 0x64, 0xe1, 0x93, 0x76, 0x44, 0x57, 0x13, 0x90,
0x29, 0xea, 0xab, 0x34, 0xc9, 0x1f, 0xd4, 0x69, 0x54, 0x8f, 0x7e, 0x6e, 0xb1, 0x5e, 0xf5, 0x4a,
0xf0, 0x2f, 0x9b, 0x03, 0x1a, 0xa6, 0xfd, 0x83, 0xcb, 0x9f, 0x96, 0x6f, 0x32, 0xab, 0x17, 0xf5,
0x14, 0xff, 0xbc, 0x9e, 0xe2, 0xff, 0x5b, 0xec, 0x46, 0xbd, 0x62, 0xfd, 0x25, 0x06, 0x3f, 0x8b,
0x10, 0xee, 0x9a, 0xc2, 0xc1, 0xd8, 0xf7, 0x5c, 0x04, 0x56, 0xcc, 0x03, 0xe9, 0xcf, 0x65, 0x16,
0x26, 0xca, 0xe0, 0x84, 0x5c, 0xf7, 0xd8, 0x3c, 0x90, 0xcf, 0x09, 0xa9, 0x08, 0xf9, 0xec, 0x58,
0x05, 0xd6, 0xe0, 0x98, 0x24, 0xc2, 0x0b, 0x42, 0x46, 0x7b, 0xac, 0x43, 0x8f, 0x1b, 0xff, 0xac,
0x32, 0x9b, 0x36, 0xba, 0x73, 0xd9, 0x6b, 0xe8, 0x3a, 0x45, 0xfe, 0xe8, 0x97, 0x16, 0xeb, 0x3a,
0x08, 0x4e, 0x2c, 0x95, 0xc7, 0xb9, 0x76, 0x03, 0x9c, 0x02, 0x44, 0xe3, 0x2c, 0xd7, 0xd5, 0x63,
0x86, 0x01, 0x6c, 0x4a, 0xcf, 0x16, 0x56, 0x19, 0x37, 0xbd, 0x5d, 0x04, 0xf8, 0x19, 0xe1, 0x34,
0xba, 0x5d, 0x04, 0xc3, 0x5b, 0xc7, 0xb9, 0xa9, 0x86, 0x37, 0xac, 0x01, 0x3b, 0x03, 0x8c, 0x66,
0x37, 0xae, 0x47, 0x53, 0x36, 0x68, 0x3c, 0xbc, 0x97, 0xbc, 0xb1, 0x1b, 0x6c, 0x35, 0x95, 0x3f,
0xba, 0xa6, 0x60, 0x89, 0x57, 0x53, 0x46, 0xca, 0xc4, 0xaf, 0x14, 0x36, 0xd5, 0xf7, 0x96, 0xf1,
0x13, 0xf1, 0xfa, 0xcd, 0xf0, 0xca, 0x1f, 0x6f, 0x86, 0x57, 0x7e, 0x3a, 0x1f, 0xb6, 0x5e, 0x9f,
0x0f, 0x5b, 0xbf, 0x9f, 0x0f, 0x5b, 0x7f, 0x9d, 0x0f, 0x5b, 0xb3, 0x0e, 0xfe, 0x87, 0xfa, 0xe8,
0x9f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x79, 0xd2, 0xcd, 0xe2, 0xab, 0x09, 0x00, 0x00,
// 1198 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0x4d, 0x73, 0xd4, 0x46,
0x13, 0x66, 0xed, 0xc5, 0xeb, 0xed, 0xb5, 0xc1, 0x0c, 0x86, 0x57, 0xc0, 0xcb, 0xda, 0x5e, 0x02,
0x45, 0xaa, 0x92, 0xdd, 0x94, 0xf3, 0x55, 0x49, 0x91, 0x4a, 0x19, 0x02, 0x45, 0x8a, 0x10, 0x5c,
0x02, 0x57, 0x8e, 0xaa, 0x59, 0x69, 0x2c, 0x0d, 0x96, 0x34, 0xaa, 0x99, 0x91, 0x1d, 0x73, 0xca,
0x21, 0xd7, 0x54, 0x7e, 0x4d, 0xfe, 0x03, 0xb7, 0xe4, 0x98, 0x53, 0x2a, 0xf8, 0x97, 0xa4, 0xba,
0x67, 0x64, 0x29, 0x07, 0x43, 0x6e, 0xd3, 0x4f, 0x3f, 0xdd, 0xea, 0x8f, 0x99, 0x6e, 0xc1, 0x27,
0xa9, 0xb4, 0x59, 0x3d, 0x9f, 0xc6, 0xaa, 0x98, 0xc5, 0xaa, 0xb4, 0x5c, 0x96, 0x42, 0x27, 0xb3,
0x38, 0xd5, 0xaa, 0xae, 0xcc, 0xec, 0x70, 0x7b, 0x66, 0x2c, 0xb7, 0x66, 0x56, 0x08, 0xab, 0x65,
0x6c, 0xa6, 0x95, 0x56, 0x56, 0xb1, 0x40, 0xaa, 0x69, 0xcb, 0x9e, 0x7a, 0xf6, 0xf4, 0x70, 0xfb,
0xfa, 0x7a, 0xaa, 0x52, 0x45, 0xa4, 0x19, 0x9e, 0x1c, 0x7f, 0xf2, 0xdb, 0x22, 0x0c, 0x9e, 0x3a,
0x0f, 0xec, 0x33, 0xe8, 0x57, 0x32, 0x31, 0x41, 0x6f, 0xb3, 0x77, 0x77, 0xb4, 0x3d, 0x99, 0x9e,
0xe5, 0x6a, 0xba, 0x2b, 0x13, 0xf3, 0xdc, 0x72, 0x1b, 0x12, 0x9f, 0xdd, 0x83, 0xc5, 0xb8, 0xaa,
0x83, 0x05, 0x32, 0xdb, 0x3a, 0xdb, 0xec, 0xc1, 0xee, 0x1e, 0x5a, 0xdd, 0x1f, 0x9c, 0xfc, 0xb5,
0xb1, 0xf8, 0x60, 0x77, 0x2f, 0x44, 0x33, 0x76, 0x0f, 0x96, 0x0a, 0x51, 0x28, 0x7d, 0x1c, 0xf4,
0xc9, 0xc1, 0x7b, 0x67, 0x3b, 0x78, 0x4a, 0x3c, 0xfa, 0xb2, 0xb7, 0xc1, 0x98, 0x75, 0x52, 0xf0,
0xe0, 0xfc, 0xbb, 0x62, 0x0e, 0x93, 0x82, 0xbb, 0x98, 0x91, 0xcf, 0x3e, 0x82, 0x05, 0xa9, 0x82,
0x25, 0xb2, 0xda, 0x3c, 0xdb, 0xea, 0xdb, 0x67, 0x64, 0xb3, 0x20, 0x15, 0xfb, 0x1a, 0x06, 0x59,
0x9d, 0x0a, 0x9b, 0xcf, 0x83, 0xc1, 0xe6, 0xe2, 0xdd, 0xd1, 0xf6, 0xed, 0xb3, 0xcd, 0x1e, 0xd7,
0xa9, 0x78, 0x91, 0xcf, 0xc9, 0xb6, 0xb1, 0x62, 0x4f, 0x60, 0xd5, 0x05, 0x1d, 0x89, 0x43, 0x51,
0x5a, 0x13, 0x2c, 0xd3, 0xd7, 0xef, 0xbc, 0x2b, 0xdf, 0x87, 0xc4, 0x0e, 0x57, 0x8a, 0x8e, 0x34,
0xf9, 0x12, 0x96, 0x9b, 0x2e, 0xb0, 0x00, 0x06, 0x71, 0xad, 0xb5, 0x28, 0x2d, 0xb5, 0xae, 0x1f,
0x36, 0x22, 0x5b, 0x87, 0xf3, 0xb9, 0x2c, 0xa4, 0xa5, 0xde, 0xf4, 0x43, 0x27, 0x4c, 0x7e, 0xef,
0xc1, 0xc0, 0xf7, 0x82, 0xdd, 0x04, 0xa8, 0x0d, 0x4f, 0x45, 0x54, 0x1b, 0x11, 0x7b, 0xf3, 0x21,
0x21, 0x7b, 0x46, 0xc4, 0xec, 0x06, 0x0c, 0x6b, 0x23, 0xb4, 0xd3, 0x3a, 0x27, 0xcb, 0x08, 0x90,
0x72, 0x03, 0x46, 0xe6, 0xd8, 0x58, 0x51, 0x38, 0xf5, 0x22, 0xa9, 0xc1, 0x41, 0x44, 0xb8, 0x09,
0x50, 0xea, 0xa8, 0x12, 0x5a, 0xaa, 0xc4, 0x50, 0x7b, 0xfb, 0xe1, 0xb0, 0xd4, 0xbb, 0x0e, 0x60,
0x5b, 0xb0, 0x52, 0xea, 0xc8, 0x66, 0x5a, 0x59, 0x9b, 0x8b, 0x84, 0x7a, 0xd8, 0x0f, 0x47, 0xa5,
0x7e, 0xd1, 0x40, 0xec, 0x36, 0x5c, 0x38, 0xd5, 0xbb, 0xaf, 0x2c, 0x11, 0x69, 0xf5, 0x14, 0xc5,
0x0f, 0x4d, 0x7e, 0x1d, 0x02, 0xb4, 0x97, 0x83, 0x31, 0xe8, 0xf3, 0x52, 0x95, 0x3e, 0x1d, 0x3a,
0x23, 0xb6, 0x2f, 0x73, 0xe1, 0x93, 0xa0, 0x33, 0x06, 0x70, 0x20, 0x74, 0x29, 0xf2, 0xc8, 0x58,
0x1e, 0x1f, 0xf8, 0x0c, 0x46, 0x0e, 0x7b, 0x8e, 0x10, 0x9a, 0x99, 0x9c, 0xcf, 0x7d, 0xf0, 0x74,
0x26, 0x4c, 0xc5, 0x07, 0x3e, 0x5e, 0x3a, 0x63, 0xa5, 0x4d, 0x56, 0x88, 0xc2, 0xc7, 0xe7, 0x04,
0xac, 0x10, 0x7e, 0x28, 0x2a, 0x78, 0x55, 0x89, 0x24, 0x18, 0xb8, 0x0a, 0x21, 0xf4, 0x94, 0x10,
0xac, 0x10, 0x11, 0x12, 0xa9, 0xed, 0x31, 0x5d, 0x88, 0x7e, 0x38, 0x44, 0xe4, 0x1b, 0x04, 0x30,
0x7d, 0x52, 0x1f, 0x69, 0x69, 0xc5, 0x1c, 0x43, 0x1c, 0xba, 0xf4, 0x11, 0xfd, 0xa1, 0x01, 0xd9,
0x35, 0x58, 0xc6, 0x1c, 0x23, 0x9b, 0x55, 0x01, 0xb8, 0x1b, 0x80, 0xf2, 0x8b, 0xac, 0x62, 0xb7,
0x60, 0x55, 0x96, 0x3c, 0xb6, 0xf2, 0x50, 0x44, 0x54, 0x93, 0x11, 0xe9, 0x57, 0x1a, 0x70, 0x07,
0x6b, 0xb3, 0x01, 0xa3, 0x2e, 0x65, 0xc5, 0x85, 0xd9, 0x21, 0x74, 0xbd, 0x50, 0x15, 0x57, 0xff,
0xed, 0xe5, 0x11, 0x56, 0xb3, 0xf5, 0x42, 0x94, 0x0b, 0x5d, 0x2f, 0x44, 0xd8, 0x84, 0x51, 0x5d,
0x8a, 0x43, 0x19, 0x5b, 0x3e, 0xcf, 0x45, 0x70, 0xd1, 0x55, 0xbb, 0x03, 0xb1, 0xf7, 0x61, 0x0d,
0x2b, 0x1c, 0x69, 0x11, 0xe7, 0x5c, 0x16, 0x44, 0x5b, 0x23, 0xda, 0x45, 0xc4, 0xc3, 0x16, 0x66,
0x1f, 0x02, 0x23, 0x6a, 0x5d, 0x76, 0xc9, 0x97, 0x88, 0x7c, 0x09, 0x35, 0x7b, 0x5d, 0x05, 0xbe,
0x91, 0x2a, 0xdd, 0xe7, 0x75, 0x6e, 0x03, 0xe6, 0x2a, 0xe4, 0x45, 0x36, 0x06, 0xa8, 0xd2, 0x82,
0xbf, 0x74, 0xca, 0xcb, 0x2e, 0xea, 0x16, 0xc1, 0x0f, 0x1d, 0x29, 0x7d, 0x20, 0xcb, 0xd4, 0x08,
0x1b, 0x69, 0xe1, 0x78, 0xeb, 0xee, 0x43, 0xad, 0x26, 0x74, 0x0a, 0x36, 0x83, 0xcb, 0x1d, 0x3a,
0x65, 0xcf, 0xad, 0x08, 0xae, 0x10, 0xbf, 0xe3, 0x69, 0xc7, 0x6b, 0xd8, 0xa7, 0x70, 0xb5, 0x63,
0x50, 0xaa, 0x44, 0xf8, 0xb8, 0x83, 0xab, 0x64, 0x73, 0xa5, 0xd5, 0x7e, 0xdf, 0x2a, 0xd9, 0x75,
0x58, 0xae, 0x52, 0x2d, 0xf6, 0x65, 0x9e, 0x07, 0xff, 0x73, 0x0f, 0xb3, 0x91, 0xd9, 0x55, 0x58,
0xaa, 0x52, 0x13, 0xf3, 0x32, 0x08, 0x48, 0xe3, 0x25, 0x57, 0x04, 0x63, 0x05, 0xcf, 0x83, 0x6b,
0x4d, 0x11, 0x48, 0x74, 0x45, 0x38, 0x0d, 0xf6, 0x7a, 0x53, 0x84, 0x06, 0x61, 0x13, 0x58, 0xa9,
0xd2, 0x44, 0x9c, 0x32, 0x6e, 0xb8, 0xfe, 0x77, 0x31, 0xe7, 0x23, 0xe7, 0xaf, 0x8e, 0xf7, 0xb5,
0x10, 0xc1, 0xff, 0x1b, 0x1f, 0x0d, 0x82, 0xed, 0x6f, 0xa5, 0x24, 0xb8, 0xe9, 0xda, 0xdf, 0x81,
0xd8, 0x1d, 0xb8, 0x68, 0xb3, 0x2a, 0xa2, 0x42, 0x46, 0x3c, 0xcf, 0x55, 0x1c, 0x8c, 0x9b, 0xe7,
0x5e, 0x3d, 0x42, 0x74, 0x07, 0x41, 0xf6, 0x01, 0x30, 0xe4, 0xc5, 0x2a, 0xcf, 0x79, 0x65, 0x84,
0xa7, 0x6e, 0x10, 0x75, 0xcd, 0x66, 0xd5, 0x03, 0xaf, 0x70, 0xec, 0x75, 0x38, 0x4f, 0x03, 0x2d,
0xd8, 0x74, 0x4f, 0x93, 0x04, 0xbc, 0xad, 0x6e, 0xf0, 0xb9, 0x01, 0xb9, 0xe5, 0xc2, 0x25, 0xe8,
0x3b, 0x44, 0xf0, 0x69, 0x9a, 0x23, 0x5e, 0x45, 0xce, 0x76, 0xe2, 0x9e, 0x26, 0x22, 0x7b, 0x64,
0xdf, 0xa8, 0x9d, 0xf9, 0xad, 0x56, 0x4d, 0xd6, 0x13, 0x03, 0x2b, 0xdd, 0xe9, 0xcd, 0xd6, 0x60,
0x31, 0x57, 0x47, 0x7e, 0x22, 0xe1, 0x11, 0xa7, 0x48, 0x26, 0xd3, 0xac, 0x19, 0x48, 0x78, 0x46,
0x56, 0xc1, 0x7f, 0xf4, 0x73, 0x08, 0x8f, 0x88, 0x28, 0x55, 0xf8, 0xf1, 0x83, 0x47, 0x7c, 0xec,
0x4a, 0x15, 0xd1, 0x01, 0x36, 0xde, 0x4d, 0xa0, 0x81, 0x52, 0xc5, 0x13, 0x99, 0xe7, 0x93, 0x9f,
0x7b, 0xb0, 0xdc, 0xec, 0x39, 0xf6, 0x55, 0x77, 0x2b, 0xe0, 0xbe, 0xba, 0xf5, 0xf6, 0xe5, 0xf8,
0xb0, 0xb4, 0xfa, 0xb8, 0x5d, 0x1d, 0x5f, 0xb4, 0xab, 0xe3, 0x3f, 0x1b, 0xfb, 0xfd, 0x22, 0x60,
0x78, 0x8a, 0xe1, 0x5d, 0x4c, 0xf0, 0x81, 0x0b, 0xca, 0x7d, 0x18, 0x7a, 0x09, 0xeb, 0x9f, 0xc5,
0x3c, 0xca, 0x78, 0x99, 0xe4, 0xc2, 0x50, 0x15, 0x56, 0x43, 0xc8, 0x62, 0xfe, 0xd8, 0x21, 0x0d,
0x41, 0xcd, 0x5f, 0x8a, 0xd8, 0x1a, 0xaa, 0x89, 0x23, 0x3c, 0x73, 0xc8, 0x64, 0x07, 0x96, 0xdc,
0x7a, 0x66, 0x9f, 0x37, 0x1d, 0x76, 0x89, 0x6e, 0xbd, 0x6d, 0x9f, 0xfb, 0x48, 0x89, 0x3f, 0xf9,
0xa5, 0x07, 0x03, 0x0f, 0xe1, 0x35, 0x29, 0xf8, 0x4b, 0xa5, 0x7d, 0x8f, 0x9c, 0x40, 0xa8, 0x2c,
0x95, 0x6e, 0x36, 0x28, 0x09, 0x98, 0x94, 0x9e, 0x1f, 0x5b, 0x61, 0x7c, 0xab, 0xbc, 0x84, 0xf8,
0x91, 0xc3, 0x5d, 0xc3, 0xbc, 0x84, 0xbd, 0xd6, 0x52, 0x99, 0x66, 0x63, 0xe0, 0x19, 0xb1, 0x23,
0xc4, 0xdc, 0xc2, 0xa0, 0xf3, 0x64, 0x0f, 0x46, 0x9d, 0x5f, 0x87, 0xb7, 0x2c, 0x76, 0x7f, 0x51,
0x16, 0xda, 0x8b, 0x82, 0xf3, 0x80, 0xa7, 0xc2, 0xc8, 0x57, 0x82, 0x82, 0x1a, 0x86, 0xa7, 0xf2,
0xfd, 0xe0, 0xf5, 0x9b, 0xf1, 0xb9, 0x3f, 0xdf, 0x8c, 0xcf, 0xfd, 0x74, 0x32, 0xee, 0xbd, 0x3e,
0x19, 0xf7, 0xfe, 0x38, 0x19, 0xf7, 0xfe, 0x3e, 0x19, 0xf7, 0xe6, 0x4b, 0xf4, 0x17, 0xf8, 0xf1,
0x3f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x4f, 0x2b, 0x30, 0xd6, 0x6d, 0x0a, 0x00, 0x00,
}
func (m *Metrics) Marshal() (dAtA []byte, err error) {
@ -597,6 +647,16 @@ func (m *Metrics) MarshalTo(dAtA []byte) (int, error) {
i += n
}
}
if m.MemoryEvents != nil {
dAtA[i] = 0x42
i++
i = encodeVarintMetrics(dAtA, i, uint64(m.MemoryEvents.Size()))
n6, err := m.MemoryEvents.MarshalTo(dAtA[i:])
if err != nil {
return 0, err
}
i += n6
}
if m.XXX_unrecognized != nil {
i += copy(dAtA[i:], m.XXX_unrecognized)
}
@ -921,6 +981,52 @@ func (m *MemoryStat) MarshalTo(dAtA []byte) (int, error) {
return i, nil
}
func (m *MemoryEvents) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
n, err := m.MarshalTo(dAtA)
if err != nil {
return nil, err
}
return dAtA[:n], nil
}
func (m *MemoryEvents) MarshalTo(dAtA []byte) (int, error) {
var i int
_ = i
var l int
_ = l
if m.Low != 0 {
dAtA[i] = 0x8
i++
i = encodeVarintMetrics(dAtA, i, uint64(m.Low))
}
if m.High != 0 {
dAtA[i] = 0x10
i++
i = encodeVarintMetrics(dAtA, i, uint64(m.High))
}
if m.Max != 0 {
dAtA[i] = 0x18
i++
i = encodeVarintMetrics(dAtA, i, uint64(m.Max))
}
if m.Oom != 0 {
dAtA[i] = 0x20
i++
i = encodeVarintMetrics(dAtA, i, uint64(m.Oom))
}
if m.OomKill != 0 {
dAtA[i] = 0x28
i++
i = encodeVarintMetrics(dAtA, i, uint64(m.OomKill))
}
if m.XXX_unrecognized != nil {
i += copy(dAtA[i:], m.XXX_unrecognized)
}
return i, nil
}
func (m *RdmaStat) Marshal() (dAtA []byte, err error) {
size := m.Size()
dAtA = make([]byte, size)
@ -1165,6 +1271,10 @@ func (m *Metrics) Size() (n int) {
n += 1 + l + sovMetrics(uint64(l))
}
}
if m.MemoryEvents != nil {
l = m.MemoryEvents.Size()
n += 1 + l + sovMetrics(uint64(l))
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
@ -1336,6 +1446,33 @@ func (m *MemoryStat) Size() (n int) {
return n
}
func (m *MemoryEvents) Size() (n int) {
if m == nil {
return 0
}
var l int
_ = l
if m.Low != 0 {
n += 1 + sovMetrics(uint64(m.Low))
}
if m.High != 0 {
n += 1 + sovMetrics(uint64(m.High))
}
if m.Max != 0 {
n += 1 + sovMetrics(uint64(m.Max))
}
if m.Oom != 0 {
n += 1 + sovMetrics(uint64(m.Oom))
}
if m.OomKill != 0 {
n += 1 + sovMetrics(uint64(m.OomKill))
}
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *RdmaStat) Size() (n int) {
if m == nil {
return 0
@ -1476,6 +1613,7 @@ func (this *Metrics) String() string {
`Rdma:` + strings.Replace(fmt.Sprintf("%v", this.Rdma), "RdmaStat", "RdmaStat", 1) + `,`,
`Io:` + strings.Replace(fmt.Sprintf("%v", this.Io), "IOStat", "IOStat", 1) + `,`,
`Hugetlb:` + strings.Replace(fmt.Sprintf("%v", this.Hugetlb), "HugeTlbStat", "HugeTlbStat", 1) + `,`,
`MemoryEvents:` + strings.Replace(fmt.Sprintf("%v", this.MemoryEvents), "MemoryEvents", "MemoryEvents", 1) + `,`,
`XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`,
`}`,
}, "")
@ -1554,6 +1692,21 @@ func (this *MemoryStat) String() string {
}, "")
return s
}
func (this *MemoryEvents) String() string {
if this == nil {
return "nil"
}
s := strings.Join([]string{`&MemoryEvents{`,
`Low:` + fmt.Sprintf("%v", this.Low) + `,`,
`High:` + fmt.Sprintf("%v", this.High) + `,`,
`Max:` + fmt.Sprintf("%v", this.Max) + `,`,
`Oom:` + fmt.Sprintf("%v", this.Oom) + `,`,
`OomKill:` + fmt.Sprintf("%v", this.OomKill) + `,`,
`XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`,
`}`,
}, "")
return s
}
func (this *RdmaStat) String() string {
if this == nil {
return "nil"
@ -1870,6 +2023,42 @@ func (m *Metrics) Unmarshal(dAtA []byte) error {
return err
}
iNdEx = postIndex
case 8:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field MemoryEvents", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
msglen |= int(b&0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthMetrics
}
postIndex := iNdEx + msglen
if postIndex < 0 {
return ErrInvalidLengthMetrics
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.MemoryEvents == nil {
m.MemoryEvents = &MemoryEvents{}
}
if err := m.MemoryEvents.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
default:
iNdEx = preIndex
skippy, err := skipMetrics(dAtA[iNdEx:])
@ -2874,6 +3063,155 @@ func (m *MemoryStat) Unmarshal(dAtA []byte) error {
}
return nil
}
func (m *MemoryEvents) Unmarshal(dAtA []byte) error {
l := len(dAtA)
iNdEx := 0
for iNdEx < l {
preIndex := iNdEx
var wire uint64
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
if wireType == 4 {
return fmt.Errorf("proto: MemoryEvents: wiretype end group for non-group")
}
if fieldNum <= 0 {
return fmt.Errorf("proto: MemoryEvents: illegal tag %d (wire type %d)", fieldNum, wire)
}
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Low", wireType)
}
m.Low = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.Low |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
case 2:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field High", wireType)
}
m.High = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.High |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
case 3:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Max", wireType)
}
m.Max = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.Max |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
case 4:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Oom", wireType)
}
m.Oom = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.Oom |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
case 5:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field OomKill", wireType)
}
m.OomKill = 0
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowMetrics
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := dAtA[iNdEx]
iNdEx++
m.OomKill |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
}
default:
iNdEx = preIndex
skippy, err := skipMetrics(dAtA[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthMetrics
}
if (iNdEx + skippy) < 0 {
return ErrInvalidLengthMetrics
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
if iNdEx > l {
return io.ErrUnexpectedEOF
}
return nil
}
func (m *RdmaStat) Unmarshal(dAtA []byte) error {
l := len(dAtA)
iNdEx := 0

View file

@ -11,6 +11,7 @@ message Metrics {
RdmaStat rdma = 5;
IOStat io = 6;
repeated HugeTlbStat hugetlb = 7;
MemoryEvents memory_events = 8;
}
message PidsStat {
@ -65,6 +66,14 @@ message MemoryStat {
uint64 swap_limit = 35;
}
message MemoryEvents {
uint64 low = 1;
uint64 high = 2;
uint64 max = 3;
uint64 oom = 4;
uint64 oom_kill = 5;
}
message RdmaStat {
repeated RdmaEntry current = 1;
repeated RdmaEntry limit = 2;

View file

@ -28,9 +28,8 @@ import (
"strings"
"time"
"github.com/godbus/dbus/v5"
"github.com/containerd/cgroups/v2/stats"
"github.com/godbus/dbus/v5"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@ -85,6 +84,9 @@ func parseCgroupProcsFile(path string) ([]uint64, error) {
out = append(out, pid)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@ -103,14 +105,6 @@ func parseKV(raw string) (string, interface{}, error) {
}
}
func readUint(path string) (uint64, error) {
v, err := ioutil.ReadFile(path)
if err != nil {
return 0, err
}
return parseUint(strings.TrimSpace(string(v)), 10, 64)
}
func parseUint(s string, base, bitSize int) (uint64, error) {
v, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
@ -144,9 +138,6 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
s = bufio.NewScanner(r)
)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
}
var (
text = s.Text()
parts = strings.SplitN(text, ":", 3)
@ -159,6 +150,9 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
return parts[2], nil
}
}
if err := s.Err(); err != nil {
return "", err
}
return "", fmt.Errorf("cgroup path not found")
}
@ -175,7 +169,7 @@ func ToResources(spec *specs.LinuxResources) *Resources {
Mems: cpu.Mems,
}
if shares := cpu.Shares; shares != nil {
convertedWeight := (1 + ((*shares-2)*9999)/262142)
convertedWeight := 1 + ((*shares-2)*9999)/262142
resources.CPU.Weight = &convertedWeight
}
if period := cpu.Period; period != nil {
@ -298,8 +292,8 @@ func readIoStats(path string) []*stats.IOEntry {
Major: major,
Minor: minor,
}
for _, stats := range parts {
keyPairValue := strings.Split(stats, "=")
for _, s := range parts {
keyPairValue := strings.Split(s, "=")
if len(keyPairValue) != 2 {
continue
}