This commit is contained in:
Sebastiaan van Stijn 2024-04-16 16:20:47 +02:00 committed by GitHub
commit 3ab6d16e2f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
80 changed files with 2816 additions and 2198 deletions

View file

@ -77,7 +77,7 @@ require (
github.com/morikuni/aec v1.0.0
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0-rc5
github.com/opencontainers/runc v1.1.12
github.com/opencontainers/runc v1.2.0-rc.1
github.com/opencontainers/runtime-spec v1.2.0
github.com/opencontainers/selinux v1.11.0
github.com/pelletier/go-toml v1.9.5
@ -136,7 +136,7 @@ require (
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cilium/ebpf v0.11.0 // indirect
github.com/cilium/ebpf v0.12.3 // indirect
github.com/container-storage-interface/spec v1.5.0 // indirect
github.com/containerd/cgroups v1.1.0 // indirect
github.com/containerd/console v1.0.4 // indirect

View file

@ -124,8 +124,8 @@ github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
github.com/cilium/ebpf v0.12.3 h1:8ht6F9MquybnY97at+VDZb3eQQr8ev79RueWeVaEcG4=
github.com/cilium/ebpf v0.12.3/go.mod h1:TctK1ivibvI3znr66ljgi4hqOT8EYQjz1KWBfb1UVgM=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
@ -539,8 +539,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI=
github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=
github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
github.com/opencontainers/runc v1.2.0-rc.1 h1:SMjop2pxxYRTfKdsigna/8xRoaoCfIQfD2cVuOb64/o=
github.com/opencontainers/runc v1.2.0-rc.1/go.mod h1:m9JwxfHzXz5YTTXBQr7EY9KTuazFAGPyMQx2nRR3vTw=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=

View file

@ -4,6 +4,9 @@ BasedOnStyle: LLVM
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: true
AlignEscapedNewlines: DontAlign
# mkdocs annotations in source code are written as trailing comments
# and alignment pushes these really far away from the content.
AlignTrailingComments: false
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: false
AllowAllParametersOfDeclarationOnNextLine: false
@ -16,4 +19,7 @@ UseTab: ForContinuationAndIndentation
ColumnLimit: 1000
# Go compiler comments need to stay unindented.
CommentPragmas: '^go:.*'
# linux/bpf.h needs to be included before bpf/bpf_helpers.h for types like __u64
# and sorting makes this impossible.
SortIncludes: false
...

View file

@ -1,15 +1,7 @@
---
issues:
exclude-rules:
# syscall param structs will have unused fields in Go code.
- path: syscall.*.go
linters:
- structcheck
linters:
disable-all: true
enable:
- errcheck
- goimports
- gosimple
- govet
@ -19,8 +11,3 @@ linters:
- typecheck
- unused
- gofmt
# Could be enabled later:
# - gocyclo
# - maligned
# - gosec

View file

@ -1,9 +1,9 @@
# The development version of clang is distributed as the 'clang' binary,
# while stable/released versions have a version number attached.
# Pin the default clang to a stable version.
CLANG ?= clang-14
STRIP ?= llvm-strip-14
OBJCOPY ?= llvm-objcopy-14
CLANG ?= clang-17
STRIP ?= llvm-strip-17
OBJCOPY ?= llvm-objcopy-17
CFLAGS := -O2 -g -Wall -Werror $(CFLAGS)
CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/
@ -21,12 +21,9 @@ CONTAINER_RUN_ARGS ?= $(if $(filter ${CONTAINER_ENGINE}, podman), --log-driver=n
IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE)
VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION)
# clang <8 doesn't tag relocs properly (STT_NOTYPE)
# clang 9 is the first version emitting BTF
TARGETS := \
testdata/loader-clang-7 \
testdata/loader-clang-9 \
testdata/loader-clang-11 \
testdata/loader-clang-14 \
testdata/loader-$(CLANG) \
testdata/manyprogs \
testdata/btf_map_init \
@ -36,6 +33,7 @@ TARGETS := \
testdata/invalid_btf_map_init \
testdata/strings \
testdata/freplace \
testdata/fentry_fexit \
testdata/iproute2_map_compat \
testdata/map_spin_lock \
testdata/subprog_reloc \
@ -45,6 +43,7 @@ TARGETS := \
testdata/kfunc \
testdata/invalid-kfunc \
testdata/kfunc-kmod \
testdata/constants \
btf/testdata/relocs \
btf/testdata/relocs_read \
btf/testdata/relocs_read_tgt \
@ -56,22 +55,26 @@ TARGETS := \
# Build all ELF binaries using a containerized LLVM toolchain.
container-all:
+${CONTAINER_ENGINE} run --rm -ti ${CONTAINER_RUN_ARGS} \
+${CONTAINER_ENGINE} run --rm -t ${CONTAINER_RUN_ARGS} \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
--env HOME="/tmp" \
--env BPF2GO_CC="$(CLANG)" \
--env BPF2GO_FLAGS="-fdebug-prefix-map=/ebpf=. $(CFLAGS)" \
"${IMAGE}:${VERSION}" \
make all
# (debug) Drop the user into a shell inside the container as root.
# Set BPF2GO_ envs to make 'make generate' just work.
container-shell:
${CONTAINER_ENGINE} run --rm -ti \
-v "${REPODIR}":/ebpf -w /ebpf \
--env BPF2GO_CC="$(CLANG)" \
--env BPF2GO_FLAGS="-fdebug-prefix-map=/ebpf=. $(CFLAGS)" \
"${IMAGE}:${VERSION}"
clean:
-$(RM) testdata/*.elf
-$(RM) btf/testdata/*.elf
find "$(CURDIR)" -name "*.elf" -delete
find "$(CURDIR)" -name "*.o" -delete
format:
find . -type f -name "*.c" | xargs clang-format -i
@ -80,9 +83,6 @@ all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) gene
ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf
ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf
# $BPF_CLANG is used in go:generate invocations.
generate: export BPF_CLANG := $(CLANG)
generate: export BPF_CFLAGS := $(CFLAGS)
generate:
go generate ./...
@ -103,13 +103,12 @@ testdata/loader-%-eb.elf: testdata/loader.c
$(STRIP) -g $@
.PHONY: generate-btf
generate-btf: KERNEL_VERSION?=5.19
generate-btf: KERNEL_VERSION?=6.1.29
generate-btf:
$(eval TMP := $(shell mktemp -d))
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage"
/lib/modules/$(uname -r)/build/scripts/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux"
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-amd64.tgz" -o "$(TMP)/linux.tgz"
tar xvf "$(TMP)/linux.tgz" -C "$(TMP)" --strip-components=2 ./boot/vmlinuz ./lib/modules
/lib/modules/$(shell uname -r)/build/scripts/extract-vmlinux "$(TMP)/vmlinuz" > "$(TMP)/vmlinux"
$(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz"
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz"
tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \
$(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" - /dev/null
find "$(TMP)/modules" -type f -name bpf_testmod.ko -exec $(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" {} /dev/null \;
$(RM) -r "$(TMP)"

View file

@ -2,7 +2,7 @@
[![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf)
![HoneyGopher](.github/images/cilium-ebpf.png)
![HoneyGopher](docs/ebpf/ebpf-go.png)
ebpf-go is a pure Go library that provides utilities for loading, compiling, and
debugging eBPF programs. It has minimal external dependencies and is intended to

View file

@ -1,26 +1,26 @@
package asm
//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp
//go:generate go run golang.org/x/tools/cmd/stringer@latest -output alu_string.go -type=Source,Endianness,ALUOp
// Source of ALU / ALU64 / Branch operations
//
// msb lsb
// +----+-+---+
// |op |S|cls|
// +----+-+---+
type Source uint8
// msb lsb
// +------------+-+---+
// | op |S|cls|
// +------------+-+---+
type Source uint16
const sourceMask OpCode = 0x08
const sourceMask OpCode = 0x0008
// Source bitmask
const (
// InvalidSource is returned by getters when invoked
// on non ALU / branch OpCodes.
InvalidSource Source = 0xff
InvalidSource Source = 0xffff
// ImmSource src is from constant
ImmSource Source = 0x00
ImmSource Source = 0x0000
// RegSource src is from register
RegSource Source = 0x08
RegSource Source = 0x0008
)
// The Endianness of a byte swap instruction.
@ -39,46 +39,56 @@ const (
// ALUOp are ALU / ALU64 operations
//
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
type ALUOp uint8
// msb lsb
// +-------+----+-+---+
// | EXT | OP |s|cls|
// +-------+----+-+---+
type ALUOp uint16
const aluMask OpCode = 0xf0
const aluMask OpCode = 0x3ff0
const (
// InvalidALUOp is returned by getters when invoked
// on non ALU OpCodes
InvalidALUOp ALUOp = 0xff
InvalidALUOp ALUOp = 0xffff
// Add - addition
Add ALUOp = 0x00
Add ALUOp = 0x0000
// Sub - subtraction
Sub ALUOp = 0x10
Sub ALUOp = 0x0010
// Mul - multiplication
Mul ALUOp = 0x20
Mul ALUOp = 0x0020
// Div - division
Div ALUOp = 0x30
Div ALUOp = 0x0030
// SDiv - signed division
SDiv ALUOp = Div + 0x0100
// Or - bitwise or
Or ALUOp = 0x40
Or ALUOp = 0x0040
// And - bitwise and
And ALUOp = 0x50
And ALUOp = 0x0050
// LSh - bitwise shift left
LSh ALUOp = 0x60
LSh ALUOp = 0x0060
// RSh - bitwise shift right
RSh ALUOp = 0x70
RSh ALUOp = 0x0070
// Neg - sign/unsign signing bit
Neg ALUOp = 0x80
Neg ALUOp = 0x0080
// Mod - modulo
Mod ALUOp = 0x90
Mod ALUOp = 0x0090
// SMod - signed modulo
SMod ALUOp = Mod + 0x0100
// Xor - bitwise xor
Xor ALUOp = 0xa0
Xor ALUOp = 0x00a0
// Mov - move value from one place to another
Mov ALUOp = 0xb0
// ArSh - arithmatic shift
ArSh ALUOp = 0xc0
Mov ALUOp = 0x00b0
// MovSX8 - move lower 8 bits, sign extended upper bits of target
MovSX8 ALUOp = Mov + 0x0100
// MovSX16 - move lower 16 bits, sign extended upper bits of target
MovSX16 ALUOp = Mov + 0x0200
// MovSX32 - move lower 32 bits, sign extended upper bits of target
MovSX32 ALUOp = Mov + 0x0300
// ArSh - arithmetic shift
ArSh ALUOp = 0x00c0
// Swap - endian conversions
Swap ALUOp = 0xd0
Swap ALUOp = 0x00d0
)
// HostTo converts from host to another endianness.
@ -102,6 +112,27 @@ func HostTo(endian Endianness, dst Register, size Size) Instruction {
}
}
// BSwap unconditionally reverses the order of bytes in a register.
func BSwap(dst Register, size Size) Instruction {
var imm int64
switch size {
case Half:
imm = 16
case Word:
imm = 32
case DWord:
imm = 64
default:
return Instruction{OpCode: InvalidOpCode}
}
return Instruction{
OpCode: OpCode(ALU64Class).SetALUOp(Swap),
Dst: dst,
Constant: imm,
}
}
// Op returns the OpCode for an ALU operation with a given source.
func (op ALUOp) Op(source Source) OpCode {
return OpCode(ALU64Class).SetALUOp(op).SetSource(source)

View file

@ -8,7 +8,7 @@ func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidSource-255]
_ = x[InvalidSource-65535]
_ = x[ImmSource-0]
_ = x[RegSource-8]
}
@ -25,7 +25,7 @@ func (i Source) String() string {
return _Source_name_0
case i == 8:
return _Source_name_1
case i == 255:
case i == 65535:
return _Source_name_2
default:
return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
@ -62,41 +62,51 @@ func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[InvalidALUOp-255]
_ = x[InvalidALUOp-65535]
_ = x[Add-0]
_ = x[Sub-16]
_ = x[Mul-32]
_ = x[Div-48]
_ = x[SDiv-304]
_ = x[Or-64]
_ = x[And-80]
_ = x[LSh-96]
_ = x[RSh-112]
_ = x[Neg-128]
_ = x[Mod-144]
_ = x[SMod-400]
_ = x[Xor-160]
_ = x[Mov-176]
_ = x[MovSX8-432]
_ = x[MovSX16-688]
_ = x[MovSX32-944]
_ = x[ArSh-192]
_ = x[Swap-208]
}
const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp"
const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapSDivSModMovSX8MovSX16MovSX32InvalidALUOp"
var _ALUOp_map = map[ALUOp]string{
0: _ALUOp_name[0:3],
16: _ALUOp_name[3:6],
32: _ALUOp_name[6:9],
48: _ALUOp_name[9:12],
64: _ALUOp_name[12:14],
80: _ALUOp_name[14:17],
96: _ALUOp_name[17:20],
112: _ALUOp_name[20:23],
128: _ALUOp_name[23:26],
144: _ALUOp_name[26:29],
160: _ALUOp_name[29:32],
176: _ALUOp_name[32:35],
192: _ALUOp_name[35:39],
208: _ALUOp_name[39:43],
255: _ALUOp_name[43:55],
0: _ALUOp_name[0:3],
16: _ALUOp_name[3:6],
32: _ALUOp_name[6:9],
48: _ALUOp_name[9:12],
64: _ALUOp_name[12:14],
80: _ALUOp_name[14:17],
96: _ALUOp_name[17:20],
112: _ALUOp_name[20:23],
128: _ALUOp_name[23:26],
144: _ALUOp_name[26:29],
160: _ALUOp_name[29:32],
176: _ALUOp_name[32:35],
192: _ALUOp_name[35:39],
208: _ALUOp_name[39:43],
304: _ALUOp_name[43:47],
400: _ALUOp_name[47:51],
432: _ALUOp_name[51:57],
688: _ALUOp_name[57:64],
944: _ALUOp_name[64:71],
65535: _ALUOp_name[71:83],
}
func (i ALUOp) String() string {

View file

@ -1,6 +1,6 @@
package asm
//go:generate stringer -output func_string.go -type=BuiltinFunc
//go:generate go run golang.org/x/tools/cmd/stringer@latest -output func_string.go -type=BuiltinFunc
// BuiltinFunc is a built-in eBPF function.
type BuiltinFunc int32

View file

@ -60,6 +60,34 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, err
}
ins.Offset = int16(bo.Uint16(data[2:4]))
if ins.OpCode.Class().IsALU() {
switch ins.OpCode.ALUOp() {
case Div:
if ins.Offset == 1 {
ins.OpCode = ins.OpCode.SetALUOp(SDiv)
ins.Offset = 0
}
case Mod:
if ins.Offset == 1 {
ins.OpCode = ins.OpCode.SetALUOp(SMod)
ins.Offset = 0
}
case Mov:
switch ins.Offset {
case 8:
ins.OpCode = ins.OpCode.SetALUOp(MovSX8)
ins.Offset = 0
case 16:
ins.OpCode = ins.OpCode.SetALUOp(MovSX16)
ins.Offset = 0
case 32:
ins.OpCode = ins.OpCode.SetALUOp(MovSX32)
ins.Offset = 0
}
}
}
// Convert to int32 before widening to int64
// to ensure the signed bit is carried over.
ins.Constant = int64(int32(bo.Uint32(data[4:8])))
@ -106,8 +134,38 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
return 0, fmt.Errorf("can't marshal registers: %s", err)
}
if ins.OpCode.Class().IsALU() {
newOffset := int16(0)
switch ins.OpCode.ALUOp() {
case SDiv:
ins.OpCode = ins.OpCode.SetALUOp(Div)
newOffset = 1
case SMod:
ins.OpCode = ins.OpCode.SetALUOp(Mod)
newOffset = 1
case MovSX8:
ins.OpCode = ins.OpCode.SetALUOp(Mov)
newOffset = 8
case MovSX16:
ins.OpCode = ins.OpCode.SetALUOp(Mov)
newOffset = 16
case MovSX32:
ins.OpCode = ins.OpCode.SetALUOp(Mov)
newOffset = 32
}
if newOffset != 0 && ins.Offset != 0 {
return 0, fmt.Errorf("extended ALU opcodes should have an .Offset of 0: %s", ins)
}
ins.Offset = newOffset
}
op, err := ins.OpCode.bpfOpCode()
if err != nil {
return 0, err
}
data := make([]byte, InstructionSize)
data[0] = byte(ins.OpCode)
data[0] = op
data[1] = byte(regs)
bo.PutUint16(data[2:4], uint16(ins.Offset))
bo.PutUint32(data[4:8], uint32(cons))
@ -298,9 +356,9 @@ func (ins Instruction) Format(f fmt.State, c rune) {
goto ref
}
fmt.Fprintf(f, "%v ", op)
switch cls := op.Class(); {
case cls.isLoadOrStore():
fmt.Fprintf(f, "%v ", op)
switch op.Mode() {
case ImmMode:
fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
@ -308,21 +366,30 @@ func (ins Instruction) Format(f fmt.State, c rune) {
fmt.Fprintf(f, "imm: %d", ins.Constant)
case IndMode:
fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
case MemMode:
case MemMode, MemSXMode:
fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
case XAddMode:
fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
}
case cls.IsALU():
fmt.Fprintf(f, "dst: %s ", ins.Dst)
if op.ALUOp() == Swap || op.Source() == ImmSource {
fmt.Fprintf(f, "%v", op)
if op == Swap.Op(ImmSource) {
fmt.Fprintf(f, "%d", ins.Constant)
}
fmt.Fprintf(f, " dst: %s ", ins.Dst)
switch {
case op.ALUOp() == Swap:
break
case op.Source() == ImmSource:
fmt.Fprintf(f, "imm: %d", ins.Constant)
} else {
default:
fmt.Fprintf(f, "src: %s", ins.Src)
}
case cls.IsJump():
fmt.Fprintf(f, "%v ", op)
switch jop := op.JumpOp(); jop {
case Call:
switch ins.Src {
@ -336,6 +403,13 @@ func (ins Instruction) Format(f fmt.State, c rune) {
fmt.Fprint(f, BuiltinFunc(ins.Constant))
}
case Ja:
if ins.OpCode.Class() == Jump32Class {
fmt.Fprintf(f, "imm: %d", ins.Constant)
} else {
fmt.Fprintf(f, "off: %d", ins.Offset)
}
default:
fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset)
if op.Source() == ImmSource {
@ -344,6 +418,8 @@ func (ins Instruction) Format(f fmt.State, c rune) {
fmt.Fprintf(f, "src: %s", ins.Src)
}
}
default:
fmt.Fprintf(f, "%v ", op)
}
ref:
@ -772,7 +848,8 @@ func (insns Instructions) encodeFunctionReferences() error {
}
switch {
case ins.IsFunctionReference() && ins.Constant == -1:
case ins.IsFunctionReference() && ins.Constant == -1,
ins.OpCode == Ja.opCode(Jump32Class, ImmSource) && ins.Constant == -1:
symOffset, ok := symbolOffsets[ins.Reference()]
if !ok {
return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference)

View file

@ -1,6 +1,6 @@
package asm
//go:generate stringer -output jump_string.go -type=JumpOp
//go:generate go run golang.org/x/tools/cmd/stringer@latest -output jump_string.go -type=JumpOp
// JumpOp affect control flow.
//
@ -10,7 +10,7 @@ package asm
// +----+-+---+
type JumpOp uint8
const jumpMask OpCode = aluMask
const jumpMask OpCode = 0xf0
const (
// InvalidJumpOp is returned by getters when invoked
@ -103,13 +103,21 @@ func (op JumpOp) Reg32(dst, src Register, label string) Instruction {
}
func (op JumpOp) opCode(class Class, source Source) OpCode {
if op == Exit || op == Call || op == Ja {
if op == Exit || op == Call {
return InvalidOpCode
}
return OpCode(class).SetJumpOp(op).SetSource(source)
}
// LongJump returns a jump always instruction with a range of [-2^31, 2^31 - 1].
func LongJump(label string) Instruction {
return Instruction{
OpCode: Ja.opCode(Jump32Class, ImmSource),
Constant: -1,
}.WithReference(label)
}
// Label adjusts PC to the address of the label.
func (op JumpOp) Label(label string) Instruction {
if op == Call {

View file

@ -1,6 +1,6 @@
package asm
//go:generate stringer -output load_store_string.go -type=Mode,Size
//go:generate go run golang.org/x/tools/cmd/stringer@latest -output load_store_string.go -type=Mode,Size
// Mode for load and store operations
//
@ -24,6 +24,8 @@ const (
IndMode Mode = 0x40
// MemMode - load from memory
MemMode Mode = 0x60
// MemSXMode - load from memory, sign extension
MemSXMode Mode = 0x80
// XAddMode - add atomically across processors.
XAddMode Mode = 0xc0
)
@ -73,6 +75,11 @@ func LoadMemOp(size Size) OpCode {
return OpCode(LdXClass).SetMode(MemMode).SetSize(size)
}
// LoadMemSXOp returns the OpCode to load a value of given size from memory sign extended.
func LoadMemSXOp(size Size) OpCode {
return OpCode(LdXClass).SetMode(MemSXMode).SetSize(size)
}
// LoadMem emits `dst = *(size *)(src + offset)`.
func LoadMem(dst, src Register, offset int16, size Size) Instruction {
return Instruction{
@ -83,6 +90,20 @@ func LoadMem(dst, src Register, offset int16, size Size) Instruction {
}
}
// LoadMemSX emits `dst = *(size *)(src + offset)` but sign extends dst.
func LoadMemSX(dst, src Register, offset int16, size Size) Instruction {
if size == DWord {
return Instruction{OpCode: InvalidOpCode}
}
return Instruction{
OpCode: LoadMemSXOp(size),
Dst: dst,
Src: src,
Offset: offset,
}
}
// LoadImmOp returns the OpCode to load an immediate of given size.
//
// As of kernel 4.20, only DWord size is accepted.

View file

@ -13,6 +13,7 @@ func _() {
_ = x[AbsMode-32]
_ = x[IndMode-64]
_ = x[MemMode-96]
_ = x[MemSXMode-128]
_ = x[XAddMode-192]
}
@ -21,8 +22,9 @@ const (
_Mode_name_1 = "AbsMode"
_Mode_name_2 = "IndMode"
_Mode_name_3 = "MemMode"
_Mode_name_4 = "XAddMode"
_Mode_name_5 = "InvalidMode"
_Mode_name_4 = "MemSXMode"
_Mode_name_5 = "XAddMode"
_Mode_name_6 = "InvalidMode"
)
func (i Mode) String() string {
@ -35,10 +37,12 @@ func (i Mode) String() string {
return _Mode_name_2
case i == 96:
return _Mode_name_3
case i == 192:
case i == 128:
return _Mode_name_4
case i == 255:
case i == 192:
return _Mode_name_5
case i == 255:
return _Mode_name_6
default:
return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
}

View file

@ -5,7 +5,7 @@ import (
"strings"
)
//go:generate stringer -output opcode_string.go -type=Class
//go:generate go run golang.org/x/tools/cmd/stringer@latest -output opcode_string.go -type=Class
// Class of operations
//
@ -66,18 +66,43 @@ func (cls Class) isJumpOrALU() bool {
return cls.IsJump() || cls.IsALU()
}
// OpCode is a packed eBPF opcode.
// OpCode represents a single operation.
// It is not a 1:1 mapping to real eBPF opcodes.
//
// Its encoding is defined by a Class value:
// The encoding varies based on a 3-bit Class:
//
// msb lsb
// +----+-+---+
// | ???? |CLS|
// +----+-+---+
type OpCode uint8
// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
// ??? | CLS
//
// For ALUClass and ALUCLass32:
//
// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
// OPC |S| CLS
//
// For LdClass, LdXclass, StClass and StXClass:
//
// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
// 0 | MDE |SIZ| CLS
//
// For JumpClass, Jump32Class:
//
// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
// 0 | OPC |S| CLS
type OpCode uint16
// InvalidOpCode is returned by setters on OpCode
const InvalidOpCode OpCode = 0xff
const InvalidOpCode OpCode = 0xffff
// bpfOpCode returns the actual BPF opcode.
func (op OpCode) bpfOpCode() (byte, error) {
const opCodeMask = 0xff
if !valid(op, opCodeMask) {
return 0, fmt.Errorf("invalid opcode %x", op)
}
return byte(op & opCodeMask), nil
}
// rawInstructions returns the number of BPF instructions required
// to encode this opcode.
@ -147,7 +172,7 @@ func (op OpCode) JumpOp() JumpOp {
jumpOp := JumpOp(op & jumpMask)
// Some JumpOps are only supported by JumpClass, not Jump32Class.
if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call || jumpOp == Ja) {
if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call) {
return InvalidJumpOp
}
@ -234,17 +259,24 @@ func (op OpCode) String() string {
}
case class.IsALU():
if op.ALUOp() == Swap && op.Class() == ALU64Class {
// B to make BSwap, uncontitional byte swap
f.WriteString("B")
}
f.WriteString(op.ALUOp().String())
if op.ALUOp() == Swap {
// Width for Endian is controlled by Constant
f.WriteString(op.Endianness().String())
if op.Class() == ALUClass {
// Width for Endian is controlled by Constant
f.WriteString(op.Endianness().String())
}
} else {
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
if class == ALUClass {
f.WriteString("32")
}
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
}
case class.IsJump():
@ -254,7 +286,7 @@ func (op OpCode) String() string {
f.WriteString("32")
}
if jop := op.JumpOp(); jop != Exit && jop != Call {
if jop := op.JumpOp(); jop != Exit && jop != Call && jop != Ja {
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
}

View file

@ -14,7 +14,6 @@ import (
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
const btfMagic = 0xeB9F
@ -47,51 +46,13 @@ type Spec struct {
// Includes all struct flavors and types with the same name.
namedTypes map[essentialName][]Type
// String table from ELF, may be nil.
// String table from ELF.
strings *stringTable
// Byte order of the ELF we decoded the spec from, may be nil.
byteOrder binary.ByteOrder
}
var btfHeaderLen = binary.Size(&btfHeader{})
type btfHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
TypeOff uint32
TypeLen uint32
StringOff uint32
StringLen uint32
}
// typeStart returns the offset from the beginning of the .BTF section
// to the start of its type entries.
func (h *btfHeader) typeStart() int64 {
return int64(h.HdrLen + h.TypeOff)
}
// stringStart returns the offset from the beginning of the .BTF section
// to the start of its string table.
func (h *btfHeader) stringStart() int64 {
return int64(h.HdrLen + h.StringOff)
}
// newSpec creates a Spec containing only Void.
func newSpec() *Spec {
return &Spec{
[]Type{(*Void)(nil)},
map[Type]TypeID{(*Void)(nil): 0},
0,
make(map[essentialName][]Type),
nil,
nil,
}
}
// LoadSpec opens file and calls LoadSpecFromReader on it.
func LoadSpec(file string) (*Spec, error) {
fh, err := os.Open(file)
@ -240,10 +201,6 @@ func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, base *Spec) (*Spec, error
return nil, fmt.Errorf("can't use split BTF as base")
}
if base.strings == nil {
return nil, fmt.Errorf("parse split BTF: base must be loaded from an ELF")
}
baseStrings = base.strings
firstTypeID, err = base.nextTypeID()
@ -252,12 +209,7 @@ func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, base *Spec) (*Spec, error
}
}
rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings)
if err != nil {
return nil, err
}
types, err := inflateRawTypes(rawTypes, rawStrings, base)
types, rawStrings, err := parseBTF(btf, bo, baseStrings, base)
if err != nil {
return nil, err
}
@ -365,12 +317,12 @@ func loadKernelSpec() (_ *Spec, fallback bool, _ error) {
}
defer file.Close()
spec, err := loadSpecFromELF(file)
spec, err := LoadSpecFromReader(file)
return spec, true, err
}
// findVMLinux scans multiple well-known paths for vmlinux kernel images.
func findVMLinux() (*internal.SafeELFFile, error) {
func findVMLinux() (*os.File, error) {
release, err := internal.KernelRelease()
if err != nil {
return nil, err
@ -389,7 +341,7 @@ func findVMLinux() (*internal.SafeELFFile, error) {
}
for _, loc := range locations {
file, err := internal.OpenSafeELFFile(fmt.Sprintf(loc, release))
file, err := os.Open(fmt.Sprintf(loc, release))
if errors.Is(err, os.ErrNotExist) {
continue
}
@ -399,37 +351,6 @@ func findVMLinux() (*internal.SafeELFFile, error) {
return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported)
}
// parseBTFHeader parses the header of the .BTF section.
func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) {
var header btfHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, errors.New("header length shorter than btfHeader size")
}
if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil {
return nil, fmt.Errorf("header padding: %v", err)
}
return &header, nil
}
func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder {
buf := new(bufio.Reader)
for _, bo := range []binary.ByteOrder{
@ -447,7 +368,7 @@ func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder {
// parseBTF reads a .BTF section into memory and parses it into a list of
// raw types and a string table.
func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([]rawType, *stringTable, error) {
func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable, base *Spec) ([]Type, *stringTable, error) {
buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64)
header, err := parseBTFHeader(buf, bo)
if err != nil {
@ -461,12 +382,12 @@ func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([
}
buf.Reset(io.NewSectionReader(btf, header.typeStart(), int64(header.TypeLen)))
rawTypes, err := readTypes(buf, bo, header.TypeLen)
types, err := readAndInflateTypes(buf, bo, header.TypeLen, rawStrings, base)
if err != nil {
return nil, nil, fmt.Errorf("can't read types: %w", err)
return nil, nil, err
}
return rawTypes, rawStrings, nil
return types, rawStrings, nil
}
type symbol struct {
@ -773,97 +694,3 @@ func (iter *TypesIterator) Next() bool {
iter.index++
return true
}
// haveBTF attempts to load a BTF blob containing an Int. It should pass on any
// kernel that supports BPF_BTF_LOAD.
var haveBTF = internal.NewFeatureTest("BTF", "4.18", func() error {
// 0-length anonymous integer
err := probeBTF(&Int{})
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
return internal.ErrNotSupported
}
return err
})
// haveMapBTF attempts to load a minimal BTF blob containing a Var. It is
// used as a proxy for .bss, .data and .rodata map support, which generally
// come with a Var and Datasec. These were introduced in Linux 5.2.
var haveMapBTF = internal.NewFeatureTest("Map BTF (Var/Datasec)", "5.2", func() error {
if err := haveBTF(); err != nil {
return err
}
v := &Var{
Name: "a",
Type: &Pointer{(*Void)(nil)},
}
err := probeBTF(v)
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: creating the map may still
// succeed without Btf* attrs.
return internal.ErrNotSupported
}
return err
})
// haveProgBTF attempts to load a BTF blob containing a Func and FuncProto. It
// is used as a proxy for ext_info (func_info) support, which depends on
// Func(Proto) by definition.
var haveProgBTF = internal.NewFeatureTest("Program BTF (func/line_info)", "5.0", func() error {
if err := haveBTF(); err != nil {
return err
}
fn := &Func{
Name: "a",
Type: &FuncProto{Return: (*Void)(nil)},
}
err := probeBTF(fn)
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
return internal.ErrNotSupported
}
return err
})
var haveFuncLinkage = internal.NewFeatureTest("BTF func linkage", "5.6", func() error {
if err := haveProgBTF(); err != nil {
return err
}
fn := &Func{
Name: "a",
Type: &FuncProto{Return: (*Void)(nil)},
Linkage: GlobalFunc,
}
err := probeBTF(fn)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
return err
})
func probeBTF(typ Type) error {
b, err := NewBuilder([]Type{typ})
if err != nil {
return err
}
buf, err := b.Marshal(nil, nil)
if err != nil {
return err
}
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(buf),
BtfSize: uint32(len(buf)),
})
if err == nil {
fd.Close()
}
return err
}

View file

@ -2,12 +2,15 @@ package btf
import (
"encoding/binary"
"errors"
"fmt"
"io"
"unsafe"
"github.com/cilium/ebpf/internal"
)
//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind
//go:generate go run golang.org/x/tools/cmd/stringer@latest -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind
// btfKind describes a Type.
type btfKind uint8
@ -69,6 +72,63 @@ const (
btfTypeKindFlagMask = 1
)
var btfHeaderLen = binary.Size(&btfHeader{})
type btfHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
TypeOff uint32
TypeLen uint32
StringOff uint32
StringLen uint32
}
// typeStart returns the offset from the beginning of the .BTF section
// to the start of its type entries.
func (h *btfHeader) typeStart() int64 {
return int64(h.HdrLen + h.TypeOff)
}
// stringStart returns the offset from the beginning of the .BTF section
// to the start of its string table.
func (h *btfHeader) stringStart() int64 {
return int64(h.HdrLen + h.StringOff)
}
// parseBTFHeader parses the header of the .BTF section.
func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) {
var header btfHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, errors.New("header length shorter than btfHeader size")
}
if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil {
return nil, fmt.Errorf("header padding: %v", err)
}
return &header, nil
}
var btfTypeLen = binary.Size(btfType{})
// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst.
@ -93,6 +153,19 @@ type btfType struct {
SizeType uint32
}
var btfTypeSize = int(unsafe.Sizeof(btfType{}))
func unmarshalBtfType(bt *btfType, b []byte, bo binary.ByteOrder) (int, error) {
if len(b) < btfTypeSize {
return 0, fmt.Errorf("not enough bytes to unmarshal btfType")
}
bt.NameOff = bo.Uint32(b[0:])
bt.Info = bo.Uint32(b[4:])
bt.SizeType = bo.Uint32(b[8:])
return btfTypeSize, nil
}
func mask(len uint32) uint32 {
return (1 << len) - 1
}
@ -240,6 +313,17 @@ const (
btfIntBitsShift = 0
)
var btfIntLen = int(unsafe.Sizeof(btfInt{}))
func unmarshalBtfInt(bi *btfInt, b []byte, bo binary.ByteOrder) (int, error) {
if len(b) < btfIntLen {
return 0, fmt.Errorf("not enough bytes to unmarshal btfInt")
}
bi.Raw = bo.Uint32(b[0:])
return btfIntLen, nil
}
func (bi btfInt) Encoding() IntEncoding {
return IntEncoding(readBits(bi.Raw, btfIntEncodingLen, btfIntEncodingShift))
}
@ -270,102 +354,166 @@ type btfArray struct {
Nelems uint32
}
var btfArrayLen = int(unsafe.Sizeof(btfArray{}))
func unmarshalBtfArray(ba *btfArray, b []byte, bo binary.ByteOrder) (int, error) {
if len(b) < btfArrayLen {
return 0, fmt.Errorf("not enough bytes to unmarshal btfArray")
}
ba.Type = TypeID(bo.Uint32(b[0:]))
ba.IndexType = TypeID(bo.Uint32(b[4:]))
ba.Nelems = bo.Uint32(b[8:])
return btfArrayLen, nil
}
type btfMember struct {
NameOff uint32
Type TypeID
Offset uint32
}
var btfMemberLen = int(unsafe.Sizeof(btfMember{}))
func unmarshalBtfMembers(members []btfMember, b []byte, bo binary.ByteOrder) (int, error) {
off := 0
for i := range members {
if off+btfMemberLen > len(b) {
return 0, fmt.Errorf("not enough bytes to unmarshal btfMember %d", i)
}
members[i].NameOff = bo.Uint32(b[off+0:])
members[i].Type = TypeID(bo.Uint32(b[off+4:]))
members[i].Offset = bo.Uint32(b[off+8:])
off += btfMemberLen
}
return off, nil
}
type btfVarSecinfo struct {
Type TypeID
Offset uint32
Size uint32
}
var btfVarSecinfoLen = int(unsafe.Sizeof(btfVarSecinfo{}))
func unmarshalBtfVarSecInfos(secinfos []btfVarSecinfo, b []byte, bo binary.ByteOrder) (int, error) {
off := 0
for i := range secinfos {
if off+btfVarSecinfoLen > len(b) {
return 0, fmt.Errorf("not enough bytes to unmarshal btfVarSecinfo %d", i)
}
secinfos[i].Type = TypeID(bo.Uint32(b[off+0:]))
secinfos[i].Offset = bo.Uint32(b[off+4:])
secinfos[i].Size = bo.Uint32(b[off+8:])
off += btfVarSecinfoLen
}
return off, nil
}
type btfVariable struct {
Linkage uint32
}
var btfVariableLen = int(unsafe.Sizeof(btfVariable{}))
func unmarshalBtfVariable(bv *btfVariable, b []byte, bo binary.ByteOrder) (int, error) {
if len(b) < btfVariableLen {
return 0, fmt.Errorf("not enough bytes to unmarshal btfVariable")
}
bv.Linkage = bo.Uint32(b[0:])
return btfVariableLen, nil
}
type btfEnum struct {
NameOff uint32
Val uint32
}
var btfEnumLen = int(unsafe.Sizeof(btfEnum{}))
func unmarshalBtfEnums(enums []btfEnum, b []byte, bo binary.ByteOrder) (int, error) {
off := 0
for i := range enums {
if off+btfEnumLen > len(b) {
return 0, fmt.Errorf("not enough bytes to unmarshal btfEnum %d", i)
}
enums[i].NameOff = bo.Uint32(b[off+0:])
enums[i].Val = bo.Uint32(b[off+4:])
off += btfEnumLen
}
return off, nil
}
type btfEnum64 struct {
NameOff uint32
ValLo32 uint32
ValHi32 uint32
}
var btfEnum64Len = int(unsafe.Sizeof(btfEnum64{}))
func unmarshalBtfEnums64(enums []btfEnum64, b []byte, bo binary.ByteOrder) (int, error) {
off := 0
for i := range enums {
if off+btfEnum64Len > len(b) {
return 0, fmt.Errorf("not enough bytes to unmarshal btfEnum64 %d", i)
}
enums[i].NameOff = bo.Uint32(b[off+0:])
enums[i].ValLo32 = bo.Uint32(b[off+4:])
enums[i].ValHi32 = bo.Uint32(b[off+8:])
off += btfEnum64Len
}
return off, nil
}
type btfParam struct {
NameOff uint32
Type TypeID
}
var btfParamLen = int(unsafe.Sizeof(btfParam{}))
func unmarshalBtfParams(params []btfParam, b []byte, bo binary.ByteOrder) (int, error) {
off := 0
for i := range params {
if off+btfParamLen > len(b) {
return 0, fmt.Errorf("not enough bytes to unmarshal btfParam %d", i)
}
params[i].NameOff = bo.Uint32(b[off+0:])
params[i].Type = TypeID(bo.Uint32(b[off+4:]))
off += btfParamLen
}
return off, nil
}
type btfDeclTag struct {
ComponentIdx uint32
}
func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) {
var header btfType
// because of the interleaving between types and struct members it is difficult to
// precompute the numbers of raw types this will parse
// this "guess" is a good first estimation
sizeOfbtfType := uintptr(btfTypeLen)
tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2
types := make([]rawType, 0, tyMaxCount)
var btfDeclTagLen = int(unsafe.Sizeof(btfDeclTag{}))
for id := TypeID(1); ; id++ {
if err := binary.Read(r, bo, &header); err == io.EOF {
return types, nil
} else if err != nil {
return nil, fmt.Errorf("can't read type info for id %v: %v", id, err)
}
var data interface{}
switch header.Kind() {
case kindInt:
data = new(btfInt)
case kindPointer:
case kindArray:
data = new(btfArray)
case kindStruct:
fallthrough
case kindUnion:
data = make([]btfMember, header.Vlen())
case kindEnum:
data = make([]btfEnum, header.Vlen())
case kindForward:
case kindTypedef:
case kindVolatile:
case kindConst:
case kindRestrict:
case kindFunc:
case kindFuncProto:
data = make([]btfParam, header.Vlen())
case kindVar:
data = new(btfVariable)
case kindDatasec:
data = make([]btfVarSecinfo, header.Vlen())
case kindFloat:
case kindDeclTag:
data = new(btfDeclTag)
case kindTypeTag:
case kindEnum64:
data = make([]btfEnum64, header.Vlen())
default:
return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind())
}
if data == nil {
types = append(types, rawType{header, nil})
continue
}
if err := binary.Read(r, bo, data); err != nil {
return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err)
}
types = append(types, rawType{header, data})
func unmarshalBtfDeclTag(bdt *btfDeclTag, b []byte, bo binary.ByteOrder) (int, error) {
if len(b) < btfDeclTagLen {
return 0, fmt.Errorf("not enough bytes to unmarshal btfDeclTag")
}
bdt.ComponentIdx = bo.Uint32(b[0:])
return btfDeclTagLen, nil
}

View file

@ -18,8 +18,8 @@ import (
// COREFixup is the result of computing a CO-RE relocation for a target.
type COREFixup struct {
kind coreKind
local uint32
target uint32
local uint64
target uint64
// True if there is no valid fixup. The instruction is replaced with an
// invalid dummy.
poison bool
@ -196,12 +196,12 @@ func CORERelocate(relos []*CORERelocation, target *Spec, bo binary.ByteOrder) ([
result[i] = COREFixup{
kind: relo.kind,
local: uint32(relo.id),
local: uint64(relo.id),
// NB: Using relo.id as the target here is incorrect, since
// it doesn't match the BTF we generate on the fly. This isn't
// too bad for now since there are no uses of the local type ID
// in the kernel, yet.
target: uint32(relo.id),
target: uint64(relo.id),
}
continue
}
@ -311,10 +311,10 @@ var errNoSignedness = errors.New("no signedness")
// coreCalculateFixup calculates the fixup for a single local type, target type
// and relocation.
func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo binary.ByteOrder) (COREFixup, error) {
fixup := func(local, target uint32) (COREFixup, error) {
fixup := func(local, target uint64) (COREFixup, error) {
return COREFixup{kind: relo.kind, local: local, target: target}, nil
}
fixupWithoutValidation := func(local, target uint32) (COREFixup, error) {
fixupWithoutValidation := func(local, target uint64) (COREFixup, error) {
return COREFixup{kind: relo.kind, local: local, target: target, skipLocalValidation: true}, nil
}
poison := func() (COREFixup, error) {
@ -346,7 +346,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return fixup(1, 1)
case reloTypeIDTarget:
return fixup(uint32(relo.id), uint32(targetID))
return fixup(uint64(relo.id), uint64(targetID))
case reloTypeSize:
localSize, err := Sizeof(local)
@ -359,7 +359,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return zero, err
}
return fixup(uint32(localSize), uint32(targetSize))
return fixup(uint64(localSize), uint64(targetSize))
}
case reloEnumvalValue, reloEnumvalExists:
@ -376,7 +376,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return fixup(1, 1)
case reloEnumvalValue:
return fixup(uint32(localValue.Value), uint32(targetValue.Value))
return fixup(localValue.Value, targetValue.Value)
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64, reloFieldSigned:
@ -405,7 +405,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return fixup(1, 1)
case reloFieldByteOffset:
return maybeSkipValidation(fixup(localField.offset, targetField.offset))
return maybeSkipValidation(fixup(uint64(localField.offset), uint64(targetField.offset)))
case reloFieldByteSize:
localSize, err := Sizeof(localField.Type)
@ -417,24 +417,24 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
if err != nil {
return zero, err
}
return maybeSkipValidation(fixup(uint32(localSize), uint32(targetSize)))
return maybeSkipValidation(fixup(uint64(localSize), uint64(targetSize)))
case reloFieldLShiftU64:
var target uint32
var target uint64
if bo == binary.LittleEndian {
targetSize, err := targetField.sizeBits()
if err != nil {
return zero, err
}
target = uint32(64 - targetField.bitfieldOffset - targetSize)
target = uint64(64 - targetField.bitfieldOffset - targetSize)
} else {
loadWidth, err := Sizeof(targetField.Type)
if err != nil {
return zero, err
}
target = uint32(64 - Bits(loadWidth*8) + targetField.bitfieldOffset)
target = uint64(64 - Bits(loadWidth*8) + targetField.bitfieldOffset)
}
return fixupWithoutValidation(0, target)
@ -444,7 +444,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return zero, err
}
return fixupWithoutValidation(0, uint32(64-targetSize))
return fixupWithoutValidation(0, uint64(64-targetSize))
case reloFieldSigned:
switch local := UnderlyingType(localField.Type).(type) {
@ -454,7 +454,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return zero, fmt.Errorf("target isn't *Enum but %T", targetField.Type)
}
return fixup(boolToUint32(local.Signed), boolToUint32(target.Signed))
return fixup(boolToUint64(local.Signed), boolToUint64(target.Signed))
case *Int:
target, ok := as[*Int](targetField.Type)
if !ok {
@ -462,8 +462,8 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
}
return fixup(
uint32(local.Encoding&Signed),
uint32(target.Encoding&Signed),
uint64(local.Encoding&Signed),
uint64(target.Encoding&Signed),
)
default:
return zero, fmt.Errorf("type %T: %w", local, errNoSignedness)
@ -474,7 +474,7 @@ func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo b
return zero, ErrNotSupported
}
func boolToUint32(val bool) uint32 {
func boolToUint64(val bool) uint64 {
if val {
return 1
}
@ -799,7 +799,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) {
if visited[target] {
continue
}
if len(visited) >= maxTypeDepth {
if len(visited) >= maxResolveDepth {
// This check is different than libbpf, which restricts the entire
// path to BPF_CORE_SPEC_MAX_LEN items.
return Member{}, false, fmt.Errorf("type is nested too deep")
@ -895,7 +895,7 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
)
for ; l != nil && t != nil; l, t = localTs.Shift(), targetTs.Shift() {
if depth >= maxTypeDepth {
if depth >= maxResolveDepth {
return errors.New("types are nested too deep")
}

View file

@ -16,9 +16,9 @@ import (
// ExtInfos contains ELF section metadata.
type ExtInfos struct {
// The slices are sorted by offset in ascending order.
funcInfos map[string][]funcInfo
lineInfos map[string][]lineInfo
relocationInfos map[string][]coreRelocationInfo
funcInfos map[string]FuncInfos
lineInfos map[string]LineInfos
relocationInfos map[string]CORERelocationInfos
}
// loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF.
@ -34,11 +34,11 @@ func loadExtInfosFromELF(file *internal.SafeELFFile, spec *Spec) (*ExtInfos, err
return nil, fmt.Errorf("compressed ext_info is not supported")
}
return loadExtInfos(section.ReaderAt, file.ByteOrder, spec, spec.strings)
return loadExtInfos(section.ReaderAt, file.ByteOrder, spec)
}
// loadExtInfos parses bare ext infos.
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec, strings *stringTable) (*ExtInfos, error) {
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec) (*ExtInfos, error) {
// Open unbuffered section reader. binary.Read() calls io.ReadFull on
// the header structs, resulting in one syscall per header.
headerRd := io.NewSectionReader(r, 0, math.MaxInt64)
@ -53,12 +53,12 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec, strings *strin
}
buf := internal.NewBufferedSectionReader(r, extHeader.funcInfoStart(), int64(extHeader.FuncInfoLen))
btfFuncInfos, err := parseFuncInfos(buf, bo, strings)
btfFuncInfos, err := parseFuncInfos(buf, bo, spec.strings)
if err != nil {
return nil, fmt.Errorf("parsing BTF function info: %w", err)
}
funcInfos := make(map[string][]funcInfo, len(btfFuncInfos))
funcInfos := make(map[string]FuncInfos, len(btfFuncInfos))
for section, bfis := range btfFuncInfos {
funcInfos[section], err = newFuncInfos(bfis, spec)
if err != nil {
@ -67,14 +67,14 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec, strings *strin
}
buf = internal.NewBufferedSectionReader(r, extHeader.lineInfoStart(), int64(extHeader.LineInfoLen))
btfLineInfos, err := parseLineInfos(buf, bo, strings)
btfLineInfos, err := parseLineInfos(buf, bo, spec.strings)
if err != nil {
return nil, fmt.Errorf("parsing BTF line info: %w", err)
}
lineInfos := make(map[string][]lineInfo, len(btfLineInfos))
lineInfos := make(map[string]LineInfos, len(btfLineInfos))
for section, blis := range btfLineInfos {
lineInfos[section], err = newLineInfos(blis, strings)
lineInfos[section], err = newLineInfos(blis, spec.strings)
if err != nil {
return nil, fmt.Errorf("section %s: line infos: %w", section, err)
}
@ -86,14 +86,14 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec, strings *strin
var btfCORERelos map[string][]bpfCORERelo
buf = internal.NewBufferedSectionReader(r, extHeader.coreReloStart(coreHeader), int64(coreHeader.COREReloLen))
btfCORERelos, err = parseCORERelos(buf, bo, strings)
btfCORERelos, err = parseCORERelos(buf, bo, spec.strings)
if err != nil {
return nil, fmt.Errorf("parsing CO-RE relocation info: %w", err)
}
coreRelos := make(map[string][]coreRelocationInfo, len(btfCORERelos))
coreRelos := make(map[string]CORERelocationInfos, len(btfCORERelos))
for section, brs := range btfCORERelos {
coreRelos[section], err = newRelocationInfos(brs, spec, strings)
coreRelos[section], err = newRelocationInfos(brs, spec, spec.strings)
if err != nil {
return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err)
}
@ -111,21 +111,31 @@ func (ei *ExtInfos) Assign(insns asm.Instructions, section string) {
lineInfos := ei.lineInfos[section]
reloInfos := ei.relocationInfos[section]
AssignMetadataToInstructions(insns, funcInfos, lineInfos, reloInfos)
}
// Assign per-instruction metadata to the instructions in insns.
func AssignMetadataToInstructions(
insns asm.Instructions,
funcInfos FuncInfos,
lineInfos LineInfos,
reloInfos CORERelocationInfos,
) {
iter := insns.Iterate()
for iter.Next() {
if len(funcInfos) > 0 && funcInfos[0].offset == iter.Offset {
*iter.Ins = WithFuncMetadata(*iter.Ins, funcInfos[0].fn)
funcInfos = funcInfos[1:]
if len(funcInfos.infos) > 0 && funcInfos.infos[0].offset == iter.Offset {
*iter.Ins = WithFuncMetadata(*iter.Ins, funcInfos.infos[0].fn)
funcInfos.infos = funcInfos.infos[1:]
}
if len(lineInfos) > 0 && lineInfos[0].offset == iter.Offset {
*iter.Ins = iter.Ins.WithSource(lineInfos[0].line)
lineInfos = lineInfos[1:]
if len(lineInfos.infos) > 0 && lineInfos.infos[0].offset == iter.Offset {
*iter.Ins = iter.Ins.WithSource(lineInfos.infos[0].line)
lineInfos.infos = lineInfos.infos[1:]
}
if len(reloInfos) > 0 && reloInfos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos[0].relo)
reloInfos = reloInfos[1:]
if len(reloInfos.infos) > 0 && reloInfos.infos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos.infos[0].relo)
reloInfos.infos = reloInfos.infos[1:]
}
}
}
@ -323,6 +333,11 @@ func parseExtInfoRecordSize(r io.Reader, bo binary.ByteOrder) (uint32, error) {
return recordSize, nil
}
// FuncInfos contains a sorted list of func infos.
type FuncInfos struct {
infos []funcInfo
}
// The size of a FuncInfo in BTF wire format.
var FuncInfoSize = uint32(binary.Size(bpfFuncInfo{}))
@ -359,21 +374,39 @@ func newFuncInfo(fi bpfFuncInfo, spec *Spec) (*funcInfo, error) {
}, nil
}
func newFuncInfos(bfis []bpfFuncInfo, spec *Spec) ([]funcInfo, error) {
fis := make([]funcInfo, 0, len(bfis))
func newFuncInfos(bfis []bpfFuncInfo, spec *Spec) (FuncInfos, error) {
fis := FuncInfos{
infos: make([]funcInfo, 0, len(bfis)),
}
for _, bfi := range bfis {
fi, err := newFuncInfo(bfi, spec)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bfi.InsnOff, err)
return FuncInfos{}, fmt.Errorf("offset %d: %w", bfi.InsnOff, err)
}
fis = append(fis, *fi)
fis.infos = append(fis.infos, *fi)
}
sort.Slice(fis, func(i, j int) bool {
return fis[i].offset <= fis[j].offset
sort.Slice(fis.infos, func(i, j int) bool {
return fis.infos[i].offset <= fis.infos[j].offset
})
return fis, nil
}
// LoadFuncInfos parses BTF func info in kernel wire format.
func LoadFuncInfos(reader io.Reader, bo binary.ByteOrder, recordNum uint32, spec *Spec) (FuncInfos, error) {
fis, err := parseFuncInfoRecords(
reader,
bo,
FuncInfoSize,
recordNum,
false,
)
if err != nil {
return FuncInfos{}, fmt.Errorf("parsing BTF func info: %w", err)
}
return newFuncInfos(fis, spec)
}
// marshal into the BTF wire format.
func (fi *funcInfo) marshal(w *bytes.Buffer, b *Builder) error {
id, err := b.Add(fi.fn)
@ -409,7 +442,7 @@ func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map
return nil, err
}
records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo)
records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo, true)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
@ -421,7 +454,7 @@ func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map
// parseFuncInfoRecords parses a stream of func_infos into a funcInfos.
// These records appear after a btf_ext_info_sec header in the func_info
// sub-section of .BTF.ext.
func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfFuncInfo, error) {
func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, offsetInBytes bool) ([]bpfFuncInfo, error) {
var out []bpfFuncInfo
var fi bpfFuncInfo
@ -435,13 +468,15 @@ func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, r
return nil, fmt.Errorf("can't read function info: %v", err)
}
if fi.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff)
}
if offsetInBytes {
if fi.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
fi.InsnOff /= asm.InstructionSize
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
fi.InsnOff /= asm.InstructionSize
}
out = append(out, fi)
}
@ -480,6 +515,11 @@ func (li *Line) String() string {
return li.line
}
// LineInfos contains a sorted list of line infos.
type LineInfos struct {
infos []lineInfo
}
type lineInfo struct {
line *Line
offset asm.RawInstructionOffset
@ -500,21 +540,37 @@ type bpfLineInfo struct {
LineCol uint32
}
func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) {
// LoadLineInfos parses BTF line info in kernel wire format.
func LoadLineInfos(reader io.Reader, bo binary.ByteOrder, recordNum uint32, spec *Spec) (LineInfos, error) {
lis, err := parseLineInfoRecords(
reader,
bo,
LineInfoSize,
recordNum,
false,
)
if err != nil {
return LineInfos{}, fmt.Errorf("parsing BTF line info: %w", err)
}
return newLineInfos(lis, spec.strings)
}
func newLineInfo(li bpfLineInfo, strings *stringTable) (lineInfo, error) {
line, err := strings.Lookup(li.LineOff)
if err != nil {
return nil, fmt.Errorf("lookup of line: %w", err)
return lineInfo{}, fmt.Errorf("lookup of line: %w", err)
}
fileName, err := strings.Lookup(li.FileNameOff)
if err != nil {
return nil, fmt.Errorf("lookup of filename: %w", err)
return lineInfo{}, fmt.Errorf("lookup of filename: %w", err)
}
lineNumber := li.LineCol >> bpfLineShift
lineColumn := li.LineCol & bpfColumnMax
return &lineInfo{
return lineInfo{
&Line{
fileName,
line,
@ -525,17 +581,19 @@ func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) {
}, nil
}
func newLineInfos(blis []bpfLineInfo, strings *stringTable) ([]lineInfo, error) {
lis := make([]lineInfo, 0, len(blis))
func newLineInfos(blis []bpfLineInfo, strings *stringTable) (LineInfos, error) {
lis := LineInfos{
infos: make([]lineInfo, 0, len(blis)),
}
for _, bli := range blis {
li, err := newLineInfo(bli, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bli.InsnOff, err)
return LineInfos{}, fmt.Errorf("offset %d: %w", bli.InsnOff, err)
}
lis = append(lis, *li)
lis.infos = append(lis.infos, li)
}
sort.Slice(lis, func(i, j int) bool {
return lis[i].offset <= lis[j].offset
sort.Slice(lis.infos, func(i, j int) bool {
return lis.infos[i].offset <= lis.infos[j].offset
})
return lis, nil
}
@ -595,7 +653,7 @@ func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map
return nil, err
}
records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo)
records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo, true)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
@ -607,8 +665,7 @@ func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map
// parseLineInfoRecords parses a stream of line_infos into a lineInfos.
// These records appear after a btf_ext_info_sec header in the line_info
// sub-section of .BTF.ext.
func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfLineInfo, error) {
var out []bpfLineInfo
func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, offsetInBytes bool) ([]bpfLineInfo, error) {
var li bpfLineInfo
if exp, got := uint32(binary.Size(li)), recordSize; exp != got {
@ -616,18 +673,21 @@ func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, r
return nil, fmt.Errorf("expected LineInfo record size %d, but BTF blob contains %d", exp, got)
}
out := make([]bpfLineInfo, 0, recordNum)
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &li); err != nil {
return nil, fmt.Errorf("can't read line info: %v", err)
}
if li.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff)
}
if offsetInBytes {
if li.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
li.InsnOff /= asm.InstructionSize
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
li.InsnOff /= asm.InstructionSize
}
out = append(out, li)
}
@ -661,6 +721,11 @@ func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation {
return relo
}
// CORERelocationInfos contains a sorted list of co:re relocation infos.
type CORERelocationInfos struct {
infos []coreRelocationInfo
}
type coreRelocationInfo struct {
relo *CORERelocation
offset asm.RawInstructionOffset
@ -693,17 +758,19 @@ func newRelocationInfo(relo bpfCORERelo, spec *Spec, strings *stringTable) (*cor
}, nil
}
func newRelocationInfos(brs []bpfCORERelo, spec *Spec, strings *stringTable) ([]coreRelocationInfo, error) {
rs := make([]coreRelocationInfo, 0, len(brs))
func newRelocationInfos(brs []bpfCORERelo, spec *Spec, strings *stringTable) (CORERelocationInfos, error) {
rs := CORERelocationInfos{
infos: make([]coreRelocationInfo, 0, len(brs)),
}
for _, br := range brs {
relo, err := newRelocationInfo(br, spec, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err)
return CORERelocationInfos{}, fmt.Errorf("offset %d: %w", br.InsnOff, err)
}
rs = append(rs, *relo)
rs.infos = append(rs.infos, *relo)
}
sort.Slice(rs, func(i, j int) bool {
return rs[i].offset < rs[j].offset
sort.Slice(rs.infos, func(i, j int) bool {
return rs.infos[i].offset < rs.infos[j].offset
})
return rs, nil
}

123
vendor/github.com/cilium/ebpf/btf/feature.go generated vendored Normal file
View file

@ -0,0 +1,123 @@
package btf
import (
"errors"
"math"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// haveBTF attempts to load a BTF blob containing an Int. It should pass on any
// kernel that supports BPF_BTF_LOAD.
var haveBTF = internal.NewFeatureTest("BTF", "4.18", func() error {
// 0-length anonymous integer
err := probeBTF(&Int{})
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
return internal.ErrNotSupported
}
return err
})
// haveMapBTF attempts to load a minimal BTF blob containing a Var. It is
// used as a proxy for .bss, .data and .rodata map support, which generally
// come with a Var and Datasec. These were introduced in Linux 5.2.
var haveMapBTF = internal.NewFeatureTest("Map BTF (Var/Datasec)", "5.2", func() error {
if err := haveBTF(); err != nil {
return err
}
v := &Var{
Name: "a",
Type: &Pointer{(*Void)(nil)},
}
err := probeBTF(v)
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: creating the map may still
// succeed without Btf* attrs.
return internal.ErrNotSupported
}
return err
})
// haveProgBTF attempts to load a BTF blob containing a Func and FuncProto. It
// is used as a proxy for ext_info (func_info) support, which depends on
// Func(Proto) by definition.
var haveProgBTF = internal.NewFeatureTest("Program BTF (func/line_info)", "5.0", func() error {
if err := haveBTF(); err != nil {
return err
}
fn := &Func{
Name: "a",
Type: &FuncProto{Return: (*Void)(nil)},
}
err := probeBTF(fn)
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
return internal.ErrNotSupported
}
return err
})
var haveFuncLinkage = internal.NewFeatureTest("BTF func linkage", "5.6", func() error {
if err := haveProgBTF(); err != nil {
return err
}
fn := &Func{
Name: "a",
Type: &FuncProto{Return: (*Void)(nil)},
Linkage: GlobalFunc,
}
err := probeBTF(fn)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
return err
})
var haveEnum64 = internal.NewFeatureTest("ENUM64", "6.0", func() error {
if err := haveBTF(); err != nil {
return err
}
enum := &Enum{
Size: 8,
Values: []EnumValue{
{"TEST", math.MaxUint32 + 1},
},
}
err := probeBTF(enum)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
return err
})
func probeBTF(typ Type) error {
b, err := NewBuilder([]Type{typ})
if err != nil {
return err
}
buf, err := b.Marshal(nil, nil)
if err != nil {
return err
}
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(buf),
BtfSize: uint32(len(buf)),
})
if err == nil {
fd.Close()
}
return err
}

View file

@ -77,7 +77,13 @@ func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error {
gf.w.WriteString("; const ( ")
for _, ev := range e.Values {
id := gf.enumIdentifier(name, ev.Name)
fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value)
var value any
if e.Signed {
value = int64(ev.Value)
} else {
value = ev.Value
}
fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, value)
}
gf.w.WriteString(")")
@ -112,7 +118,7 @@ func (gf *GoFormatter) writeType(typ Type, depth int) error {
// uint32
func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error {
depth++
if depth > maxTypeDepth {
if depth > maxResolveDepth {
return errNestedTooDeep
}
@ -259,7 +265,7 @@ func (gf *GoFormatter) writeStructField(m Member, depth int) error {
}
depth++
if depth > maxTypeDepth {
if depth > maxResolveDepth {
return errNestedTooDeep
}
@ -332,7 +338,7 @@ func (gf *GoFormatter) writePadding(bytes uint32) {
func skipQualifiers(typ Type) Type {
result := typ
for depth := 0; depth <= maxTypeDepth; depth++ {
for depth := 0; depth <= maxResolveDepth; depth++ {
switch v := (result).(type) {
case qualifier:
result = v.qualify()

View file

@ -18,6 +18,8 @@ type MarshalOptions struct {
Order binary.ByteOrder
// Remove function linkage information for compatibility with <5.6 kernels.
StripFuncLinkage bool
// Replace Enum64 with a placeholder for compatibility with <6.0 kernels.
ReplaceEnum64 bool
}
// KernelMarshalOptions will generate BTF suitable for the current kernel.
@ -25,6 +27,7 @@ func KernelMarshalOptions() *MarshalOptions {
return &MarshalOptions{
Order: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
ReplaceEnum64: haveEnum64() != nil,
}
}
@ -328,21 +331,13 @@ func (e *encoder) deflateType(typ Type) (err error) {
raw.data, err = e.convertMembers(&raw.btfType, v.Members)
case *Union:
raw.SetKind(kindUnion)
raw.SetSize(v.Size)
raw.data, err = e.convertMembers(&raw.btfType, v.Members)
err = e.deflateUnion(&raw, v)
case *Enum:
raw.SetSize(v.size())
raw.SetVlen(len(v.Values))
raw.SetSigned(v.Signed)
if v.has64BitValues() {
raw.SetKind(kindEnum64)
raw.data, err = e.deflateEnum64Values(v.Values)
if v.Size == 8 {
err = e.deflateEnum64(&raw, v)
} else {
raw.SetKind(kindEnum)
raw.data, err = e.deflateEnumValues(v.Values)
err = e.deflateEnum(&raw, v)
}
case *Fwd:
@ -415,6 +410,13 @@ func (e *encoder) deflateType(typ Type) (err error) {
return raw.Marshal(e.buf, e.Order)
}
func (e *encoder) deflateUnion(raw *rawType, union *Union) (err error) {
raw.SetKind(kindUnion)
raw.SetSize(union.Size)
raw.data, err = e.convertMembers(&raw.btfType, union.Members)
return
}
func (e *encoder) convertMembers(header *btfType, members []Member) ([]btfMember, error) {
bms := make([]btfMember, 0, len(members))
isBitfield := false
@ -443,16 +445,32 @@ func (e *encoder) convertMembers(header *btfType, members []Member) ([]btfMember
return bms, nil
}
func (e *encoder) deflateEnumValues(values []EnumValue) ([]btfEnum, error) {
bes := make([]btfEnum, 0, len(values))
for _, value := range values {
func (e *encoder) deflateEnum(raw *rawType, enum *Enum) (err error) {
raw.SetKind(kindEnum)
raw.SetSize(enum.Size)
raw.SetVlen(len(enum.Values))
// Signedness appeared together with ENUM64 support.
raw.SetSigned(enum.Signed && !e.ReplaceEnum64)
raw.data, err = e.deflateEnumValues(enum)
return
}
func (e *encoder) deflateEnumValues(enum *Enum) ([]btfEnum, error) {
bes := make([]btfEnum, 0, len(enum.Values))
for _, value := range enum.Values {
nameOff, err := e.strings.Add(value.Name)
if err != nil {
return nil, err
}
if value.Value > math.MaxUint32 {
return nil, fmt.Errorf("value of enum %q exceeds 32 bits", value.Name)
if enum.Signed {
if signedValue := int64(value.Value); signedValue < math.MinInt32 || signedValue > math.MaxInt32 {
return nil, fmt.Errorf("value %d of enum %q exceeds 32 bits", signedValue, value.Name)
}
} else {
if value.Value > math.MaxUint32 {
return nil, fmt.Errorf("value %d of enum %q exceeds 32 bits", value.Value, value.Name)
}
}
bes = append(bes, btfEnum{
@ -464,6 +482,41 @@ func (e *encoder) deflateEnumValues(values []EnumValue) ([]btfEnum, error) {
return bes, nil
}
func (e *encoder) deflateEnum64(raw *rawType, enum *Enum) (err error) {
if e.ReplaceEnum64 {
// Replace the ENUM64 with a union of fields with the correct size.
// This matches libbpf behaviour on purpose.
placeholder := &Int{
"enum64_placeholder",
enum.Size,
Unsigned,
}
if enum.Signed {
placeholder.Encoding = Signed
}
if err := e.allocateID(placeholder); err != nil {
return fmt.Errorf("add enum64 placeholder: %w", err)
}
members := make([]Member, 0, len(enum.Values))
for _, v := range enum.Values {
members = append(members, Member{
Name: v.Name,
Type: placeholder,
})
}
return e.deflateUnion(raw, &Union{enum.Name, enum.Size, members})
}
raw.SetKind(kindEnum64)
raw.SetSize(enum.Size)
raw.SetVlen(len(enum.Values))
raw.SetSigned(enum.Signed)
raw.data, err = e.deflateEnum64Values(enum.Values)
return
}
func (e *encoder) deflateEnum64Values(values []EnumValue) ([]btfEnum64, error) {
bes := make([]btfEnum64, 0, len(values))
for _, value := range values {

View file

@ -9,11 +9,13 @@ import (
"strings"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
)
type stringTable struct {
base *stringTable
offsets []uint32
prevIdx int
strings []string
}
@ -60,7 +62,7 @@ func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) {
return nil, errors.New("first item in string table is non-empty")
}
return &stringTable{base, offsets, strings}, nil
return &stringTable{base, offsets, 0, strings}, nil
}
func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) {
@ -83,53 +85,36 @@ func (st *stringTable) Lookup(offset uint32) (string, error) {
}
func (st *stringTable) lookup(offset uint32) (string, error) {
i := search(st.offsets, offset)
if i == len(st.offsets) || st.offsets[i] != offset {
// Fast path: zero offset is the empty string, looked up frequently.
if offset == 0 && st.base == nil {
return "", nil
}
// Accesses tend to be globally increasing, so check if the next string is
// the one we want. This skips the binary search in about 50% of cases.
if st.prevIdx+1 < len(st.offsets) && st.offsets[st.prevIdx+1] == offset {
st.prevIdx++
return st.strings[st.prevIdx], nil
}
i, found := slices.BinarySearch(st.offsets, offset)
if !found {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
}
// Set the new increment index, but only if its greater than the current.
if i > st.prevIdx+1 {
st.prevIdx = i
}
return st.strings[i], nil
}
func (st *stringTable) Marshal(w io.Writer) error {
for _, str := range st.strings {
_, err := io.WriteString(w, str)
if err != nil {
return err
}
_, err = w.Write([]byte{0})
if err != nil {
return err
}
}
return nil
}
// Num returns the number of strings in the table.
func (st *stringTable) Num() int {
return len(st.strings)
}
// search is a copy of sort.Search specialised for uint32.
//
// Licensed under https://go.dev/LICENSE
func search(ints []uint32, needle uint32) int {
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, len(ints)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if !(ints[h] >= needle) {
i = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i
}
// stringTableBuilder builds BTF string tables.
type stringTableBuilder struct {
length uint32

View file

@ -1,19 +1,22 @@
package btf
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"reflect"
"strings"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"golang.org/x/exp/slices"
)
const maxTypeDepth = 32
// Mirrors MAX_RESOLVE_DEPTH in libbpf.
// https://github.com/libbpf/libbpf/blob/e26b84dc330c9644c07428c271ab491b0f01f4e1/src/btf.c#L761
const maxResolveDepth = 32
// TypeID identifies a type in a BTF section.
type TypeID = sys.TypeID
@ -116,7 +119,7 @@ type Int struct {
}
func (i *Int) Format(fs fmt.State, verb rune) {
formatType(fs, verb, i, i.Encoding, "size=", i.Size*8)
formatType(fs, verb, i, i.Encoding, "size=", i.Size)
}
func (i *Int) TypeName() string { return i.Name }
@ -278,21 +281,6 @@ func (e *Enum) copy() Type {
return &cpy
}
// has64BitValues returns true if the Enum contains a value larger than 32 bits.
// Kernels before 6.0 have enum values that overrun u32 replaced with zeroes.
//
// 64-bit enums have their Enum.Size attributes correctly set to 8, but if we
// use the size attribute as a heuristic during BTF marshaling, we'll emit
// ENUM64s to kernels that don't support them.
func (e *Enum) has64BitValues() bool {
for _, v := range e.Values {
if v.Value > math.MaxUint32 {
return true
}
}
return false
}
// FwdKind is the type of forward declaration.
type FwdKind int
@ -605,7 +593,7 @@ func Sizeof(typ Type) (int, error) {
elem int64
)
for i := 0; i < maxTypeDepth; i++ {
for i := 0; i < maxResolveDepth; i++ {
switch v := typ.(type) {
case *Array:
if n > 0 && int64(v.Nelems) > math.MaxInt64/n {
@ -740,17 +728,22 @@ func (c *copier) copy(typ *Type, transform Transformer) {
type typeDeque = internal.Deque[*Type]
// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns
// it into a graph of Types connected via pointers.
// readAndInflateTypes reads the raw btf type info and turns it into a graph
// of Types connected via pointers.
//
// If base is provided, then the raw types are considered to be of a split BTF
// If base is provided, then the types are considered to be of a split BTF
// (e.g., a kernel module).
//
// Returns a slice of types indexed by TypeID. Since BTF ignores compilation
// units, multiple types may share the same name. A Type may form a cyclic graph
// by pointing at itself.
func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([]Type, error) {
types := make([]Type, 0, len(rawTypes)+1) // +1 for Void added to base types
func readAndInflateTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32, rawStrings *stringTable, base *Spec) ([]Type, error) {
// because of the interleaving between types and struct members it is difficult to
// precompute the numbers of raw types this will parse
// this "guess" is a good first estimation
sizeOfbtfType := uintptr(btfTypeLen)
tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2
types := make([]Type, 0, tyMaxCount)
// Void is defined to always be type ID 0, and is thus omitted from BTF.
types = append(types, (*Void)(nil))
@ -773,11 +766,11 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
}
var fixups []fixupDef
fixup := func(id TypeID, typ *Type) bool {
fixup := func(id TypeID, typ *Type) {
if id < firstTypeID {
if baseType, err := base.TypeByID(id); err == nil {
*typ = baseType
return true
return
}
}
@ -785,31 +778,10 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
if idx < len(types) {
// We've already inflated this type, fix it up immediately.
*typ = types[idx]
return true
return
}
fixups = append(fixups, fixupDef{id, typ})
return false
}
type assertion struct {
id TypeID
typ *Type
want reflect.Type
}
var assertions []assertion
fixupAndAssert := func(id TypeID, typ *Type, want reflect.Type) error {
if !fixup(id, typ) {
assertions = append(assertions, assertion{id, typ, want})
return nil
}
// The type has already been fixed up, check the type immediately.
if reflect.TypeOf(*typ) != want {
return fmt.Errorf("type ID %d: expected %s, got %T", id, want, *typ)
}
return nil
}
type bitfieldFixupDef struct {
@ -876,62 +848,128 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
return members, nil
}
var (
buf = make([]byte, 1024)
header btfType
bInt btfInt
bArr btfArray
bMembers []btfMember
bEnums []btfEnum
bParams []btfParam
bVariable btfVariable
bSecInfos []btfVarSecinfo
bDeclTag btfDeclTag
bEnums64 []btfEnum64
)
var declTags []*declTag
for _, raw := range rawTypes {
for {
var (
id = firstTypeID + TypeID(len(types))
typ Type
)
if _, err := io.ReadFull(r, buf[:btfTypeLen]); err == io.EOF {
break
} else if err != nil {
return nil, fmt.Errorf("can't read type info for id %v: %v", id, err)
}
if _, err := unmarshalBtfType(&header, buf[:btfTypeLen], bo); err != nil {
return nil, fmt.Errorf("can't unmarshal type info for id %v: %v", id, err)
}
if id < firstTypeID {
return nil, fmt.Errorf("no more type IDs")
}
name, err := rawStrings.Lookup(raw.NameOff)
name, err := rawStrings.Lookup(header.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for type id %d: %w", id, err)
}
switch raw.Kind() {
switch header.Kind() {
case kindInt:
size := raw.Size()
bi := raw.data.(*btfInt)
if bi.Offset() > 0 || bi.Bits().Bytes() != size {
legacyBitfields[id] = [2]Bits{bi.Offset(), bi.Bits()}
size := header.Size()
buf = buf[:btfIntLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfInt, id: %d: %w", id, err)
}
typ = &Int{name, raw.Size(), bi.Encoding()}
if _, err := unmarshalBtfInt(&bInt, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfInt, id: %d: %w", id, err)
}
if bInt.Offset() > 0 || bInt.Bits().Bytes() != size {
legacyBitfields[id] = [2]Bits{bInt.Offset(), bInt.Bits()}
}
typ = &Int{name, header.Size(), bInt.Encoding()}
case kindPointer:
ptr := &Pointer{nil}
fixup(raw.Type(), &ptr.Target)
fixup(header.Type(), &ptr.Target)
typ = ptr
case kindArray:
btfArr := raw.data.(*btfArray)
arr := &Array{nil, nil, btfArr.Nelems}
fixup(btfArr.IndexType, &arr.Index)
fixup(btfArr.Type, &arr.Type)
buf = buf[:btfArrayLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfArray, id: %d: %w", id, err)
}
if _, err := unmarshalBtfArray(&bArr, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfArray, id: %d: %w", id, err)
}
arr := &Array{nil, nil, bArr.Nelems}
fixup(bArr.IndexType, &arr.Index)
fixup(bArr.Type, &arr.Type)
typ = arr
case kindStruct:
members, err := convertMembers(raw.data.([]btfMember), raw.Bitfield())
vlen := header.Vlen()
bMembers = slices.Grow(bMembers[:0], vlen)[:vlen]
buf = slices.Grow(buf[:0], vlen*btfMemberLen)[:vlen*btfMemberLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfMembers, id: %d: %w", id, err)
}
if _, err := unmarshalBtfMembers(bMembers, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfMembers, id: %d: %w", id, err)
}
members, err := convertMembers(bMembers, header.Bitfield())
if err != nil {
return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err)
}
typ = &Struct{name, raw.Size(), members}
typ = &Struct{name, header.Size(), members}
case kindUnion:
members, err := convertMembers(raw.data.([]btfMember), raw.Bitfield())
vlen := header.Vlen()
bMembers = slices.Grow(bMembers[:0], vlen)[:vlen]
buf = slices.Grow(buf[:0], vlen*btfMemberLen)[:vlen*btfMemberLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfMembers, id: %d: %w", id, err)
}
if _, err := unmarshalBtfMembers(bMembers, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfMembers, id: %d: %w", id, err)
}
members, err := convertMembers(bMembers, header.Bitfield())
if err != nil {
return nil, fmt.Errorf("union %s (id %d): %w", name, id, err)
}
typ = &Union{name, raw.Size(), members}
typ = &Union{name, header.Size(), members}
case kindEnum:
rawvals := raw.data.([]btfEnum)
vals := make([]EnumValue, 0, len(rawvals))
signed := raw.Signed()
for i, btfVal := range rawvals {
vlen := header.Vlen()
bEnums = slices.Grow(bEnums[:0], vlen)[:vlen]
buf = slices.Grow(buf[:0], vlen*btfEnumLen)[:vlen*btfEnumLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfEnums, id: %d: %w", id, err)
}
if _, err := unmarshalBtfEnums(bEnums, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfEnums, id: %d: %w", id, err)
}
vals := make([]EnumValue, 0, vlen)
signed := header.Signed()
for i, btfVal := range bEnums {
name, err := rawStrings.Lookup(btfVal.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for enum value %d: %s", i, err)
@ -943,42 +981,49 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
}
vals = append(vals, EnumValue{name, value})
}
typ = &Enum{name, raw.Size(), signed, vals}
typ = &Enum{name, header.Size(), signed, vals}
case kindForward:
typ = &Fwd{name, raw.FwdKind()}
typ = &Fwd{name, header.FwdKind()}
case kindTypedef:
typedef := &Typedef{name, nil}
fixup(raw.Type(), &typedef.Type)
fixup(header.Type(), &typedef.Type)
typ = typedef
case kindVolatile:
volatile := &Volatile{nil}
fixup(raw.Type(), &volatile.Type)
fixup(header.Type(), &volatile.Type)
typ = volatile
case kindConst:
cnst := &Const{nil}
fixup(raw.Type(), &cnst.Type)
fixup(header.Type(), &cnst.Type)
typ = cnst
case kindRestrict:
restrict := &Restrict{nil}
fixup(raw.Type(), &restrict.Type)
fixup(header.Type(), &restrict.Type)
typ = restrict
case kindFunc:
fn := &Func{name, nil, raw.Linkage()}
if err := fixupAndAssert(raw.Type(), &fn.Type, reflect.TypeOf((*FuncProto)(nil))); err != nil {
return nil, err
}
fn := &Func{name, nil, header.Linkage()}
fixup(header.Type(), &fn.Type)
typ = fn
case kindFuncProto:
rawparams := raw.data.([]btfParam)
params := make([]FuncParam, 0, len(rawparams))
for i, param := range rawparams {
vlen := header.Vlen()
bParams = slices.Grow(bParams[:0], vlen)[:vlen]
buf = slices.Grow(buf[:0], vlen*btfParamLen)[:vlen*btfParamLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfParams, id: %d: %w", id, err)
}
if _, err := unmarshalBtfParams(bParams, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfParams, id: %d: %w", id, err)
}
params := make([]FuncParam, 0, vlen)
for i, param := range bParams {
name, err := rawStrings.Lookup(param.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err)
@ -988,57 +1033,90 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
})
}
for i := range params {
fixup(rawparams[i].Type, &params[i].Type)
fixup(bParams[i].Type, &params[i].Type)
}
fp := &FuncProto{nil, params}
fixup(raw.Type(), &fp.Return)
fixup(header.Type(), &fp.Return)
typ = fp
case kindVar:
variable := raw.data.(*btfVariable)
v := &Var{name, nil, VarLinkage(variable.Linkage)}
fixup(raw.Type(), &v.Type)
buf = buf[:btfVariableLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfVariable, id: %d: %w", id, err)
}
if _, err := unmarshalBtfVariable(&bVariable, buf, bo); err != nil {
return nil, fmt.Errorf("can't read btfVariable, id: %d: %w", id, err)
}
v := &Var{name, nil, VarLinkage(bVariable.Linkage)}
fixup(header.Type(), &v.Type)
typ = v
case kindDatasec:
btfVars := raw.data.([]btfVarSecinfo)
vars := make([]VarSecinfo, 0, len(btfVars))
for _, btfVar := range btfVars {
vlen := header.Vlen()
bSecInfos = slices.Grow(bSecInfos[:0], vlen)[:vlen]
buf = slices.Grow(buf[:0], vlen*btfVarSecinfoLen)[:vlen*btfVarSecinfoLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfVarSecInfos, id: %d: %w", id, err)
}
if _, err := unmarshalBtfVarSecInfos(bSecInfos, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfVarSecInfos, id: %d: %w", id, err)
}
vars := make([]VarSecinfo, 0, vlen)
for _, btfVar := range bSecInfos {
vars = append(vars, VarSecinfo{
Offset: btfVar.Offset,
Size: btfVar.Size,
})
}
for i := range vars {
fixup(btfVars[i].Type, &vars[i].Type)
fixup(bSecInfos[i].Type, &vars[i].Type)
}
typ = &Datasec{name, raw.Size(), vars}
typ = &Datasec{name, header.Size(), vars}
case kindFloat:
typ = &Float{name, raw.Size()}
typ = &Float{name, header.Size()}
case kindDeclTag:
btfIndex := raw.data.(*btfDeclTag).ComponentIdx
buf = buf[:btfDeclTagLen]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfDeclTag, id: %d: %w", id, err)
}
if _, err := unmarshalBtfDeclTag(&bDeclTag, buf, bo); err != nil {
return nil, fmt.Errorf("can't read btfDeclTag, id: %d: %w", id, err)
}
btfIndex := bDeclTag.ComponentIdx
if uint64(btfIndex) > math.MaxInt {
return nil, fmt.Errorf("type id %d: index exceeds int", id)
}
dt := &declTag{nil, name, int(int32(btfIndex))}
fixup(raw.Type(), &dt.Type)
fixup(header.Type(), &dt.Type)
typ = dt
declTags = append(declTags, dt)
case kindTypeTag:
tt := &typeTag{nil, name}
fixup(raw.Type(), &tt.Type)
fixup(header.Type(), &tt.Type)
typ = tt
case kindEnum64:
rawvals := raw.data.([]btfEnum64)
vals := make([]EnumValue, 0, len(rawvals))
for i, btfVal := range rawvals {
vlen := header.Vlen()
bEnums64 = slices.Grow(bEnums64[:0], vlen)[:vlen]
buf = slices.Grow(buf[:0], vlen*btfEnum64Len)[:vlen*btfEnum64Len]
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("can't read btfEnum64s, id: %d: %w", id, err)
}
if _, err := unmarshalBtfEnums64(bEnums64, buf, bo); err != nil {
return nil, fmt.Errorf("can't unmarshal btfEnum64s, id: %d: %w", id, err)
}
vals := make([]EnumValue, 0, vlen)
for i, btfVal := range bEnums64 {
name, err := rawStrings.Lookup(btfVal.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for enum64 value %d: %s", i, err)
@ -1046,10 +1124,10 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
value := (uint64(btfVal.ValHi32) << 32) | uint64(btfVal.ValLo32)
vals = append(vals, EnumValue{name, value})
}
typ = &Enum{name, raw.Size(), raw.Signed(), vals}
typ = &Enum{name, header.Size(), header.Signed(), vals}
default:
return nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind())
return nil, fmt.Errorf("type id %d: unknown kind: %v", id, header.Kind())
}
types = append(types, typ)
@ -1081,12 +1159,6 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
}
}
for _, assertion := range assertions {
if reflect.TypeOf(*assertion.typ) != assertion.want {
return nil, fmt.Errorf("type ID %d: expected %s, got %T", assertion.id, assertion.want, *assertion.typ)
}
}
for _, dt := range declTags {
switch t := dt.Type.(type) {
case *Var, *Typedef:
@ -1100,7 +1172,12 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([
}
case *Func:
if dt.Index >= len(t.Type.(*FuncProto).Params) {
fp, ok := t.Type.(*FuncProto)
if !ok {
return nil, fmt.Errorf("type %s: %s is not a FuncProto", dt, t.Type)
}
if dt.Index >= len(fp.Params) {
return nil, fmt.Errorf("type %s: index %d exceeds params of %s", dt, dt.Index, t)
}
@ -1136,7 +1213,7 @@ func newEssentialName(name string) essentialName {
// UnderlyingType skips qualifiers and Typedefs.
func UnderlyingType(typ Type) Type {
result := typ
for depth := 0; depth <= maxTypeDepth; depth++ {
for depth := 0; depth <= maxResolveDepth; depth++ {
switch v := (result).(type) {
case qualifier:
result = v.qualify()
@ -1155,7 +1232,7 @@ func UnderlyingType(typ Type) Type {
// Returns the zero value and false if there is no T or if the type is nested
// too deeply.
func as[T Type](typ Type) (T, bool) {
for depth := 0; depth <= maxTypeDepth; depth++ {
for depth := 0; depth <= maxResolveDepth; depth++ {
switch v := (typ).(type) {
case T:
return v, true

View file

@ -11,6 +11,7 @@ import (
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/kconfig"
"github.com/cilium/ebpf/internal/sysenc"
)
// CollectionOptions control loading a collection into the kernel.
@ -175,12 +176,12 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error
return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname)
}
b, err := marshalBytes(replacement, int(v.Size))
b, err := sysenc.Marshal(replacement, int(v.Size))
if err != nil {
return fmt.Errorf("marshaling constant replacement %s: %w", vname, err)
}
copy(cpy[v.Offset:v.Offset+v.Size], b)
b.CopyTo(cpy[v.Offset : v.Offset+v.Size])
replaced[vname] = true
}
@ -308,7 +309,7 @@ func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions)
}
// Populate the requested maps. Has a chance of lazy-loading other dependent maps.
if err := loader.populateMaps(); err != nil {
if err := loader.populateDeferredMaps(); err != nil {
return err
}
@ -388,7 +389,7 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
// Maps can contain Program and Map stubs, so populate them after
// all Maps and Programs have been successfully loaded.
if err := loader.populateMaps(); err != nil {
if err := loader.populateDeferredMaps(); err != nil {
return nil, err
}
@ -470,6 +471,15 @@ func (cl *collectionLoader) loadMap(mapName string) (*Map, error) {
return nil, fmt.Errorf("map %s: %w", mapName, err)
}
// Finalize 'scalar' maps that don't refer to any other eBPF resources
// potentially pending creation. This is needed for frozen maps like .rodata
// that need to be finalized before invoking the verifier.
if !mapSpec.Type.canStoreMapOrProgram() {
if err := m.finalize(mapSpec); err != nil {
return nil, fmt.Errorf("finalizing map %s: %w", mapName, err)
}
}
cl.maps[mapName] = m
return m, nil
}
@ -527,44 +537,50 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) {
return prog, nil
}
func (cl *collectionLoader) populateMaps() error {
// populateDeferredMaps iterates maps holding programs or other maps and loads
// any dependencies. Populates all maps in cl and freezes them if specified.
func (cl *collectionLoader) populateDeferredMaps() error {
for mapName, m := range cl.maps {
mapSpec, ok := cl.coll.Maps[mapName]
if !ok {
return fmt.Errorf("missing map spec %s", mapName)
}
// Scalar maps without Map or Program references are finalized during
// creation. Don't finalize them again.
if !mapSpec.Type.canStoreMapOrProgram() {
continue
}
mapSpec = mapSpec.Copy()
// MapSpecs that refer to inner maps or programs within the same
// CollectionSpec do so using strings. These strings are used as the key
// to look up the respective object in the Maps or Programs fields.
// Resolve those references to actual Map or Program resources that
// have been loaded into the kernel.
if mapSpec.Type.canStoreMap() || mapSpec.Type.canStoreProgram() {
mapSpec = mapSpec.Copy()
for i, kv := range mapSpec.Contents {
objName, ok := kv.Value.(string)
if !ok {
continue
}
for i, kv := range mapSpec.Contents {
objName, ok := kv.Value.(string)
if !ok {
continue
switch t := mapSpec.Type; {
case t.canStoreProgram():
// loadProgram is idempotent and could return an existing Program.
prog, err := cl.loadProgram(objName)
if err != nil {
return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, prog}
switch t := mapSpec.Type; {
case t.canStoreProgram():
// loadProgram is idempotent and could return an existing Program.
prog, err := cl.loadProgram(objName)
if err != nil {
return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, prog}
case t.canStoreMap():
// loadMap is idempotent and could return an existing Map.
innerMap, err := cl.loadMap(objName)
if err != nil {
return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, innerMap}
case t.canStoreMap():
// loadMap is idempotent and could return an existing Map.
innerMap, err := cl.loadMap(objName)
if err != nil {
return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, innerMap}
}
}
@ -610,17 +626,20 @@ func resolveKconfig(m *MapSpec) error {
internal.NativeEndian.PutUint32(data[vsi.Offset:], kv.Kernel())
case "LINUX_HAS_SYSCALL_WRAPPER":
if integer, ok := v.Type.(*btf.Int); !ok || integer.Size != 4 {
return fmt.Errorf("variable %s must be a 32 bits integer, got %s", n, v.Type)
integer, ok := v.Type.(*btf.Int)
if !ok {
return fmt.Errorf("variable %s must be an integer, got %s", n, v.Type)
}
var value uint32 = 1
var value uint64 = 1
if err := haveSyscallWrapper(); errors.Is(err, ErrNotSupported) {
value = 0
} else if err != nil {
return fmt.Errorf("unable to derive a value for LINUX_HAS_SYSCALL_WRAPPER: %w", err)
}
internal.NativeEndian.PutUint32(data[vsi.Offset:], value)
if err := kconfig.PutInteger(data[vsi.Offset:], integer, value); err != nil {
return fmt.Errorf("set LINUX_HAS_SYSCALL_WRAPPER: %w", err)
}
default: // Catch CONFIG_*.
configs[n] = configInfo{
@ -679,6 +698,71 @@ func LoadCollection(file string) (*Collection, error) {
return NewCollection(spec)
}
// Assign the contents of a Collection to a struct.
//
// This function bridges functionality between bpf2go generated
// code and any functionality better implemented in Collection.
//
// 'to' must be a pointer to a struct. A field of the
// struct is updated with values from Programs or Maps if it
// has an `ebpf` tag and its type is *Program or *Map.
// The tag's value specifies the name of the program or map as
// found in the CollectionSpec.
//
// struct {
// Foo *ebpf.Program `ebpf:"xdp_foo"`
// Bar *ebpf.Map `ebpf:"bar_map"`
// Ignored int
// }
//
// Returns an error if any of the eBPF objects can't be found, or
// if the same Map or Program is assigned multiple times.
//
// Ownership and Close()ing responsibility is transferred to `to`
// for any successful assigns. On error `to` is left in an undefined state.
func (coll *Collection) Assign(to interface{}) error {
assignedMaps := make(map[string]bool)
assignedProgs := make(map[string]bool)
// Assign() only transfers already-loaded Maps and Programs. No extra
// loading is done.
getValue := func(typ reflect.Type, name string) (interface{}, error) {
switch typ {
case reflect.TypeOf((*Program)(nil)):
if p := coll.Programs[name]; p != nil {
assignedProgs[name] = true
return p, nil
}
return nil, fmt.Errorf("missing program %q", name)
case reflect.TypeOf((*Map)(nil)):
if m := coll.Maps[name]; m != nil {
assignedMaps[name] = true
return m, nil
}
return nil, fmt.Errorf("missing map %q", name)
default:
return nil, fmt.Errorf("unsupported type %s", typ)
}
}
if err := assignValues(to, getValue); err != nil {
return err
}
// Finalize ownership transfer
for p := range assignedProgs {
delete(coll.Programs, p)
}
for m := range assignedMaps {
delete(coll.Maps, m)
}
return nil
}
// Close frees all maps and programs associated with the collection.
//
// The collection mustn't be used afterwards.

View file

@ -81,6 +81,8 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
// Collect all the sections we're interested in. This includes relocations
// which we parse later.
//
// Keep the documentation at docs/ebpf/loading/elf-sections.md up-to-date.
for i, sec := range f.Sections {
idx := elf.SectionIndex(i)
@ -371,7 +373,7 @@ func (ec *elfCode) loadFunctions(section *elfSection) (map[string]asm.Instructio
r := bufio.NewReader(section.Open())
// Decode the section's instruction stream.
var insns asm.Instructions
insns := make(asm.Instructions, 0, section.Size/asm.InstructionSize)
if err := insns.Unmarshal(r, ec.ByteOrder); err != nil {
return nil, fmt.Errorf("decoding instructions for section %s: %w", section.Name, err)
}
@ -465,10 +467,14 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
switch target.kind {
case mapSection, btfMapSection:
if bind != elf.STB_GLOBAL {
if bind == elf.STB_LOCAL {
return fmt.Errorf("possible erroneous static qualifier on map definition: found reference to %q", name)
}
if bind != elf.STB_GLOBAL {
return fmt.Errorf("map %q: unsupported relocation %s", name, bind)
}
if typ != elf.STT_OBJECT && typ != elf.STT_NOTYPE {
// STT_NOTYPE is generated on clang < 8 which doesn't tag
// relocations appropriately.
@ -694,10 +700,6 @@ func (ec *elfCode) loadMaps() error {
spec.Extra = bytes.NewReader(extra)
}
if err := spec.clampPerfEventArraySize(); err != nil {
return fmt.Errorf("map %s: %w", mapName, err)
}
ec.maps[mapName] = &spec
}
}
@ -752,7 +754,7 @@ func (ec *elfCode) loadBTFMaps() error {
}
// Each Var representing a BTF map definition contains a Struct.
mapStruct, ok := v.Type.(*btf.Struct)
mapStruct, ok := btf.UnderlyingType(v.Type).(*btf.Struct)
if !ok {
return fmt.Errorf("expected struct, got %s", v.Type)
}
@ -762,10 +764,6 @@ func (ec *elfCode) loadBTFMaps() error {
return fmt.Errorf("map %v: %w", name, err)
}
if err := mapSpec.clampPerfEventArraySize(); err != nil {
return fmt.Errorf("map %v: %w", name, err)
}
ec.maps[name] = mapSpec
}
@ -785,7 +783,7 @@ func (ec *elfCode) loadBTFMaps() error {
// mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing
// a BTF map definition. The name and spec arguments will be copied to the
// resulting MapSpec, and inner must be true on any resursive invocations.
// resulting MapSpec, and inner must be true on any recursive invocations.
func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *btf.Spec, name string, inner bool) (*MapSpec, error) {
var (
key, value btf.Type
@ -1150,7 +1148,7 @@ func (ec *elfCode) loadKconfigSection() error {
KeySize: uint32(4),
ValueSize: ds.Size,
MaxEntries: 1,
Flags: unix.BPF_F_RDONLY_PROG | unix.BPF_F_MMAPABLE,
Flags: unix.BPF_F_RDONLY_PROG,
Freeze: true,
Key: &btf.Int{Size: 4},
Value: ds,
@ -1268,6 +1266,7 @@ func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
{"seccomp", SocketFilter, AttachNone, 0},
{"kprobe.multi", Kprobe, AttachTraceKprobeMulti, 0},
{"kretprobe.multi", Kprobe, AttachTraceKprobeMulti, 0},
// Document all prefixes in docs/ebpf/concepts/elf-sections.md.
}
for _, t := range types {

View file

@ -101,6 +101,11 @@ type ProgramInfo struct {
maps []MapID
insns []byte
lineInfos []byte
numLineInfos uint32
funcInfos []byte
numFuncInfos uint32
}
func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) {
@ -128,10 +133,13 @@ func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) {
// Start with a clean struct for the second call, otherwise we may get EFAULT.
var info2 sys.ProgInfo
makeSecondCall := false
if info.NrMapIds > 0 {
pi.maps = make([]MapID, info.NrMapIds)
info2.NrMapIds = info.NrMapIds
info2.MapIds = sys.NewPointer(unsafe.Pointer(&pi.maps[0]))
makeSecondCall = true
} else if haveProgramInfoMapIDs() == nil {
// This program really has no associated maps.
pi.maps = make([]MapID, 0)
@ -150,9 +158,28 @@ func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) {
pi.insns = make([]byte, info.XlatedProgLen)
info2.XlatedProgLen = info.XlatedProgLen
info2.XlatedProgInsns = sys.NewSlicePointer(pi.insns)
makeSecondCall = true
}
if info.NrMapIds > 0 || info.XlatedProgLen > 0 {
if info.NrLineInfo > 0 {
pi.lineInfos = make([]byte, btf.LineInfoSize*info.NrLineInfo)
info2.LineInfo = sys.NewSlicePointer(pi.lineInfos)
info2.LineInfoRecSize = btf.LineInfoSize
info2.NrLineInfo = info.NrLineInfo
pi.numLineInfos = info.NrLineInfo
makeSecondCall = true
}
if info.NrFuncInfo > 0 {
pi.funcInfos = make([]byte, btf.FuncInfoSize*info.NrFuncInfo)
info2.FuncInfo = sys.NewSlicePointer(pi.funcInfos)
info2.FuncInfoRecSize = btf.FuncInfoSize
info2.NrFuncInfo = info.NrFuncInfo
pi.numFuncInfos = info.NrFuncInfo
makeSecondCall = true
}
if makeSecondCall {
if err := sys.ObjInfo(fd, &info2); err != nil {
return nil, err
}
@ -245,7 +272,13 @@ func (pi *ProgramInfo) Runtime() (time.Duration, bool) {
//
// The first instruction is marked as a symbol using the Program's name.
//
// Available from 4.13. Requires CAP_BPF or equivalent.
// If available, the instructions will be annotated with metadata from the
// BTF. This includes line information and function information. Reading
// this metadata requires CAP_SYS_ADMIN or equivalent. If capability is
// unavailable, the instructions will be returned without metadata.
//
// Available from 4.13. Requires CAP_BPF or equivalent for plain instructions.
// Requires CAP_SYS_ADMIN for instructions with metadata.
func (pi *ProgramInfo) Instructions() (asm.Instructions, error) {
// If the calling process is not BPF-capable or if the kernel doesn't
// support getting xlated instructions, the field will be zero.
@ -259,8 +292,55 @@ func (pi *ProgramInfo) Instructions() (asm.Instructions, error) {
return nil, fmt.Errorf("unmarshaling instructions: %w", err)
}
// Tag the first instruction with the name of the program, if available.
insns[0] = insns[0].WithSymbol(pi.Name)
if pi.btf != 0 {
btfh, err := btf.NewHandleFromID(pi.btf)
if err != nil {
// Getting a BTF handle requires CAP_SYS_ADMIN, if not available we get an -EPERM.
// Ignore it and fall back to instructions without metadata.
if !errors.Is(err, unix.EPERM) {
return nil, fmt.Errorf("unable to get BTF handle: %w", err)
}
}
// If we have a BTF handle, we can use it to assign metadata to the instructions.
if btfh != nil {
defer btfh.Close()
spec, err := btfh.Spec(nil)
if err != nil {
return nil, fmt.Errorf("unable to get BTF spec: %w", err)
}
lineInfos, err := btf.LoadLineInfos(
bytes.NewReader(pi.lineInfos),
internal.NativeEndian,
pi.numLineInfos,
spec,
)
if err != nil {
return nil, fmt.Errorf("parse line info: %w", err)
}
funcInfos, err := btf.LoadFuncInfos(
bytes.NewReader(pi.funcInfos),
internal.NativeEndian,
pi.numFuncInfos,
spec,
)
if err != nil {
return nil, fmt.Errorf("parse func info: %w", err)
}
btf.AssignMetadataToInstructions(insns, funcInfos, lineInfos, btf.CORERelocationInfos{})
}
}
fn := btf.FuncMetadata(&insns[0])
name := pi.Name
if fn != nil {
name = fn.Name
}
insns[0] = insns[0].WithSymbol(name)
return insns, nil
}

View file

@ -6,7 +6,7 @@ import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.BigEndian
var NativeEndian = binary.BigEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "eb"

View file

@ -6,7 +6,7 @@ import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.LittleEndian
var NativeEndian = binary.LittleEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "el"

View file

@ -250,7 +250,20 @@ func putValueNumber(data []byte, typ btf.Type, value string) error {
return fmt.Errorf("cannot parse value: %w", err)
}
switch size {
return PutInteger(data, integer, n)
}
// PutInteger writes n into data.
//
// integer determines how much is written into data and what the valid values
// are.
func PutInteger(data []byte, integer *btf.Int, n uint64) error {
// This function should match set_kcfg_value_num in libbpf.
if integer.Encoding == btf.Bool && n > 1 {
return fmt.Errorf("invalid boolean value: %d", n)
}
switch integer.Size {
case 1:
data[0] = byte(n)
case 2:
@ -260,7 +273,7 @@ func putValueNumber(data []byte, typ btf.Type, value string) error {
case 8:
internal.NativeEndian.PutUint64(data, uint64(n))
default:
return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", size)
return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", integer.Size)
}
return nil

View file

@ -11,7 +11,7 @@ import (
// ENOTSUPP is a Linux internal error code that has leaked into UAPI.
//
// It is not the same as ENOTSUP or EOPNOTSUPP.
var ENOTSUPP = syscall.Errno(524)
const ENOTSUPP = syscall.Errno(524)
// BPF wraps SYS_BPF.
//
@ -123,7 +123,7 @@ type TypeID uint32
// MapFlags control map behaviour.
type MapFlags uint32
//go:generate stringer -type MapFlags
//go:generate go run golang.org/x/tools/cmd/stringer@latest -type MapFlags
const (
BPF_F_NO_PREALLOC MapFlags = 1 << iota

View file

@ -59,7 +59,13 @@ const (
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE AttachType = 40
BPF_PERF_EVENT AttachType = 41
BPF_TRACE_KPROBE_MULTI AttachType = 42
__MAX_BPF_ATTACH_TYPE AttachType = 43
BPF_LSM_CGROUP AttachType = 43
BPF_STRUCT_OPS AttachType = 44
BPF_NETFILTER AttachType = 45
BPF_TCX_INGRESS AttachType = 46
BPF_TCX_EGRESS AttachType = 47
BPF_TRACE_UPROBE_MULTI AttachType = 48
__MAX_BPF_ATTACH_TYPE AttachType = 49
)
type Cmd uint32
@ -311,7 +317,15 @@ const (
BPF_FUNC_dynptr_read FunctionId = 201
BPF_FUNC_dynptr_write FunctionId = 202
BPF_FUNC_dynptr_data FunctionId = 203
__BPF_FUNC_MAX_ID FunctionId = 204
BPF_FUNC_tcp_raw_gen_syncookie_ipv4 FunctionId = 204
BPF_FUNC_tcp_raw_gen_syncookie_ipv6 FunctionId = 205
BPF_FUNC_tcp_raw_check_syncookie_ipv4 FunctionId = 206
BPF_FUNC_tcp_raw_check_syncookie_ipv6 FunctionId = 207
BPF_FUNC_ktime_get_tai_ns FunctionId = 208
BPF_FUNC_user_ringbuf_drain FunctionId = 209
BPF_FUNC_cgrp_storage_get FunctionId = 210
BPF_FUNC_cgrp_storage_delete FunctionId = 211
__BPF_FUNC_MAX_ID FunctionId = 212
)
type HdrStartOff uint32
@ -334,43 +348,49 @@ const (
BPF_LINK_TYPE_PERF_EVENT LinkType = 7
BPF_LINK_TYPE_KPROBE_MULTI LinkType = 8
BPF_LINK_TYPE_STRUCT_OPS LinkType = 9
MAX_BPF_LINK_TYPE LinkType = 10
BPF_LINK_TYPE_NETFILTER LinkType = 10
BPF_LINK_TYPE_TCX LinkType = 11
BPF_LINK_TYPE_UPROBE_MULTI LinkType = 12
MAX_BPF_LINK_TYPE LinkType = 13
)
type MapType uint32
const (
BPF_MAP_TYPE_UNSPEC MapType = 0
BPF_MAP_TYPE_HASH MapType = 1
BPF_MAP_TYPE_ARRAY MapType = 2
BPF_MAP_TYPE_PROG_ARRAY MapType = 3
BPF_MAP_TYPE_PERF_EVENT_ARRAY MapType = 4
BPF_MAP_TYPE_PERCPU_HASH MapType = 5
BPF_MAP_TYPE_PERCPU_ARRAY MapType = 6
BPF_MAP_TYPE_STACK_TRACE MapType = 7
BPF_MAP_TYPE_CGROUP_ARRAY MapType = 8
BPF_MAP_TYPE_LRU_HASH MapType = 9
BPF_MAP_TYPE_LRU_PERCPU_HASH MapType = 10
BPF_MAP_TYPE_LPM_TRIE MapType = 11
BPF_MAP_TYPE_ARRAY_OF_MAPS MapType = 12
BPF_MAP_TYPE_HASH_OF_MAPS MapType = 13
BPF_MAP_TYPE_DEVMAP MapType = 14
BPF_MAP_TYPE_SOCKMAP MapType = 15
BPF_MAP_TYPE_CPUMAP MapType = 16
BPF_MAP_TYPE_XSKMAP MapType = 17
BPF_MAP_TYPE_SOCKHASH MapType = 18
BPF_MAP_TYPE_CGROUP_STORAGE MapType = 19
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY MapType = 20
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE MapType = 21
BPF_MAP_TYPE_QUEUE MapType = 22
BPF_MAP_TYPE_STACK MapType = 23
BPF_MAP_TYPE_SK_STORAGE MapType = 24
BPF_MAP_TYPE_DEVMAP_HASH MapType = 25
BPF_MAP_TYPE_STRUCT_OPS MapType = 26
BPF_MAP_TYPE_RINGBUF MapType = 27
BPF_MAP_TYPE_INODE_STORAGE MapType = 28
BPF_MAP_TYPE_TASK_STORAGE MapType = 29
BPF_MAP_TYPE_BLOOM_FILTER MapType = 30
BPF_MAP_TYPE_UNSPEC MapType = 0
BPF_MAP_TYPE_HASH MapType = 1
BPF_MAP_TYPE_ARRAY MapType = 2
BPF_MAP_TYPE_PROG_ARRAY MapType = 3
BPF_MAP_TYPE_PERF_EVENT_ARRAY MapType = 4
BPF_MAP_TYPE_PERCPU_HASH MapType = 5
BPF_MAP_TYPE_PERCPU_ARRAY MapType = 6
BPF_MAP_TYPE_STACK_TRACE MapType = 7
BPF_MAP_TYPE_CGROUP_ARRAY MapType = 8
BPF_MAP_TYPE_LRU_HASH MapType = 9
BPF_MAP_TYPE_LRU_PERCPU_HASH MapType = 10
BPF_MAP_TYPE_LPM_TRIE MapType = 11
BPF_MAP_TYPE_ARRAY_OF_MAPS MapType = 12
BPF_MAP_TYPE_HASH_OF_MAPS MapType = 13
BPF_MAP_TYPE_DEVMAP MapType = 14
BPF_MAP_TYPE_SOCKMAP MapType = 15
BPF_MAP_TYPE_CPUMAP MapType = 16
BPF_MAP_TYPE_XSKMAP MapType = 17
BPF_MAP_TYPE_SOCKHASH MapType = 18
BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED MapType = 19
BPF_MAP_TYPE_CGROUP_STORAGE MapType = 19
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY MapType = 20
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE MapType = 21
BPF_MAP_TYPE_QUEUE MapType = 22
BPF_MAP_TYPE_STACK MapType = 23
BPF_MAP_TYPE_SK_STORAGE MapType = 24
BPF_MAP_TYPE_DEVMAP_HASH MapType = 25
BPF_MAP_TYPE_STRUCT_OPS MapType = 26
BPF_MAP_TYPE_RINGBUF MapType = 27
BPF_MAP_TYPE_INODE_STORAGE MapType = 28
BPF_MAP_TYPE_TASK_STORAGE MapType = 29
BPF_MAP_TYPE_BLOOM_FILTER MapType = 30
BPF_MAP_TYPE_USER_RINGBUF MapType = 31
BPF_MAP_TYPE_CGRP_STORAGE MapType = 32
)
type ProgType uint32
@ -408,15 +428,17 @@ const (
BPF_PROG_TYPE_LSM ProgType = 29
BPF_PROG_TYPE_SK_LOOKUP ProgType = 30
BPF_PROG_TYPE_SYSCALL ProgType = 31
BPF_PROG_TYPE_NETFILTER ProgType = 32
)
type RetCode uint32
const (
BPF_OK RetCode = 0
BPF_DROP RetCode = 2
BPF_REDIRECT RetCode = 7
BPF_LWT_REROUTE RetCode = 128
BPF_OK RetCode = 0
BPF_DROP RetCode = 2
BPF_REDIRECT RetCode = 7
BPF_LWT_REROUTE RetCode = 128
BPF_FLOW_DISSECTOR_CONTINUE RetCode = 129
)
type SkAction uint32
@ -476,7 +498,7 @@ type LinkInfo struct {
Id LinkID
ProgId uint32
_ [4]byte
Extra [16]uint8
Extra [32]uint8
}
type MapInfo struct {
@ -521,10 +543,10 @@ type ProgInfo struct {
JitedFuncLens uint64
BtfId BTFID
FuncInfoRecSize uint32
FuncInfo uint64
FuncInfo Pointer
NrFuncInfo uint32
NrLineInfo uint32
LineInfo uint64
LineInfo Pointer
JitedLineInfo uint64
NrJitedLineInfo uint32
LineInfoRecSize uint32
@ -535,6 +557,8 @@ type ProgInfo struct {
RunCnt uint64
RecursionMisses uint64
VerifiedInsns uint32
AttachBtfObjId BTFID
AttachBtfId TypeID
_ [4]byte
}
@ -583,12 +607,12 @@ func BtfGetNextId(attr *BtfGetNextIdAttr) error {
}
type BtfLoadAttr struct {
Btf Pointer
BtfLogBuf Pointer
BtfSize uint32
BtfLogSize uint32
BtfLogLevel uint32
_ [4]byte
Btf Pointer
BtfLogBuf Pointer
BtfSize uint32
BtfLogSize uint32
BtfLogLevel uint32
BtfLogTrueSize uint32
}
func BtfLoad(attr *BtfLoadAttr) (*FD, error) {
@ -628,7 +652,7 @@ type LinkCreateAttr struct {
AttachType AttachType
Flags uint32
TargetBtfId TypeID
_ [28]byte
_ [44]byte
}
func LinkCreate(attr *LinkCreateAttr) (*FD, error) {
@ -646,7 +670,7 @@ type LinkCreateIterAttr struct {
Flags uint32
IterInfo Pointer
IterInfoLen uint32
_ [20]byte
_ [36]byte
}
func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) {
@ -667,6 +691,7 @@ type LinkCreateKprobeMultiAttr struct {
Syms Pointer
Addrs Pointer
Cookies Pointer
_ [16]byte
}
func LinkCreateKprobeMulti(attr *LinkCreateKprobeMultiAttr) (*FD, error) {
@ -683,7 +708,7 @@ type LinkCreatePerfEventAttr struct {
AttachType AttachType
Flags uint32
BpfCookie uint64
_ [24]byte
_ [40]byte
}
func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) {
@ -702,7 +727,7 @@ type LinkCreateTracingAttr struct {
TargetBtfId BTFID
_ [4]byte
Cookie uint64
_ [16]byte
_ [32]byte
}
func LinkCreateTracing(attr *LinkCreateTracingAttr) (*FD, error) {
@ -909,6 +934,8 @@ type ObjGetAttr struct {
Pathname Pointer
BpfFd uint32
FileFlags uint32
PathFd int32
_ [4]byte
}
func ObjGet(attr *ObjGetAttr) (*FD, error) {
@ -934,6 +961,8 @@ type ObjPinAttr struct {
Pathname Pointer
BpfFd uint32
FileFlags uint32
PathFd int32
_ [4]byte
}
func ObjPin(attr *ObjPinAttr) error {
@ -942,11 +971,13 @@ func ObjPin(attr *ObjPinAttr) error {
}
type ProgAttachAttr struct {
TargetFd uint32
AttachBpfFd uint32
AttachType uint32
AttachFlags uint32
ReplaceBpfFd uint32
TargetFd uint32
AttachBpfFd uint32
AttachType uint32
AttachFlags uint32
ReplaceBpfFd uint32
RelativeFd uint32
ExpectedRevision uint64
}
func ProgAttach(attr *ProgAttachAttr) error {
@ -1022,7 +1053,7 @@ type ProgLoadAttr struct {
FdArray Pointer
CoreRelos Pointer
CoreReloRecSize uint32
_ [4]byte
LogTrueSize uint32
}
func ProgLoad(attr *ProgLoadAttr) (*FD, error) {
@ -1034,13 +1065,17 @@ func ProgLoad(attr *ProgLoadAttr) (*FD, error) {
}
type ProgQueryAttr struct {
TargetFd uint32
AttachType AttachType
QueryFlags uint32
AttachFlags uint32
ProgIds Pointer
ProgCount uint32
_ [4]byte
TargetFd uint32
AttachType AttachType
QueryFlags uint32
AttachFlags uint32
ProgIds Pointer
ProgCount uint32
_ [4]byte
ProgAttachFlags Pointer
LinkIds Pointer
LinkAttachFlags Pointer
Revision uint64
}
func ProgQuery(attr *ProgQueryAttr) error {

View file

@ -0,0 +1,77 @@
package sysenc
import (
"unsafe"
"github.com/cilium/ebpf/internal/sys"
)
type Buffer struct {
ptr unsafe.Pointer
// Size of the buffer. syscallPointerOnly if created from UnsafeBuffer or when using
// zero-copy unmarshaling.
size int
}
const syscallPointerOnly = -1
func newBuffer(buf []byte) Buffer {
if len(buf) == 0 {
return Buffer{}
}
return Buffer{unsafe.Pointer(&buf[0]), len(buf)}
}
// UnsafeBuffer constructs a Buffer for zero-copy unmarshaling.
//
// [Pointer] is the only valid method to call on such a Buffer.
// Use [SyscallBuffer] instead if possible.
func UnsafeBuffer(ptr unsafe.Pointer) Buffer {
return Buffer{ptr, syscallPointerOnly}
}
// SyscallOutput prepares a Buffer for a syscall to write into.
//
// The buffer may point at the underlying memory of dst, in which case [Unmarshal]
// becomes a no-op.
//
// The contents of the buffer are undefined and may be non-zero.
func SyscallOutput(dst any, size int) Buffer {
if dstBuf := unsafeBackingMemory(dst); len(dstBuf) == size {
buf := newBuffer(dstBuf)
buf.size = syscallPointerOnly
return buf
}
return newBuffer(make([]byte, size))
}
// CopyTo copies the buffer into dst.
//
// Returns the number of copied bytes.
func (b Buffer) CopyTo(dst []byte) int {
return copy(dst, b.unsafeBytes())
}
// Pointer returns the location where a syscall should write.
func (b Buffer) Pointer() sys.Pointer {
// NB: This deliberately ignores b.length to support zero-copy
// marshaling / unmarshaling using unsafe.Pointer.
return sys.NewPointer(b.ptr)
}
// Unmarshal the buffer into the provided value.
func (b Buffer) Unmarshal(data any) error {
if b.size == syscallPointerOnly {
return nil
}
return Unmarshal(data, b.unsafeBytes())
}
func (b Buffer) unsafeBytes() []byte {
if b.size == syscallPointerOnly {
return nil
}
return unsafe.Slice((*byte)(b.ptr), b.size)
}

3
vendor/github.com/cilium/ebpf/internal/sysenc/doc.go generated vendored Normal file
View file

@ -0,0 +1,3 @@
// Package sysenc provides efficient conversion of Go values to system
// call interfaces.
package sysenc

View file

@ -0,0 +1,41 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found at https://go.dev/LICENSE.
package sysenc
import (
"reflect"
"sync"
)
var hasUnexportedFieldsCache sync.Map // map[reflect.Type]bool
func hasUnexportedFields(typ reflect.Type) bool {
switch typ.Kind() {
case reflect.Slice, reflect.Array, reflect.Pointer:
return hasUnexportedFields(typ.Elem())
case reflect.Struct:
if unexported, ok := hasUnexportedFieldsCache.Load(typ); ok {
return unexported.(bool)
}
unexported := false
for i, n := 0, typ.NumField(); i < n; i++ {
field := typ.Field(i)
// Package binary allows _ fields but always writes zeroes into them.
if (!field.IsExported() && field.Name != "_") || hasUnexportedFields(field.Type) {
unexported = true
break
}
}
hasUnexportedFieldsCache.Store(typ, unexported)
return unexported
default:
// NB: It's not clear what this means for Chan and so on.
return false
}
}

View file

@ -0,0 +1,178 @@
package sysenc
import (
"bytes"
"encoding"
"encoding/binary"
"errors"
"fmt"
"reflect"
"sync"
"unsafe"
"github.com/cilium/ebpf/internal"
"golang.org/x/exp/slices"
)
// Marshal turns data into a byte slice using the system's native endianness.
//
// If possible, avoids allocations by directly using the backing memory
// of data. This means that the variable must not be modified for the lifetime
// of the returned [Buffer].
//
// Returns an error if the data can't be turned into a byte slice according to
// the behaviour of [binary.Write].
func Marshal(data any, size int) (Buffer, error) {
if data == nil {
return Buffer{}, errors.New("can't marshal a nil value")
}
var buf []byte
var err error
switch value := data.(type) {
case encoding.BinaryMarshaler:
buf, err = value.MarshalBinary()
case string:
buf = unsafe.Slice(unsafe.StringData(value), len(value))
case []byte:
buf = value
case int16:
buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), uint16(value))
case uint16:
buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), value)
case int32:
buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), uint32(value))
case uint32:
buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), value)
case int64:
buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), uint64(value))
case uint64:
buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), value)
default:
if buf := unsafeBackingMemory(data); len(buf) == size {
return newBuffer(buf), nil
}
wr := internal.NewBuffer(make([]byte, 0, size))
defer internal.PutBuffer(wr)
err = binary.Write(wr, internal.NativeEndian, value)
buf = wr.Bytes()
}
if err != nil {
return Buffer{}, err
}
if len(buf) != size {
return Buffer{}, fmt.Errorf("%T doesn't marshal to %d bytes", data, size)
}
return newBuffer(buf), nil
}
var bytesReaderPool = sync.Pool{
New: func() interface{} {
return new(bytes.Reader)
},
}
// Unmarshal a byte slice in the system's native endianness into data.
//
// Returns an error if buf can't be unmarshalled according to the behaviour
// of [binary.Read].
func Unmarshal(data interface{}, buf []byte) error {
switch value := data.(type) {
case encoding.BinaryUnmarshaler:
return value.UnmarshalBinary(buf)
case *string:
*value = string(buf)
return nil
case *[]byte:
// Backwards compat: unmarshaling into a slice replaces the whole slice.
*value = slices.Clone(buf)
return nil
default:
if dataBuf := unsafeBackingMemory(data); len(dataBuf) == len(buf) {
copy(dataBuf, buf)
return nil
}
rd := bytesReaderPool.Get().(*bytes.Reader)
defer bytesReaderPool.Put(rd)
rd.Reset(buf)
if err := binary.Read(rd, internal.NativeEndian, value); err != nil {
return err
}
if rd.Len() != 0 {
return fmt.Errorf("unmarshaling %T doesn't consume all data", data)
}
return nil
}
}
// unsafeBackingMemory returns the backing memory of data if it can be used
// instead of calling into package binary.
//
// Returns nil if the value is not a pointer or a slice, or if it contains
// padding or unexported fields.
func unsafeBackingMemory(data any) []byte {
if data == nil {
return nil
}
value := reflect.ValueOf(data)
var valueSize int
switch value.Kind() {
case reflect.Pointer:
if value.IsNil() {
return nil
}
if elemType := value.Type().Elem(); elemType.Kind() != reflect.Slice {
valueSize = int(elemType.Size())
break
}
// We're dealing with a pointer to a slice. Dereference and
// handle it like a regular slice.
value = value.Elem()
fallthrough
case reflect.Slice:
valueSize = int(value.Type().Elem().Size()) * value.Len()
default:
// Prevent Value.UnsafePointer from panicking.
return nil
}
// Some nil pointer types currently crash binary.Size. Call it after our own
// code so that the panic isn't reachable.
// See https://github.com/golang/go/issues/60892
if size := binary.Size(data); size == -1 || size != valueSize {
// The type contains padding or unsupported types.
return nil
}
if hasUnexportedFields(reflect.TypeOf(data)) {
return nil
}
// Reinterpret the pointer as a byte slice. This violates the unsafe.Pointer
// rules because it's very unlikely that the source data has "an equivalent
// memory layout". However, we can make it safe-ish because of the
// following reasons:
// - There is no alignment mismatch since we cast to a type with an
// alignment of 1.
// - There are no pointers in the source type so we don't upset the GC.
// - The length is verified at runtime.
return unsafe.Slice((*byte)(value.UnsafePointer()), valueSize)
}

View file

@ -20,7 +20,7 @@ var (
ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes")
)
//go:generate stringer -type=ProbeType -linecomment
//go:generate go run golang.org/x/tools/cmd/stringer@latest -type=ProbeType -linecomment
type ProbeType uint8

View file

@ -85,6 +85,8 @@ const (
BPF_FS_MAGIC = linux.BPF_FS_MAGIC
TRACEFS_MAGIC = linux.TRACEFS_MAGIC
DEBUGFS_MAGIC = linux.DEBUGFS_MAGIC
BPF_RB_NO_WAKEUP = linux.BPF_RB_NO_WAKEUP
BPF_RB_FORCE_WAKEUP = linux.BPF_RB_FORCE_WAKEUP
)
type Statfs_t = linux.Statfs_t

View file

@ -89,6 +89,8 @@ const (
BPF_FS_MAGIC
TRACEFS_MAGIC
DEBUGFS_MAGIC
BPF_RB_NO_WAKEUP
BPF_RB_FORCE_WAKEUP
)
type Statfs_t struct {

View file

@ -8,6 +8,7 @@ import (
"io"
"math"
"os"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
@ -19,6 +20,8 @@ var (
// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library
// linked into the current process image.
func vdsoVersion() (uint32, error) {
const uintptrIs32bits = unsafe.Sizeof((uintptr)(0)) == 4
// Read data from the auxiliary vector, which is normally passed directly
// to the process. Go does not expose that data, so we must read it from procfs.
// https://man7.org/linux/man-pages/man3/getauxval.3.html
@ -31,7 +34,7 @@ func vdsoVersion() (uint32, error) {
}
defer av.Close()
vdsoAddr, err := vdsoMemoryAddress(av)
vdsoAddr, err := vdsoMemoryAddress(av, NativeEndian, uintptrIs32bits)
if err != nil {
return 0, fmt.Errorf("finding vDSO memory address: %w", err)
}
@ -52,9 +55,34 @@ func vdsoVersion() (uint32, error) {
return c, nil
}
type auxvPair32 struct {
Tag, Value uint32
}
type auxvPair64 struct {
Tag, Value uint64
}
func readAuxvPair(r io.Reader, order binary.ByteOrder, uintptrIs32bits bool) (tag, value uint64, _ error) {
if uintptrIs32bits {
var aux auxvPair32
if err := binary.Read(r, order, &aux); err != nil {
return 0, 0, fmt.Errorf("reading auxv entry: %w", err)
}
return uint64(aux.Tag), uint64(aux.Value), nil
}
var aux auxvPair64
if err := binary.Read(r, order, &aux); err != nil {
return 0, 0, fmt.Errorf("reading auxv entry: %w", err)
}
return aux.Tag, aux.Value, nil
}
// vdsoMemoryAddress returns the memory address of the vDSO library
// linked into the current process image. r is an io.Reader into an auxv blob.
func vdsoMemoryAddress(r io.Reader) (uint64, error) {
func vdsoMemoryAddress(r io.Reader, order binary.ByteOrder, uintptrIs32bits bool) (uintptr, error) {
// See https://elixir.bootlin.com/linux/v6.5.5/source/include/uapi/linux/auxvec.h
const (
_AT_NULL = 0 // End of vector
_AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image
@ -62,16 +90,16 @@ func vdsoMemoryAddress(r io.Reader) (uint64, error) {
// Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`,
// the address of a page containing the virtual Dynamic Shared Object (vDSO).
aux := struct{ Tag, Val uint64 }{}
for {
if err := binary.Read(r, NativeEndian, &aux); err != nil {
return 0, fmt.Errorf("reading auxv entry: %w", err)
tag, value, err := readAuxvPair(r, order, uintptrIs32bits)
if err != nil {
return 0, err
}
switch aux.Tag {
switch tag {
case _AT_SYSINFO_EHDR:
if aux.Val != 0 {
return aux.Val, nil
if value != 0 {
return uintptr(value), nil
}
return 0, fmt.Errorf("invalid vDSO address in auxv")
// _AT_NULL is always the last tag/val pair in the aux vector

View file

@ -25,10 +25,6 @@ type IterOptions struct {
// AttachIter attaches a BPF seq_file iterator.
func AttachIter(opts IterOptions) (*Iter, error) {
if err := haveBPFLink(); err != nil {
return nil, err
}
progFd := opts.Program.FD()
if progFd < 0 {
return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
@ -52,6 +48,9 @@ func AttachIter(opts IterOptions) (*Iter, error) {
fd, err := sys.LinkCreateIter(&attr)
if err != nil {
if haveFeatErr := haveBPFLink(); haveFeatErr != nil {
return nil, haveFeatErr
}
return nil, fmt.Errorf("can't link iterator: %w", err)
}

View file

@ -82,10 +82,6 @@ func kprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions, flags uint32) (Lin
return nil, fmt.Errorf("Cookies must be exactly Symbols or Addresses in length: %w", errInvalidInput)
}
if err := haveBPFLinkKprobeMulti(); err != nil {
return nil, err
}
attr := &sys.LinkCreateKprobeMultiAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_TRACE_KPROBE_MULTI,
@ -113,7 +109,11 @@ func kprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions, flags uint32) (Lin
if errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("%w (missing kernel symbol or prog's AttachType not AttachTraceKprobeMulti?)", err)
}
if err != nil {
if haveFeatErr := haveBPFLinkKprobeMulti(); haveFeatErr != nil {
return nil, haveFeatErr
}
return nil, err
}

View file

@ -25,10 +25,6 @@ type RawAttachProgramOptions struct {
// You should use one of the higher level abstractions available in this
// package if possible.
func RawAttachProgram(opts RawAttachProgramOptions) error {
if err := haveProgAttach(); err != nil {
return err
}
var replaceFd uint32
if opts.Replace != nil {
replaceFd = uint32(opts.Replace.FD())
@ -43,8 +39,12 @@ func RawAttachProgram(opts RawAttachProgramOptions) error {
}
if err := sys.ProgAttach(&attr); err != nil {
if haveFeatErr := haveProgAttach(); haveFeatErr != nil {
return haveFeatErr
}
return fmt.Errorf("can't attach program: %w", err)
}
return nil
}
@ -59,16 +59,15 @@ type RawDetachProgramOptions struct {
// You should use one of the higher level abstractions available in this
// package if possible.
func RawDetachProgram(opts RawDetachProgramOptions) error {
if err := haveProgAttach(); err != nil {
return err
}
attr := sys.ProgDetachAttr{
TargetFd: uint32(opts.Target),
AttachBpfFd: uint32(opts.Program.FD()),
AttachType: uint32(opts.Attach),
}
if err := sys.ProgDetach(&attr); err != nil {
if haveFeatErr := haveProgAttach(); haveFeatErr != nil {
return haveFeatErr
}
return fmt.Errorf("can't detach program: %w", err)
}

View file

@ -60,9 +60,11 @@ var haveProgAttachReplace = internal.NewFeatureTest("BPF_PROG_ATTACH atomic repl
asm.Return(),
},
})
if err != nil {
return internal.ErrNotSupported
}
defer prog.Close()
// We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs.
@ -113,11 +115,12 @@ var haveProgQuery = internal.NewFeatureTest("BPF_PROG_QUERY", "4.15", func() err
}
err := sys.ProgQuery(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
if err != nil {
return ErrNotSupported
}
return errors.New("syscall succeeded unexpectedly")
})

View file

@ -18,9 +18,12 @@ var (
uprobeRefCtrOffsetShift = 32
haveRefCtrOffsetPMU = internal.NewFeatureTest("RefCtrOffsetPMU", "4.20", func() error {
_, err := os.Stat(uprobeRefCtrOffsetPMUPath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
return nil
})

View file

@ -7,6 +7,8 @@ import (
"io"
"math"
"golang.org/x/exp/slices"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
@ -40,10 +42,12 @@ func (hs handles) fdArray() []int32 {
return fda
}
func (hs handles) close() {
for _, h := range hs {
h.Close()
func (hs *handles) Close() error {
var errs []error
for _, h := range *hs {
errs = append(errs, h.Close())
}
return errors.Join(errs...)
}
// splitSymbols splits insns into subsections delimited by Symbol Instructions.
@ -55,21 +59,33 @@ func splitSymbols(insns asm.Instructions) (map[string]asm.Instructions, error) {
return nil, errors.New("insns is empty")
}
if insns[0].Symbol() == "" {
currentSym := insns[0].Symbol()
if currentSym == "" {
return nil, errors.New("insns must start with a Symbol")
}
var name string
start := 0
progs := make(map[string]asm.Instructions)
for _, ins := range insns {
if sym := ins.Symbol(); sym != "" {
if progs[sym] != nil {
return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym)
}
name = sym
for i, ins := range insns[1:] {
i := i + 1
sym := ins.Symbol()
if sym == "" {
continue
}
progs[name] = append(progs[name], ins)
// New symbol, flush the old one out.
progs[currentSym] = slices.Clone(insns[start:i])
if progs[sym] != nil {
return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym)
}
currentSym = sym
start = i
}
if tail := insns[start:]; len(tail) > 0 {
progs[currentSym] = slices.Clone(tail)
}
return progs, nil
@ -231,7 +247,13 @@ func fixupAndValidate(insns asm.Instructions) error {
// fixupKfuncs loops over all instructions in search for kfunc calls.
// If at least one is found, the current kernels BTF and module BTFis are searched to set Instruction.Constant
// and Instruction.Offset to the correct values.
func fixupKfuncs(insns asm.Instructions) (handles, error) {
func fixupKfuncs(insns asm.Instructions) (_ handles, err error) {
closeOnError := func(c io.Closer) {
if err != nil {
c.Close()
}
}
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
@ -250,6 +272,8 @@ fixups:
}
fdArray := make(handles, 0)
defer closeOnError(&fdArray)
for {
ins := iter.Ins
@ -276,6 +300,11 @@ fixups:
return nil, err
}
idx, err := fdArray.add(module)
if err != nil {
return nil, err
}
if err := btf.CheckTypeCompatibility(kfm.Type, target.(*btf.Func).Type); err != nil {
return nil, &incompatibleKfuncError{kfm.Name, err}
}
@ -285,11 +314,6 @@ fixups:
return nil, err
}
idx, err := fdArray.add(module)
if err != nil {
return nil, err
}
ins.Constant = int64(id)
ins.Offset = int16(idx)

331
vendor/github.com/cilium/ebpf/map.go generated vendored
View file

@ -9,12 +9,14 @@ import (
"os"
"path/filepath"
"reflect"
"strings"
"time"
"unsafe"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/sysenc"
"github.com/cilium/ebpf/internal/unix"
)
@ -102,26 +104,55 @@ func (ms *MapSpec) Copy() *MapSpec {
return &cpy
}
func (ms *MapSpec) clampPerfEventArraySize() error {
if ms.Type != PerfEventArray {
return nil
// fixupMagicFields fills fields of MapSpec which are usually
// left empty in ELF or which depend on runtime information.
//
// The method doesn't modify Spec, instead returning a copy.
// The copy is only performed if fixups are necessary, so callers mustn't mutate
// the returned spec.
func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) {
switch spec.Type {
case ArrayOfMaps, HashOfMaps:
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for map of map")
}
spec = spec.Copy()
spec.ValueSize = 4
case PerfEventArray:
if spec.KeySize != 0 && spec.KeySize != 4 {
return nil, errors.New("KeySize must be zero or four for perf event array")
}
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for perf event array")
}
spec = spec.Copy()
spec.KeySize = 4
spec.ValueSize = 4
n, err := internal.PossibleCPUs()
if err != nil {
return nil, fmt.Errorf("fixup perf event array: %w", err)
}
if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n {
// MaxEntries should be zero most of the time, but there is code
// out there which hardcodes large constants. Clamp the number
// of entries to the number of CPUs at most. Allow creating maps with
// less than n items since some kernel selftests relied on this
// behaviour in the past.
spec.MaxEntries = n
}
}
n, err := internal.PossibleCPUs()
if err != nil {
return fmt.Errorf("perf event array: %w", err)
}
if n := uint32(n); ms.MaxEntries > n {
ms.MaxEntries = n
}
return nil
return spec, nil
}
// dataSection returns the contents and BTF Datasec descriptor of the spec.
func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) {
if ms.Value == nil {
return nil, nil, errMapNoBTFValue
}
@ -155,27 +186,37 @@ type MapKV struct {
//
// Returns an error wrapping [ErrMapIncompatible] otherwise.
func (ms *MapSpec) Compatible(m *Map) error {
switch {
case m.typ != ms.Type:
return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible)
case m.keySize != ms.KeySize:
return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible)
case m.valueSize != ms.ValueSize:
return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible)
case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) &&
m.maxEntries != ms.MaxEntries:
return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible)
// BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow
// this mismatch.
case !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) &&
m.flags != ms.Flags:
return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible)
ms, err := ms.fixupMagicFields()
if err != nil {
return err
}
return nil
diffs := []string{}
if m.typ != ms.Type {
diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type))
}
if m.keySize != ms.KeySize {
diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize))
}
if m.valueSize != ms.ValueSize {
diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize))
}
if m.maxEntries != ms.MaxEntries {
diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries))
}
// BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this
// mismatch.
if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) &&
m.flags != ms.Flags {
diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags))
}
if len(diffs) == 0 {
return nil
}
return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible)
}
// Map represents a Map file descriptor.
@ -350,60 +391,9 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err erro
}
}
switch spec.Type {
case ArrayOfMaps, HashOfMaps:
if err := haveNestedMaps(); err != nil {
return nil, err
}
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for map of map")
}
spec = spec.Copy()
spec.ValueSize = 4
case PerfEventArray:
if spec.KeySize != 0 && spec.KeySize != 4 {
return nil, errors.New("KeySize must be zero or four for perf event array")
}
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for perf event array")
}
spec = spec.Copy()
spec.KeySize = 4
spec.ValueSize = 4
if spec.MaxEntries == 0 {
n, err := internal.PossibleCPUs()
if err != nil {
return nil, fmt.Errorf("perf event array: %w", err)
}
spec.MaxEntries = uint32(n)
}
}
if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze {
if err := haveMapMutabilityModifiers(); err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
}
if spec.Flags&unix.BPF_F_MMAPABLE > 0 {
if err := haveMmapableMaps(); err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
}
if spec.Flags&unix.BPF_F_INNER_MAP > 0 {
if err := haveInnerMaps(); err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
}
if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
if err := haveNoPreallocMaps(); err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
spec, err = spec.fixupMagicFields()
if err != nil {
return nil, err
}
attr := sys.MapCreateAttr{
@ -440,38 +430,72 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err erro
}
fd, err := sys.MapCreate(&attr)
// Some map types don't support BTF k/v in earlier kernel versions.
// Remove BTF metadata and retry map creation.
if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 {
attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0
fd, err = sys.MapCreate(&attr)
}
if err != nil {
if errors.Is(err, unix.EPERM) {
return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
}
if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 {
return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
}
if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap {
return nil, fmt.Errorf("map create: cannot use type %s", UnspecifiedMap)
}
if attr.BtfFd == 0 {
return nil, fmt.Errorf("map create: %w (without BTF k/v)", err)
}
return nil, fmt.Errorf("map create: %w", err)
return nil, handleMapCreateError(attr, spec, err)
}
defer closeOnError(fd)
defer closeOnError(fd)
m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags)
if err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
return m, nil
}
func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error {
if errors.Is(err, unix.EPERM) {
return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
}
if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 {
return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
}
if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap {
return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap)
}
if errors.Is(err, unix.EINVAL) && spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type)
}
switch spec.Type {
case ArrayOfMaps, HashOfMaps:
if haveFeatErr := haveNestedMaps(); haveFeatErr != nil {
return fmt.Errorf("map create: %w", haveFeatErr)
}
}
if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze {
if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
return fmt.Errorf("map create: %w", haveFeatErr)
}
}
if spec.Flags&unix.BPF_F_MMAPABLE > 0 {
if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil {
return fmt.Errorf("map create: %w", haveFeatErr)
}
}
if spec.Flags&unix.BPF_F_INNER_MAP > 0 {
if haveFeatErr := haveInnerMaps(); haveFeatErr != nil {
return fmt.Errorf("map create: %w", haveFeatErr)
}
}
if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil {
return fmt.Errorf("map create: %w", haveFeatErr)
}
}
if attr.BtfFd == 0 {
return fmt.Errorf("map create: %w (without BTF k/v)", err)
}
return fmt.Errorf("map create: %w", err)
}
// newMap allocates and returns a new Map structure.
// Sets the fullValueSize on per-CPU maps.
func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
@ -568,8 +592,8 @@ func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) e
return m.lookupPerCPU(key, valueOut, flags)
}
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookup(key, valuePtr, flags); err != nil {
valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil {
return err
}
@ -595,8 +619,8 @@ func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLooku
return m.lookupAndDeletePerCPU(key, valueOut, flags)
}
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookupAndDelete(key, valuePtr, flags); err != nil {
valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil {
return err
}
return m.unmarshalValue(valueOut, valueBytes)
@ -764,13 +788,13 @@ func (m *Map) Delete(key interface{}) error {
//
// Returns ErrKeyNotExist if there is no next key.
func (m *Map) NextKey(key, nextKeyOut interface{}) error {
nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.keySize))
nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize))
if err := m.nextKey(key, nextKeyPtr); err != nil {
if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil {
return err
}
if err := m.unmarshalKey(nextKeyOut, nextKeyBytes); err != nil {
if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil {
return fmt.Errorf("can't unmarshal next key: %w", err)
}
return nil
@ -941,14 +965,14 @@ func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut
keyPtr := sys.NewSlicePointer(keyBuf)
valueBuf := make([]byte, count*int(m.fullValueSize))
valuePtr := sys.NewSlicePointer(valueBuf)
nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize))
nextBuf := makeMapSyscallOutput(nextKeyOut, int(m.keySize))
attr := sys.MapLookupBatchAttr{
MapFd: m.fd.Uint(),
Keys: keyPtr,
Values: valuePtr,
Count: uint32(count),
OutBatch: nextPtr,
OutBatch: nextBuf.Pointer(),
}
if opts != nil {
@ -958,7 +982,7 @@ func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut
var err error
if startKey != nil {
attr.InBatch, err = marshalPtr(startKey, int(m.keySize))
attr.InBatch, err = marshalMapSyscallInput(startKey, int(m.keySize))
if err != nil {
return 0, err
}
@ -970,15 +994,15 @@ func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut
return 0, sysErr
}
err = m.unmarshalKey(nextKeyOut, nextBuf)
err = nextBuf.Unmarshal(nextKeyOut)
if err != nil {
return 0, err
}
err = unmarshalBytes(keysOut, keyBuf)
err = sysenc.Unmarshal(keysOut, keyBuf)
if err != nil {
return 0, err
}
err = unmarshalBytes(valuesOut, valueBuf)
err = sysenc.Unmarshal(valuesOut, valueBuf)
if err != nil {
return 0, err
}
@ -991,9 +1015,6 @@ func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut
// "keys" and "values" must be of type slice, a pointer
// to a slice or buffer will not work.
func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) {
if err := haveBatchAPI(); err != nil {
return 0, err
}
if m.typ.hasPerCPUValue() {
return 0, ErrNotSupported
}
@ -1013,11 +1034,11 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
if count != valuesValue.Len() {
return 0, fmt.Errorf("keys and values must be the same length")
}
keyPtr, err := marshalPtr(keys, count*int(m.keySize))
keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
if err != nil {
return 0, err
}
valuePtr, err = marshalPtr(values, count*int(m.valueSize))
valuePtr, err = marshalMapSyscallInput(values, count*int(m.valueSize))
if err != nil {
return 0, err
}
@ -1035,6 +1056,9 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
err = sys.MapUpdateBatch(&attr)
if err != nil {
if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
return 0, haveFeatErr
}
return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err))
}
@ -1044,9 +1068,6 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
// BatchDelete batch deletes entries in the map by keys.
// "keys" must be of type slice, a pointer to a slice or buffer will not work.
func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
if err := haveBatchAPI(); err != nil {
return 0, err
}
if m.typ.hasPerCPUValue() {
return 0, ErrNotSupported
}
@ -1055,7 +1076,7 @@ func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
return 0, fmt.Errorf("keys must be a slice")
}
count := keysValue.Len()
keyPtr, err := marshalPtr(keys, count*int(m.keySize))
keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
if err != nil {
return 0, fmt.Errorf("cannot marshal keys: %v", err)
}
@ -1072,6 +1093,9 @@ func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
}
if err = sys.MapDeleteBatch(&attr); err != nil {
if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
return 0, haveFeatErr
}
return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err))
}
@ -1176,15 +1200,14 @@ func (m *Map) IsPinned() bool {
//
// It makes no changes to kernel-side restrictions.
func (m *Map) Freeze() error {
if err := haveMapMutabilityModifiers(); err != nil {
return fmt.Errorf("can't freeze map: %w", err)
}
attr := sys.MapFreezeAttr{
MapFd: m.fd.Uint(),
}
if err := sys.MapFreeze(&attr); err != nil {
if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
return fmt.Errorf("can't freeze map: %w", haveFeatErr)
}
return fmt.Errorf("can't freeze map: %w", err)
}
return nil
@ -1217,16 +1240,7 @@ func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) {
return sys.Pointer{}, errors.New("can't use nil as key of map")
}
return marshalPtr(data, int(m.keySize))
}
func (m *Map) unmarshalKey(data interface{}, buf []byte) error {
if buf == nil {
// This is from a makeBuffer call, nothing do do here.
return nil
}
return unmarshalBytes(data, buf)
return marshalMapSyscallInput(data, int(m.keySize))
}
func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
@ -1249,7 +1263,7 @@ func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
buf, err = marshalProgram(value, int(m.valueSize))
default:
return marshalPtr(data, int(m.valueSize))
return marshalMapSyscallInput(data, int(m.valueSize))
}
if err != nil {
@ -1259,16 +1273,7 @@ func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
return sys.NewSlicePointer(buf), nil
}
func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
if buf == nil {
// This is from a makeBuffer call, nothing do do here.
return nil
}
if m.typ.hasPerCPUValue() {
return unmarshalPerCPUValue(value, int(m.valueSize), buf)
}
func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error {
switch value := value.(type) {
case **Map:
if !m.typ.canStoreMap() {
@ -1315,7 +1320,7 @@ func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
return errors.New("require pointer to *Program")
}
return unmarshalBytes(value, buf)
return buf.Unmarshal(value)
}
// LoadPinnedMap loads a Map from a BPF file.
@ -1337,12 +1342,11 @@ func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) {
}
// unmarshalMap creates a map from a map ID encoded in host endianness.
func unmarshalMap(buf []byte) (*Map, error) {
if len(buf) != 4 {
return nil, errors.New("map id requires 4 byte value")
func unmarshalMap(buf sysenc.Buffer) (*Map, error) {
var id uint32
if err := buf.Unmarshal(&id); err != nil {
return nil, err
}
id := internal.NativeEndian.Uint32(buf)
return NewMapFromID(MapID(id))
}
@ -1414,11 +1418,7 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
return false
}
// The user can get access to nextKey since unmarshalBytes
// does not copy when unmarshaling into a []byte.
// Make a copy to prevent accidental corruption of
// iterator state.
copy(mi.curKey, nextKey)
mi.curKey = nextKey
mi.count++
mi.err = mi.target.Lookup(nextKey, valueOut)
@ -1438,7 +1438,12 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
return false
}
mi.err = mi.target.unmarshalKey(keyOut, nextKey)
if ptr, ok := keyOut.(unsafe.Pointer); ok {
copy(unsafe.Slice((*byte)(ptr), len(nextKey)), nextKey)
} else {
mi.err = sysenc.Unmarshal(keyOut, nextKey)
}
return mi.err == nil
}

View file

@ -1,166 +1,53 @@
package ebpf
import (
"bytes"
"encoding"
"encoding/binary"
"errors"
"fmt"
"reflect"
"runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/sysenc"
)
// marshalPtr converts an arbitrary value into a pointer suitable
// marshalMapSyscallInput converts an arbitrary value into a pointer suitable
// to be passed to the kernel.
//
// As an optimization, it returns the original value if it is an
// unsafe.Pointer.
func marshalPtr(data interface{}, length int) (sys.Pointer, error) {
func marshalMapSyscallInput(data any, length int) (sys.Pointer, error) {
if ptr, ok := data.(unsafe.Pointer); ok {
return sys.NewPointer(ptr), nil
}
buf, err := marshalBytes(data, length)
buf, err := sysenc.Marshal(data, length)
if err != nil {
return sys.Pointer{}, err
}
return sys.NewSlicePointer(buf), nil
return buf.Pointer(), nil
}
// marshalBytes converts an arbitrary value into a byte buffer.
//
// Prefer using Map.marshalKey and Map.marshalValue if possible, since
// those have special cases that allow more types to be encoded.
//
// Returns an error if the given value isn't representable in exactly
// length bytes.
func marshalBytes(data interface{}, length int) (buf []byte, err error) {
if data == nil {
return nil, errors.New("can't marshal a nil value")
}
switch value := data.(type) {
case encoding.BinaryMarshaler:
buf, err = value.MarshalBinary()
case string:
buf = []byte(value)
case []byte:
buf = value
case unsafe.Pointer:
err = errors.New("can't marshal from unsafe.Pointer")
case Map, *Map, Program, *Program:
err = fmt.Errorf("can't marshal %T", value)
default:
wr := internal.NewBuffer(make([]byte, 0, length))
defer internal.PutBuffer(wr)
err = binary.Write(wr, internal.NativeEndian, value)
if err != nil {
err = fmt.Errorf("encoding %T: %v", value, err)
}
buf = wr.Bytes()
}
if err != nil {
return nil, err
}
if len(buf) != length {
return nil, fmt.Errorf("%T doesn't marshal to %d bytes", data, length)
}
return buf, nil
}
func makeBuffer(dst interface{}, length int) (sys.Pointer, []byte) {
func makeMapSyscallOutput(dst any, length int) sysenc.Buffer {
if ptr, ok := dst.(unsafe.Pointer); ok {
return sys.NewPointer(ptr), nil
return sysenc.UnsafeBuffer(ptr)
}
buf := make([]byte, length)
return sys.NewSlicePointer(buf), buf
}
var bytesReaderPool = sync.Pool{
New: func() interface{} {
return new(bytes.Reader)
},
}
// unmarshalBytes converts a byte buffer into an arbitrary value.
//
// Prefer using Map.unmarshalKey and Map.unmarshalValue if possible, since
// those have special cases that allow more types to be encoded.
//
// The common int32 and int64 types are directly handled to avoid
// unnecessary heap allocations as happening in the default case.
func unmarshalBytes(data interface{}, buf []byte) error {
switch value := data.(type) {
case unsafe.Pointer:
dst := unsafe.Slice((*byte)(value), len(buf))
copy(dst, buf)
runtime.KeepAlive(value)
return nil
case Map, *Map, Program, *Program:
return fmt.Errorf("can't unmarshal into %T", value)
case encoding.BinaryUnmarshaler:
return value.UnmarshalBinary(buf)
case *string:
*value = string(buf)
return nil
case *[]byte:
*value = buf
return nil
case *int32:
if len(buf) < 4 {
return errors.New("int32 requires 4 bytes")
}
*value = int32(internal.NativeEndian.Uint32(buf))
return nil
case *uint32:
if len(buf) < 4 {
return errors.New("uint32 requires 4 bytes")
}
*value = internal.NativeEndian.Uint32(buf)
return nil
case *int64:
if len(buf) < 8 {
return errors.New("int64 requires 8 bytes")
}
*value = int64(internal.NativeEndian.Uint64(buf))
return nil
case *uint64:
if len(buf) < 8 {
return errors.New("uint64 requires 8 bytes")
}
*value = internal.NativeEndian.Uint64(buf)
return nil
case string:
return errors.New("require pointer to string")
case []byte:
return errors.New("require pointer to []byte")
default:
rd := bytesReaderPool.Get().(*bytes.Reader)
rd.Reset(buf)
defer bytesReaderPool.Put(rd)
if err := binary.Read(rd, internal.NativeEndian, value); err != nil {
return fmt.Errorf("decoding %T: %v", value, err)
}
return nil
_, ok := dst.(encoding.BinaryUnmarshaler)
if ok {
return sysenc.SyscallOutput(nil, length)
}
return sysenc.SyscallOutput(dst, length)
}
// marshalPerCPUValue encodes a slice containing one value per
// possible CPU into a buffer of bytes.
//
// Values are initialized to zero if the slice has less elements than CPUs.
//
// slice must have a type like []elementType.
func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error) {
func marshalPerCPUValue(slice any, elemLength int) (sys.Pointer, error) {
sliceType := reflect.TypeOf(slice)
if sliceType.Kind() != reflect.Slice {
return sys.Pointer{}, errors.New("per-CPU value requires slice")
@ -182,13 +69,13 @@ func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error)
for i := 0; i < sliceLen; i++ {
elem := sliceValue.Index(i).Interface()
elemBytes, err := marshalBytes(elem, elemLength)
elemBytes, err := sysenc.Marshal(elem, elemLength)
if err != nil {
return sys.Pointer{}, err
}
offset := i * alignedElemLength
copy(buf[offset:offset+elemLength], elemBytes)
elemBytes.CopyTo(buf[offset : offset+elemLength])
}
return sys.NewSlicePointer(buf), nil
@ -197,8 +84,8 @@ func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error)
// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
// possible CPU.
//
// valueOut must have a type like *[]elementType
func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
// slicePtr must be a pointer to a slice.
func unmarshalPerCPUValue(slicePtr any, elemLength int, buf []byte) error {
slicePtrType := reflect.TypeOf(slicePtr)
if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
return fmt.Errorf("per-cpu value requires pointer to slice")
@ -218,12 +105,9 @@ func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) erro
sliceElemType = sliceElemType.Elem()
}
step := len(buf) / possibleCPUs
if step < elemLength {
return fmt.Errorf("per-cpu element length is larger than available data")
}
stride := internal.Align(elemLength, 8)
for i := 0; i < possibleCPUs; i++ {
var elem interface{}
var elem any
if sliceElemIsPointer {
newElem := reflect.New(sliceElemType)
slice.Index(i).Set(newElem)
@ -232,16 +116,12 @@ func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) erro
elem = slice.Index(i).Addr().Interface()
}
// Make a copy, since unmarshal can hold on to itemBytes
elemBytes := make([]byte, elemLength)
copy(elemBytes, buf[:elemLength])
err := unmarshalBytes(elem, elemBytes)
err := sysenc.Unmarshal(elem, buf[:elemLength])
if err != nil {
return fmt.Errorf("cpu %d: %w", i, err)
}
buf = buf[step:]
buf = buf[stride:]
}
reflect.ValueOf(slicePtr).Elem().Set(slice)

4
vendor/github.com/cilium/ebpf/netlify.toml generated vendored Normal file
View file

@ -0,0 +1,4 @@
[build]
base = "docs/"
publish = "site/"
command = "mkdocs build"

View file

@ -16,6 +16,7 @@ import (
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/sysenc"
"github.com/cilium/ebpf/internal/unix"
)
@ -277,7 +278,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
if err != nil {
return nil, fmt.Errorf("fixing up kfuncs: %w", err)
}
defer handles.close()
defer handles.Close()
if len(handles) > 0 {
fdArray := handles.fdArray()
@ -763,14 +764,14 @@ retry:
return attr.Retval, total, nil
}
func unmarshalProgram(buf []byte) (*Program, error) {
if len(buf) != 4 {
return nil, errors.New("program id requires 4 byte value")
func unmarshalProgram(buf sysenc.Buffer) (*Program, error) {
var id uint32
if err := buf.Unmarshal(&id); err != nil {
return nil, err
}
// Looking up an entry in a nested map or prog array returns an id,
// not an fd.
id := internal.NativeEndian.Uint32(buf)
return NewProgramFromID(ProgramID(id))
}
@ -921,7 +922,12 @@ func findProgramTargetInKernel(name string, progType ProgramType, attachType Att
}
id, err := spec.TypeID(target)
return module, id, err
if err != nil {
module.Close()
return nil, 0, err
}
return module, id, nil
}
// findTargetInKernel attempts to find a named type in the current kernel.
@ -999,7 +1005,9 @@ func findTargetInProgram(prog *Program, name string, progType ProgramType, attac
var typeName string
switch (match{progType, attachType}) {
case match{Extension, AttachNone}:
case match{Extension, AttachNone},
match{Tracing, AttachTraceFEntry},
match{Tracing, AttachTraceFExit}:
typeName = name
default:
return 0, errUnrecognizedAttachType

View file

@ -14,6 +14,21 @@ set -euo pipefail
script="$(realpath "$0")"
readonly script
quote_env() {
for var in "$@"; do
if [ -v "$var" ]; then
printf "%s=%q " "$var" "${!var}"
fi
done
}
declare -a preserved_env=(
PATH
CI_MAX_KERNEL_VERSION
TEST_SEED
KERNEL_VERSION
)
# This script is a bit like a Matryoshka doll since it keeps re-executing itself
# in various different contexts:
#
@ -51,11 +66,11 @@ if [[ "${1:-}" = "--exec-vm" ]]; then
fi
for ((i = 0; i < 3; i++)); do
if ! $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \
if ! $sudo virtme-run --kimg "${input}/boot/vmlinuz" --memory 768M --pwd \
--rwdir="${testdir}=${testdir}" \
--rodir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--script-sh "PATH=\"$PATH\" CI_MAX_KERNEL_VERSION="${CI_MAX_KERNEL_VERSION:-}" \"$script\" --exec-test $cmd" \
--script-sh "$(quote_env "${preserved_env[@]}") \"$script\" --exec-test $cmd" \
--kopt possible_cpus=2; then # need at least two CPUs for some tests
exit 23
fi
@ -85,8 +100,8 @@ elif [[ "${1:-}" = "--exec-test" ]]; then
export KERNEL_SELFTESTS="/run/input/bpf"
fi
if [[ -f "/run/input/bpf/bpf_testmod/bpf_testmod.ko" ]]; then
insmod "/run/input/bpf/bpf_testmod/bpf_testmod.ko"
if [[ -d "/run/input/lib/modules" ]]; then
find /run/input/lib/modules -type f -name bpf_testmod.ko -exec insmod {} \;
fi
dmesg --clear
@ -114,6 +129,9 @@ fetch() {
return $ret
}
machine="$(uname -m)"
readonly machine
if [[ -f "${1}" ]]; then
readonly kernel="${1}"
cp "${1}" "${input}/bzImage"
@ -121,16 +139,24 @@ else
# LINUX_VERSION_CODE test compares this to discovered value.
export KERNEL_VERSION="${1}"
readonly kernel="linux-${1}.bz"
readonly selftests="linux-${1}-selftests-bpf.tgz"
if [ "${machine}" = "x86_64" ]; then
readonly kernel="linux-${1}-amd64.tgz"
readonly selftests="linux-${1}-amd64-selftests-bpf.tgz"
elif [ "${machine}" = "aarch64" ]; then
readonly kernel="linux-${1}-arm64.tgz"
readonly selftests=""
else
echo "Arch ${machine} is not supported"
exit 1
fi
fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
tar xf "${tmp_dir}/${kernel}" -C "${input}"
if fetch "${selftests}"; then
if [ -n "${selftests}" ] && fetch "${selftests}"; then
echo "Decompressing selftests"
mkdir "${input}/bpf"
tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf"
tar --strip-components=5 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf"
else
echo "No selftests found, disabling"
fi

View file

@ -119,6 +119,7 @@ var haveInnerMaps = internal.NewFeatureTest("inner maps", "5.10", func() error {
MaxEntries: 1,
MapFlags: unix.BPF_F_INNER_MAP,
})
if err != nil {
return internal.ErrNotSupported
}
@ -135,6 +136,7 @@ var haveNoPreallocMaps = internal.NewFeatureTest("prealloc maps", "4.6", func()
MaxEntries: 1,
MapFlags: unix.BPF_F_NO_PREALLOC,
})
if err != nil {
return internal.ErrNotSupported
}
@ -223,8 +225,8 @@ var haveBatchAPI = internal.NewFeatureTest("map batch api", "5.6", func() error
keys := []uint32{1, 2}
values := []uint32{3, 4}
kp, _ := marshalPtr(keys, 8)
vp, _ := marshalPtr(values, 8)
kp, _ := marshalMapSyscallInput(keys, 8)
vp, _ := marshalMapSyscallInput(values, 8)
err = sys.MapUpdateBatch(&sys.MapUpdateBatchAttr{
MapFd: fd.Uint(),
@ -265,11 +267,8 @@ var haveBPFToBPFCalls = internal.NewFeatureTest("bpf2bpf calls", "4.16", func()
}
fd, err := progLoad(insns, SocketFilter, "MIT")
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if err != nil {
return err
return internal.ErrNotSupported
}
_ = fd.Close()
return nil

View file

@ -5,7 +5,7 @@ import (
"github.com/cilium/ebpf/internal/unix"
)
//go:generate stringer -output types_string.go -type=MapType,ProgramType,PinType
//go:generate go run golang.org/x/tools/cmd/stringer@latest -output types_string.go -type=MapType,ProgramType,PinType
// MapType indicates the type map structure
// that will be initialized in the kernel.
@ -44,7 +44,7 @@ const (
// if an skb is from a socket belonging to a specific cgroup
CGroupArray
// LRUHash - This allows you to create a small hash structure that will purge the
// least recently used items rather than thow an error when you run out of memory
// least recently used items rather than throw an error when you run out of memory
LRUHash
// LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs,
// it has more to do with including the CPU id with the LRU calculation so that if a
@ -102,6 +102,12 @@ func (mt MapType) hasPerCPUValue() bool {
return mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash || mt == PerCPUCGroupStorage
}
// canStoreMapOrProgram returns true if the Map stores references to another Map
// or Program.
func (mt MapType) canStoreMapOrProgram() bool {
return mt.canStoreMap() || mt.canStoreProgram()
}
// canStoreMap returns true if the map type accepts a map fd
// for update and returns a map id for lookup.
func (mt MapType) canStoreMap() bool {
@ -158,7 +164,7 @@ const (
// Will cause invalid argument (EINVAL) at program load time if set incorrectly.
type AttachType uint32
//go:generate stringer -type AttachType -trimprefix Attach
//go:generate go run golang.org/x/tools/cmd/stringer@latest -type AttachType -trimprefix Attach
// AttachNone is an alias for AttachCGroupInetIngress for readability reasons.
const AttachNone AttachType = 0
@ -213,7 +219,7 @@ const (
type AttachFlags uint32
// PinType determines whether a map is pinned into a BPFFS.
type PinType int
type PinType uint32
// Valid pin types.
//

View file

@ -111,7 +111,7 @@ const _PinType_name = "PinNonePinByName"
var _PinType_index = [...]uint8{0, 7, 16}
func (i PinType) String() string {
if i < 0 || i >= PinType(len(_PinType_index)-1) {
if i >= PinType(len(_PinType_index)-1) {
return "PinType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _PinType_name[_PinType_index[i]:_PinType_index[i+1]]

View file

@ -8,9 +8,9 @@ The following is courtesy of our legal counsel:
Use and transfer of Docker may be subject to certain restrictions by the
United States and other governments.
United States and other governments.
It is your responsibility to ensure that your use and/or transfer does not
violate applicable laws.
violate applicable laws.
For more information, please see http://www.bis.doc.gov

View file

@ -1,9 +1,24 @@
package cgroups
import (
"errors"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
// ErrDevicesUnsupported is an error returned when a cgroup manager
// is not configured to set device rules.
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
// DevicesSetV1 and DevicesSetV2 are functions to set devices for
// cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices
// package is imported, it is set to nil, so cgroup managers can't
// manage devices.
DevicesSetV1 func(path string, r *configs.Resources) error
DevicesSetV2 func(path string, r *configs.Resources) error
)
type Manager interface {
// Apply creates a cgroup, if not yet created, and adds a process
// with the specified pid into that cgroup. A special value of -1

View file

@ -50,24 +50,13 @@ func WriteFile(dir, file, data string) error {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
if _, err := fd.WriteString(data); err != nil {
// Having data in the error message helps in debugging.
return fmt.Errorf("failed to write %q: %w", data, err)
}
return nil
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}
const (
cgroupfsDir = "/sys/fs/cgroup"
cgroupfsPrefix = cgroupfsDir + "/"
@ -90,7 +79,7 @@ func prepareOpenat2() error {
})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
if err != unix.ENOSYS {
logrus.Warnf("falling back to securejoin: %s", prepErr)
} else {
logrus.Debug("openat2 not available, falling back to securejoin")
@ -148,8 +137,9 @@ func openFile(dir, file string, flags int) (*os.File, error) {
//
// TODO: if such usage will ever be common, amend this
// to reopen cgroupRootHandle and retry openat2.
fdStr := strconv.Itoa(int(cgroupRootHandle.Fd()))
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
defer closer()
fdDest, _ := os.Readlink(fdPath)
if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupRootHandle
// is opened to an unexpected/wrong directory.

View file

@ -32,9 +32,22 @@ type CpuUsage struct {
UsageInUsermode uint64 `json:"usage_in_usermode"`
}
type PSIData struct {
Avg10 float64 `json:"avg10"`
Avg60 float64 `json:"avg60"`
Avg300 float64 `json:"avg300"`
Total uint64 `json:"total"`
}
type PSIStats struct {
Some PSIData `json:"some,omitempty"`
Full PSIData `json:"full,omitempty"`
}
type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}
type CPUSetStats struct {
@ -91,6 +104,7 @@ type MemoryStats struct {
UseHierarchy bool `json:"use_hierarchy"`
Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}
type PageUsageByNUMA struct {
@ -135,6 +149,7 @@ type BlkioStats struct {
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}
type HugetlbStats struct {
@ -157,6 +172,13 @@ type RdmaStats struct {
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
}
type MiscStats struct {
// current resource usage for a key in misc
Usage uint64 `json:"usage,omitempty"`
// number of times the resource usage was about to go over the max boundary
Events uint64 `json:"events,omitempty"`
}
type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
@ -166,10 +188,13 @@ type Stats struct {
// the map is in the format "size of hugepage: stats of the hugepage"
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
// the map is in the format "misc resource name: stats of the key"
MiscStats map[string]MiscStats `json:"misc_stats,omitempty"`
}
func NewStats() *Stats {
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
hugetlbStats := make(map[string]HugetlbStats)
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
miscStats := make(map[string]MiscStats)
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
}

View file

@ -36,13 +36,13 @@ func IsCgroup2UnifiedMode() bool {
var st unix.Statfs_t
err := unix.Statfs(unifiedMountpoint, &st)
if err != nil {
level := logrus.WarnLevel
if os.IsNotExist(err) && userns.RunningInUserNS() {
// ignore the "not found" error if running in userns
logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint)
isUnified = false
return
// For rootless containers, sweep it under the rug.
level = logrus.DebugLevel
}
panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
logrus.StandardLogger().Logf(level,
"statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err)
}
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
})
@ -217,21 +217,26 @@ func PathExists(path string) bool {
return true
}
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := WriteCgroupProc(path, pid); err != nil {
return err
}
}
}
return nil
}
// rmdir tries to remove a directory, optionally retrying on EBUSY.
func rmdir(path string, retry bool) error {
delay := time.Millisecond
tries := 10
func rmdir(path string) error {
again:
err := unix.Rmdir(path)
if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
switch err { // nolint:errorlint // unix errors are bare
case nil, unix.ENOENT:
return nil
case unix.EINTR:
goto again
case unix.EBUSY:
if retry && tries > 0 {
time.Sleep(delay)
delay *= 2
tries--
goto again
}
}
return &os.PathError{Op: "rmdir", Path: path, Err: err}
}
@ -239,68 +244,42 @@ func rmdir(path string) error {
// RemovePath aims to remove cgroup path. It does so recursively,
// by removing any subdirectories (sub-cgroups) first.
func RemovePath(path string) error {
// try the fast path first
if err := rmdir(path); err == nil {
// Try the fast path first.
if err := rmdir(path, false); err == nil {
return nil
}
infos, err := os.ReadDir(path)
if err != nil {
if os.IsNotExist(err) {
err = nil
}
if err != nil && !os.IsNotExist(err) {
return err
}
for _, info := range infos {
if info.IsDir() {
// We should remove subcgroups dir first
// We should remove subcgroup first.
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
break
}
}
}
if err == nil {
err = rmdir(path)
err = rmdir(path, true)
}
return err
}
// RemovePaths iterates over the provided paths removing them.
// We trying to remove all paths five times with increasing delay between tries.
// If after all there are not removed cgroups - appropriate error will be
// returned.
func RemovePaths(paths map[string]string) (err error) {
const retries = 5
delay := 10 * time.Millisecond
for i := 0; i < retries; i++ {
if i != 0 {
time.Sleep(delay)
delay *= 2
}
for s, p := range paths {
if err := RemovePath(p); err != nil {
// do not log intermediate iterations
switch i {
case 0:
logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
case retries - 1:
logrus.WithError(err).Error("Failed to remove cgroup")
}
}
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
// RemoveAll almost always returns error, even on already removed
// cgroups
if os.IsNotExist(err) {
delete(paths, s)
}
}
if len(paths) == 0 {
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
paths = make(map[string]string)
return nil
for s, p := range paths {
if err := RemovePath(p); err == nil {
delete(paths, s)
}
}
if len(paths) == 0 {
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
// TODO: switch to clear once Go < 1.21 is not supported.
paths = make(map[string]string)
return nil
}
return fmt.Errorf("Failed to remove paths: %v", paths)
}

View file

@ -99,11 +99,12 @@ func tryDefaultPath(cgroupPath, subsystem string) string {
// expensive), so it is assumed that cgroup mounts are not being changed.
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
readMountinfoOnce.Do(func() {
// mountinfo.GetMounts uses /proc/thread-self, so we can use it without
// issues.
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
mountinfo.FSTypeFilter("cgroup"),
)
})
return cgroupMountinfo, readMountinfoErr
}
@ -196,6 +197,9 @@ func getCgroupMountsV1(all bool) ([]Mount, error) {
return nil, err
}
// We don't need to use /proc/thread-self here because runc always runs
// with every thread in the same cgroup. This lets us avoid having to do
// runtime.LockOSThread.
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
@ -214,6 +218,10 @@ func GetOwnCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
// We don't need to use /proc/thread-self here because runc always runs
// with every thread in the same cgroup. This lets us avoid having to do
// runtime.LockOSThread.
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
@ -236,27 +244,6 @@ func GetOwnCgroupPath(subsystem string) (string, error) {
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {

View file

@ -2,8 +2,8 @@ package configs
import "fmt"
// blockIODevice holds major:minor format supported in blkio cgroup
type blockIODevice struct {
// BlockIODevice holds major:minor format supported in blkio cgroup.
type BlockIODevice struct {
// Major is the device's major number
Major int64 `json:"major"`
// Minor is the device's minor number
@ -12,7 +12,7 @@ type blockIODevice struct {
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
type WeightDevice struct {
blockIODevice
BlockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight uint16 `json:"weight"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
@ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string {
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
blockIODevice
BlockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}

View file

@ -69,6 +69,9 @@ type Resources struct {
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period.
CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod uint64 `json:"cpu_period"`
@ -84,6 +87,9 @@ type Resources struct {
// MEM to use
CpusetMems string `json:"cpuset_mems"`
// cgroup SCHED_IDLE
CPUIdle *int64 `json:"cpu_idle,omitempty"`
// Process limit; set <= `0' to disable limit.
PidsLimit int64 `json:"pids_limit"`
@ -155,4 +161,9 @@ type Resources struct {
// during Set() to figure out whether the freeze is required. Those
// methods may be relatively slow, thus this flag.
SkipFreezeOnSet bool `json:"-"`
// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
// if the new memory limits (Memory and MemorySwap) being set are lower
// than the current memory usage, and reject if so.
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
}

View file

@ -8,6 +8,7 @@ import (
"time"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runtime-spec/specs-go"
@ -31,12 +32,13 @@ type IDMap struct {
// for syscalls. Additional architectures can be added by specifying them in
// Architectures.
type Seccomp struct {
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
DefaultErrnoRet *uint `json:"default_errno_ret"`
ListenerPath string `json:"listener_path,omitempty"`
ListenerMetadata string `json:"listener_metadata,omitempty"`
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Flags []specs.LinuxSeccompFlag `json:"flags"`
Syscalls []*Syscall `json:"syscalls"`
DefaultErrnoRet *uint `json:"default_errno_ret"`
ListenerPath string `json:"listener_path,omitempty"`
ListenerMetadata string `json:"listener_metadata,omitempty"`
}
// Action is taken upon rule match in Seccomp
@ -83,9 +85,6 @@ type Syscall struct {
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific.
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
@ -121,6 +120,9 @@ type Config struct {
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname"`
// Domainname optionally sets the container's domainname if provided
Domainname string `json:"domainname"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces Namespaces `json:"namespaces"`
@ -158,11 +160,11 @@ type Config struct {
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
// UIDMappings is an array of User ID mappings for User Namespaces
UIDMappings []IDMap `json:"uid_mappings"`
// GidMappings is an array of Group ID mappings for User Namespaces
GidMappings []IDMap `json:"gid_mappings"`
// GIDMappings is an array of Group ID mappings for User Namespaces
GIDMappings []IDMap `json:"gid_mappings"`
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
// mount pointing to /dev/null as to prevent reads of the file.
@ -211,8 +213,87 @@ type Config struct {
// RootlessCgroups is set when unlikely to have the full access to cgroups.
// When RootlessCgroups is set, cgroups errors are ignored.
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
// TimeOffsets specifies the offset for supporting time namespaces.
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`
// Scheduler represents the scheduling attributes for a process.
Scheduler *Scheduler `json:"scheduler,omitempty"`
// Personality contains configuration for the Linux personality syscall.
Personality *LinuxPersonality `json:"personality,omitempty"`
// IOPriority is the container's I/O priority.
IOPriority *IOPriority `json:"io_priority,omitempty"`
}
// Scheduler is based on the Linux sched_setattr(2) syscall.
type Scheduler = specs.Scheduler
// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
var policy uint32
switch scheduler.Policy {
case specs.SchedOther:
policy = 0
case specs.SchedFIFO:
policy = 1
case specs.SchedRR:
policy = 2
case specs.SchedBatch:
policy = 3
case specs.SchedISO:
policy = 4
case specs.SchedIdle:
policy = 5
case specs.SchedDeadline:
policy = 6
default:
return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
}
var flags uint64
for _, flag := range scheduler.Flags {
switch flag {
case specs.SchedFlagResetOnFork:
flags |= 0x01
case specs.SchedFlagReclaim:
flags |= 0x02
case specs.SchedFlagDLOverrun:
flags |= 0x04
case specs.SchedFlagKeepPolicy:
flags |= 0x08
case specs.SchedFlagKeepParams:
flags |= 0x10
case specs.SchedFlagUtilClampMin:
flags |= 0x20
case specs.SchedFlagUtilClampMax:
flags |= 0x40
default:
return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
}
}
return &unix.SchedAttr{
Size: unix.SizeofSchedAttr,
Policy: policy,
Flags: flags,
Nice: scheduler.Nice,
Priority: uint32(scheduler.Priority),
Runtime: scheduler.Runtime,
Deadline: scheduler.Deadline,
Period: scheduler.Period,
}, nil
}
var IOPrioClassMapping = map[specs.IOPriorityClass]int{
specs.IOPRIO_CLASS_RT: 1,
specs.IOPRIO_CLASS_BE: 2,
specs.IOPRIO_CLASS_IDLE: 3,
}
type IOPriority = specs.LinuxIOPriority
type (
HookName string
HookList []Hook
@ -277,6 +358,7 @@ type Capabilities struct {
Ambient []string
}
// Deprecated: use (Hooks).Run instead.
func (hooks HookList) RunHooks(state *specs.State) error {
for i, h := range hooks {
if err := h.Run(state); err != nil {
@ -333,6 +415,18 @@ func (hooks *Hooks) MarshalJSON() ([]byte, error) {
})
}
// Run executes all hooks for the given hook name.
func (hooks Hooks) Run(name HookName, state *specs.State) error {
list := hooks[name]
for i, h := range list {
if err := h.Run(state); err != nil {
return fmt.Errorf("error running %s hook #%d: %w", name, i, err)
}
}
return nil
}
type Hook interface {
// Run executes the hook with the provided state.
Run(*specs.State) error
@ -393,7 +487,7 @@ func (c Command) Run(s *specs.State) error {
go func() {
err := cmd.Wait()
if err != nil {
err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
}
errC <- err
}()

View file

@ -7,22 +7,33 @@ import (
)
var (
errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.")
errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.")
errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found")
errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found")
)
// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details.
// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h
const (
PerLinux = 0x0000
PerLinux32 = 0x0008
)
type LinuxPersonality struct {
// Domain for the personality
// can only contain values "LINUX" and "LINUX32"
Domain int `json:"domain"`
}
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
if len(c.UIDMappings) == 0 {
return -1, errNoUIDMap
}
id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings)
id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings)
if !found {
return -1, errNoUserMap
return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId)
}
// If we are a 32-bit binary running on a 64-bit system, it's possible
// the mapped user is too large to store in an int, which means we
@ -47,12 +58,12 @@ func (c Config) HostRootUID() (int, error) {
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
if len(c.GIDMappings) == 0 {
return -1, errNoGIDMap
}
id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings)
id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings)
if !found {
return -1, errNoGroupMap
return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId)
}
// If we are a 32-bit binary running on a 64-bit system, it's possible
// the mapped user is too large to store in an int, which means we

View file

@ -1,48 +1,7 @@
package configs
import "golang.org/x/sys/unix"
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning
EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning
)
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
// Destination path for the mount inside the container.
Destination string `json:"destination"`
// Device the mount is for.
Device string `json:"device"`
// Mount flags.
Flags int `json:"flags"`
// Propagation Flags
PropagationFlags []int `json:"propagation_flags"`
// Mount data applied to the mount.
Data string `json:"data"`
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
RecAttr *unix.MountAttr `json:"rec_attr"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}
func (m *Mount) IsBind() bool {
return m.Flags&unix.MS_BIND != 0
}

View file

@ -0,0 +1,66 @@
package configs
import "golang.org/x/sys/unix"
type MountIDMapping struct {
// Recursive indicates if the mapping needs to be recursive.
Recursive bool `json:"recursive"`
// UserNSPath is a path to a user namespace that indicates the necessary
// id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and
// GIDMappings must be set to nil.
UserNSPath string `json:"userns_path,omitempty"`
// UIDMappings is the uid mapping set for this mount, to be used with
// MOUNT_ATTR_IDMAP.
UIDMappings []IDMap `json:"uid_mappings,omitempty"`
// GIDMappings is the gid mapping set for this mount, to be used with
// MOUNT_ATTR_IDMAP.
GIDMappings []IDMap `json:"gid_mappings,omitempty"`
}
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
// Destination path for the mount inside the container.
Destination string `json:"destination"`
// Device the mount is for.
Device string `json:"device"`
// Mount flags.
Flags int `json:"flags"`
// Mount flags that were explicitly cleared in the configuration (meaning
// the user explicitly requested that these flags *not* be set).
ClearedFlags int `json:"cleared_flags"`
// Propagation Flags
PropagationFlags []int `json:"propagation_flags"`
// Mount data applied to the mount.
Data string `json:"data"`
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
RecAttr *unix.MountAttr `json:"rec_attr"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil,
// the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings.
IDMapping *MountIDMapping `json:"id_mapping,omitempty"`
}
func (m *Mount) IsBind() bool {
return m.Flags&unix.MS_BIND != 0
}
func (m *Mount) IsIDMapped() bool {
return m.IDMapping != nil
}

View file

@ -0,0 +1,10 @@
//go:build !linux
// +build !linux
package configs
type Mount struct{}
func (m *Mount) IsBind() bool {
return false
}

View file

@ -14,6 +14,7 @@ const (
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
NEWCGROUP NamespaceType = "NEWCGROUP"
NEWTIME NamespaceType = "NEWTIME"
)
var (
@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string {
return "uts"
case NEWCGROUP:
return "cgroup"
case NEWTIME:
return "time"
}
return ""
}
@ -56,6 +59,9 @@ func IsNamespaceSupported(ns NamespaceType) bool {
if nsFile == "" {
return false
}
// We don't need to use /proc/thread-self here because the list of
// namespace types is unrelated to the thread. This lets us avoid having to
// do runtime.LockOSThread.
_, err := os.Stat("/proc/self/ns/" + nsFile)
// a namespace is supported if it exists and we have permissions to read it
supported = err == nil
@ -72,6 +78,7 @@ func NamespaceTypes() []NamespaceType {
NEWPID,
NEWNS,
NEWCGROUP,
NEWTIME,
}
}

View file

@ -17,6 +17,7 @@ var namespaceInfo = map[NamespaceType]int{
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
NEWCGROUP: unix.CLONE_NEWCGROUP,
NEWTIME: unix.CLONE_NEWTIME,
}
// CloneFlags parses the container's Namespaces options to set the correct
@ -31,3 +32,15 @@ func (n *Namespaces) CloneFlags() uintptr {
}
return uintptr(flag)
}
// IsPrivate tells whether the namespace of type t is configured as private
// (i.e. it exists and is not shared).
func (n Namespaces) IsPrivate(t NamespaceType) bool {
for _, v := range n {
if v.Type == t {
return v.Path == ""
}
}
// Not found, so implicitly sharing a parent namespace.
return false
}

View file

@ -1,157 +0,0 @@
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package user
import (
"io"
"os"
"strconv"
"golang.org/x/sys/unix"
)
// Unix-specific path to the passwd and group formatted files.
const (
unixPasswdPath = "/etc/passwd"
unixGroupPath = "/etc/group"
)
// LookupUser looks up a user by their username in /etc/passwd. If the user
// cannot be found (or there is no /etc/passwd file on the filesystem), then
// LookupUser returns an error.
func LookupUser(username string) (User, error) {
return lookupUserFunc(func(u User) bool {
return u.Name == username
})
}
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
// returns an error.
func LookupUid(uid int) (User, error) {
return lookupUserFunc(func(u User) bool {
return u.Uid == uid
})
}
func lookupUserFunc(filter func(u User) bool) (User, error) {
// Get operating system-specific passwd reader-closer.
passwd, err := GetPasswd()
if err != nil {
return User{}, err
}
defer passwd.Close()
// Get the users.
users, err := ParsePasswdFilter(passwd, filter)
if err != nil {
return User{}, err
}
// No user entries found.
if len(users) == 0 {
return User{}, ErrNoPasswdEntries
}
// Assume the first entry is the "correct" one.
return users[0], nil
}
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
// returns an error.
func LookupGroup(groupname string) (Group, error) {
return lookupGroupFunc(func(g Group) bool {
return g.Name == groupname
})
}
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGid
// returns an error.
func LookupGid(gid int) (Group, error) {
return lookupGroupFunc(func(g Group) bool {
return g.Gid == gid
})
}
func lookupGroupFunc(filter func(g Group) bool) (Group, error) {
// Get operating system-specific group reader-closer.
group, err := GetGroup()
if err != nil {
return Group{}, err
}
defer group.Close()
// Get the users.
groups, err := ParseGroupFilter(group, filter)
if err != nil {
return Group{}, err
}
// No user entries found.
if len(groups) == 0 {
return Group{}, ErrNoGroupEntries
}
// Assume the first entry is the "correct" one.
return groups[0], nil
}
func GetPasswdPath() (string, error) {
return unixPasswdPath, nil
}
func GetPasswd() (io.ReadCloser, error) {
return os.Open(unixPasswdPath)
}
func GetGroupPath() (string, error) {
return unixGroupPath, nil
}
func GetGroup() (io.ReadCloser, error) {
return os.Open(unixGroupPath)
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(unix.Getuid())
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(unix.Getgid())
}
func currentUserSubIDs(fileName string) ([]SubID, error) {
u, err := CurrentUser()
if err != nil {
return nil, err
}
filter := func(entry SubID) bool {
return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid)
}
return ParseSubIDFileFilter(fileName, filter)
}
func CurrentUserSubUIDs() ([]SubID, error) {
return currentUserSubIDs("/etc/subuid")
}
func CurrentUserSubGIDs() ([]SubID, error) {
return currentUserSubIDs("/etc/subgid")
}
func CurrentProcessUIDMap() ([]IDMap, error) {
return ParseIDMapFile("/proc/self/uid_map")
}
func CurrentProcessGIDMap() ([]IDMap, error) {
return ParseIDMapFile("/proc/self/gid_map")
}

View file

@ -1,605 +0,0 @@
package user
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
)
const (
minID = 0
maxID = 1<<31 - 1 // for 32-bit systems compatibility
)
var (
// ErrNoPasswdEntries is returned if no matching entries were found in /etc/group.
ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
// ErrNoGroupEntries is returned if no matching entries were found in /etc/passwd.
ErrNoGroupEntries = errors.New("no matching entries in group file")
// ErrRange is returned if a UID or GID is outside of the valid range.
ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minID, maxID)
)
type User struct {
Name string
Pass string
Uid int
Gid int
Gecos string
Home string
Shell string
}
type Group struct {
Name string
Pass string
Gid int
List []string
}
// SubID represents an entry in /etc/sub{u,g}id
type SubID struct {
Name string
SubID int64
Count int64
}
// IDMap represents an entry in /proc/PID/{u,g}id_map
type IDMap struct {
ID int64
ParentID int64
Count int64
}
func parseLine(line []byte, v ...interface{}) {
parseParts(bytes.Split(line, []byte(":")), v...)
}
func parseParts(parts [][]byte, v ...interface{}) {
if len(parts) == 0 {
return
}
for i, p := range parts {
// Ignore cases where we don't have enough fields to populate the arguments.
// Some configuration files like to misbehave.
if len(v) <= i {
break
}
// Use the type of the argument to figure out how to parse it, scanf() style.
// This is legit.
switch e := v[i].(type) {
case *string:
*e = string(p)
case *int:
// "numbers", with conversion errors ignored because of some misbehaving configuration files.
*e, _ = strconv.Atoi(string(p))
case *int64:
*e, _ = strconv.ParseInt(string(p), 10, 64)
case *[]string:
// Comma-separated lists.
if len(p) != 0 {
*e = strings.Split(string(p), ",")
} else {
*e = []string{}
}
default:
// Someone goof'd when writing code using this function. Scream so they can hear us.
panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e))
}
}
}
func ParsePasswdFile(path string) ([]User, error) {
passwd, err := os.Open(path)
if err != nil {
return nil, err
}
defer passwd.Close()
return ParsePasswd(passwd)
}
func ParsePasswd(passwd io.Reader) ([]User, error) {
return ParsePasswdFilter(passwd, nil)
}
func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) {
passwd, err := os.Open(path)
if err != nil {
return nil, err
}
defer passwd.Close()
return ParsePasswdFilter(passwd, filter)
}
func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
if r == nil {
return nil, errors.New("nil source for passwd-formatted data")
}
var (
s = bufio.NewScanner(r)
out = []User{}
)
for s.Scan() {
line := bytes.TrimSpace(s.Bytes())
if len(line) == 0 {
continue
}
// see: man 5 passwd
// name:password:UID:GID:GECOS:directory:shell
// Name:Pass:Uid:Gid:Gecos:Home:Shell
// root:x:0:0:root:/root:/bin/bash
// adm:x:3:4:adm:/var/adm:/bin/false
p := User{}
parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell)
if filter == nil || filter(p) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
func ParseGroupFile(path string) ([]Group, error) {
group, err := os.Open(path)
if err != nil {
return nil, err
}
defer group.Close()
return ParseGroup(group)
}
func ParseGroup(group io.Reader) ([]Group, error) {
return ParseGroupFilter(group, nil)
}
func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) {
group, err := os.Open(path)
if err != nil {
return nil, err
}
defer group.Close()
return ParseGroupFilter(group, filter)
}
func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
if r == nil {
return nil, errors.New("nil source for group-formatted data")
}
rd := bufio.NewReader(r)
out := []Group{}
// Read the file line-by-line.
for {
var (
isPrefix bool
wholeLine []byte
err error
)
// Read the next line. We do so in chunks (as much as reader's
// buffer is able to keep), check if we read enough columns
// already on each step and store final result in wholeLine.
for {
var line []byte
line, isPrefix, err = rd.ReadLine()
if err != nil {
// We should return no error if EOF is reached
// without a match.
if err == io.EOF {
err = nil
}
return out, err
}
// Simple common case: line is short enough to fit in a
// single reader's buffer.
if !isPrefix && len(wholeLine) == 0 {
wholeLine = line
break
}
wholeLine = append(wholeLine, line...)
// Check if we read the whole line already.
if !isPrefix {
break
}
}
// There's no spec for /etc/passwd or /etc/group, but we try to follow
// the same rules as the glibc parser, which allows comments and blank
// space at the beginning of a line.
wholeLine = bytes.TrimSpace(wholeLine)
if len(wholeLine) == 0 || wholeLine[0] == '#' {
continue
}
// see: man 5 group
// group_name:password:GID:user_list
// Name:Pass:Gid:List
// root:x:0:root
// adm:x:4:root,adm,daemon
p := Group{}
parseLine(wholeLine, &p.Name, &p.Pass, &p.Gid, &p.List)
if filter == nil || filter(p) {
out = append(out, p)
}
}
}
type ExecUser struct {
Uid int
Gid int
Sgids []int
Home string
}
// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the
// given file paths and uses that data as the arguments to GetExecUser. If the
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
var passwd, group io.Reader
if passwdFile, err := os.Open(passwdPath); err == nil {
passwd = passwdFile
defer passwdFile.Close()
}
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
}
return GetExecUser(userSpec, defaults, passwd, group)
}
// GetExecUser parses a user specification string (using the passwd and group
// readers as sources for /etc/passwd and /etc/group data, respectively). In
// the case of blank fields or missing data from the sources, the values in
// defaults is used.
//
// GetExecUser will return an error if a user or group literal could not be
// found in any entry in passwd and group respectively.
//
// Examples of valid user specifications are:
// - ""
// - "user"
// - "uid"
// - "user:group"
// - "uid:gid
// - "user:gid"
// - "uid:group"
//
// It should be noted that if you specify a numeric user or group id, they will
// not be evaluated as usernames (only the metadata will be filled). So attempting
// to parse a user with user.Name = "1337" will produce the user with a UID of
// 1337.
func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
if defaults == nil {
defaults = new(ExecUser)
}
// Copy over defaults.
user := &ExecUser{
Uid: defaults.Uid,
Gid: defaults.Gid,
Sgids: defaults.Sgids,
Home: defaults.Home,
}
// Sgids slice *cannot* be nil.
if user.Sgids == nil {
user.Sgids = []int{}
}
// Allow for userArg to have either "user" syntax, or optionally "user:group" syntax
var userArg, groupArg string
parseLine([]byte(userSpec), &userArg, &groupArg)
// Convert userArg and groupArg to be numeric, so we don't have to execute
// Atoi *twice* for each iteration over lines.
uidArg, uidErr := strconv.Atoi(userArg)
gidArg, gidErr := strconv.Atoi(groupArg)
// Find the matching user.
users, err := ParsePasswdFilter(passwd, func(u User) bool {
if userArg == "" {
// Default to current state of the user.
return u.Uid == user.Uid
}
if uidErr == nil {
// If the userArg is numeric, always treat it as a UID.
return uidArg == u.Uid
}
return u.Name == userArg
})
// If we can't find the user, we have to bail.
if err != nil && passwd != nil {
if userArg == "" {
userArg = strconv.Itoa(user.Uid)
}
return nil, fmt.Errorf("unable to find user %s: %w", userArg, err)
}
var matchedUserName string
if len(users) > 0 {
// First match wins, even if there's more than one matching entry.
matchedUserName = users[0].Name
user.Uid = users[0].Uid
user.Gid = users[0].Gid
user.Home = users[0].Home
} else if userArg != "" {
// If we can't find a user with the given username, the only other valid
// option is if it's a numeric username with no associated entry in passwd.
if uidErr != nil {
// Not numeric.
return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries)
}
user.Uid = uidArg
// Must be inside valid uid range.
if user.Uid < minID || user.Uid > maxID {
return nil, ErrRange
}
// Okay, so it's numeric. We can just roll with this.
}
// On to the groups. If we matched a username, we need to do this because of
// the supplementary group IDs.
if groupArg != "" || matchedUserName != "" {
groups, err := ParseGroupFilter(group, func(g Group) bool {
// If the group argument isn't explicit, we'll just search for it.
if groupArg == "" {
// Check if user is a member of this group.
for _, u := range g.List {
if u == matchedUserName {
return true
}
}
return false
}
if gidErr == nil {
// If the groupArg is numeric, always treat it as a GID.
return gidArg == g.Gid
}
return g.Name == groupArg
})
if err != nil && group != nil {
return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err)
}
// Only start modifying user.Gid if it is in explicit form.
if groupArg != "" {
if len(groups) > 0 {
// First match wins, even if there's more than one matching entry.
user.Gid = groups[0].Gid
} else {
// If we can't find a group with the given name, the only other valid
// option is if it's a numeric group name with no associated entry in group.
if gidErr != nil {
// Not numeric.
return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries)
}
user.Gid = gidArg
// Must be inside valid gid range.
if user.Gid < minID || user.Gid > maxID {
return nil, ErrRange
}
// Okay, so it's numeric. We can just roll with this.
}
} else if len(groups) > 0 {
// Supplementary group ids only make sense if in the implicit form.
user.Sgids = make([]int, len(groups))
for i, group := range groups {
user.Sgids[i] = group.Gid
}
}
}
return user, nil
}
// GetAdditionalGroups looks up a list of groups by name or group id
// against the given /etc/group formatted data. If a group name cannot
// be found, an error will be returned. If a group id cannot be found,
// or the given group data is nil, the id will be returned as-is
// provided it is in the legal range.
func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) {
groups := []Group{}
if group != nil {
var err error
groups, err = ParseGroupFilter(group, func(g Group) bool {
for _, ag := range additionalGroups {
if g.Name == ag || strconv.Itoa(g.Gid) == ag {
return true
}
}
return false
})
if err != nil {
return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err)
}
}
gidMap := make(map[int]struct{})
for _, ag := range additionalGroups {
var found bool
for _, g := range groups {
// if we found a matched group either by name or gid, take the
// first matched as correct
if g.Name == ag || strconv.Itoa(g.Gid) == ag {
if _, ok := gidMap[g.Gid]; !ok {
gidMap[g.Gid] = struct{}{}
found = true
break
}
}
}
// we asked for a group but didn't find it. let's check to see
// if we wanted a numeric group
if !found {
gid, err := strconv.ParseInt(ag, 10, 64)
if err != nil {
// Not a numeric ID either.
return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries)
}
// Ensure gid is inside gid range.
if gid < minID || gid > maxID {
return nil, ErrRange
}
gidMap[int(gid)] = struct{}{}
}
}
gids := []int{}
for gid := range gidMap {
gids = append(gids, gid)
}
return gids, nil
}
// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups
// that opens the groupPath given and gives it as an argument to
// GetAdditionalGroups.
func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
var group io.Reader
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
}
return GetAdditionalGroups(additionalGroups, group)
}
func ParseSubIDFile(path string) ([]SubID, error) {
subid, err := os.Open(path)
if err != nil {
return nil, err
}
defer subid.Close()
return ParseSubID(subid)
}
func ParseSubID(subid io.Reader) ([]SubID, error) {
return ParseSubIDFilter(subid, nil)
}
func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) {
subid, err := os.Open(path)
if err != nil {
return nil, err
}
defer subid.Close()
return ParseSubIDFilter(subid, filter)
}
func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
if r == nil {
return nil, errors.New("nil source for subid-formatted data")
}
var (
s = bufio.NewScanner(r)
out = []SubID{}
)
for s.Scan() {
line := bytes.TrimSpace(s.Bytes())
if len(line) == 0 {
continue
}
// see: man 5 subuid
p := SubID{}
parseLine(line, &p.Name, &p.SubID, &p.Count)
if filter == nil || filter(p) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
func ParseIDMapFile(path string) ([]IDMap, error) {
r, err := os.Open(path)
if err != nil {
return nil, err
}
defer r.Close()
return ParseIDMap(r)
}
func ParseIDMap(r io.Reader) ([]IDMap, error) {
return ParseIDMapFilter(r, nil)
}
func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) {
r, err := os.Open(path)
if err != nil {
return nil, err
}
defer r.Close()
return ParseIDMapFilter(r, filter)
}
func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
if r == nil {
return nil, errors.New("nil source for idmap-formatted data")
}
var (
s = bufio.NewScanner(r)
out = []IDMap{}
)
for s.Scan() {
line := bytes.TrimSpace(s.Bytes())
if len(line) == 0 {
continue
}
// see: man 7 user_namespaces
p := IDMap{}
parseParts(bytes.Fields(line), &p.ID, &p.ParentID, &p.Count)
if filter == nil || filter(p) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}

View file

@ -1,43 +0,0 @@
//go:build gofuzz
// +build gofuzz
package user
import (
"io"
"strings"
)
func IsDivisbleBy(n int, divisibleby int) bool {
return (n % divisibleby) == 0
}
func FuzzUser(data []byte) int {
if len(data) == 0 {
return -1
}
if !IsDivisbleBy(len(data), 5) {
return -1
}
var divided [][]byte
chunkSize := len(data) / 5
for i := 0; i < len(data); i += chunkSize {
end := i + chunkSize
divided = append(divided, data[i:end])
}
_, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil)
var passwd, group io.Reader
group = strings.NewReader(string(divided[1]))
_, _ = GetAdditionalGroups([]string{string(divided[2])}, group)
passwd = strings.NewReader(string(divided[3]))
_, _ = GetExecUser(string(divided[4]), nil, passwd, group)
return 1
}

View file

@ -1,5 +1,4 @@
package userns
// RunningInUserNS detects whether we are currently running in a user namespace.
// Originally copied from github.com/lxc/lxd/shared/util.go
var RunningInUserNS = runningInUserNS

View file

@ -3,14 +3,7 @@
package userns
import (
"strings"
"github.com/opencontainers/runc/libcontainer/user"
)
func FuzzUIDMap(data []byte) int {
uidmap, _ := user.ParseIDMap(strings.NewReader(string(data)))
_ = uidMapInUserNS(uidmap)
func FuzzUIDMap(uidmap []byte) int {
_ = uidMapInUserNS(string(uidmap))
return 1
}

View file

@ -1,9 +1,10 @@
package userns
import (
"bufio"
"fmt"
"os"
"sync"
"github.com/opencontainers/runc/libcontainer/user"
)
var (
@ -12,26 +13,43 @@ var (
)
// runningInUserNS detects whether we are currently running in a user namespace.
// Originally copied from github.com/lxc/lxd/shared/util.go
//
// Originally copied from https://github.com/lxc/incus/blob/e45085dd42f826b3c8c3228e9733c0b6f998eafe/shared/util.go#L678-L700.
func runningInUserNS() bool {
nsOnce.Do(func() {
uidmap, err := user.CurrentProcessUIDMap()
file, err := os.Open("/proc/self/uid_map")
if err != nil {
// This kernel-provided file only exists if user namespaces are supported
// This kernel-provided file only exists if user namespaces are supported.
return
}
inUserNS = uidMapInUserNS(uidmap)
defer file.Close()
buf := bufio.NewReader(file)
l, _, err := buf.ReadLine()
if err != nil {
return
}
inUserNS = uidMapInUserNS(string(l))
})
return inUserNS
}
func uidMapInUserNS(uidmap []user.IDMap) bool {
/*
* We assume we are in the initial user namespace if we have a full
* range - 4294967295 uids starting at uid 0.
*/
if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
func uidMapInUserNS(uidMap string) bool {
if uidMap == "" {
// File exist but empty (the initial state when userns is created,
// see user_namespaces(7)).
return true
}
var a, b, c int64
if _, err := fmt.Sscanf(uidMap, "%d %d %d", &a, &b, &c); err != nil {
// Assume we are in a regular, non user namespace.
return false
}
return true
// As per user_namespaces(7), /proc/self/uid_map of
// the initial user namespace shows 0 0 4294967295.
initNS := a == 0 && b == 0 && c == 4294967295
return !initNS
}

View file

@ -3,8 +3,6 @@
package userns
import "github.com/opencontainers/runc/libcontainer/user"
// runningInUserNS is a stub for non-Linux systems
// Always returns false
func runningInUserNS() bool {
@ -13,6 +11,6 @@ func runningInUserNS() bool {
// uidMapInUserNS is a stub for non-Linux systems
// Always returns false
func uidMapInUserNS(uidmap []user.IDMap) bool {
func uidMapInUserNS(uidMap string) bool {
return false
}

View file

@ -0,0 +1,156 @@
package userns
import (
"fmt"
"os"
"sort"
"strings"
"sync"
"syscall"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Mapping struct {
UIDMappings []configs.IDMap
GIDMappings []configs.IDMap
}
func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) {
for _, uid := range m.UIDMappings {
uids = append(uids, syscall.SysProcIDMap{
ContainerID: int(uid.ContainerID),
HostID: int(uid.HostID),
Size: int(uid.Size),
})
}
for _, gid := range m.GIDMappings {
gids = append(gids, syscall.SysProcIDMap{
ContainerID: int(gid.ContainerID),
HostID: int(gid.HostID),
Size: int(gid.Size),
})
}
return
}
// id returns a unique identifier for this mapping, agnostic of the order of
// the uid and gid mappings (because the order doesn't matter to the kernel).
// The set of userns handles is indexed using this ID.
func (m Mapping) id() string {
var uids, gids []string
for _, idmap := range m.UIDMappings {
uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
}
for _, idmap := range m.GIDMappings {
gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
}
// We don't care about the sort order -- just sort them.
sort.Strings(uids)
sort.Strings(gids)
return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",")
}
type Handles struct {
m sync.Mutex
maps map[string]*os.File
}
// Release all resources associated with this Handle. All existing files
// returned from Get() will continue to work even after calling Release(). The
// same Handles can be re-used after calling Release().
func (hs *Handles) Release() {
hs.m.Lock()
defer hs.m.Unlock()
// Close the files for good measure, though GC will do that for us anyway.
for _, file := range hs.maps {
_ = file.Close()
}
hs.maps = nil
}
func spawnProc(req Mapping) (*os.Process, error) {
// We need to spawn a subprocess with the requested mappings, which is
// unfortunately quite expensive. The "safe" way of doing this is natively
// with Go (and then spawning something like "sleep infinity"), but
// execve() is a waste of cycles because we just need some process to have
// the right mapping, we don't care what it's executing. The "unsafe"
// option of doing a clone() behind the back of Go is probably okay in
// theory as long as we just do kill(getpid(), SIGSTOP). However, if we
// tell Go to put the new process into PTRACE_TRACEME mode, we can avoid
// the exec and not have to faff around with the mappings.
//
// Note that Go's stdlib does not support newuidmap, but in the case of
// id-mapped mounts, it seems incredibly unlikely that the user will be
// requesting us to do a remapping as an unprivileged user with mappings
// they have privileges over.
logrus.Debugf("spawning dummy process for id-mapping %s", req.id())
uidMappings, gidMappings := req.toSys()
// We don't need to use /proc/thread-self here because the exe mm of a
// thread-group is guaranteed to be the same for all threads by definition.
// This lets us avoid having to do runtime.LockOSThread.
return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{
Sys: &syscall.SysProcAttr{
Cloneflags: unix.CLONE_NEWUSER,
UidMappings: uidMappings,
GidMappings: gidMappings,
GidMappingsEnableSetgroups: false,
// Put the process into PTRACE_TRACEME mode to allow us to get the
// userns without having a proper execve() target.
Ptrace: true,
},
})
}
func dupFile(f *os.File) (*os.File, error) {
newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
}
return os.NewFile(uintptr(newFd), f.Name()), nil
}
// Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested
// mapping. The processes spawned to produce userns nsfds are cached, so if
// equivalent user namespace mappings are requested, the same user namespace
// will be returned. The caller is responsible for closing the returned file
// descriptor.
func (hs *Handles) Get(req Mapping) (file *os.File, err error) {
hs.m.Lock()
defer hs.m.Unlock()
if hs.maps == nil {
hs.maps = make(map[string]*os.File)
}
file, ok := hs.maps[req.id()]
if !ok {
proc, err := spawnProc(req)
if err != nil {
return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err)
}
// Make sure we kill the helper process. We ignore errors because
// there's not much we can do about them anyway, and ultimately
defer func() {
_ = proc.Kill()
_, _ = proc.Wait()
}()
// Stash away a handle to the userns file. This is neater than keeping
// the process alive, because Go's GC can handle files much better than
// leaked processes, and having long-living useless processes seems
// less than ideal.
file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid))
if err != nil {
return nil, err
}
hs.maps[req.id()] = file
}
// Duplicate the file, to make sure the lifecycle of each *os.File we
// return is independent.
return dupFile(file)
}

View file

@ -19,13 +19,14 @@ package utils
import (
"fmt"
"os"
"runtime"
"golang.org/x/sys/unix"
)
// MaxSendfdLen is the maximum length of the name of a file descriptor being
// sent using SendFd. The name of the file handle returned by RecvFd will never
// be larger than this value.
// MaxNameLen is the maximum length of the name of a file descriptor being sent
// using SendFile. The name of the file handle returned by RecvFile will never be
// larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
@ -33,26 +34,21 @@ const MaxNameLen = 4096
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// RecvFile waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
// For some reason, unix.Recvmsg uses the length rather than the capacity
// when passing the msg_controllen and other attributes to recvmsg. So we
// have to actually set the length.
func RecvFile(socket *os.File) (_ *os.File, Err error) {
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC)
if err != nil {
return nil, err
}
if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
@ -61,36 +57,63 @@ func RecvFd(socket *os.File) (*os.File, error) {
if err != nil {
return nil, err
}
// We cannot control how many SCM_RIGHTS we receive, and upon receiving
// them all of the descriptors are installed in our fd table, so we need to
// parse all of the SCM_RIGHTS we received in order to close all of the
// descriptors on error.
var fds []int
defer func() {
for i, fd := range fds {
if i == 0 && Err == nil {
// Only close the first one on error.
continue
}
// Always close extra ones.
_ = unix.Close(fd)
}
}()
var lastErr error
for _, scm := range scms {
if scm.Header.Type == unix.SCM_RIGHTS {
scmFds, err := unix.ParseUnixRights(&scm)
if err != nil {
lastErr = err
} else {
fds = append(fds, scmFds...)
}
}
}
if lastErr != nil {
return nil, lastErr
}
// We do this after collecting the fds to make sure we close them all when
// returning an error here.
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return nil, err
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
fd := uintptr(fds[0])
return os.NewFile(fd, string(name)), nil
return os.NewFile(uintptr(fds[0]), string(name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
// addition, the file.Name() of the given file will also be sent as
// non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket *os.File, name string, fd uintptr) error {
// SendFile sends a file over the given AF_UNIX socket. file.Name() is also
// included so that if the other end uses RecvFile, the file will have the same
// name information.
func SendFile(socket *os.File, file *os.File) error {
name := file.Name()
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", name)
}
return SendFds(socket, []byte(name), int(fd))
err := SendRawFd(socket, name, file.Fd())
runtime.KeepAlive(file)
return err
}
// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket.
func SendFds(socket *os.File, msg []byte, fds ...int) error {
oob := unix.UnixRights(fds...)
return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0)
// SendRawFd sends a specific file descriptor over the given AF_UNIX socket.
func SendRawFd(socket *os.File, msg string, fd uintptr) error {
oob := unix.UnixRights(int(fd))
return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0)
}

View file

@ -3,15 +3,12 @@ package utils
import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"unsafe"
securejoin "github.com/cyphar/filepath-securejoin"
"golang.org/x/sys/unix"
)
@ -43,6 +40,9 @@ func ExitStatus(status unix.WaitStatus) int {
}
// WriteJSON writes the provided struct v to w using standard json marshaling
// without a trailing newline. This is used instead of json.Encoder because
// there might be a problem in json decoder in some cases, see:
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
func WriteJSON(w io.Writer, v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
@ -99,52 +99,16 @@ func stripRoot(root, path string) string {
return CleanPath("/" + path)
}
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
// corresponding to the unsafePath resolved within the root. Before passing the
// fd, this path is verified to have been inside the root -- so operating on it
// through the passed fdpath should be safe. Do not access this path through
// the original path strings, and do not attempt to use the pathname outside of
// the passed closure (the file handle will be freed once the closure returns).
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
// Remove the root then forcefully resolve inside the root.
unsafePath = stripRoot(root, unsafePath)
path, err := securejoin.SecureJoin(root, unsafePath)
if err != nil {
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
}
// Open the target path.
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open o_path procfd: %w", err)
}
defer fh.Close()
// Double-check the path is the one we expected.
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
if realpath, err := os.Readlink(procfd); err != nil {
return fmt.Errorf("procfd verification failed: %w", err)
} else if realpath != path {
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
}
// Run the closure.
return fn(procfd)
}
// SearchLabels searches a list of key-value pairs for the provided key and
// returns the corresponding value. The pairs must be separated with '='.
func SearchLabels(labels []string, query string) string {
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == query {
return parts[1]
// SearchLabels searches through a list of key=value pairs for a given key,
// returning its value, and the binary flag telling whether the key exist.
func SearchLabels(labels []string, key string) (string, bool) {
key += "="
for _, s := range labels {
if strings.HasPrefix(s, key) {
return s[len(key):], true
}
}
return ""
return "", false
}
// Annotations returns the bundle path and user defined annotations from the

View file

@ -5,10 +5,16 @@ package utils
import (
"fmt"
"math"
"os"
"path/filepath"
"runtime"
"strconv"
"sync"
_ "unsafe" // for go:linkname
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -24,12 +30,39 @@ func EnsureProcHandle(fh *os.File) error {
return nil
}
var (
haveCloseRangeCloexecBool bool
haveCloseRangeCloexecOnce sync.Once
)
func haveCloseRangeCloexec() bool {
haveCloseRangeCloexecOnce.Do(func() {
// Make sure we're not closing a random file descriptor.
tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return
}
defer unix.Close(tmpFd)
err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
// other potential error would imply that even the most basic close
// operation wouldn't work.
haveCloseRangeCloexecBool = err == nil
})
return haveCloseRangeCloexecBool
}
type fdFunc func(fd int)
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
// the current process.
func fdRangeFrom(minFd int, fn fdFunc) error {
fdDir, err := os.Open("/proc/self/fd")
procSelfFd, closer := ProcThreadSelf("fd")
defer closer()
fdDir, err := os.Open(procSelfFd)
if err != nil {
return err
}
@ -67,6 +100,12 @@ func fdRangeFrom(minFd int, fn fdFunc) error {
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
// equal to minFd in the current process.
func CloseExecFrom(minFd int) error {
// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
if haveCloseRangeCloexec() {
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
return os.NewSyscallError("close_range", err)
}
// Otherwise, fall back to the standard loop.
return fdRangeFrom(minFd, unix.CloseOnExec)
}
@ -89,7 +128,8 @@ func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
// *os.File operations would apply to the wrong file). This function is only
// intended to be called from the last stage of runc init.
func UnsafeCloseFrom(minFd int) error {
// We must not close some file descriptors.
// We cannot use close_range(2) even if it is available, because we must
// not close some file descriptors.
return fdRangeFrom(minFd, func(fd int) {
if runtime_IsPollDescriptor(uintptr(fd)) {
// These are the Go runtimes internal netpoll file descriptors.
@ -107,11 +147,117 @@ func UnsafeCloseFrom(minFd int) error {
})
}
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
// NewSockPair returns a new SOCK_STREAM unix socket pair.
func NewSockPair(name string) (parent, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
// corresponding to the unsafePath resolved within the root. Before passing the
// fd, this path is verified to have been inside the root -- so operating on it
// through the passed fdpath should be safe. Do not access this path through
// the original path strings, and do not attempt to use the pathname outside of
// the passed closure (the file handle will be freed once the closure returns).
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
// Remove the root then forcefully resolve inside the root.
unsafePath = stripRoot(root, unsafePath)
path, err := securejoin.SecureJoin(root, unsafePath)
if err != nil {
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
}
procSelfFd, closer := ProcThreadSelf("fd/")
defer closer()
// Open the target path.
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open o_path procfd: %w", err)
}
defer fh.Close()
procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
// Double-check the path is the one we expected.
if realpath, err := os.Readlink(procfd); err != nil {
return fmt.Errorf("procfd verification failed: %w", err)
} else if realpath != path {
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
}
return fn(procfd)
}
type ProcThreadSelfCloser func()
var (
haveProcThreadSelf bool
haveProcThreadSelfOnce sync.Once
)
// ProcThreadSelf returns a string that is equivalent to
// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
// meaning that the passed string needs to be trusted. The caller _must_ call
// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
// *only once* after it has finished using the returned path string.
func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
haveProcThreadSelfOnce.Do(func() {
if _, err := os.Stat("/proc/thread-self/"); err == nil {
haveProcThreadSelf = true
} else {
logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
}
})
// We need to lock our thread until the caller is done with the path string
// because any non-atomic operation on the path (such as opening a file,
// then reading it) could be interrupted by the Go runtime where the
// underlying thread is swapped out and the original thread is killed,
// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
// addition, the pre-3.17 fallback makes everything non-atomic because the
// same thing could happen between unix.Gettid() and the path operations.
//
// In theory, we don't need to lock in the atomic user case when using
// /proc/thread-self/, but it's better to be safe than sorry (and there are
// only one or two truly atomic users of /proc/thread-self/).
runtime.LockOSThread()
threadSelf := "/proc/thread-self/"
if !haveProcThreadSelf {
// Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
if _, err := os.Stat(threadSelf); err != nil {
// Unfortunately, this code is called from rootfs_linux.go where we
// are running inside the pid namespace of the container but /proc
// is the host's procfs. Unfortunately there is no real way to get
// the correct tid to use here (the kernel age means we cannot do
// things like set up a private fsopen("proc") -- even scanning
// NSpid in all of the tasks in /proc/self/task/*/status requires
// Linux 4.1).
//
// So, we just have to assume that /proc/self is acceptable in this
// one specific case.
if os.Getpid() == 1 {
logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
} else {
// This should never happen, but the fallback should work in most cases...
logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
}
threadSelf = "/proc/self/"
}
}
return threadSelf + subpath, runtime.UnlockOSThread
}
// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
// create a /proc/thread-self handle for given file descriptor.
//
// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
// without using fmt.Sprintf to avoid unneeded overhead.
func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
}

10
vendor/modules.txt vendored
View file

@ -216,14 +216,15 @@ github.com/cenkalti/backoff/v4
# github.com/cespare/xxhash/v2 v2.2.0
## explicit; go 1.11
github.com/cespare/xxhash/v2
# github.com/cilium/ebpf v0.11.0
## explicit; go 1.19
# github.com/cilium/ebpf v0.12.3
## explicit; go 1.20
github.com/cilium/ebpf
github.com/cilium/ebpf/asm
github.com/cilium/ebpf/btf
github.com/cilium/ebpf/internal
github.com/cilium/ebpf/internal/kconfig
github.com/cilium/ebpf/internal/sys
github.com/cilium/ebpf/internal/sysenc
github.com/cilium/ebpf/internal/tracefs
github.com/cilium/ebpf/internal/unix
github.com/cilium/ebpf/link
@ -985,12 +986,11 @@ github.com/opencontainers/go-digest/digestset
github.com/opencontainers/image-spec/identity
github.com/opencontainers/image-spec/specs-go
github.com/opencontainers/image-spec/specs-go/v1
# github.com/opencontainers/runc v1.1.12
## explicit; go 1.17
# github.com/opencontainers/runc v1.2.0-rc.1
## explicit; go 1.20
github.com/opencontainers/runc/libcontainer/cgroups
github.com/opencontainers/runc/libcontainer/configs
github.com/opencontainers/runc/libcontainer/devices
github.com/opencontainers/runc/libcontainer/user
github.com/opencontainers/runc/libcontainer/userns
github.com/opencontainers/runc/libcontainer/utils
# github.com/opencontainers/runtime-spec v1.2.0