vendor: github.com/containerd/cgroups/v3 v3.0.1

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2023-01-30 23:43:31 +09:00
parent e528b227a6
commit e807ae4f2e
No known key found for this signature in database
GPG key ID: 49524C6F9F638F1A
147 changed files with 14369 additions and 9567 deletions

View file

@ -1,7 +1,7 @@
package main
import (
cdcgroups "github.com/containerd/cgroups"
cdcgroups "github.com/containerd/cgroups/v3"
systemdDaemon "github.com/coreos/go-systemd/v22/daemon"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/pkg/sysinfo"

View file

@ -6,7 +6,7 @@ import (
"os/exec"
"path/filepath"
"github.com/containerd/cgroups"
"github.com/containerd/cgroups/v3"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/opts"

View file

@ -18,9 +18,9 @@ import (
"syscall"
"time"
"github.com/containerd/cgroups"
statsV1 "github.com/containerd/cgroups/stats/v1"
statsV2 "github.com/containerd/cgroups/v2/stats"
"github.com/containerd/cgroups/v3"
statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats"
statsV2 "github.com/containerd/cgroups/v3/cgroup2/stats"
"github.com/containerd/containerd/pkg/userns"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/blkiodev"

View file

@ -10,7 +10,7 @@ import (
"strconv"
"strings"
cdcgroups "github.com/containerd/cgroups"
cdcgroups "github.com/containerd/cgroups/v3"
"github.com/containerd/containerd/containers"
coci "github.com/containerd/containerd/oci"
"github.com/containerd/containerd/pkg/apparmor"

View file

@ -13,8 +13,8 @@ type Summary struct{}
type Stats struct {
Read time.Time
// Metrics is expected to be either one of:
// * github.com/containerd/cgroups/stats/v1.Metrics
// * github.com/containerd/cgroups/stats/v2.Metrics
// * github.com/containerd/cgroups/v3/cgroup1/stats.Metrics
// * github.com/containerd/cgroups/v3/cgroup2/stats.Metrics
Metrics interface{}
}

View file

@ -5,8 +5,8 @@ import (
"path"
"strings"
"github.com/containerd/cgroups"
cgroupsV2 "github.com/containerd/cgroups/v2"
"github.com/containerd/cgroups/v3"
cgroupsV2 "github.com/containerd/cgroups/v3/cgroup2"
"github.com/containerd/containerd/pkg/userns"
"github.com/sirupsen/logrus"
)
@ -27,7 +27,7 @@ func newV2(options ...Opt) *SysInfo {
applyCgroupNsInfo,
}
m, err := cgroupsV2.LoadManager("/sys/fs/cgroup", sysInfo.cg2GroupPath)
m, err := cgroupsV2.Load(sysInfo.cg2GroupPath)
if err != nil {
logrus.Warn(err)
} else {

View file

@ -7,7 +7,8 @@ import (
"strings"
"sync"
"github.com/containerd/cgroups"
"github.com/containerd/cgroups/v3"
"github.com/containerd/cgroups/v3/cgroup1"
"github.com/containerd/containerd/pkg/seccomp"
"github.com/moby/sys/mountinfo"
"github.com/sirupsen/logrus"
@ -40,7 +41,7 @@ func findCgroupV1Mountpoints() (map[string]string, error) {
return nil, err
}
allSubsystems, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
allSubsystems, err := cgroup1.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, fmt.Errorf("Failed to parse cgroup information: %v", err)
}

View file

@ -24,7 +24,7 @@ require (
github.com/aws/smithy-go v1.13.1
github.com/bsphere/le_go v0.0.0-20200109081728-fc06dab2caa8
github.com/cloudflare/cfssl v0.0.0-20180323000720-5d63dbd981b5
github.com/containerd/cgroups v1.0.4
github.com/containerd/cgroups/v3 v3.0.1
github.com/containerd/containerd v1.6.19
github.com/containerd/continuity v0.3.0
github.com/containerd/fifo v1.1.0
@ -113,8 +113,9 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.1.2 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/cilium/ebpf v0.7.0 // indirect
github.com/cilium/ebpf v0.9.1 // indirect
github.com/container-storage-interface/spec v1.5.0 // indirect
github.com/containerd/cgroups v1.0.4 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/containerd/go-cni v1.1.6 // indirect
github.com/containerd/go-runc v1.0.0 // indirect

View file

@ -302,8 +302,9 @@ github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLI
github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.7.0 h1:1k/q3ATgxSXRdrmPfH8d7YK0GfqVsEKZAX9dQZvs56k=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
@ -345,6 +346,8 @@ github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTF
github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU=
github.com/containerd/cgroups v1.0.4 h1:jN/mbWBEaz+T1pi5OFtnkQ+8qnmEbAr1Oo1FRm5B0dA=
github.com/containerd/cgroups v1.0.4/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA=
github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE=
github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw=
github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE=
@ -562,8 +565,8 @@ github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI
github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/fortytw2/leaktest v1.2.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI=
@ -962,8 +965,8 @@ github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxv
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
@ -1257,6 +1260,7 @@ github.com/rogpeppe/fastuuid v1.1.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
github.com/rootless-containers/rootlesskit v1.1.0 h1:cRaRIYxY8oce4eE/zeAUZhgKu/4tU1p9YHN4+suwV7M=
github.com/rootless-containers/rootlesskit v1.1.0/go.mod h1:H+o9ndNe7tS91WqU0/+vpvc+VaCd7TCIWaJjnV0ujUo=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=

View file

@ -78,3 +78,9 @@ tend to use bpf_link to do so. Older hooks unfortunately use a combination of
syscalls, netlink messages, etc. Adding support for a new link type should not
pull in large dependencies like netlink, so XDP programs or tracepoints are
out of scope.
Each bpf_link_type has one corresponding Go type, e.g. `link.tracing` corresponds
to BPF_LINK_TRACING. In general, these types should be unexported as long as they
don't export methods outside of the Link interface. Each Go type may have multiple
exported constructors. For example `AttachTracing` and `AttachLSM` create a
tracing link, but are distinct functions since they may require different arguments.

8
vendor/github.com/cilium/ebpf/MAINTAINERS.md generated vendored Normal file
View file

@ -0,0 +1,8 @@
# Maintainers
* [Lorenz Bauer]
* [Timo Beckers] (Isovalent)
[Lorenz Bauer]: https://github.com/lmb
[Timo Beckers]: https://github.com/ti-mo

View file

@ -1,17 +1,27 @@
# The development version of clang is distributed as the 'clang' binary,
# while stable/released versions have a version number attached.
# Pin the default clang to a stable version.
CLANG ?= clang-12
CFLAGS := -target bpf -O2 -g -Wall -Werror $(CFLAGS)
CLANG ?= clang-14
STRIP ?= llvm-strip-14
OBJCOPY ?= llvm-objcopy-14
CFLAGS := -O2 -g -Wall -Werror $(CFLAGS)
CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/
# Obtain an absolute path to the directory of the Makefile.
# Assume the Makefile is in the root of the repository.
REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
UIDGID := $(shell stat -c '%u:%g' ${REPODIR})
# Prefer podman if installed, otherwise use docker.
# Note: Setting the var at runtime will always override.
CONTAINER_ENGINE ?= $(if $(shell command -v podman), podman, docker)
CONTAINER_RUN_ARGS ?= $(if $(filter ${CONTAINER_ENGINE}, podman), --log-driver=none, --user "${UIDGID}")
IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE)
VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION)
# clang <8 doesn't tag relocs properly (STT_NOTYPE)
# clang 9 is the first version emitting BTF
TARGETS := \
@ -26,48 +36,75 @@ TARGETS := \
testdata/strings \
testdata/freplace \
testdata/iproute2_map_compat \
internal/btf/testdata/relocs
testdata/map_spin_lock \
testdata/subprog_reloc \
testdata/fwd_decl \
btf/testdata/relocs \
btf/testdata/relocs_read \
btf/testdata/relocs_read_tgt
.PHONY: all clean docker-all docker-shell
.PHONY: all clean container-all container-shell generate
.DEFAULT_TARGET = docker-all
.DEFAULT_TARGET = container-all
# Build all ELF binaries using a Dockerized LLVM toolchain.
docker-all:
docker run --rm --user "${UIDGID}" \
# Build all ELF binaries using a containerized LLVM toolchain.
container-all:
${CONTAINER_ENGINE} run --rm ${CONTAINER_RUN_ARGS} \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
--env HOME="/tmp" \
"${IMAGE}:${VERSION}" \
make all
$(MAKE) all
# (debug) Drop the user into a shell inside the Docker container as root.
docker-shell:
docker run --rm -ti \
# (debug) Drop the user into a shell inside the container as root.
container-shell:
${CONTAINER_ENGINE} run --rm -ti \
-v "${REPODIR}":/ebpf -w /ebpf \
"${IMAGE}:${VERSION}"
clean:
-$(RM) testdata/*.elf
-$(RM) internal/btf/testdata/*.elf
-$(RM) btf/testdata/*.elf
all: $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS))
format:
find . -type f -name "*.c" | xargs clang-format -i
all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) generate
ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf
ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf
# $BPF_CLANG is used in go:generate invocations.
generate: export BPF_CLANG := $(CLANG)
generate: export BPF_CFLAGS := $(CFLAGS)
generate:
go generate ./cmd/bpf2go/test
go generate ./internal/sys
cd examples/ && go generate ./...
testdata/loader-%-el.elf: testdata/loader.c
$* $(CFLAGS) -mlittle-endian -c $< -o $@
$* $(CFLAGS) -target bpfel -c $< -o $@
$(STRIP) -g $@
testdata/loader-%-eb.elf: testdata/loader.c
$* $(CFLAGS) -mbig-endian -c $< -o $@
$* $(CFLAGS) -target bpfeb -c $< -o $@
$(STRIP) -g $@
%-el.elf: %.c
$(CLANG) $(CFLAGS) -mlittle-endian -c $< -o $@
$(CLANG) $(CFLAGS) -target bpfel -c $< -o $@
$(STRIP) -g $@
%-eb.elf : %.c
$(CLANG) $(CFLAGS) -mbig-endian -c $< -o $@
$(CLANG) $(CFLAGS) -target bpfeb -c $< -o $@
$(STRIP) -g $@
# Usage: make VMLINUX=/path/to/vmlinux vmlinux-btf
.PHONY: vmlinux-btf
vmlinux-btf: internal/btf/testdata/vmlinux-btf.gz
internal/btf/testdata/vmlinux-btf.gz: $(VMLINUX)
objcopy --dump-section .BTF=/dev/stdout "$<" /dev/null | gzip > "$@"
.PHONY: generate-btf
generate-btf: KERNEL_VERSION?=5.18
generate-btf:
$(eval TMP := $(shell mktemp -d))
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage"
./testdata/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux"
$(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz"
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz"
tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \
$(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" - /dev/null
$(RM) -r "$(TMP)"

View file

@ -45,13 +45,17 @@ This library includes the following packages:
`PERF_EVENT_ARRAY`
* [ringbuf](https://pkg.go.dev/github.com/cilium/ebpf/ringbuf) allows reading from a
`BPF_MAP_TYPE_RINGBUF` map
* [features](https://pkg.go.dev/github.com/cilium/ebpf/features) implements the equivalent
of `bpftool feature probe` for discovering BPF-related kernel features using native Go.
* [rlimit](https://pkg.go.dev/github.com/cilium/ebpf/rlimit) provides a convenient API to lift
the `RLIMIT_MEMLOCK` constraint on kernels before 5.11.
## Requirements
* A version of Go that is [supported by
upstream](https://golang.org/doc/devel/release.html#policy)
* Linux >= 4.9. CI is run against LTS releases.
* Linux >= 4.9. CI is run against kernel.org LTS releases. 4.4 should work but is
not tested against.
## Regenerating Testdata
@ -59,6 +63,9 @@ Run `make` in the root of this repository to rebuild testdata in all
subpackages. This requires Docker, as it relies on a standardized build
environment to keep the build output stable.
It is possible to regenerate data using Podman by overriding the `CONTAINER_*`
variables: `CONTAINER_ENGINE=podman CONTAINER_RUN_ARGS= make`.
The toolchain image build files are kept in [testdata/docker/](testdata/docker/).
## License

View file

@ -5,6 +5,10 @@ package asm
// BuiltinFunc is a built-in eBPF function.
type BuiltinFunc int32
func (_ BuiltinFunc) Max() BuiltinFunc {
return maxBuiltinFunc - 1
}
// eBPF built-in functions
//
// You can regenerate this list using the following gawk script:
@ -190,6 +194,43 @@ const (
FnSysBpf
FnBtfFindByNameKind
FnSysClose
FnTimerInit
FnTimerSetCallback
FnTimerStart
FnTimerCancel
FnGetFuncIp
FnGetAttachCookie
FnTaskPtRegs
FnGetBranchSnapshot
FnTraceVprintk
FnSkcToUnixSock
FnKallsymsLookupName
FnFindVma
FnLoop
FnStrncmp
FnGetFuncArg
FnGetFuncRet
FnGetFuncArgCnt
FnGetRetval
FnSetRetval
FnXdpGetBuffLen
FnXdpLoadBytes
FnXdpStoreBytes
FnCopyFromUserTask
FnSkbSetTstamp
FnImaFileHash
FnKptrXchg
FnMapLookupPercpuElem
FnSkcToMptcpSock
FnDynptrFromMem
FnRingbufReserveDynptr
FnRingbufSubmitDynptr
FnRingbufDiscardDynptr
FnDynptrRead
FnDynptrWrite
FnDynptrData
maxBuiltinFunc
)
// Call emits a function call.

View file

@ -177,11 +177,47 @@ func _() {
_ = x[FnSysBpf-166]
_ = x[FnBtfFindByNameKind-167]
_ = x[FnSysClose-168]
_ = x[FnTimerInit-169]
_ = x[FnTimerSetCallback-170]
_ = x[FnTimerStart-171]
_ = x[FnTimerCancel-172]
_ = x[FnGetFuncIp-173]
_ = x[FnGetAttachCookie-174]
_ = x[FnTaskPtRegs-175]
_ = x[FnGetBranchSnapshot-176]
_ = x[FnTraceVprintk-177]
_ = x[FnSkcToUnixSock-178]
_ = x[FnKallsymsLookupName-179]
_ = x[FnFindVma-180]
_ = x[FnLoop-181]
_ = x[FnStrncmp-182]
_ = x[FnGetFuncArg-183]
_ = x[FnGetFuncRet-184]
_ = x[FnGetFuncArgCnt-185]
_ = x[FnGetRetval-186]
_ = x[FnSetRetval-187]
_ = x[FnXdpGetBuffLen-188]
_ = x[FnXdpLoadBytes-189]
_ = x[FnXdpStoreBytes-190]
_ = x[FnCopyFromUserTask-191]
_ = x[FnSkbSetTstamp-192]
_ = x[FnImaFileHash-193]
_ = x[FnKptrXchg-194]
_ = x[FnMapLookupPercpuElem-195]
_ = x[FnSkcToMptcpSock-196]
_ = x[FnDynptrFromMem-197]
_ = x[FnRingbufReserveDynptr-198]
_ = x[FnRingbufSubmitDynptr-199]
_ = x[FnRingbufDiscardDynptr-200]
_ = x[FnDynptrRead-201]
_ = x[FnDynptrWrite-202]
_ = x[FnDynptrData-203]
_ = x[maxBuiltinFunc-204]
}
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysClose"
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDatamaxBuiltinFunc"
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497}
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3011}
func (i BuiltinFunc) String() string {
if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {

View file

@ -8,8 +8,10 @@ import (
"fmt"
"io"
"math"
"sort"
"strings"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
@ -19,6 +21,10 @@ const InstructionSize = 8
// RawInstructionOffset is an offset in units of raw BPF instructions.
type RawInstructionOffset uint64
var ErrUnreferencedSymbol = errors.New("unreferenced symbol")
var ErrUnsatisfiedMapReference = errors.New("unsatisfied map reference")
var ErrUnsatisfiedProgramReference = errors.New("unsatisfied program reference")
// Bytes returns the offset of an instruction in bytes.
func (rio RawInstructionOffset) Bytes() uint64 {
return uint64(rio) * InstructionSize
@ -26,50 +32,57 @@ func (rio RawInstructionOffset) Bytes() uint64 {
// Instruction is a single eBPF instruction.
type Instruction struct {
OpCode OpCode
Dst Register
Src Register
Offset int16
Constant int64
Reference string
Symbol string
}
OpCode OpCode
Dst Register
Src Register
Offset int16
Constant int64
// Sym creates a symbol.
func (ins Instruction) Sym(name string) Instruction {
ins.Symbol = name
return ins
// Metadata contains optional metadata about this instruction.
Metadata Metadata
}
// Unmarshal decodes a BPF instruction.
func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
var bi bpfInstruction
err := binary.Read(r, bo, &bi)
if err != nil {
data := make([]byte, InstructionSize)
if _, err := io.ReadFull(r, data); err != nil {
return 0, err
}
ins.OpCode = bi.OpCode
ins.Offset = bi.Offset
ins.Constant = int64(bi.Constant)
ins.Dst, ins.Src, err = bi.Registers.Unmarshal(bo)
if err != nil {
return 0, fmt.Errorf("can't unmarshal registers: %s", err)
ins.OpCode = OpCode(data[0])
regs := data[1]
switch bo {
case binary.LittleEndian:
ins.Dst, ins.Src = Register(regs&0xF), Register(regs>>4)
case binary.BigEndian:
ins.Dst, ins.Src = Register(regs>>4), Register(regs&0xf)
}
if !bi.OpCode.IsDWordLoad() {
ins.Offset = int16(bo.Uint16(data[2:4]))
// Convert to int32 before widening to int64
// to ensure the signed bit is carried over.
ins.Constant = int64(int32(bo.Uint32(data[4:8])))
if !ins.OpCode.IsDWordLoad() {
return InstructionSize, nil
}
var bi2 bpfInstruction
if err := binary.Read(r, bo, &bi2); err != nil {
// Pull another instruction from the stream to retrieve the second
// half of the 64-bit immediate value.
if _, err := io.ReadFull(r, data); err != nil {
// No Wrap, to avoid io.EOF clash
return 0, errors.New("64bit immediate is missing second half")
}
if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 {
// Require that all fields other than the value are zero.
if bo.Uint32(data[0:4]) != 0 {
return 0, errors.New("64bit immediate has non-zero fields")
}
ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant)))
cons1 := uint32(ins.Constant)
cons2 := int32(bo.Uint32(data[4:8]))
ins.Constant = int64(cons2)<<32 | int64(cons1)
return 2 * InstructionSize, nil
}
@ -93,14 +106,12 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
return 0, fmt.Errorf("can't marshal registers: %s", err)
}
bpfi := bpfInstruction{
ins.OpCode,
regs,
ins.Offset,
cons,
}
if err := binary.Write(w, bo, &bpfi); err != nil {
data := make([]byte, InstructionSize)
data[0] = byte(ins.OpCode)
data[1] = byte(regs)
bo.PutUint16(data[2:4], uint16(ins.Offset))
bo.PutUint32(data[4:8], uint32(cons))
if _, err := w.Write(data); err != nil {
return 0, err
}
@ -108,42 +119,76 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
return InstructionSize, nil
}
bpfi = bpfInstruction{
Constant: int32(ins.Constant >> 32),
}
if err := binary.Write(w, bo, &bpfi); err != nil {
// The first half of the second part of a double-wide instruction
// must be zero. The second half carries the value.
bo.PutUint32(data[0:4], 0)
bo.PutUint32(data[4:8], uint32(ins.Constant>>32))
if _, err := w.Write(data); err != nil {
return 0, err
}
return 2 * InstructionSize, nil
}
// RewriteMapPtr changes an instruction to use a new map fd.
// AssociateMap associates a Map with this Instruction.
//
// Returns an error if the instruction doesn't load a map.
func (ins *Instruction) RewriteMapPtr(fd int) error {
if !ins.OpCode.IsDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
}
if ins.Src != PseudoMapFD && ins.Src != PseudoMapValue {
// Implicitly clears the Instruction's Reference field.
//
// Returns an error if the Instruction is not a map load.
func (ins *Instruction) AssociateMap(m FDer) error {
if !ins.IsLoadFromMap() {
return errors.New("not a load from a map")
}
ins.Metadata.Set(referenceMeta{}, nil)
ins.Metadata.Set(mapMeta{}, m)
return nil
}
// RewriteMapPtr changes an instruction to use a new map fd.
//
// Returns an error if the instruction doesn't load a map.
//
// Deprecated: use AssociateMap instead. If you cannot provide a Map,
// wrap an fd in a type implementing FDer.
func (ins *Instruction) RewriteMapPtr(fd int) error {
if !ins.IsLoadFromMap() {
return errors.New("not a load from a map")
}
ins.encodeMapFD(fd)
return nil
}
func (ins *Instruction) encodeMapFD(fd int) {
// Preserve the offset value for direct map loads.
offset := uint64(ins.Constant) & (math.MaxUint32 << 32)
rawFd := uint64(uint32(fd))
ins.Constant = int64(offset | rawFd)
return nil
}
// MapPtr returns the map fd for this instruction.
//
// The result is undefined if the instruction is not a load from a map,
// see IsLoadFromMap.
//
// Deprecated: use Map() instead.
func (ins *Instruction) MapPtr() int {
return int(int32(uint64(ins.Constant) & math.MaxUint32))
// If there is a map associated with the instruction, return its FD.
if fd := ins.Metadata.Get(mapMeta{}); fd != nil {
return fd.(FDer).FD()
}
// Fall back to the fd stored in the Constant field
return ins.mapFd()
}
// mapFd returns the map file descriptor stored in the 32 least significant
// bits of ins' Constant field.
func (ins *Instruction) mapFd() int {
return int(int32(ins.Constant))
}
// RewriteMapOffset changes the offset of a direct load from a map.
@ -181,6 +226,18 @@ func (ins *Instruction) IsFunctionCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall
}
// IsLoadOfFunctionPointer returns true if the instruction loads a function pointer.
func (ins *Instruction) IsLoadOfFunctionPointer() bool {
return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc
}
// IsFunctionReference returns true if the instruction references another BPF
// function, either by invoking a Call jump operation or by loading a function
// pointer.
func (ins *Instruction) IsFunctionReference() bool {
return ins.IsFunctionCall() || ins.IsLoadOfFunctionPointer()
}
// IsBuiltinCall returns true if the instruction is a built-in call, i.e. BPF helper call.
func (ins *Instruction) IsBuiltinCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == R0 && ins.Dst == R0
@ -213,21 +270,30 @@ func (ins Instruction) Format(f fmt.State, c rune) {
}
if ins.IsLoadFromMap() {
fd := ins.MapPtr()
fd := ins.mapFd()
m := ins.Map()
switch ins.Src {
case PseudoMapFD:
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
if m != nil {
fmt.Fprintf(f, "LoadMapPtr dst: %s map: %s", ins.Dst, m)
} else {
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
}
case PseudoMapValue:
fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset())
if m != nil {
fmt.Fprintf(f, "LoadMapValue dst: %s, map: %s off: %d", ins.Dst, m, ins.mapOffset())
} else {
fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset())
}
}
goto ref
}
fmt.Fprintf(f, "%v ", op)
switch cls := op.Class(); cls {
case LdClass, LdXClass, StClass, StXClass:
switch cls := op.Class(); {
case cls.isLoadOrStore():
switch op.Mode() {
case ImmMode:
fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
@ -241,7 +307,7 @@ func (ins Instruction) Format(f fmt.State, c rune) {
fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
}
case ALU64Class, ALUClass:
case cls.IsALU():
fmt.Fprintf(f, "dst: %s ", ins.Dst)
if op.ALUOp() == Swap || op.Source() == ImmSource {
fmt.Fprintf(f, "imm: %d", ins.Constant)
@ -249,7 +315,7 @@ func (ins Instruction) Format(f fmt.State, c rune) {
fmt.Fprintf(f, "src: %s", ins.Src)
}
case JumpClass:
case cls.IsJump():
switch jop := op.JumpOp(); jop {
case Call:
if ins.Src == PseudoCall {
@ -270,34 +336,171 @@ func (ins Instruction) Format(f fmt.State, c rune) {
}
ref:
if ins.Reference != "" {
fmt.Fprintf(f, " <%s>", ins.Reference)
if ins.Reference() != "" {
fmt.Fprintf(f, " <%s>", ins.Reference())
}
}
func (ins Instruction) equal(other Instruction) bool {
return ins.OpCode == other.OpCode &&
ins.Dst == other.Dst &&
ins.Src == other.Src &&
ins.Offset == other.Offset &&
ins.Constant == other.Constant
}
// Size returns the amount of bytes ins would occupy in binary form.
func (ins Instruction) Size() uint64 {
return uint64(InstructionSize * ins.OpCode.rawInstructions())
}
type symbolMeta struct{}
// WithSymbol marks the Instruction as a Symbol, which other Instructions
// can point to using corresponding calls to WithReference.
func (ins Instruction) WithSymbol(name string) Instruction {
ins.Metadata.Set(symbolMeta{}, name)
return ins
}
// Sym creates a symbol.
//
// Deprecated: use WithSymbol instead.
func (ins Instruction) Sym(name string) Instruction {
return ins.WithSymbol(name)
}
// Symbol returns the value ins has been marked with using WithSymbol,
// otherwise returns an empty string. A symbol is often an Instruction
// at the start of a function body.
func (ins Instruction) Symbol() string {
sym, _ := ins.Metadata.Get(symbolMeta{}).(string)
return sym
}
type referenceMeta struct{}
// WithReference makes ins reference another Symbol or map by name.
func (ins Instruction) WithReference(ref string) Instruction {
ins.Metadata.Set(referenceMeta{}, ref)
return ins
}
// Reference returns the Symbol or map name referenced by ins, if any.
func (ins Instruction) Reference() string {
ref, _ := ins.Metadata.Get(referenceMeta{}).(string)
return ref
}
type mapMeta struct{}
// Map returns the Map referenced by ins, if any.
// An Instruction will contain a Map if e.g. it references an existing,
// pinned map that was opened during ELF loading.
func (ins Instruction) Map() FDer {
fd, _ := ins.Metadata.Get(mapMeta{}).(FDer)
return fd
}
type sourceMeta struct{}
// WithSource adds source information about the Instruction.
func (ins Instruction) WithSource(src fmt.Stringer) Instruction {
ins.Metadata.Set(sourceMeta{}, src)
return ins
}
// Source returns source information about the Instruction. The field is
// present when the compiler emits BTF line info about the Instruction and
// usually contains the line of source code responsible for it.
func (ins Instruction) Source() fmt.Stringer {
str, _ := ins.Metadata.Get(sourceMeta{}).(fmt.Stringer)
return str
}
// A Comment can be passed to Instruction.WithSource to add a comment
// to an instruction.
type Comment string
func (s Comment) String() string {
return string(s)
}
// FDer represents a resource tied to an underlying file descriptor.
// Used as a stand-in for e.g. ebpf.Map since that type cannot be
// imported here and FD() is the only method we rely on.
type FDer interface {
FD() int
}
// Instructions is an eBPF program.
type Instructions []Instruction
// Unmarshal unmarshals an Instructions from a binary instruction stream.
// All instructions in insns are replaced by instructions decoded from r.
func (insns *Instructions) Unmarshal(r io.Reader, bo binary.ByteOrder) error {
if len(*insns) > 0 {
*insns = nil
}
var offset uint64
for {
var ins Instruction
n, err := ins.Unmarshal(r, bo)
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("offset %d: %w", offset, err)
}
*insns = append(*insns, ins)
offset += n
}
return nil
}
// Name returns the name of the function insns belongs to, if any.
func (insns Instructions) Name() string {
if len(insns) == 0 {
return ""
}
return insns[0].Symbol()
}
func (insns Instructions) String() string {
return fmt.Sprint(insns)
}
// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
// Size returns the amount of bytes insns would occupy in binary form.
func (insns Instructions) Size() uint64 {
var sum uint64
for _, ins := range insns {
sum += ins.Size()
}
return sum
}
// AssociateMap updates all Instructions that Reference the given symbol
// to point to an existing Map m instead.
//
// Returns an error if the symbol isn't used, see IsUnreferencedSymbol.
func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
// Returns ErrUnreferencedSymbol error if no references to symbol are found
// in insns. If symbol is anything else than the symbol name of map (e.g.
// a bpf2bpf subprogram), an error is returned.
func (insns Instructions) AssociateMap(symbol string, m FDer) error {
if symbol == "" {
return errors.New("empty symbol")
}
found := false
var found bool
for i := range insns {
ins := &insns[i]
if ins.Reference != symbol {
if ins.Reference() != symbol {
continue
}
if err := ins.RewriteMapPtr(fd); err != nil {
if err := ins.AssociateMap(m); err != nil {
return err
}
@ -305,7 +508,40 @@ func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
}
if !found {
return &unreferencedSymbolError{symbol}
return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol)
}
return nil
}
// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
//
// Returns ErrUnreferencedSymbol if the symbol isn't used.
//
// Deprecated: use AssociateMap instead.
func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
if symbol == "" {
return errors.New("empty symbol")
}
var found bool
for i := range insns {
ins := &insns[i]
if ins.Reference() != symbol {
continue
}
if !ins.IsLoadFromMap() {
return errors.New("not a load from a map")
}
ins.encodeMapFD(fd)
found = true
}
if !found {
return fmt.Errorf("symbol %s: %w", symbol, ErrUnreferencedSymbol)
}
return nil
@ -317,31 +553,61 @@ func (insns Instructions) SymbolOffsets() (map[string]int, error) {
offsets := make(map[string]int)
for i, ins := range insns {
if ins.Symbol == "" {
if ins.Symbol() == "" {
continue
}
if _, ok := offsets[ins.Symbol]; ok {
return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol)
if _, ok := offsets[ins.Symbol()]; ok {
return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol())
}
offsets[ins.Symbol] = i
offsets[ins.Symbol()] = i
}
return offsets, nil
}
// FunctionReferences returns a set of symbol names these Instructions make
// bpf-to-bpf calls to.
func (insns Instructions) FunctionReferences() []string {
calls := make(map[string]struct{})
for _, ins := range insns {
if ins.Constant != -1 {
// BPF-to-BPF calls have -1 constants.
continue
}
if ins.Reference() == "" {
continue
}
if !ins.IsFunctionReference() {
continue
}
calls[ins.Reference()] = struct{}{}
}
result := make([]string, 0, len(calls))
for call := range calls {
result = append(result, call)
}
sort.Strings(result)
return result
}
// ReferenceOffsets returns the set of references and their offset in
// the instructions.
func (insns Instructions) ReferenceOffsets() map[string][]int {
offsets := make(map[string][]int)
for i, ins := range insns {
if ins.Reference == "" {
if ins.Reference() == "" {
continue
}
offsets[ins.Reference] = append(offsets[ins.Reference], i)
offsets[ins.Reference()] = append(offsets[ins.Reference()], i)
}
return offsets
@ -392,18 +658,36 @@ func (insns Instructions) Format(f fmt.State, c rune) {
iter := insns.Iterate()
for iter.Next() {
if iter.Ins.Symbol != "" {
fmt.Fprintf(f, "%s%s:\n", symIndent, iter.Ins.Symbol)
if iter.Ins.Symbol() != "" {
fmt.Fprintf(f, "%s%s:\n", symIndent, iter.Ins.Symbol())
}
if src := iter.Ins.Source(); src != nil {
line := strings.TrimSpace(src.String())
if line != "" {
fmt.Fprintf(f, "%s%*s; %s\n", indent, offsetWidth, " ", line)
}
}
fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, iter.Offset, iter.Ins)
}
}
// Marshal encodes a BPF program into the kernel format.
//
// insns may be modified if there are unresolved jumps or bpf2bpf calls.
//
// Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction
// without a matching Symbol Instruction within insns.
func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
if err := insns.encodeFunctionReferences(); err != nil {
return err
}
if err := insns.encodeMapPointers(); err != nil {
return err
}
for i, ins := range insns {
_, err := ins.Marshal(w, bo)
if err != nil {
if _, err := ins.Marshal(w, bo); err != nil {
return fmt.Errorf("instruction %d: %w", i, err)
}
}
@ -429,6 +713,95 @@ func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) {
return hex.EncodeToString(h.Sum(nil)[:unix.BPF_TAG_SIZE]), nil
}
// encodeFunctionReferences populates the Offset (or Constant, depending on
// the instruction type) field of instructions with a Reference field to point
// to the offset of the corresponding instruction with a matching Symbol field.
//
// Only Reference Instructions that are either jumps or BPF function references
// (calls or function pointer loads) are populated.
//
// Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction
// without at least one corresponding Symbol Instruction within insns.
func (insns Instructions) encodeFunctionReferences() error {
// Index the offsets of instructions tagged as a symbol.
symbolOffsets := make(map[string]RawInstructionOffset)
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
if ins.Symbol() == "" {
continue
}
if _, ok := symbolOffsets[ins.Symbol()]; ok {
return fmt.Errorf("duplicate symbol %s", ins.Symbol())
}
symbolOffsets[ins.Symbol()] = iter.Offset
}
// Find all instructions tagged as references to other symbols.
// Depending on the instruction type, populate their constant or offset
// fields to point to the symbol they refer to within the insn stream.
iter = insns.Iterate()
for iter.Next() {
i := iter.Index
offset := iter.Offset
ins := iter.Ins
if ins.Reference() == "" {
continue
}
switch {
case ins.IsFunctionReference() && ins.Constant == -1:
symOffset, ok := symbolOffsets[ins.Reference()]
if !ok {
return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference)
}
ins.Constant = int64(symOffset - offset - 1)
case ins.OpCode.Class().IsJump() && ins.Offset == -1:
symOffset, ok := symbolOffsets[ins.Reference()]
if !ok {
return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference)
}
ins.Offset = int16(symOffset - offset - 1)
}
}
return nil
}
// encodeMapPointers finds all Map Instructions and encodes their FDs
// into their Constant fields.
func (insns Instructions) encodeMapPointers() error {
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
if !ins.IsLoadFromMap() {
continue
}
m := ins.Map()
if m == nil {
continue
}
fd := m.FD()
if fd < 0 {
return fmt.Errorf("map %s: %w", m, sys.ErrClosedFd)
}
ins.encodeMapFD(m.FD())
}
return nil
}
// Iterate allows iterating a BPF program while keeping track of
// various offsets.
//
@ -464,13 +837,6 @@ func (iter *InstructionIterator) Next() bool {
return true
}
type bpfInstruction struct {
OpCode OpCode
Registers bpfRegisters
Offset int16
Constant int32
}
type bpfRegisters uint8
func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) {
@ -484,28 +850,10 @@ func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, erro
}
}
func (r bpfRegisters) Unmarshal(bo binary.ByteOrder) (dst, src Register, err error) {
switch bo {
case binary.LittleEndian:
return Register(r & 0xF), Register(r >> 4), nil
case binary.BigEndian:
return Register(r >> 4), Register(r & 0xf), nil
default:
return 0, 0, fmt.Errorf("unrecognized ByteOrder %T", bo)
}
}
type unreferencedSymbolError struct {
symbol string
}
func (use *unreferencedSymbolError) Error() string {
return fmt.Sprintf("unreferenced symbol %s", use.symbol)
}
// IsUnreferencedSymbol returns true if err was caused by
// an unreferenced symbol.
//
// Deprecated: use errors.Is(err, asm.ErrUnreferencedSymbol).
func IsUnreferencedSymbol(err error) bool {
_, ok := err.(*unreferencedSymbolError)
return ok
return errors.Is(err, ErrUnreferencedSymbol)
}

View file

@ -60,50 +60,68 @@ func (op JumpOp) Op(source Source) OpCode {
return OpCode(JumpClass).SetJumpOp(op).SetSource(source)
}
// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled.
// Imm compares 64 bit dst to 64 bit value (sign extended), and adjusts PC by offset if the condition is fulfilled.
func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
if op == Exit || op == Call || op == Ja {
return Instruction{OpCode: InvalidOpCode}
}
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource),
Dst: dst,
Offset: -1,
Constant: int64(value),
Reference: label,
}
OpCode: op.opCode(JumpClass, ImmSource),
Dst: dst,
Offset: -1,
Constant: int64(value),
}.WithReference(label)
}
// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled.
// Imm32 compares 32 bit dst to 32 bit value, and adjusts PC by offset if the condition is fulfilled.
// Requires kernel 5.1.
func (op JumpOp) Imm32(dst Register, value int32, label string) Instruction {
return Instruction{
OpCode: op.opCode(Jump32Class, ImmSource),
Dst: dst,
Offset: -1,
Constant: int64(value),
}.WithReference(label)
}
// Reg compares 64 bit dst to 64 bit src, and adjusts PC by offset if the condition is fulfilled.
func (op JumpOp) Reg(dst, src Register, label string) Instruction {
return Instruction{
OpCode: op.opCode(JumpClass, RegSource),
Dst: dst,
Src: src,
Offset: -1,
}.WithReference(label)
}
// Reg32 compares 32 bit dst to 32 bit src, and adjusts PC by offset if the condition is fulfilled.
// Requires kernel 5.1.
func (op JumpOp) Reg32(dst, src Register, label string) Instruction {
return Instruction{
OpCode: op.opCode(Jump32Class, RegSource),
Dst: dst,
Src: src,
Offset: -1,
}.WithReference(label)
}
func (op JumpOp) opCode(class Class, source Source) OpCode {
if op == Exit || op == Call || op == Ja {
return Instruction{OpCode: InvalidOpCode}
return InvalidOpCode
}
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource),
Dst: dst,
Src: src,
Offset: -1,
Reference: label,
}
return OpCode(class).SetJumpOp(op).SetSource(source)
}
// Label adjusts PC to the address of the label.
func (op JumpOp) Label(label string) Instruction {
if op == Call {
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(Call),
Src: PseudoCall,
Constant: -1,
Reference: label,
}
OpCode: OpCode(JumpClass).SetJumpOp(Call),
Src: PseudoCall,
Constant: -1,
}.WithReference(label)
}
return Instruction{
OpCode: OpCode(JumpClass).SetJumpOp(op),
Offset: -1,
Reference: label,
}
OpCode: OpCode(JumpClass).SetJumpOp(op),
Offset: -1,
}.WithReference(label)
}

80
vendor/github.com/cilium/ebpf/asm/metadata.go generated vendored Normal file
View file

@ -0,0 +1,80 @@
package asm
// Metadata contains metadata about an instruction.
type Metadata struct {
head *metaElement
}
type metaElement struct {
next *metaElement
key, value interface{}
}
// Find the element containing key.
//
// Returns nil if there is no such element.
func (m *Metadata) find(key interface{}) *metaElement {
for e := m.head; e != nil; e = e.next {
if e.key == key {
return e
}
}
return nil
}
// Remove an element from the linked list.
//
// Copies as many elements of the list as necessary to remove r, but doesn't
// perform a full copy.
func (m *Metadata) remove(r *metaElement) {
current := &m.head
for e := m.head; e != nil; e = e.next {
if e == r {
// We've found the element we want to remove.
*current = e.next
// No need to copy the tail.
return
}
// There is another element in front of the one we want to remove.
// We have to copy it to be able to change metaElement.next.
cpy := &metaElement{key: e.key, value: e.value}
*current = cpy
current = &cpy.next
}
}
// Set a key to a value.
//
// If value is nil, the key is removed. Avoids modifying old metadata by
// copying if necessary.
func (m *Metadata) Set(key, value interface{}) {
if e := m.find(key); e != nil {
if e.value == value {
// Key is present and the value is the same. Nothing to do.
return
}
// Key is present with a different value. Create a copy of the list
// which doesn't have the element in it.
m.remove(e)
}
// m.head is now a linked list that doesn't contain key.
if value == nil {
return
}
m.head = &metaElement{key: key, value: value, next: m.head}
}
// Get the value of a key.
//
// Returns nil if no value with the given key is present.
func (m *Metadata) Get(key interface{}) interface{} {
if e := m.find(key); e != nil {
return e.value
}
return nil
}

View file

@ -7,14 +7,6 @@ import (
//go:generate stringer -output opcode_string.go -type=Class
type encoding int
const (
unknownEncoding encoding = iota
loadOrStore
jumpOrALU
)
// Class of operations
//
// msb lsb
@ -26,31 +18,52 @@ type Class uint8
const classMask OpCode = 0x07
const (
// LdClass load memory
// LdClass loads immediate values into registers.
// Also used for non-standard load operations from cBPF.
LdClass Class = 0x00
// LdXClass load memory from constant
// LdXClass loads memory into registers.
LdXClass Class = 0x01
// StClass load register from memory
// StClass stores immediate values to memory.
StClass Class = 0x02
// StXClass load register from constant
// StXClass stores registers to memory.
StXClass Class = 0x03
// ALUClass arithmetic operators
// ALUClass describes arithmetic operators.
ALUClass Class = 0x04
// JumpClass jump operators
// JumpClass describes jump operators.
JumpClass Class = 0x05
// ALU64Class arithmetic in 64 bit mode
// Jump32Class describes jump operators with 32-bit comparisons.
// Requires kernel 5.1.
Jump32Class Class = 0x06
// ALU64Class describes arithmetic operators in 64-bit mode.
ALU64Class Class = 0x07
)
func (cls Class) encoding() encoding {
switch cls {
case LdClass, LdXClass, StClass, StXClass:
return loadOrStore
case ALU64Class, ALUClass, JumpClass:
return jumpOrALU
default:
return unknownEncoding
}
// IsLoad checks if this is either LdClass or LdXClass.
func (cls Class) IsLoad() bool {
return cls == LdClass || cls == LdXClass
}
// IsStore checks if this is either StClass or StXClass.
func (cls Class) IsStore() bool {
return cls == StClass || cls == StXClass
}
func (cls Class) isLoadOrStore() bool {
return cls.IsLoad() || cls.IsStore()
}
// IsALU checks if this is either ALUClass or ALU64Class.
func (cls Class) IsALU() bool {
return cls == ALUClass || cls == ALU64Class
}
// IsJump checks if this is either JumpClass or Jump32Class.
func (cls Class) IsJump() bool {
return cls == JumpClass || cls == Jump32Class
}
func (cls Class) isJumpOrALU() bool {
return cls.IsJump() || cls.IsALU()
}
// OpCode is a packed eBPF opcode.
@ -86,7 +99,7 @@ func (op OpCode) Class() Class {
// Mode returns the mode for load and store operations.
func (op OpCode) Mode() Mode {
if op.Class().encoding() != loadOrStore {
if !op.Class().isLoadOrStore() {
return InvalidMode
}
return Mode(op & modeMask)
@ -94,7 +107,7 @@ func (op OpCode) Mode() Mode {
// Size returns the size for load and store operations.
func (op OpCode) Size() Size {
if op.Class().encoding() != loadOrStore {
if !op.Class().isLoadOrStore() {
return InvalidSize
}
return Size(op & sizeMask)
@ -102,7 +115,7 @@ func (op OpCode) Size() Size {
// Source returns the source for branch and ALU operations.
func (op OpCode) Source() Source {
if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap {
if !op.Class().isJumpOrALU() || op.ALUOp() == Swap {
return InvalidSource
}
return Source(op & sourceMask)
@ -110,7 +123,7 @@ func (op OpCode) Source() Source {
// ALUOp returns the ALUOp.
func (op OpCode) ALUOp() ALUOp {
if op.Class().encoding() != jumpOrALU {
if !op.Class().IsALU() {
return InvalidALUOp
}
return ALUOp(op & aluMask)
@ -125,18 +138,27 @@ func (op OpCode) Endianness() Endianness {
}
// JumpOp returns the JumpOp.
// Returns InvalidJumpOp if it doesn't encode a jump.
func (op OpCode) JumpOp() JumpOp {
if op.Class().encoding() != jumpOrALU {
if !op.Class().IsJump() {
return InvalidJumpOp
}
return JumpOp(op & jumpMask)
jumpOp := JumpOp(op & jumpMask)
// Some JumpOps are only supported by JumpClass, not Jump32Class.
if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call || jumpOp == Ja) {
return InvalidJumpOp
}
return jumpOp
}
// SetMode sets the mode on load and store operations.
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetMode(mode Mode) OpCode {
if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) {
if !op.Class().isLoadOrStore() || !valid(OpCode(mode), modeMask) {
return InvalidOpCode
}
return (op & ^modeMask) | OpCode(mode)
@ -146,7 +168,7 @@ func (op OpCode) SetMode(mode Mode) OpCode {
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetSize(size Size) OpCode {
if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) {
if !op.Class().isLoadOrStore() || !valid(OpCode(size), sizeMask) {
return InvalidOpCode
}
return (op & ^sizeMask) | OpCode(size)
@ -156,7 +178,7 @@ func (op OpCode) SetSize(size Size) OpCode {
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetSource(source Source) OpCode {
if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) {
if !op.Class().isJumpOrALU() || !valid(OpCode(source), sourceMask) {
return InvalidOpCode
}
return (op & ^sourceMask) | OpCode(source)
@ -166,8 +188,7 @@ func (op OpCode) SetSource(source Source) OpCode {
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetALUOp(alu ALUOp) OpCode {
class := op.Class()
if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) {
if !op.Class().IsALU() || !valid(OpCode(alu), aluMask) {
return InvalidOpCode
}
return (op & ^aluMask) | OpCode(alu)
@ -177,17 +198,25 @@ func (op OpCode) SetALUOp(alu ALUOp) OpCode {
//
// Returns InvalidOpCode if op is of the wrong class.
func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) {
if !op.Class().IsJump() || !valid(OpCode(jump), jumpMask) {
return InvalidOpCode
}
return (op & ^jumpMask) | OpCode(jump)
newOp := (op & ^jumpMask) | OpCode(jump)
// Check newOp is legal.
if newOp.JumpOp() == InvalidJumpOp {
return InvalidOpCode
}
return newOp
}
func (op OpCode) String() string {
var f strings.Builder
switch class := op.Class(); class {
case LdClass, LdXClass, StClass, StXClass:
switch class := op.Class(); {
case class.isLoadOrStore():
f.WriteString(strings.TrimSuffix(class.String(), "Class"))
mode := op.Mode()
@ -204,7 +233,7 @@ func (op OpCode) String() string {
f.WriteString("B")
}
case ALU64Class, ALUClass:
case class.IsALU():
f.WriteString(op.ALUOp().String())
if op.ALUOp() == Swap {
@ -218,8 +247,13 @@ func (op OpCode) String() string {
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
}
case JumpClass:
case class.IsJump():
f.WriteString(op.JumpOp().String())
if class == Jump32Class {
f.WriteString("32")
}
if jop := op.JumpOp(); jop != Exit && jop != Call {
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
}

View file

@ -14,25 +14,17 @@ func _() {
_ = x[StXClass-3]
_ = x[ALUClass-4]
_ = x[JumpClass-5]
_ = x[Jump32Class-6]
_ = x[ALU64Class-7]
}
const (
_Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass"
_Class_name_1 = "ALU64Class"
)
const _Class_name = "LdClassLdXClassStClassStXClassALUClassJumpClassJump32ClassALU64Class"
var (
_Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47}
)
var _Class_index = [...]uint8{0, 7, 15, 22, 30, 38, 47, 58, 68}
func (i Class) String() string {
switch {
case 0 <= i && i <= 5:
return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]]
case i == 7:
return _Class_name_1
default:
if i >= Class(len(_Class_index)-1) {
return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Class_name[_Class_index[i]:_Class_index[i+1]]
}

View file

@ -38,6 +38,7 @@ const (
PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD
PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE
PseudoCall = R1 // BPF_PSEUDO_CALL
PseudoFunc = R4 // BPF_PSEUDO_FUNC
)
func (r Register) String() string {

897
vendor/github.com/cilium/ebpf/btf/btf.go generated vendored Normal file
View file

@ -0,0 +1,897 @@
package btf
import (
"bufio"
"bytes"
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"reflect"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
const btfMagic = 0xeB9F
// Errors returned by BTF functions.
var (
ErrNotSupported = internal.ErrNotSupported
ErrNotFound = errors.New("not found")
ErrNoExtendedInfo = errors.New("no extended info")
)
// ID represents the unique ID of a BTF object.
type ID = sys.BTFID
// Spec represents decoded BTF.
type Spec struct {
// Data from .BTF.
rawTypes []rawType
strings *stringTable
// All types contained by the spec. For the base type, the position of
// a type in the slice is its ID.
types types
// Type IDs indexed by type.
typeIDs map[Type]TypeID
// Types indexed by essential name.
// Includes all struct flavors and types with the same name.
namedTypes map[essentialName][]Type
byteOrder binary.ByteOrder
}
type btfHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
TypeOff uint32
TypeLen uint32
StringOff uint32
StringLen uint32
}
// typeStart returns the offset from the beginning of the .BTF section
// to the start of its type entries.
func (h *btfHeader) typeStart() int64 {
return int64(h.HdrLen + h.TypeOff)
}
// stringStart returns the offset from the beginning of the .BTF section
// to the start of its string table.
func (h *btfHeader) stringStart() int64 {
return int64(h.HdrLen + h.StringOff)
}
// LoadSpec opens file and calls LoadSpecFromReader on it.
func LoadSpec(file string) (*Spec, error) {
fh, err := os.Open(file)
if err != nil {
return nil, err
}
defer fh.Close()
return LoadSpecFromReader(fh)
}
// LoadSpecFromReader reads from an ELF or a raw BTF blob.
//
// Returns ErrNotFound if reading from an ELF which contains no BTF. ExtInfos
// may be nil.
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
if bo := guessRawBTFByteOrder(rd); bo != nil {
// Try to parse a naked BTF blob. This will return an error if
// we encounter a Datasec, since we can't fix it up.
spec, err := loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil, nil)
return spec, err
}
return nil, err
}
return loadSpecFromELF(file)
}
// LoadSpecAndExtInfosFromReader reads from an ELF.
//
// ExtInfos may be nil if the ELF doesn't contain section metadta.
// Returns ErrNotFound if the ELF contains no BTF.
func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
return nil, nil, err
}
spec, err := loadSpecFromELF(file)
if err != nil {
return nil, nil, err
}
extInfos, err := loadExtInfosFromELF(file, spec.types, spec.strings)
if err != nil && !errors.Is(err, ErrNotFound) {
return nil, nil, err
}
return spec, extInfos, nil
}
// variableOffsets extracts all symbols offsets from an ELF and indexes them by
// section and variable name.
//
// References to variables in BTF data sections carry unsigned 32-bit offsets.
// Some ELF symbols (e.g. in vmlinux) may point to virtual memory that is well
// beyond this range. Since these symbols cannot be described by BTF info,
// ignore them here.
func variableOffsets(file *internal.SafeELFFile) (map[variable]uint32, error) {
symbols, err := file.Symbols()
if err != nil {
return nil, fmt.Errorf("can't read symbols: %v", err)
}
variableOffsets := make(map[variable]uint32)
for _, symbol := range symbols {
if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
// Ignore things like SHN_ABS
continue
}
if symbol.Value > math.MaxUint32 {
// VarSecinfo offset is u32, cannot reference symbols in higher regions.
continue
}
if int(symbol.Section) >= len(file.Sections) {
return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section)
}
secName := file.Sections[symbol.Section].Name
variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value)
}
return variableOffsets, nil
}
func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) {
var (
btfSection *elf.Section
sectionSizes = make(map[string]uint32)
)
for _, sec := range file.Sections {
switch sec.Name {
case ".BTF":
btfSection = sec
default:
if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS {
break
}
if sec.Size > math.MaxUint32 {
return nil, fmt.Errorf("section %s exceeds maximum size", sec.Name)
}
sectionSizes[sec.Name] = uint32(sec.Size)
}
}
if btfSection == nil {
return nil, fmt.Errorf("btf: %w", ErrNotFound)
}
vars, err := variableOffsets(file)
if err != nil {
return nil, err
}
if btfSection.ReaderAt == nil {
return nil, fmt.Errorf("compressed BTF is not supported")
}
rawTypes, rawStrings, err := parseBTF(btfSection.ReaderAt, file.ByteOrder, nil)
if err != nil {
return nil, err
}
err = fixupDatasec(rawTypes, rawStrings, sectionSizes, vars)
if err != nil {
return nil, err
}
return inflateSpec(rawTypes, rawStrings, file.ByteOrder, nil)
}
func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder,
baseTypes types, baseStrings *stringTable) (*Spec, error) {
rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings)
if err != nil {
return nil, err
}
return inflateSpec(rawTypes, rawStrings, bo, baseTypes)
}
func inflateSpec(rawTypes []rawType, rawStrings *stringTable, bo binary.ByteOrder,
baseTypes types) (*Spec, error) {
types, err := inflateRawTypes(rawTypes, baseTypes, rawStrings)
if err != nil {
return nil, err
}
typeIDs, typesByName := indexTypes(types, TypeID(len(baseTypes)))
return &Spec{
rawTypes: rawTypes,
namedTypes: typesByName,
typeIDs: typeIDs,
types: types,
strings: rawStrings,
byteOrder: bo,
}, nil
}
func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essentialName][]Type) {
namedTypes := 0
for _, typ := range types {
if typ.TypeName() != "" {
// Do a pre-pass to figure out how big types by name has to be.
// Most types have unique names, so it's OK to ignore essentialName
// here.
namedTypes++
}
}
typeIDs := make(map[Type]TypeID, len(types))
typesByName := make(map[essentialName][]Type, namedTypes)
for i, typ := range types {
if name := newEssentialName(typ.TypeName()); name != "" {
typesByName[name] = append(typesByName[name], typ)
}
typeIDs[typ] = TypeID(i) + typeIDOffset
}
return typeIDs, typesByName
}
// LoadKernelSpec returns the current kernel's BTF information.
//
// Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system
// for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled.
func LoadKernelSpec() (*Spec, error) {
fh, err := os.Open("/sys/kernel/btf/vmlinux")
if err == nil {
defer fh.Close()
return loadRawSpec(fh, internal.NativeEndian, nil, nil)
}
file, err := findVMLinux()
if err != nil {
return nil, err
}
defer file.Close()
return loadSpecFromELF(file)
}
// findVMLinux scans multiple well-known paths for vmlinux kernel images.
func findVMLinux() (*internal.SafeELFFile, error) {
release, err := internal.KernelRelease()
if err != nil {
return nil, err
}
// use same list of locations as libbpf
// https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122
locations := []string{
"/boot/vmlinux-%s",
"/lib/modules/%s/vmlinux-%[1]s",
"/lib/modules/%s/build/vmlinux",
"/usr/lib/modules/%s/kernel/vmlinux",
"/usr/lib/debug/boot/vmlinux-%s",
"/usr/lib/debug/boot/vmlinux-%s.debug",
"/usr/lib/debug/lib/modules/%s/vmlinux",
}
for _, loc := range locations {
file, err := internal.OpenSafeELFFile(fmt.Sprintf(loc, release))
if errors.Is(err, os.ErrNotExist) {
continue
}
return file, err
}
return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported)
}
// parseBTFHeader parses the header of the .BTF section.
func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) {
var header btfHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, errors.New("header length shorter than btfHeader size")
}
if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil {
return nil, fmt.Errorf("header padding: %v", err)
}
return &header, nil
}
func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder {
buf := new(bufio.Reader)
for _, bo := range []binary.ByteOrder{
binary.LittleEndian,
binary.BigEndian,
} {
buf.Reset(io.NewSectionReader(r, 0, math.MaxInt64))
if _, err := parseBTFHeader(buf, bo); err == nil {
return bo
}
}
return nil
}
// parseBTF reads a .BTF section into memory and parses it into a list of
// raw types and a string table.
func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([]rawType, *stringTable, error) {
buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64)
header, err := parseBTFHeader(buf, bo)
if err != nil {
return nil, nil, fmt.Errorf("parsing .BTF header: %v", err)
}
rawStrings, err := readStringTable(io.NewSectionReader(btf, header.stringStart(), int64(header.StringLen)),
baseStrings)
if err != nil {
return nil, nil, fmt.Errorf("can't read type names: %w", err)
}
buf.Reset(io.NewSectionReader(btf, header.typeStart(), int64(header.TypeLen)))
rawTypes, err := readTypes(buf, bo, header.TypeLen)
if err != nil {
return nil, nil, fmt.Errorf("can't read types: %w", err)
}
return rawTypes, rawStrings, nil
}
type variable struct {
section string
name string
}
func fixupDatasec(rawTypes []rawType, rawStrings *stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
for i, rawType := range rawTypes {
if rawType.Kind() != kindDatasec {
continue
}
name, err := rawStrings.Lookup(rawType.NameOff)
if err != nil {
return err
}
if name == ".kconfig" || name == ".ksyms" {
return fmt.Errorf("reference to %s: %w", name, ErrNotSupported)
}
if rawTypes[i].SizeType != 0 {
continue
}
size, ok := sectionSizes[name]
if !ok {
return fmt.Errorf("data section %s: missing size", name)
}
rawTypes[i].SizeType = size
secinfos := rawType.data.([]btfVarSecinfo)
for j, secInfo := range secinfos {
id := int(secInfo.Type - 1)
if id >= len(rawTypes) {
return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j)
}
varName, err := rawStrings.Lookup(rawTypes[id].NameOff)
if err != nil {
return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err)
}
offset, ok := variableOffsets[variable{name, varName}]
if !ok {
return fmt.Errorf("data section %s: missing offset for variable %s", name, varName)
}
secinfos[j].Offset = offset
}
}
return nil
}
// Copy creates a copy of Spec.
func (s *Spec) Copy() *Spec {
types := copyTypes(s.types, nil)
typeIDOffset := TypeID(0)
if len(s.types) != 0 {
typeIDOffset = s.typeIDs[s.types[0]]
}
typeIDs, typesByName := indexTypes(types, typeIDOffset)
// NB: Other parts of spec are not copied since they are immutable.
return &Spec{
s.rawTypes,
s.strings,
types,
typeIDs,
typesByName,
s.byteOrder,
}
}
type marshalOpts struct {
ByteOrder binary.ByteOrder
StripFuncLinkage bool
}
func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
var (
buf bytes.Buffer
header = new(btfHeader)
headerLen = binary.Size(header)
)
// Reserve space for the header. We have to write it last since
// we don't know the size of the type section yet.
_, _ = buf.Write(make([]byte, headerLen))
// Write type section, just after the header.
for _, raw := range s.rawTypes {
switch {
case opts.StripFuncLinkage && raw.Kind() == kindFunc:
raw.SetLinkage(StaticFunc)
}
if err := raw.Marshal(&buf, opts.ByteOrder); err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
}
typeLen := uint32(buf.Len() - headerLen)
// Write string section after type section.
stringsLen := s.strings.Length()
buf.Grow(stringsLen)
if err := s.strings.Marshal(&buf); err != nil {
return nil, err
}
// Fill out the header, and write it out.
header = &btfHeader{
Magic: btfMagic,
Version: 1,
Flags: 0,
HdrLen: uint32(headerLen),
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(stringsLen),
}
raw := buf.Bytes()
err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header)
if err != nil {
return nil, fmt.Errorf("can't write header: %v", err)
}
return raw, nil
}
type sliceWriter []byte
func (sw sliceWriter) Write(p []byte) (int, error) {
if len(p) != len(sw) {
return 0, errors.New("size doesn't match")
}
return copy(sw, p), nil
}
// TypeByID returns the BTF Type with the given type ID.
//
// Returns an error wrapping ErrNotFound if a Type with the given ID
// does not exist in the Spec.
func (s *Spec) TypeByID(id TypeID) (Type, error) {
return s.types.ByID(id)
}
// TypeID returns the ID for a given Type.
//
// Returns an error wrapping ErrNoFound if the type isn't part of the Spec.
func (s *Spec) TypeID(typ Type) (TypeID, error) {
if _, ok := typ.(*Void); ok {
// Equality is weird for void, since it is a zero sized type.
return 0, nil
}
id, ok := s.typeIDs[typ]
if !ok {
return 0, fmt.Errorf("no ID for type %s: %w", typ, ErrNotFound)
}
return id, nil
}
// AnyTypesByName returns a list of BTF Types with the given name.
//
// If the BTF blob describes multiple compilation units like vmlinux, multiple
// Types with the same name and kind can exist, but might not describe the same
// data structure.
//
// Returns an error wrapping ErrNotFound if no matching Type exists in the Spec.
func (s *Spec) AnyTypesByName(name string) ([]Type, error) {
types := s.namedTypes[newEssentialName(name)]
if len(types) == 0 {
return nil, fmt.Errorf("type name %s: %w", name, ErrNotFound)
}
// Return a copy to prevent changes to namedTypes.
result := make([]Type, 0, len(types))
for _, t := range types {
// Match against the full name, not just the essential one
// in case the type being looked up is a struct flavor.
if t.TypeName() == name {
result = append(result, t)
}
}
return result, nil
}
// AnyTypeByName returns a Type with the given name.
//
// Returns an error if multiple types of that name exist.
func (s *Spec) AnyTypeByName(name string) (Type, error) {
types, err := s.AnyTypesByName(name)
if err != nil {
return nil, err
}
if len(types) > 1 {
return nil, fmt.Errorf("found multiple types: %v", types)
}
return types[0], nil
}
// TypeByName searches for a Type with a specific name. Since multiple
// Types with the same name can exist, the parameter typ is taken to
// narrow down the search in case of a clash.
//
// typ must be a non-nil pointer to an implementation of a Type.
// On success, the address of the found Type will be copied to typ.
//
// Returns an error wrapping ErrNotFound if no matching
// Type exists in the Spec. If multiple candidates are found,
// an error is returned.
func (s *Spec) TypeByName(name string, typ interface{}) error {
typValue := reflect.ValueOf(typ)
if typValue.Kind() != reflect.Ptr {
return fmt.Errorf("%T is not a pointer", typ)
}
typPtr := typValue.Elem()
if !typPtr.CanSet() {
return fmt.Errorf("%T cannot be set", typ)
}
wanted := typPtr.Type()
if !wanted.AssignableTo(reflect.TypeOf((*Type)(nil)).Elem()) {
return fmt.Errorf("%T does not satisfy Type interface", typ)
}
types, err := s.AnyTypesByName(name)
if err != nil {
return err
}
var candidate Type
for _, typ := range types {
if reflect.TypeOf(typ) != wanted {
continue
}
if candidate != nil {
return fmt.Errorf("type %s: multiple candidates for %T", name, typ)
}
candidate = typ
}
if candidate == nil {
return fmt.Errorf("type %s: %w", name, ErrNotFound)
}
typPtr.Set(reflect.ValueOf(candidate))
return nil
}
// LoadSplitSpecFromReader loads split BTF from a reader.
//
// Types from base are used to resolve references in the split BTF.
// The returned Spec only contains types from the split BTF, not from the base.
func LoadSplitSpecFromReader(r io.ReaderAt, base *Spec) (*Spec, error) {
return loadRawSpec(r, internal.NativeEndian, base.types, base.strings)
}
// TypesIterator iterates over types of a given spec.
type TypesIterator struct {
spec *Spec
index int
// The last visited type in the spec.
Type Type
}
// Iterate returns the types iterator.
func (s *Spec) Iterate() *TypesIterator {
return &TypesIterator{spec: s, index: 0}
}
// Next returns true as long as there are any remaining types.
func (iter *TypesIterator) Next() bool {
if len(iter.spec.types) <= iter.index {
return false
}
iter.Type = iter.spec.types[iter.index]
iter.index++
return true
}
// Handle is a reference to BTF loaded into the kernel.
type Handle struct {
fd *sys.FD
// Size of the raw BTF in bytes.
size uint32
}
// NewHandle loads BTF into the kernel.
//
// Returns ErrNotSupported if BTF is not supported.
func NewHandle(spec *Spec) (*Handle, error) {
if err := haveBTF(); err != nil {
return nil, err
}
if spec.byteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian)
}
btf, err := spec.marshal(marshalOpts{
ByteOrder: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
})
if err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
if uint64(len(btf)) > math.MaxUint32 {
return nil, errors.New("BTF exceeds the maximum size")
}
attr := &sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
}
fd, err := sys.BtfLoad(attr)
if err != nil {
logBuf := make([]byte, 64*1024)
attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
attr.BtfLogSize = uint32(len(logBuf))
attr.BtfLogLevel = 1
// NB: The syscall will never return ENOSPC as of 5.18-rc4.
_, _ = sys.BtfLoad(attr)
return nil, internal.ErrorWithLog(err, logBuf)
}
return &Handle{fd, attr.BtfSize}, nil
}
// NewHandleFromID returns the BTF handle for a given id.
//
// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible.
//
// Returns ErrNotExist, if there is no BTF with the given id.
//
// Requires CAP_SYS_ADMIN.
func NewHandleFromID(id ID) (*Handle, error) {
fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, fmt.Errorf("get FD for ID %d: %w", id, err)
}
info, err := newHandleInfoFromFD(fd)
if err != nil {
_ = fd.Close()
return nil, err
}
return &Handle{fd, info.size}, nil
}
// Spec parses the kernel BTF into Go types.
//
// base is used to decode split BTF and may be nil.
func (h *Handle) Spec(base *Spec) (*Spec, error) {
var btfInfo sys.BtfInfo
btfBuffer := make([]byte, h.size)
btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer)
if err := sys.ObjInfo(h.fd, &btfInfo); err != nil {
return nil, err
}
var baseTypes types
var baseStrings *stringTable
if base != nil {
baseTypes = base.types
baseStrings = base.strings
}
return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, baseTypes, baseStrings)
}
// Close destroys the handle.
//
// Subsequent calls to FD will return an invalid value.
func (h *Handle) Close() error {
if h == nil {
return nil
}
return h.fd.Close()
}
// FD returns the file descriptor for the handle.
func (h *Handle) FD() int {
return h.fd.Int()
}
// Info returns metadata about the handle.
func (h *Handle) Info() (*HandleInfo, error) {
return newHandleInfoFromFD(h.fd)
}
func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte {
const minHeaderLength = 24
typesLen := uint32(binary.Size(types))
header := btfHeader{
Magic: btfMagic,
Version: 1,
HdrLen: minHeaderLength,
TypeOff: 0,
TypeLen: typesLen,
StringOff: typesLen,
StringLen: uint32(len(strings)),
}
buf := new(bytes.Buffer)
_ = binary.Write(buf, bo, &header)
_ = binary.Write(buf, bo, types)
buf.Write(strings)
return buf.Bytes()
}
var haveBTF = internal.FeatureTest("BTF", "5.1", func() error {
var (
types struct {
Integer btfType
Var btfType
btfVar struct{ Linkage uint32 }
}
strings = []byte{0, 'a', 0}
)
// We use a BTF_KIND_VAR here, to make sure that
// the kernel understands BTF at least as well as we
// do. BTF_KIND_VAR was introduced ~5.1.
types.Integer.SetKind(kindPointer)
types.Var.NameOff = 1
types.Var.SetKind(kindVar)
types.Var.SizeType = 1
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
})
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: loading the program
// might still succeed without BTF.
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
})
var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error {
if err := haveBTF(); err != nil {
return err
}
var (
types struct {
FuncProto btfType
Func btfType
}
strings = []byte{0, 'a', 0}
)
types.FuncProto.SetKind(kindFuncProto)
types.Func.SetKind(kindFunc)
types.Func.SizeType = 1 // aka FuncProto
types.Func.NameOff = 1
types.Func.SetLinkage(GlobalFunc)
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
})
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
})

View file

@ -130,13 +130,22 @@ func mask(len uint32) uint32 {
return (1 << len) - 1
}
func readBits(value, len, shift uint32) uint32 {
return (value >> shift) & mask(len)
}
func writeBits(value, len, shift, new uint32) uint32 {
value &^= mask(len) << shift
value |= (new & mask(len)) << shift
return value
}
func (bt *btfType) info(len, shift uint32) uint32 {
return (bt.Info >> shift) & mask(len)
return readBits(bt.Info, len, shift)
}
func (bt *btfType) setInfo(value, len, shift uint32) {
bt.Info &^= mask(len) << shift
bt.Info |= (value & mask(len)) << shift
bt.Info = writeBits(bt.Info, len, shift, value)
}
func (bt *btfType) Kind() btfKind {
@ -177,6 +186,10 @@ func (bt *btfType) Size() uint32 {
return bt.SizeType
}
func (bt *btfType) SetSize(size uint32) {
bt.SizeType = size
}
type rawType struct {
btfType
data interface{}
@ -194,6 +207,50 @@ func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error {
return binary.Write(w, bo, rt.data)
}
// btfInt encodes additional data for integers.
//
// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b
// ? = undefined
// e = encoding
// o = offset (bitfields?)
// b = bits (bitfields)
type btfInt struct {
Raw uint32
}
const (
btfIntEncodingLen = 4
btfIntEncodingShift = 24
btfIntOffsetLen = 8
btfIntOffsetShift = 16
btfIntBitsLen = 8
btfIntBitsShift = 0
)
func (bi btfInt) Encoding() IntEncoding {
return IntEncoding(readBits(bi.Raw, btfIntEncodingLen, btfIntEncodingShift))
}
func (bi *btfInt) SetEncoding(e IntEncoding) {
bi.Raw = writeBits(uint32(bi.Raw), btfIntEncodingLen, btfIntEncodingShift, uint32(e))
}
func (bi btfInt) Offset() Bits {
return Bits(readBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift))
}
func (bi *btfInt) SetOffset(offset uint32) {
bi.Raw = writeBits(bi.Raw, btfIntOffsetLen, btfIntOffsetShift, offset)
}
func (bi btfInt) Bits() Bits {
return Bits(readBits(bi.Raw, btfIntBitsLen, btfIntBitsShift))
}
func (bi *btfInt) SetBits(bits byte) {
bi.Raw = writeBits(bi.Raw, btfIntBitsLen, btfIntBitsShift, uint32(bits))
}
type btfArray struct {
Type TypeID
IndexType TypeID
@ -226,11 +283,14 @@ type btfParam struct {
Type TypeID
}
func readTypes(r io.Reader, bo binary.ByteOrder) ([]rawType, error) {
var (
header btfType
types []rawType
)
func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) {
var header btfType
// because of the interleaving between types and struct members it is difficult to
// precompute the numbers of raw types this will parse
// this "guess" is a good first estimation
sizeOfbtfType := uintptr(binary.Size(btfType{}))
tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2
types := make([]rawType, 0, tyMaxCount)
for id := TypeID(1); ; id++ {
if err := binary.Read(r, bo, &header); err == io.EOF {
@ -242,7 +302,7 @@ func readTypes(r io.Reader, bo binary.ByteOrder) ([]rawType, error) {
var data interface{}
switch header.Kind() {
case kindInt:
data = new(uint32)
data = new(btfInt)
case kindPointer:
case kindArray:
data = new(btfArray)
@ -281,7 +341,3 @@ func readTypes(r io.Reader, bo binary.ByteOrder) ([]rawType, error) {
types = append(types, rawType{header, data})
}
}
func intEncoding(raw uint32) (IntEncoding, uint32, byte) {
return IntEncoding((raw & 0x0f000000) >> 24), (raw & 0x00ff0000) >> 16, byte(raw & 0x000000ff)
}

View file

@ -1,11 +1,11 @@
package btf
import (
"encoding/binary"
"errors"
"fmt"
"math"
"reflect"
"sort"
"strconv"
"strings"
@ -17,50 +17,58 @@ import (
// COREFixup is the result of computing a CO-RE relocation for a target.
type COREFixup struct {
Kind COREKind
Local uint32
Target uint32
Poison bool
kind coreKind
local uint32
target uint32
// True if there is no valid fixup. The instruction is replaced with an
// invalid dummy.
poison bool
// True if the validation of the local value should be skipped. Used by
// some kinds of bitfield relocations.
skipLocalValidation bool
}
func (f COREFixup) equal(other COREFixup) bool {
return f.Local == other.Local && f.Target == other.Target
func (f *COREFixup) equal(other COREFixup) bool {
return f.local == other.local && f.target == other.target
}
func (f COREFixup) String() string {
if f.Poison {
return fmt.Sprintf("%s=poison", f.Kind)
func (f *COREFixup) String() string {
if f.poison {
return fmt.Sprintf("%s=poison", f.kind)
}
return fmt.Sprintf("%s=%d->%d", f.Kind, f.Local, f.Target)
return fmt.Sprintf("%s=%d->%d", f.kind, f.local, f.target)
}
func (f COREFixup) apply(ins *asm.Instruction) error {
if f.Poison {
return errors.New("can't poison individual instruction")
func (f *COREFixup) Apply(ins *asm.Instruction) error {
if f.poison {
const badRelo = 0xbad2310
*ins = asm.BuiltinFunc(badRelo).Call()
return nil
}
switch class := ins.OpCode.Class(); class {
case asm.LdXClass, asm.StClass, asm.StXClass:
if want := int16(f.Local); want != ins.Offset {
return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, want)
if want := int16(f.local); !f.skipLocalValidation && want != ins.Offset {
return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, f.local)
}
if f.Target > math.MaxInt16 {
return fmt.Errorf("offset %d exceeds MaxInt16", f.Target)
if f.target > math.MaxInt16 {
return fmt.Errorf("offset %d exceeds MaxInt16", f.target)
}
ins.Offset = int16(f.Target)
ins.Offset = int16(f.target)
case asm.LdClass:
if !ins.IsConstantLoad(asm.DWord) {
return fmt.Errorf("not a dword-sized immediate load")
}
if want := int64(f.Local); want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want)
if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v)", ins.Constant, want, f)
}
ins.Constant = int64(f.Target)
ins.Constant = int64(f.target)
case asm.ALUClass:
if ins.OpCode.ALUOp() == asm.Swap {
@ -74,15 +82,15 @@ func (f COREFixup) apply(ins *asm.Instruction) error {
return fmt.Errorf("invalid source %s", src)
}
if want := int64(f.Local); want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want)
if want := int64(f.local); !f.skipLocalValidation && want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d (fixup: %v, kind: %v, ins: %v)", ins.Constant, want, f, f.kind, ins)
}
if f.Target > math.MaxInt32 {
return fmt.Errorf("immediate %d exceeds MaxInt32", f.Target)
if f.target > math.MaxInt32 {
return fmt.Errorf("immediate %d exceeds MaxInt32", f.target)
}
ins.Constant = int64(f.Target)
ins.Constant = int64(f.target)
default:
return fmt.Errorf("invalid class %s", class)
@ -92,57 +100,14 @@ func (f COREFixup) apply(ins *asm.Instruction) error {
}
func (f COREFixup) isNonExistant() bool {
return f.Kind.checksForExistence() && f.Target == 0
return f.kind.checksForExistence() && f.target == 0
}
type COREFixups map[uint64]COREFixup
// Apply a set of CO-RE relocations to a BPF program.
func (fs COREFixups) Apply(insns asm.Instructions) (asm.Instructions, error) {
if len(fs) == 0 {
cpy := make(asm.Instructions, len(insns))
copy(cpy, insns)
return insns, nil
}
cpy := make(asm.Instructions, 0, len(insns))
iter := insns.Iterate()
for iter.Next() {
fixup, ok := fs[iter.Offset.Bytes()]
if !ok {
cpy = append(cpy, *iter.Ins)
continue
}
ins := *iter.Ins
if fixup.Poison {
const badRelo = asm.BuiltinFunc(0xbad2310)
cpy = append(cpy, badRelo.Call())
if ins.OpCode.IsDWordLoad() {
// 64 bit constant loads occupy two raw bpf instructions, so
// we need to add another instruction as padding.
cpy = append(cpy, badRelo.Call())
}
continue
}
if err := fixup.apply(&ins); err != nil {
return nil, fmt.Errorf("instruction %d, offset %d: %s: %w", iter.Index, iter.Offset.Bytes(), fixup.Kind, err)
}
cpy = append(cpy, ins)
}
return cpy, nil
}
// COREKind is the type of CO-RE relocation
type COREKind uint32
// coreKind is the type of CO-RE relocation as specified in BPF source code.
type coreKind uint32
const (
reloFieldByteOffset COREKind = iota /* field byte offset */
reloFieldByteOffset coreKind = iota /* field byte offset */
reloFieldByteSize /* field size in bytes */
reloFieldExists /* field existence in target kernel */
reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */
@ -156,7 +121,11 @@ const (
reloEnumvalValue /* enum value integer value */
)
func (k COREKind) String() string {
func (k coreKind) checksForExistence() bool {
return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists
}
func (k coreKind) String() string {
switch k {
case reloFieldByteOffset:
return "byte_off"
@ -187,19 +156,28 @@ func (k COREKind) String() string {
}
}
func (k COREKind) checksForExistence() bool {
return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists
}
func coreRelocate(local, target *Spec, relos coreRelos) (COREFixups, error) {
// CORERelocate calculates the difference in types between local and target.
//
// Returns a list of fixups which can be applied to instructions to make them
// match the target type(s).
//
// Fixups are returned in the order of relos, e.g. fixup[i] is the solution
// for relos[i].
func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, error) {
if local.byteOrder != target.byteOrder {
return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder)
}
var ids []TypeID
relosByID := make(map[TypeID]coreRelos)
result := make(COREFixups, len(relos))
for _, relo := range relos {
type reloGroup struct {
relos []*CORERelocation
// Position of each relocation in relos.
indices []int
}
// Split relocations into per Type lists.
relosByType := make(map[Type]*reloGroup)
result := make([]COREFixup, len(relos))
for i, relo := range relos {
if relo.kind == reloTypeIDLocal {
// Filtering out reloTypeIDLocal here makes our lives a lot easier
// down the line, since it doesn't have a target at all.
@ -207,47 +185,42 @@ func coreRelocate(local, target *Spec, relos coreRelos) (COREFixups, error) {
return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
result[uint64(relo.insnOff)] = COREFixup{
relo.kind,
uint32(relo.typeID),
uint32(relo.typeID),
false,
id, err := local.TypeID(relo.typ)
if err != nil {
return nil, fmt.Errorf("%s: %w", relo.kind, err)
}
result[i] = COREFixup{
kind: relo.kind,
local: uint32(id),
target: uint32(id),
}
continue
}
relos, ok := relosByID[relo.typeID]
group, ok := relosByType[relo.typ]
if !ok {
ids = append(ids, relo.typeID)
group = &reloGroup{}
relosByType[relo.typ] = group
}
relosByID[relo.typeID] = append(relos, relo)
group.relos = append(group.relos, relo)
group.indices = append(group.indices, i)
}
// Ensure we work on relocations in a deterministic order.
sort.Slice(ids, func(i, j int) bool {
return ids[i] < ids[j]
})
for _, id := range ids {
if int(id) >= len(local.types) {
return nil, fmt.Errorf("invalid type id %d", id)
}
localType := local.types[id]
named, ok := localType.(NamedType)
if !ok || named.TypeName() == "" {
for localType, group := range relosByType {
localTypeName := localType.TypeName()
if localTypeName == "" {
return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported)
}
relos := relosByID[id]
targets := target.namedTypes[essentialName(named.TypeName())]
fixups, err := coreCalculateFixups(localType, targets, relos)
targets := target.namedTypes[newEssentialName(localTypeName)]
fixups, err := coreCalculateFixups(local, target, localType, targets, group.relos)
if err != nil {
return nil, fmt.Errorf("relocate %s: %w", localType, err)
}
for i, relo := range relos {
result[uint64(relo.insnOff)] = fixups[i]
for j, index := range group.indices {
result[index] = fixups[j]
}
}
@ -262,30 +235,30 @@ var errImpossibleRelocation = errors.New("impossible relocation")
//
// The best target is determined by scoring: the less poisoning we have to do
// the better the target is.
func coreCalculateFixups(local Type, targets []NamedType, relos coreRelos) ([]COREFixup, error) {
localID := local.ID()
local, err := copyType(local, skipQualifierAndTypedef)
func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type, relos []*CORERelocation) ([]COREFixup, error) {
localID, err := localSpec.TypeID(local)
if err != nil {
return nil, err
return nil, fmt.Errorf("local type ID: %w", err)
}
local = Copy(local, UnderlyingType)
bestScore := len(relos)
var bestFixups []COREFixup
for i := range targets {
targetID := targets[i].ID()
target, err := copyType(targets[i], skipQualifierAndTypedef)
targetID, err := targetSpec.TypeID(targets[i])
if err != nil {
return nil, err
return nil, fmt.Errorf("target type ID: %w", err)
}
target := Copy(targets[i], UnderlyingType)
score := 0 // lower is better
fixups := make([]COREFixup, 0, len(relos))
for _, relo := range relos {
fixup, err := coreCalculateFixup(local, localID, target, targetID, relo)
fixup, err := coreCalculateFixup(localSpec.byteOrder, local, localID, target, targetID, relo)
if err != nil {
return nil, fmt.Errorf("target %s: %w", target, err)
}
if fixup.Poison || fixup.isNonExistant() {
if fixup.poison || fixup.isNonExistant() {
score++
}
fixups = append(fixups, fixup)
@ -307,17 +280,23 @@ func coreCalculateFixups(local Type, targets []NamedType, relos coreRelos) ([]CO
// the fixups agree with each other.
for i, fixup := range bestFixups {
if !fixup.equal(fixups[i]) {
return nil, fmt.Errorf("%s: multiple types match: %w", fixup.Kind, errAmbiguousRelocation)
return nil, fmt.Errorf("%s: multiple types match: %w", fixup.kind, errAmbiguousRelocation)
}
}
}
if bestFixups == nil {
// Nothing at all matched, probably because there are no suitable
// targets at all. Poison everything!
// targets at all.
//
// Poison everything except checksForExistence.
bestFixups = make([]COREFixup, len(relos))
for i, relo := range relos {
bestFixups[i] = COREFixup{Kind: relo.kind, Poison: true}
if relo.kind.checksForExistence() {
bestFixups[i] = COREFixup{kind: relo.kind, local: 1, target: 0}
} else {
bestFixups[i] = COREFixup{kind: relo.kind, poison: true}
}
}
}
@ -326,15 +305,18 @@ func coreCalculateFixups(local Type, targets []NamedType, relos coreRelos) ([]CO
// coreCalculateFixup calculates the fixup for a single local type, target type
// and relocation.
func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID, relo coreRelo) (COREFixup, error) {
func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, target Type, targetID TypeID, relo *CORERelocation) (COREFixup, error) {
fixup := func(local, target uint32) (COREFixup, error) {
return COREFixup{relo.kind, local, target, false}, nil
return COREFixup{kind: relo.kind, local: local, target: target}, nil
}
fixupWithoutValidation := func(local, target uint32) (COREFixup, error) {
return COREFixup{kind: relo.kind, local: local, target: target, skipLocalValidation: true}, nil
}
poison := func() (COREFixup, error) {
if relo.kind.checksForExistence() {
return fixup(1, 0)
}
return COREFixup{relo.kind, 0, 0, true}, nil
return COREFixup{kind: relo.kind, poison: true}, nil
}
zero := COREFixup{}
@ -390,7 +372,20 @@ func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID
return fixup(uint32(localValue.Value), uint32(targetValue.Value))
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists:
case reloFieldSigned:
switch local.(type) {
case *Enum:
return fixup(1, 1)
case *Int:
return fixup(
uint32(local.(*Int).Encoding&Signed),
uint32(target.(*Int).Encoding&Signed),
)
default:
return fixupWithoutValidation(0, 0)
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64:
if _, ok := target.(*Fwd); ok {
// We can't relocate fields using a forward declaration, so
// skip it. If a non-forward declaration is present in the BTF
@ -406,12 +401,17 @@ func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID
return zero, fmt.Errorf("target %s: %w", target, err)
}
maybeSkipValidation := func(f COREFixup, err error) (COREFixup, error) {
f.skipLocalValidation = localField.bitfieldSize > 0
return f, err
}
switch relo.kind {
case reloFieldExists:
return fixup(1, 1)
case reloFieldByteOffset:
return fixup(localField.offset/8, targetField.offset/8)
return maybeSkipValidation(fixup(localField.offset, targetField.offset))
case reloFieldByteSize:
localSize, err := Sizeof(localField.Type)
@ -423,9 +423,34 @@ func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID
if err != nil {
return zero, err
}
return maybeSkipValidation(fixup(uint32(localSize), uint32(targetSize)))
return fixup(uint32(localSize), uint32(targetSize))
case reloFieldLShiftU64:
var target uint32
if byteOrder == binary.LittleEndian {
targetSize, err := targetField.sizeBits()
if err != nil {
return zero, err
}
target = uint32(64 - targetField.bitfieldOffset - targetSize)
} else {
loadWidth, err := Sizeof(targetField.Type)
if err != nil {
return zero, err
}
target = uint32(64 - Bits(loadWidth*8) + targetField.bitfieldOffset)
}
return fixupWithoutValidation(0, target)
case reloFieldRShiftU64:
targetSize, err := targetField.sizeBits()
if err != nil {
return zero, err
}
return fixupWithoutValidation(0, uint32(64-targetSize))
}
}
@ -462,7 +487,7 @@ func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID
*/
type coreAccessor []int
func parseCoreAccessor(accessor string) (coreAccessor, error) {
func parseCOREAccessor(accessor string) (coreAccessor, error) {
if accessor == "" {
return nil, fmt.Errorf("empty accessor")
}
@ -508,18 +533,73 @@ func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) {
return &e.Values[i], nil
}
// coreField represents the position of a "child" of a composite type from the
// start of that type.
//
// /- start of composite
// | offset * 8 | bitfieldOffset | bitfieldSize | ... |
// \- start of field end of field -/
type coreField struct {
Type Type
Type Type
// The position of the field from the start of the composite type in bytes.
offset uint32
// The offset of the bitfield in bits from the start of the field.
bitfieldOffset Bits
// The size of the bitfield in bits.
//
// Zero if the field is not a bitfield.
bitfieldSize Bits
}
func adjustOffset(base uint32, t Type, n int) (uint32, error) {
size, err := Sizeof(t)
func (cf *coreField) adjustOffsetToNthElement(n int) error {
size, err := Sizeof(cf.Type)
if err != nil {
return 0, err
return err
}
return base + (uint32(n) * uint32(size) * 8), nil
cf.offset += uint32(n) * uint32(size)
return nil
}
func (cf *coreField) adjustOffsetBits(offset Bits) error {
align, err := alignof(cf.Type)
if err != nil {
return err
}
// We can compute the load offset by:
// 1) converting the bit offset to bytes with a flooring division.
// 2) dividing and multiplying that offset by the alignment, yielding the
// load size aligned offset.
offsetBytes := uint32(offset/8) / uint32(align) * uint32(align)
// The number of bits remaining is the bit offset less the number of bits
// we can "skip" with the aligned offset.
cf.bitfieldOffset = offset - Bits(offsetBytes*8)
// We know that cf.offset is aligned at to at least align since we get it
// from the compiler via BTF. Adding an aligned offsetBytes preserves the
// alignment.
cf.offset += offsetBytes
return nil
}
func (cf *coreField) sizeBits() (Bits, error) {
if cf.bitfieldSize > 0 {
return cf.bitfieldSize, nil
}
// Someone is trying to access a non-bitfield via a bit shift relocation.
// This happens when a field changes from a bitfield to a regular field
// between kernel versions. Synthesise the size to make the shifts work.
size, err := Sizeof(cf.Type)
if err != nil {
return 0, nil
}
return Bits(size * 8), nil
}
// coreFindField descends into the local type using the accessor and tries to
@ -527,32 +607,33 @@ func adjustOffset(base uint32, t Type, n int) (uint32, error) {
//
// Returns the field and the offset of the field from the start of
// target in bits.
func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreField, _ error) {
func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, coreField, error) {
local := coreField{Type: localT}
target := coreField{Type: targetT}
// The first index is used to offset a pointer of the base type like
// when accessing an array.
localOffset, err := adjustOffset(0, local, localAcc[0])
if err != nil {
if err := local.adjustOffsetToNthElement(localAcc[0]); err != nil {
return coreField{}, coreField{}, err
}
targetOffset, err := adjustOffset(0, target, localAcc[0])
if err != nil {
if err := target.adjustOffsetToNthElement(localAcc[0]); err != nil {
return coreField{}, coreField{}, err
}
if err := coreAreMembersCompatible(local, target); err != nil {
if err := coreAreMembersCompatible(local.Type, target.Type); err != nil {
return coreField{}, coreField{}, fmt.Errorf("fields: %w", err)
}
var localMaybeFlex, targetMaybeFlex bool
for _, acc := range localAcc[1:] {
switch localType := local.(type) {
for i, acc := range localAcc[1:] {
switch localType := local.Type.(type) {
case composite:
// For composite types acc is used to find the field in the local type,
// and then we try to find a field in target with the same name.
localMembers := localType.members()
if acc >= len(localMembers) {
return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, local)
return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, localType)
}
localMember := localMembers[acc]
@ -563,13 +644,15 @@ func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreFie
}
// This is an anonymous struct or union, ignore it.
local = localMember.Type
localOffset += localMember.OffsetBits
local = coreField{
Type: localMember.Type,
offset: local.offset + localMember.Offset.Bytes(),
}
localMaybeFlex = false
continue
}
targetType, ok := target.(composite)
targetType, ok := target.Type.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation)
}
@ -579,20 +662,43 @@ func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreFie
return coreField{}, coreField{}, err
}
if targetMember.BitfieldSize > 0 {
return coreField{}, coreField{}, fmt.Errorf("field %q is a bitfield: %w", targetMember.Name, ErrNotSupported)
local = coreField{
Type: localMember.Type,
offset: local.offset,
bitfieldSize: localMember.BitfieldSize,
}
localMaybeFlex = acc == len(localMembers)-1
target = coreField{
Type: targetMember.Type,
offset: target.offset,
bitfieldSize: targetMember.BitfieldSize,
}
targetMaybeFlex = last
if local.bitfieldSize == 0 && target.bitfieldSize == 0 {
local.offset += localMember.Offset.Bytes()
target.offset += targetMember.Offset.Bytes()
break
}
local = localMember.Type
localMaybeFlex = acc == len(localMembers)-1
localOffset += localMember.OffsetBits
target = targetMember.Type
targetMaybeFlex = last
targetOffset += targetMember.OffsetBits
// Either of the members is a bitfield. Make sure we're at the
// end of the accessor.
if next := i + 1; next < len(localAcc[1:]) {
return coreField{}, coreField{}, fmt.Errorf("can't descend into bitfield")
}
if err := local.adjustOffsetBits(localMember.Offset); err != nil {
return coreField{}, coreField{}, err
}
if err := target.adjustOffsetBits(targetMember.Offset); err != nil {
return coreField{}, coreField{}, err
}
case *Array:
// For arrays, acc is the index in the target.
targetType, ok := target.(*Array)
targetType, ok := target.Type.(*Array)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation)
}
@ -611,17 +717,23 @@ func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreFie
return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation)
}
local = localType.Type
local = coreField{
Type: localType.Type,
offset: local.offset,
}
localMaybeFlex = false
localOffset, err = adjustOffset(localOffset, local, acc)
if err != nil {
if err := local.adjustOffsetToNthElement(acc); err != nil {
return coreField{}, coreField{}, err
}
target = targetType.Type
target = coreField{
Type: targetType.Type,
offset: target.offset,
}
targetMaybeFlex = false
targetOffset, err = adjustOffset(targetOffset, target, acc)
if err != nil {
if err := target.adjustOffsetToNthElement(acc); err != nil {
return coreField{}, coreField{}, err
}
@ -629,12 +741,12 @@ func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreFie
return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported)
}
if err := coreAreMembersCompatible(local, target); err != nil {
if err := coreAreMembersCompatible(local.Type, target.Type); err != nil {
return coreField{}, coreField{}, err
}
}
return coreField{local, localOffset}, coreField{target, targetOffset}, nil
return local, target, nil
}
// coreFindMember finds a member in a composite type while handling anonymous
@ -646,7 +758,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) {
type offsetTarget struct {
composite
offset uint32
offset Bits
}
targets := []offsetTarget{{typ, 0}}
@ -670,7 +782,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) {
for j, member := range members {
if member.Name == name {
// NB: This is safe because member is a copy.
member.OffsetBits += target.offset
member.Offset += target.offset
return member, j == len(members)-1, nil
}
@ -685,7 +797,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) {
return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type)
}
targets = append(targets, offsetTarget{comp, target.offset + member.OffsetBits})
targets = append(targets, offsetTarget{comp, target.offset + member.Offset})
}
}
@ -704,9 +816,9 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal
return nil, nil, errImpossibleRelocation
}
localName := essentialName(localValue.Name)
localName := newEssentialName(localValue.Name)
for i, targetValue := range targetEnum.Values {
if essentialName(targetValue.Name) != localName {
if newEssentialName(targetValue.Name) != localName {
continue
}
@ -759,15 +871,9 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
}
switch lv := (localType).(type) {
case *Void, *Struct, *Union, *Enum, *Fwd:
case *Void, *Struct, *Union, *Enum, *Fwd, *Int:
// Nothing to do here
case *Int:
tv := targetType.(*Int)
if lv.isBitfield() || tv.isBitfield() {
return fmt.Errorf("bitfield: %w", errImpossibleRelocation)
}
case *Pointer, *Array:
depth++
localType.walk(&localTs)
@ -831,7 +937,7 @@ func coreAreMembersCompatible(localType Type, targetType Type) error {
return nil
}
if essentialName(a) == essentialName(b) {
if newEssentialName(a) == newEssentialName(b) {
return nil
}
@ -849,7 +955,7 @@ func coreAreMembersCompatible(localType Type, targetType Type) error {
}
switch lv := localType.(type) {
case *Array, *Pointer, *Float:
case *Array, *Pointer, *Float, *Int:
return nil
case *Enum:
@ -860,29 +966,7 @@ func coreAreMembersCompatible(localType Type, targetType Type) error {
tv := targetType.(*Fwd)
return doNamesMatch(lv.Name, tv.Name)
case *Int:
tv := targetType.(*Int)
if lv.isBitfield() || tv.isBitfield() {
return fmt.Errorf("bitfield: %w", errImpossibleRelocation)
}
return nil
default:
return fmt.Errorf("type %s: %w", localType, ErrNotSupported)
}
}
func skipQualifierAndTypedef(typ Type) (Type, error) {
result := typ
for depth := 0; depth <= maxTypeDepth; depth++ {
switch v := (result).(type) {
case qualifier:
result = v.qualify()
case *Typedef:
result = v.Type
default:
return result, nil
}
}
return nil, errors.New("exceeded type depth")
}

View file

@ -2,7 +2,4 @@
//
// The canonical documentation lives in the Linux kernel repository and is
// available at https://www.kernel.org/doc/html/latest/bpf/btf.html
//
// The API is very much unstable. You should only use this via the main
// ebpf library.
package btf

721
vendor/github.com/cilium/ebpf/btf/ext_info.go generated vendored Normal file
View file

@ -0,0 +1,721 @@
package btf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"sort"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
)
// ExtInfos contains ELF section metadata.
type ExtInfos struct {
// The slices are sorted by offset in ascending order.
funcInfos map[string][]funcInfo
lineInfos map[string][]lineInfo
relocationInfos map[string][]coreRelocationInfo
}
// loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF.
//
// Returns an error wrapping ErrNotFound if no ext infos are present.
func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTable) (*ExtInfos, error) {
section := file.Section(".BTF.ext")
if section == nil {
return nil, fmt.Errorf("btf ext infos: %w", ErrNotFound)
}
if section.ReaderAt == nil {
return nil, fmt.Errorf("compressed ext_info is not supported")
}
return loadExtInfos(section.ReaderAt, file.ByteOrder, ts, strings)
}
// loadExtInfos parses bare ext infos.
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringTable) (*ExtInfos, error) {
// Open unbuffered section reader. binary.Read() calls io.ReadFull on
// the header structs, resulting in one syscall per header.
headerRd := io.NewSectionReader(r, 0, math.MaxInt64)
extHeader, err := parseBTFExtHeader(headerRd, bo)
if err != nil {
return nil, fmt.Errorf("parsing BTF extension header: %w", err)
}
coreHeader, err := parseBTFExtCOREHeader(headerRd, bo, extHeader)
if err != nil {
return nil, fmt.Errorf("parsing BTF CO-RE header: %w", err)
}
buf := internal.NewBufferedSectionReader(r, extHeader.funcInfoStart(), int64(extHeader.FuncInfoLen))
btfFuncInfos, err := parseFuncInfos(buf, bo, strings)
if err != nil {
return nil, fmt.Errorf("parsing BTF function info: %w", err)
}
funcInfos := make(map[string][]funcInfo, len(btfFuncInfos))
for section, bfis := range btfFuncInfos {
funcInfos[section], err = newFuncInfos(bfis, ts)
if err != nil {
return nil, fmt.Errorf("section %s: func infos: %w", section, err)
}
}
buf = internal.NewBufferedSectionReader(r, extHeader.lineInfoStart(), int64(extHeader.LineInfoLen))
btfLineInfos, err := parseLineInfos(buf, bo, strings)
if err != nil {
return nil, fmt.Errorf("parsing BTF line info: %w", err)
}
lineInfos := make(map[string][]lineInfo, len(btfLineInfos))
for section, blis := range btfLineInfos {
lineInfos[section], err = newLineInfos(blis, strings)
if err != nil {
return nil, fmt.Errorf("section %s: line infos: %w", section, err)
}
}
if coreHeader == nil || coreHeader.COREReloLen == 0 {
return &ExtInfos{funcInfos, lineInfos, nil}, nil
}
var btfCORERelos map[string][]bpfCORERelo
buf = internal.NewBufferedSectionReader(r, extHeader.coreReloStart(coreHeader), int64(coreHeader.COREReloLen))
btfCORERelos, err = parseCORERelos(buf, bo, strings)
if err != nil {
return nil, fmt.Errorf("parsing CO-RE relocation info: %w", err)
}
coreRelos := make(map[string][]coreRelocationInfo, len(btfCORERelos))
for section, brs := range btfCORERelos {
coreRelos[section], err = newRelocationInfos(brs, ts, strings)
if err != nil {
return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err)
}
}
return &ExtInfos{funcInfos, lineInfos, coreRelos}, nil
}
type funcInfoMeta struct{}
type coreRelocationMeta struct{}
// Assign per-section metadata from BTF to a section's instructions.
func (ei *ExtInfos) Assign(insns asm.Instructions, section string) {
funcInfos := ei.funcInfos[section]
lineInfos := ei.lineInfos[section]
reloInfos := ei.relocationInfos[section]
iter := insns.Iterate()
for iter.Next() {
if len(funcInfos) > 0 && funcInfos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(funcInfoMeta{}, funcInfos[0].fn)
funcInfos = funcInfos[1:]
}
if len(lineInfos) > 0 && lineInfos[0].offset == iter.Offset {
*iter.Ins = iter.Ins.WithSource(lineInfos[0].line)
lineInfos = lineInfos[1:]
}
if len(reloInfos) > 0 && reloInfos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos[0].relo)
reloInfos = reloInfos[1:]
}
}
}
// MarshalExtInfos encodes function and line info embedded in insns into kernel
// wire format.
func MarshalExtInfos(insns asm.Instructions, typeID func(Type) (TypeID, error)) (funcInfos, lineInfos []byte, _ error) {
iter := insns.Iterate()
var fiBuf, liBuf bytes.Buffer
for iter.Next() {
if fn := FuncMetadata(iter.Ins); fn != nil {
fi := &funcInfo{
fn: fn,
offset: iter.Offset,
}
if err := fi.marshal(&fiBuf, typeID); err != nil {
return nil, nil, fmt.Errorf("write func info: %w", err)
}
}
if line, ok := iter.Ins.Source().(*Line); ok {
li := &lineInfo{
line: line,
offset: iter.Offset,
}
if err := li.marshal(&liBuf); err != nil {
return nil, nil, fmt.Errorf("write line info: %w", err)
}
}
}
return fiBuf.Bytes(), liBuf.Bytes(), nil
}
// btfExtHeader is found at the start of the .BTF.ext section.
type btfExtHeader struct {
Magic uint16
Version uint8
Flags uint8
// HdrLen is larger than the size of struct btfExtHeader when it is
// immediately followed by a btfExtCOREHeader.
HdrLen uint32
FuncInfoOff uint32
FuncInfoLen uint32
LineInfoOff uint32
LineInfoLen uint32
}
// parseBTFExtHeader parses the header of the .BTF.ext section.
func parseBTFExtHeader(r io.Reader, bo binary.ByteOrder) (*btfExtHeader, error) {
var header btfExtHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
if int64(header.HdrLen) < int64(binary.Size(&header)) {
return nil, fmt.Errorf("header length shorter than btfExtHeader size")
}
return &header, nil
}
// funcInfoStart returns the offset from the beginning of the .BTF.ext section
// to the start of its func_info entries.
func (h *btfExtHeader) funcInfoStart() int64 {
return int64(h.HdrLen + h.FuncInfoOff)
}
// lineInfoStart returns the offset from the beginning of the .BTF.ext section
// to the start of its line_info entries.
func (h *btfExtHeader) lineInfoStart() int64 {
return int64(h.HdrLen + h.LineInfoOff)
}
// coreReloStart returns the offset from the beginning of the .BTF.ext section
// to the start of its CO-RE relocation entries.
func (h *btfExtHeader) coreReloStart(ch *btfExtCOREHeader) int64 {
return int64(h.HdrLen + ch.COREReloOff)
}
// btfExtCOREHeader is found right after the btfExtHeader when its HdrLen
// field is larger than its size.
type btfExtCOREHeader struct {
COREReloOff uint32
COREReloLen uint32
}
// parseBTFExtCOREHeader parses the tail of the .BTF.ext header. If additional
// header bytes are present, extHeader.HdrLen will be larger than the struct,
// indicating the presence of a CO-RE extension header.
func parseBTFExtCOREHeader(r io.Reader, bo binary.ByteOrder, extHeader *btfExtHeader) (*btfExtCOREHeader, error) {
extHdrSize := int64(binary.Size(&extHeader))
remainder := int64(extHeader.HdrLen) - extHdrSize
if remainder == 0 {
return nil, nil
}
var coreHeader btfExtCOREHeader
if err := binary.Read(r, bo, &coreHeader); err != nil {
return nil, fmt.Errorf("can't read header: %v", err)
}
return &coreHeader, nil
}
type btfExtInfoSec struct {
SecNameOff uint32
NumInfo uint32
}
// parseExtInfoSec parses a btf_ext_info_sec header within .BTF.ext,
// appearing within func_info and line_info sub-sections.
// These headers appear once for each program section in the ELF and are
// followed by one or more func/line_info records for the section.
func parseExtInfoSec(r io.Reader, bo binary.ByteOrder, strings *stringTable) (string, *btfExtInfoSec, error) {
var infoHeader btfExtInfoSec
if err := binary.Read(r, bo, &infoHeader); err != nil {
return "", nil, fmt.Errorf("read ext info header: %w", err)
}
secName, err := strings.Lookup(infoHeader.SecNameOff)
if err != nil {
return "", nil, fmt.Errorf("get section name: %w", err)
}
if secName == "" {
return "", nil, fmt.Errorf("extinfo header refers to empty section name")
}
if infoHeader.NumInfo == 0 {
return "", nil, fmt.Errorf("section %s has zero records", secName)
}
return secName, &infoHeader, nil
}
// parseExtInfoRecordSize parses the uint32 at the beginning of a func_infos
// or line_infos segment that describes the length of all extInfoRecords in
// that segment.
func parseExtInfoRecordSize(r io.Reader, bo binary.ByteOrder) (uint32, error) {
const maxRecordSize = 256
var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil {
return 0, fmt.Errorf("can't read record size: %v", err)
}
if recordSize < 4 {
// Need at least InsnOff worth of bytes per record.
return 0, errors.New("record size too short")
}
if recordSize > maxRecordSize {
return 0, fmt.Errorf("record size %v exceeds %v", recordSize, maxRecordSize)
}
return recordSize, nil
}
// The size of a FuncInfo in BTF wire format.
var FuncInfoSize = uint32(binary.Size(bpfFuncInfo{}))
type funcInfo struct {
fn *Func
offset asm.RawInstructionOffset
}
type bpfFuncInfo struct {
// Instruction offset of the function within an ELF section.
InsnOff uint32
TypeID TypeID
}
func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) {
typ, err := ts.ByID(fi.TypeID)
if err != nil {
return nil, err
}
fn, ok := typ.(*Func)
if !ok {
return nil, fmt.Errorf("type ID %d is a %T, but expected a Func", fi.TypeID, typ)
}
// C doesn't have anonymous functions, but check just in case.
if fn.Name == "" {
return nil, fmt.Errorf("func with type ID %d doesn't have a name", fi.TypeID)
}
return &funcInfo{
fn,
asm.RawInstructionOffset(fi.InsnOff),
}, nil
}
func newFuncInfos(bfis []bpfFuncInfo, ts types) ([]funcInfo, error) {
fis := make([]funcInfo, 0, len(bfis))
for _, bfi := range bfis {
fi, err := newFuncInfo(bfi, ts)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bfi.InsnOff, err)
}
fis = append(fis, *fi)
}
sort.Slice(fis, func(i, j int) bool {
return fis[i].offset <= fis[j].offset
})
return fis, nil
}
// marshal into the BTF wire format.
func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) error {
id, err := typeID(fi.fn)
if err != nil {
return err
}
bfi := bpfFuncInfo{
InsnOff: uint32(fi.offset),
TypeID: id,
}
return binary.Write(w, internal.NativeEndian, &bfi)
}
// parseLineInfos parses a func_info sub-section within .BTF.ext ito a map of
// func infos indexed by section name.
func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
}
result := make(map[string][]bpfFuncInfo)
for {
secName, infoHeader, err := parseExtInfoSec(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
if err != nil {
return nil, err
}
records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
result[secName] = records
}
}
// parseFuncInfoRecords parses a stream of func_infos into a funcInfos.
// These records appear after a btf_ext_info_sec header in the func_info
// sub-section of .BTF.ext.
func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfFuncInfo, error) {
var out []bpfFuncInfo
var fi bpfFuncInfo
if exp, got := FuncInfoSize, recordSize; exp != got {
// BTF blob's record size is longer than we know how to parse.
return nil, fmt.Errorf("expected FuncInfo record size %d, but BTF blob contains %d", exp, got)
}
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &fi); err != nil {
return nil, fmt.Errorf("can't read function info: %v", err)
}
if fi.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
fi.InsnOff /= asm.InstructionSize
out = append(out, fi)
}
return out, nil
}
var LineInfoSize = uint32(binary.Size(bpfLineInfo{}))
// Line represents the location and contents of a single line of source
// code a BPF ELF was compiled from.
type Line struct {
fileName string
line string
lineNumber uint32
lineColumn uint32
// TODO: We should get rid of the fields below, but for that we need to be
// able to write BTF.
fileNameOff uint32
lineOff uint32
}
func (li *Line) FileName() string {
return li.fileName
}
func (li *Line) Line() string {
return li.line
}
func (li *Line) LineNumber() uint32 {
return li.lineNumber
}
func (li *Line) LineColumn() uint32 {
return li.lineColumn
}
func (li *Line) String() string {
return li.line
}
type lineInfo struct {
line *Line
offset asm.RawInstructionOffset
}
// Constants for the format of bpfLineInfo.LineCol.
const (
bpfLineShift = 10
bpfLineMax = (1 << (32 - bpfLineShift)) - 1
bpfColumnMax = (1 << bpfLineShift) - 1
)
type bpfLineInfo struct {
// Instruction offset of the line within the whole instruction stream, in instructions.
InsnOff uint32
FileNameOff uint32
LineOff uint32
LineCol uint32
}
func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) {
line, err := strings.Lookup(li.LineOff)
if err != nil {
return nil, fmt.Errorf("lookup of line: %w", err)
}
fileName, err := strings.Lookup(li.FileNameOff)
if err != nil {
return nil, fmt.Errorf("lookup of filename: %w", err)
}
lineNumber := li.LineCol >> bpfLineShift
lineColumn := li.LineCol & bpfColumnMax
return &lineInfo{
&Line{
fileName,
line,
lineNumber,
lineColumn,
li.FileNameOff,
li.LineOff,
},
asm.RawInstructionOffset(li.InsnOff),
}, nil
}
func newLineInfos(blis []bpfLineInfo, strings *stringTable) ([]lineInfo, error) {
lis := make([]lineInfo, 0, len(blis))
for _, bli := range blis {
li, err := newLineInfo(bli, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bli.InsnOff, err)
}
lis = append(lis, *li)
}
sort.Slice(lis, func(i, j int) bool {
return lis[i].offset <= lis[j].offset
})
return lis, nil
}
// marshal writes the binary representation of the LineInfo to w.
func (li *lineInfo) marshal(w io.Writer) error {
line := li.line
if line.lineNumber > bpfLineMax {
return fmt.Errorf("line %d exceeds %d", line.lineNumber, bpfLineMax)
}
if line.lineColumn > bpfColumnMax {
return fmt.Errorf("column %d exceeds %d", line.lineColumn, bpfColumnMax)
}
bli := bpfLineInfo{
uint32(li.offset),
line.fileNameOff,
line.lineOff,
(line.lineNumber << bpfLineShift) | line.lineColumn,
}
return binary.Write(w, internal.NativeEndian, &bli)
}
// parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of
// line infos indexed by section name.
func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfLineInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
}
result := make(map[string][]bpfLineInfo)
for {
secName, infoHeader, err := parseExtInfoSec(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
if err != nil {
return nil, err
}
records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
result[secName] = records
}
}
// parseLineInfoRecords parses a stream of line_infos into a lineInfos.
// These records appear after a btf_ext_info_sec header in the line_info
// sub-section of .BTF.ext.
func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfLineInfo, error) {
var out []bpfLineInfo
var li bpfLineInfo
if exp, got := uint32(binary.Size(li)), recordSize; exp != got {
// BTF blob's record size is longer than we know how to parse.
return nil, fmt.Errorf("expected LineInfo record size %d, but BTF blob contains %d", exp, got)
}
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &li); err != nil {
return nil, fmt.Errorf("can't read line info: %v", err)
}
if li.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
li.InsnOff /= asm.InstructionSize
out = append(out, li)
}
return out, nil
}
// bpfCORERelo matches the kernel's struct bpf_core_relo.
type bpfCORERelo struct {
InsnOff uint32
TypeID TypeID
AccessStrOff uint32
Kind coreKind
}
type CORERelocation struct {
typ Type
accessor coreAccessor
kind coreKind
}
func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation {
relo, _ := ins.Metadata.Get(coreRelocationMeta{}).(*CORERelocation)
return relo
}
type coreRelocationInfo struct {
relo *CORERelocation
offset asm.RawInstructionOffset
}
func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreRelocationInfo, error) {
typ, err := ts.ByID(relo.TypeID)
if err != nil {
return nil, err
}
accessorStr, err := strings.Lookup(relo.AccessStrOff)
if err != nil {
return nil, err
}
accessor, err := parseCOREAccessor(accessorStr)
if err != nil {
return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
}
return &coreRelocationInfo{
&CORERelocation{
typ,
accessor,
relo.Kind,
},
asm.RawInstructionOffset(relo.InsnOff),
}, nil
}
func newRelocationInfos(brs []bpfCORERelo, ts types, strings *stringTable) ([]coreRelocationInfo, error) {
rs := make([]coreRelocationInfo, 0, len(brs))
for _, br := range brs {
relo, err := newRelocationInfo(br, ts, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err)
}
rs = append(rs, *relo)
}
sort.Slice(rs, func(i, j int) bool {
return rs[i].offset < rs[j].offset
})
return rs, nil
}
var extInfoReloSize = binary.Size(bpfCORERelo{})
// parseCORERelos parses a core_relos sub-section within .BTF.ext ito a map of
// CO-RE relocations indexed by section name.
func parseCORERelos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfCORERelo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
if err != nil {
return nil, err
}
if recordSize != uint32(extInfoReloSize) {
return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize)
}
result := make(map[string][]bpfCORERelo)
for {
secName, infoHeader, err := parseExtInfoSec(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
if err != nil {
return nil, err
}
records, err := parseCOREReloRecords(r, bo, recordSize, infoHeader.NumInfo)
if err != nil {
return nil, fmt.Errorf("section %v: %w", secName, err)
}
result[secName] = records
}
}
// parseCOREReloRecords parses a stream of CO-RE relocation entries into a
// coreRelos. These records appear after a btf_ext_info_sec header in the
// core_relos sub-section of .BTF.ext.
func parseCOREReloRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfCORERelo, error) {
var out []bpfCORERelo
var relo bpfCORERelo
for i := uint32(0); i < recordNum; i++ {
if err := binary.Read(r, bo, &relo); err != nil {
return nil, fmt.Errorf("can't read CO-RE relocation: %v", err)
}
if relo.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("offset %v is not aligned with instruction size", relo.InsnOff)
}
// ELF tracks offset in bytes, the kernel expects raw BPF instructions.
// Convert as early as possible.
relo.InsnOff /= asm.InstructionSize
out = append(out, relo)
}
return out, nil
}

319
vendor/github.com/cilium/ebpf/btf/format.go generated vendored Normal file
View file

@ -0,0 +1,319 @@
package btf
import (
"errors"
"fmt"
"strings"
)
var errNestedTooDeep = errors.New("nested too deep")
// GoFormatter converts a Type to Go syntax.
//
// A zero GoFormatter is valid to use.
type GoFormatter struct {
w strings.Builder
// Types present in this map are referred to using the given name if they
// are encountered when outputting another type.
Names map[Type]string
// Identifier is called for each field of struct-like types. By default the
// field name is used as is.
Identifier func(string) string
// EnumIdentifier is called for each element of an enum. By default the
// name of the enum type is concatenated with Identifier(element).
EnumIdentifier func(name, element string) string
}
// TypeDeclaration generates a Go type declaration for a BTF type.
func (gf *GoFormatter) TypeDeclaration(name string, typ Type) (string, error) {
gf.w.Reset()
if err := gf.writeTypeDecl(name, typ); err != nil {
return "", err
}
return gf.w.String(), nil
}
func (gf *GoFormatter) identifier(s string) string {
if gf.Identifier != nil {
return gf.Identifier(s)
}
return s
}
func (gf *GoFormatter) enumIdentifier(name, element string) string {
if gf.EnumIdentifier != nil {
return gf.EnumIdentifier(name, element)
}
return name + gf.identifier(element)
}
// writeTypeDecl outputs a declaration of the given type.
//
// It encodes https://golang.org/ref/spec#Type_declarations:
//
// type foo struct { bar uint32; }
// type bar int32
func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error {
if name == "" {
return fmt.Errorf("need a name for type %s", typ)
}
switch v := skipQualifiers(typ).(type) {
case *Enum:
fmt.Fprintf(&gf.w, "type %s ", name)
switch v.Size {
case 1:
gf.w.WriteString("int8")
case 2:
gf.w.WriteString("int16")
case 4:
gf.w.WriteString("int32")
case 8:
gf.w.WriteString("int64")
default:
return fmt.Errorf("%s: invalid enum size %d", typ, v.Size)
}
if len(v.Values) == 0 {
return nil
}
gf.w.WriteString("; const ( ")
for _, ev := range v.Values {
id := gf.enumIdentifier(name, ev.Name)
fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value)
}
gf.w.WriteString(")")
return nil
default:
fmt.Fprintf(&gf.w, "type %s ", name)
return gf.writeTypeLit(v, 0)
}
}
// writeType outputs the name of a named type or a literal describing the type.
//
// It encodes https://golang.org/ref/spec#Types.
//
// foo (if foo is a named type)
// uint32
func (gf *GoFormatter) writeType(typ Type, depth int) error {
typ = skipQualifiers(typ)
name := gf.Names[typ]
if name != "" {
gf.w.WriteString(name)
return nil
}
return gf.writeTypeLit(typ, depth)
}
// writeTypeLit outputs a literal describing the type.
//
// The function ignores named types.
//
// It encodes https://golang.org/ref/spec#TypeLit.
//
// struct { bar uint32; }
// uint32
func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error {
depth++
if depth > maxTypeDepth {
return errNestedTooDeep
}
var err error
switch v := skipQualifiers(typ).(type) {
case *Int:
gf.writeIntLit(v)
case *Enum:
gf.w.WriteString("int32")
case *Typedef:
err = gf.writeType(v.Type, depth)
case *Array:
fmt.Fprintf(&gf.w, "[%d]", v.Nelems)
err = gf.writeType(v.Type, depth)
case *Struct:
err = gf.writeStructLit(v.Size, v.Members, depth)
case *Union:
// Always choose the first member to represent the union in Go.
err = gf.writeStructLit(v.Size, v.Members[:1], depth)
case *Datasec:
err = gf.writeDatasecLit(v, depth)
default:
return fmt.Errorf("type %T: %w", v, ErrNotSupported)
}
if err != nil {
return fmt.Errorf("%s: %w", typ, err)
}
return nil
}
func (gf *GoFormatter) writeIntLit(i *Int) {
// NB: Encoding.IsChar is ignored.
if i.Encoding.IsBool() && i.Size == 1 {
gf.w.WriteString("bool")
return
}
bits := i.Size * 8
if i.Encoding.IsSigned() {
fmt.Fprintf(&gf.w, "int%d", bits)
} else {
fmt.Fprintf(&gf.w, "uint%d", bits)
}
}
func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) error {
gf.w.WriteString("struct { ")
prevOffset := uint32(0)
skippedBitfield := false
for i, m := range members {
if m.BitfieldSize > 0 {
skippedBitfield = true
continue
}
offset := m.Offset.Bytes()
if n := offset - prevOffset; skippedBitfield && n > 0 {
fmt.Fprintf(&gf.w, "_ [%d]byte /* unsupported bitfield */; ", n)
} else {
gf.writePadding(n)
}
size, err := Sizeof(m.Type)
if err != nil {
return fmt.Errorf("field %d: %w", i, err)
}
prevOffset = offset + uint32(size)
if err := gf.writeStructField(m, depth); err != nil {
return fmt.Errorf("field %d: %w", i, err)
}
}
gf.writePadding(size - prevOffset)
gf.w.WriteString("}")
return nil
}
func (gf *GoFormatter) writeStructField(m Member, depth int) error {
if m.BitfieldSize > 0 {
return fmt.Errorf("bitfields are not supported")
}
if m.Offset%8 != 0 {
return fmt.Errorf("unsupported offset %d", m.Offset)
}
if m.Name == "" {
// Special case a nested anonymous union like
// struct foo { union { int bar; int baz }; }
// by replacing the whole union with its first member.
union, ok := m.Type.(*Union)
if !ok {
return fmt.Errorf("anonymous fields are not supported")
}
if len(union.Members) == 0 {
return errors.New("empty anonymous union")
}
depth++
if depth > maxTypeDepth {
return errNestedTooDeep
}
m := union.Members[0]
size, err := Sizeof(m.Type)
if err != nil {
return err
}
if err := gf.writeStructField(m, depth); err != nil {
return err
}
gf.writePadding(union.Size - uint32(size))
return nil
}
fmt.Fprintf(&gf.w, "%s ", gf.identifier(m.Name))
if err := gf.writeType(m.Type, depth); err != nil {
return err
}
gf.w.WriteString("; ")
return nil
}
func (gf *GoFormatter) writeDatasecLit(ds *Datasec, depth int) error {
gf.w.WriteString("struct { ")
prevOffset := uint32(0)
for i, vsi := range ds.Vars {
v := vsi.Type.(*Var)
if v.Linkage != GlobalVar {
// Ignore static, extern, etc. for now.
continue
}
if v.Name == "" {
return fmt.Errorf("variable %d: empty name", i)
}
gf.writePadding(vsi.Offset - prevOffset)
prevOffset = vsi.Offset + vsi.Size
fmt.Fprintf(&gf.w, "%s ", gf.identifier(v.Name))
if err := gf.writeType(v.Type, depth); err != nil {
return fmt.Errorf("variable %d: %w", i, err)
}
gf.w.WriteString("; ")
}
gf.writePadding(ds.Size - prevOffset)
gf.w.WriteString("}")
return nil
}
func (gf *GoFormatter) writePadding(bytes uint32) {
if bytes > 0 {
fmt.Fprintf(&gf.w, "_ [%d]byte; ", bytes)
}
}
func skipQualifiers(typ Type) Type {
result := typ
for depth := 0; depth <= maxTypeDepth; depth++ {
switch v := (result).(type) {
case qualifier:
result = v.qualify()
default:
return result
}
}
return &cycle{typ}
}

121
vendor/github.com/cilium/ebpf/btf/handle.go generated vendored Normal file
View file

@ -0,0 +1,121 @@
package btf
import (
"errors"
"fmt"
"os"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// HandleInfo describes a Handle.
type HandleInfo struct {
// ID of this handle in the kernel. The ID is only valid as long as the
// associated handle is kept alive.
ID ID
// Name is an identifying name for the BTF, currently only used by the
// kernel.
Name string
// IsKernel is true if the BTF originated with the kernel and not
// userspace.
IsKernel bool
// Size of the raw BTF in bytes.
size uint32
}
func newHandleInfoFromFD(fd *sys.FD) (*HandleInfo, error) {
// We invoke the syscall once with a empty BTF and name buffers to get size
// information to allocate buffers. Then we invoke it a second time with
// buffers to receive the data.
var btfInfo sys.BtfInfo
if err := sys.ObjInfo(fd, &btfInfo); err != nil {
return nil, fmt.Errorf("get BTF info for fd %s: %w", fd, err)
}
if btfInfo.NameLen > 0 {
// NameLen doesn't account for the terminating NUL.
btfInfo.NameLen++
}
// Don't pull raw BTF by default, since it may be quite large.
btfSize := btfInfo.BtfSize
btfInfo.BtfSize = 0
nameBuffer := make([]byte, btfInfo.NameLen)
btfInfo.Name, btfInfo.NameLen = sys.NewSlicePointerLen(nameBuffer)
if err := sys.ObjInfo(fd, &btfInfo); err != nil {
return nil, err
}
return &HandleInfo{
ID: ID(btfInfo.Id),
Name: unix.ByteSliceToString(nameBuffer),
IsKernel: btfInfo.KernelBtf != 0,
size: btfSize,
}, nil
}
// IsModule returns true if the BTF is for the kernel itself.
func (i *HandleInfo) IsVmlinux() bool {
return i.IsKernel && i.Name == "vmlinux"
}
// IsModule returns true if the BTF is for a kernel module.
func (i *HandleInfo) IsModule() bool {
return i.IsKernel && i.Name != "vmlinux"
}
// HandleIterator allows enumerating BTF blobs loaded into the kernel.
type HandleIterator struct {
// The ID of the last retrieved handle. Only valid after a call to Next.
ID ID
err error
}
// Next retrieves a handle for the next BTF blob.
//
// [Handle.Close] is called if *handle is non-nil to avoid leaking fds.
//
// Returns true if another BTF blob was found. Call [HandleIterator.Err] after
// the function returns false.
func (it *HandleIterator) Next(handle **Handle) bool {
if *handle != nil {
(*handle).Close()
*handle = nil
}
id := it.ID
for {
attr := &sys.BtfGetNextIdAttr{Id: id}
err := sys.BtfGetNextId(attr)
if errors.Is(err, os.ErrNotExist) {
// There are no more BTF objects.
return false
} else if err != nil {
it.err = fmt.Errorf("get next BTF ID: %w", err)
return false
}
id = attr.NextId
*handle, err = NewHandleFromID(id)
if errors.Is(err, os.ErrNotExist) {
// Try again with the next ID.
continue
} else if err != nil {
it.err = fmt.Errorf("retrieve handle for ID %d: %w", id, err)
return false
}
it.ID = id
return true
}
}
// Err returns an error if iteration failed for some reason.
func (it *HandleIterator) Err() error {
return it.err
}

128
vendor/github.com/cilium/ebpf/btf/strings.go generated vendored Normal file
View file

@ -0,0 +1,128 @@
package btf
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
)
type stringTable struct {
base *stringTable
offsets []uint32
strings []string
}
// sizedReader is implemented by bytes.Reader, io.SectionReader, strings.Reader, etc.
type sizedReader interface {
io.Reader
Size() int64
}
func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) {
// When parsing split BTF's string table, the first entry offset is derived
// from the last entry offset of the base BTF.
firstStringOffset := uint32(0)
if base != nil {
idx := len(base.offsets) - 1
firstStringOffset = base.offsets[idx] + uint32(len(base.strings[idx])) + 1
}
// Derived from vmlinux BTF.
const averageStringLength = 16
n := int(r.Size() / averageStringLength)
offsets := make([]uint32, 0, n)
strings := make([]string, 0, n)
offset := firstStringOffset
scanner := bufio.NewScanner(r)
scanner.Split(splitNull)
for scanner.Scan() {
str := scanner.Text()
offsets = append(offsets, offset)
strings = append(strings, str)
offset += uint32(len(str)) + 1
}
if err := scanner.Err(); err != nil {
return nil, err
}
if len(strings) == 0 {
return nil, errors.New("string table is empty")
}
if firstStringOffset == 0 && strings[0] != "" {
return nil, errors.New("first item in string table is non-empty")
}
return &stringTable{base, offsets, strings}, nil
}
func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) {
i := bytes.IndexByte(data, 0)
if i == -1 {
if atEOF && len(data) > 0 {
return 0, nil, errors.New("string table isn't null terminated")
}
return 0, nil, nil
}
return i + 1, data[:i], nil
}
func (st *stringTable) Lookup(offset uint32) (string, error) {
if st.base != nil && offset <= st.base.offsets[len(st.base.offsets)-1] {
return st.base.lookup(offset)
}
return st.lookup(offset)
}
func (st *stringTable) lookup(offset uint32) (string, error) {
i := search(st.offsets, offset)
if i == len(st.offsets) || st.offsets[i] != offset {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
}
return st.strings[i], nil
}
func (st *stringTable) Length() int {
last := len(st.offsets) - 1
return int(st.offsets[last]) + len(st.strings[last]) + 1
}
func (st *stringTable) Marshal(w io.Writer) error {
for _, str := range st.strings {
_, err := io.WriteString(w, str)
if err != nil {
return err
}
_, err = w.Write([]byte{0})
if err != nil {
return err
}
}
return nil
}
// search is a copy of sort.Search specialised for uint32.
//
// Licensed under https://go.dev/LICENSE
func search(ints []uint32, needle uint32) int {
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, len(ints)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if !(ints[h] >= needle) {
i = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i
}

1212
vendor/github.com/cilium/ebpf/btf/types.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -4,14 +4,11 @@ import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"reflect"
"strings"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/btf"
)
// CollectionOptions control loading a collection into the kernel.
@ -20,6 +17,17 @@ import (
type CollectionOptions struct {
Maps MapOptions
Programs ProgramOptions
// MapReplacements takes a set of Maps that will be used instead of
// creating new ones when loading the CollectionSpec.
//
// For each given Map, there must be a corresponding MapSpec in
// CollectionSpec.Maps, and its type, key/value size, max entries and flags
// must match the values of the MapSpec.
//
// The given Maps are Clone()d before being used in the Collection, so the
// caller can Close() them freely when they are no longer needed.
MapReplacements map[string]*Map
}
// CollectionSpec describes a collection.
@ -27,6 +35,10 @@ type CollectionSpec struct {
Maps map[string]*MapSpec
Programs map[string]*ProgramSpec
// Types holds type information about Maps and Programs.
// Modifications to Types are currently undefined behaviour.
Types *btf.Spec
// ByteOrder specifies whether the ELF was compiled for
// big-endian or little-endian architectures.
ByteOrder binary.ByteOrder
@ -42,6 +54,7 @@ func (cs *CollectionSpec) Copy() *CollectionSpec {
Maps: make(map[string]*MapSpec, len(cs.Maps)),
Programs: make(map[string]*ProgramSpec, len(cs.Programs)),
ByteOrder: cs.ByteOrder,
Types: cs.Types,
}
for name, spec := range cs.Maps {
@ -61,19 +74,21 @@ func (cs *CollectionSpec) Copy() *CollectionSpec {
// when calling NewCollection. Any named maps are removed from CollectionSpec.Maps.
//
// Returns an error if a named map isn't used in at least one program.
//
// Deprecated: Pass CollectionOptions.MapReplacements when loading the Collection
// instead.
func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
for symbol, m := range maps {
// have we seen a program that uses this symbol / map
seen := false
fd := m.FD()
for progName, progSpec := range cs.Programs {
err := progSpec.Instructions.RewriteMapPtr(symbol, fd)
err := progSpec.Instructions.AssociateMap(symbol, m)
switch {
case err == nil:
seen = true
case asm.IsUnreferencedSymbol(err):
case errors.Is(err, asm.ErrUnreferencedSymbol):
// Not all programs need to use the map
default:
@ -107,34 +122,67 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
//
// Returns an error if a constant doesn't exist.
func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error {
rodata := cs.Maps[".rodata"]
if rodata == nil {
return errors.New("missing .rodata section")
replaced := make(map[string]bool)
for name, spec := range cs.Maps {
if !strings.HasPrefix(name, ".rodata") {
continue
}
b, ds, err := spec.dataSection()
if errors.Is(err, errMapNoBTFValue) {
// Data sections without a BTF Datasec are valid, but don't support
// constant replacements.
continue
}
if err != nil {
return fmt.Errorf("map %s: %w", name, err)
}
// MapSpec.Copy() performs a shallow copy. Fully copy the byte slice
// to avoid any changes affecting other copies of the MapSpec.
cpy := make([]byte, len(b))
copy(cpy, b)
for _, v := range ds.Vars {
vname := v.Type.TypeName()
replacement, ok := consts[vname]
if !ok {
continue
}
if replaced[vname] {
return fmt.Errorf("section %s: duplicate variable %s", name, vname)
}
if int(v.Offset+v.Size) > len(cpy) {
return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname)
}
b, err := marshalBytes(replacement, int(v.Size))
if err != nil {
return fmt.Errorf("marshaling constant replacement %s: %w", vname, err)
}
copy(cpy[v.Offset:v.Offset+v.Size], b)
replaced[vname] = true
}
spec.Contents[0] = MapKV{Key: uint32(0), Value: cpy}
}
if rodata.BTF == nil {
return errors.New(".rodata section has no BTF")
var missing []string
for c := range consts {
if !replaced[c] {
missing = append(missing, c)
}
}
if n := len(rodata.Contents); n != 1 {
return fmt.Errorf("expected one key in .rodata, found %d", n)
if len(missing) != 0 {
return fmt.Errorf("spec is missing one or more constants: %s", strings.Join(missing, ","))
}
kv := rodata.Contents[0]
value, ok := kv.Value.([]byte)
if !ok {
return fmt.Errorf("first value in .rodata is %T not []byte", kv.Value)
}
buf := make([]byte, len(value))
copy(buf, value)
err := patchValue(buf, rodata.BTF.Value, consts)
if err != nil {
return err
}
rodata.Contents[0] = MapKV{kv.Key, buf}
return nil
}
@ -187,6 +235,9 @@ func (cs *CollectionSpec) Assign(to interface{}) error {
// LoadAndAssign loads Maps and Programs into the kernel and assigns them
// to a struct.
//
// Omitting Map/Program.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
//
// This function is a shortcut to manually checking the presence
// of maps and programs in a CollectionSpec. Consider using bpf2go
// if this sounds useful.
@ -209,15 +260,21 @@ func (cs *CollectionSpec) Assign(to interface{}) error {
// Returns an error if any of the fields can't be found, or
// if the same Map or Program is assigned multiple times.
func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions) error {
loader := newCollectionLoader(cs, opts)
defer loader.cleanup()
loader, err := newCollectionLoader(cs, opts)
if err != nil {
return err
}
defer loader.close()
// Support assigning Programs and Maps, lazy-loading the required objects.
assignedMaps := make(map[string]bool)
assignedProgs := make(map[string]bool)
getValue := func(typ reflect.Type, name string) (interface{}, error) {
switch typ {
case reflect.TypeOf((*Program)(nil)):
assignedProgs[name] = true
return loader.loadProgram(name)
case reflect.TypeOf((*Map)(nil)):
@ -244,15 +301,26 @@ func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions)
switch m.typ {
case ProgramArray:
// Require all lazy-loaded ProgramArrays to be assigned to the given object.
// Without any references, they will be closed on the first GC and all tail
// calls into them will miss.
if !assignedMaps[n] {
// The kernel empties a ProgramArray once the last user space reference
// to it closes, which leads to failed tail calls. Combined with the library
// closing map fds via GC finalizers this can lead to surprising behaviour.
// Only allow unassigned ProgramArrays when the library hasn't pre-populated
// any entries from static value declarations. At this point, we know the map
// is empty and there's no way for the caller to interact with the map going
// forward.
if !assignedMaps[n] && len(cs.Maps[n].Contents) > 0 {
return fmt.Errorf("ProgramArray %s must be assigned to prevent missed tail calls", n)
}
}
}
loader.finalize()
// Prevent loader.cleanup() from closing assigned Maps and Programs.
for m := range assignedMaps {
delete(loader.maps, m)
}
for p := range assignedProgs {
delete(loader.programs, p)
}
return nil
}
@ -264,15 +332,26 @@ type Collection struct {
Maps map[string]*Map
}
// NewCollection creates a Collection from a specification.
// NewCollection creates a Collection from the given spec, creating and
// loading its declared resources into the kernel.
//
// Omitting Collection.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
func NewCollection(spec *CollectionSpec) (*Collection, error) {
return NewCollectionWithOptions(spec, CollectionOptions{})
}
// NewCollectionWithOptions creates a Collection from a specification.
// NewCollectionWithOptions creates a Collection from the given spec using
// options, creating and loading its declared resources into the kernel.
//
// Omitting Collection.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
loader := newCollectionLoader(spec, &opts)
defer loader.cleanup()
loader, err := newCollectionLoader(spec, &opts)
if err != nil {
return nil, err
}
defer loader.close()
// Create maps first, as their fds need to be linked into programs.
for mapName := range spec.Maps {
@ -281,7 +360,11 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
}
}
for progName := range spec.Programs {
for progName, prog := range spec.Programs {
if prog.Type == UnspecifiedProgram {
continue
}
if _, err := loader.loadProgram(progName); err != nil {
return nil, err
}
@ -293,9 +376,9 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
return nil, err
}
// Prevent loader.cleanup from closing maps and programs.
maps, progs := loader.maps, loader.programs
loader.finalize()
loader.maps, loader.programs = nil, nil
return &Collection{
progs,
@ -305,13 +388,11 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
type handleCache struct {
btfHandles map[*btf.Spec]*btf.Handle
btfSpecs map[io.ReaderAt]*btf.Spec
}
func newHandleCache() *handleCache {
return &handleCache{
btfHandles: make(map[*btf.Spec]*btf.Handle),
btfSpecs: make(map[io.ReaderAt]*btf.Spec),
}
}
@ -329,20 +410,6 @@ func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
return handle, nil
}
func (hc handleCache) btfSpec(rd io.ReaderAt) (*btf.Spec, error) {
if hc.btfSpecs[rd] != nil {
return hc.btfSpecs[rd], nil
}
spec, err := btf.LoadSpecFromReader(rd)
if err != nil {
return nil, err
}
hc.btfSpecs[rd] = spec
return spec, nil
}
func (hc handleCache) close() {
for _, handle := range hc.btfHandles {
handle.Close()
@ -357,30 +424,34 @@ type collectionLoader struct {
handles *handleCache
}
func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) *collectionLoader {
func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collectionLoader, error) {
if opts == nil {
opts = &CollectionOptions{}
}
// Check for existing MapSpecs in the CollectionSpec for all provided replacement maps.
for name, m := range opts.MapReplacements {
spec, ok := coll.Maps[name]
if !ok {
return nil, fmt.Errorf("replacement map %s not found in CollectionSpec", name)
}
if err := spec.checkCompatibility(m); err != nil {
return nil, fmt.Errorf("using replacement map %s: %w", spec.Name, err)
}
}
return &collectionLoader{
coll,
opts,
make(map[string]*Map),
make(map[string]*Program),
newHandleCache(),
}
}, nil
}
// finalize should be called when all the collectionLoader's resources
// have been successfully loaded into the kernel and populated with values.
func (cl *collectionLoader) finalize() {
cl.maps, cl.programs = nil, nil
}
// cleanup cleans up all resources left over in the collectionLoader.
// Call finalize() when Map and Program creation/population is successful
// to prevent them from getting closed.
func (cl *collectionLoader) cleanup() {
// close all resources left over in the collectionLoader.
func (cl *collectionLoader) close() {
cl.handles.close()
for _, m := range cl.maps {
m.Close()
@ -400,6 +471,21 @@ func (cl *collectionLoader) loadMap(mapName string) (*Map, error) {
return nil, fmt.Errorf("missing map %s", mapName)
}
if mapSpec.BTF != nil && cl.coll.Types != mapSpec.BTF {
return nil, fmt.Errorf("map %s: BTF doesn't match collection", mapName)
}
if replaceMap, ok := cl.opts.MapReplacements[mapName]; ok {
// Clone the map to avoid closing user's map later on.
m, err := replaceMap.Clone()
if err != nil {
return nil, err
}
cl.maps[mapName] = m
return m, nil
}
m, err := newMapWithOptions(mapSpec, cl.opts.Maps, cl.handles)
if err != nil {
return nil, fmt.Errorf("map %s: %w", mapName, err)
@ -419,33 +505,41 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) {
return nil, fmt.Errorf("unknown program %s", progName)
}
// Bail out early if we know the kernel is going to reject the program.
// This skips loading map dependencies, saving some cleanup work later.
if progSpec.Type == UnspecifiedProgram {
return nil, fmt.Errorf("cannot load program %s: program type is unspecified", progName)
}
if progSpec.BTF != nil && cl.coll.Types != progSpec.BTF {
return nil, fmt.Errorf("program %s: BTF doesn't match collection", progName)
}
progSpec = progSpec.Copy()
// Rewrite any reference to a valid map.
// Rewrite any reference to a valid map in the program's instructions,
// which includes all of its dependencies.
for i := range progSpec.Instructions {
ins := &progSpec.Instructions[i]
if !ins.IsLoadFromMap() || ins.Reference == "" {
if !ins.IsLoadFromMap() || ins.Reference() == "" {
continue
}
if uint32(ins.Constant) != math.MaxUint32 {
// Don't overwrite maps already rewritten, users can
// rewrite programs in the spec themselves
// Don't overwrite map loads containing non-zero map fd's,
// they can be manually included by the caller.
// Map FDs/IDs are placed in the lower 32 bits of Constant.
if int32(ins.Constant) > 0 {
continue
}
m, err := cl.loadMap(ins.Reference)
m, err := cl.loadMap(ins.Reference())
if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err)
}
fd := m.FD()
if fd < 0 {
return nil, fmt.Errorf("map %s: %w", ins.Reference, internal.ErrClosedFd)
}
if err := ins.RewriteMapPtr(m.FD()); err != nil {
return nil, fmt.Errorf("program %s: map %s: %w", progName, ins.Reference, err)
if err := ins.AssociateMap(m); err != nil {
return nil, fmt.Errorf("program %s: map %s: %w", progName, ins.Reference(), err)
}
}
@ -467,24 +561,30 @@ func (cl *collectionLoader) populateMaps() error {
mapSpec = mapSpec.Copy()
// Replace any object stubs with loaded objects.
// MapSpecs that refer to inner maps or programs within the same
// CollectionSpec do so using strings. These strings are used as the key
// to look up the respective object in the Maps or Programs fields.
// Resolve those references to actual Map or Program resources that
// have been loaded into the kernel.
for i, kv := range mapSpec.Contents {
switch v := kv.Value.(type) {
case programStub:
// loadProgram is idempotent and could return an existing Program.
prog, err := cl.loadProgram(string(v))
if err != nil {
return fmt.Errorf("loading program %s, for map %s: %w", v, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, prog}
if objName, ok := kv.Value.(string); ok {
switch mapSpec.Type {
case ProgramArray:
// loadProgram is idempotent and could return an existing Program.
prog, err := cl.loadProgram(objName)
if err != nil {
return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, prog}
case mapStub:
// loadMap is idempotent and could return an existing Map.
innerMap, err := cl.loadMap(string(v))
if err != nil {
return fmt.Errorf("loading inner map %s, for map %s: %w", v, mapName, err)
case ArrayOfMaps, HashOfMaps:
// loadMap is idempotent and could return an existing Map.
innerMap, err := cl.loadMap(objName)
if err != nil {
return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err)
}
mapSpec.Contents[i] = MapKV{kv.Key, innerMap}
}
mapSpec.Contents[i] = MapKV{kv.Key, innerMap}
}
}
@ -497,7 +597,11 @@ func (cl *collectionLoader) populateMaps() error {
return nil
}
// LoadCollection parses an object file and converts it to a collection.
// LoadCollection reads an object file and creates and loads its declared
// resources into the kernel.
//
// Omitting Collection.Close() during application shutdown is an error.
// See the package documentation for details around Map and Program lifecycle.
func LoadCollection(file string) (*Collection, error) {
spec, err := LoadCollectionSpec(file)
if err != nil {

View file

@ -13,4 +13,13 @@
// your application as any other resource.
//
// Use the link subpackage to attach a loaded program to a hook in the kernel.
//
// Note that losing all references to Map and Program resources will cause
// their underlying file descriptors to be closed, potentially removing those
// objects from the kernel. Always retain a reference by e.g. deferring a
// Close() of a Collection or LoadAndAssign object until application exit.
//
// Special care needs to be taken when handling maps of type ProgramArray,
// as the kernel erases its contents when the last userspace or bpffs
// reference disappears, regardless of the map being in active use.
package ebpf

View file

@ -13,8 +13,8 @@ import (
"strings"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/unix"
)
@ -26,6 +26,7 @@ type elfCode struct {
license string
version uint32
btf *btf.Spec
extInfo *btf.ExtInfos
}
// LoadCollectionSpec parses an ELF file into a CollectionSpec.
@ -49,7 +50,6 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
if err != nil {
return nil, err
}
defer f.Close()
var (
licenseSection *elf.Section
@ -95,77 +95,29 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
return nil, fmt.Errorf("load version: %w", err)
}
btfSpec, err := btf.LoadSpecFromReader(rd)
btfSpec, btfExtInfo, err := btf.LoadSpecAndExtInfosFromReader(rd)
if err != nil && !errors.Is(err, btf.ErrNotFound) {
return nil, fmt.Errorf("load BTF: %w", err)
}
// Assign symbols to all the sections we're interested in.
symbols, err := f.Symbols()
if err != nil {
return nil, fmt.Errorf("load symbols: %v", err)
}
for _, symbol := range symbols {
idx := symbol.Section
symType := elf.ST_TYPE(symbol.Info)
section := sections[idx]
if section == nil {
continue
}
// Older versions of LLVM don't tag symbols correctly, so keep
// all NOTYPE ones.
keep := symType == elf.STT_NOTYPE
switch section.kind {
case mapSection, btfMapSection, dataSection:
keep = keep || symType == elf.STT_OBJECT
case programSection:
keep = keep || symType == elf.STT_FUNC
}
if !keep || symbol.Name == "" {
continue
}
section.symbols[symbol.Value] = symbol
}
ec := &elfCode{
SafeELFFile: f,
sections: sections,
license: license,
version: version,
btf: btfSpec,
extInfo: btfExtInfo,
}
// Go through relocation sections, and parse the ones for sections we're
// interested in. Make sure that relocations point at valid sections.
for idx, relSection := range relSections {
section := sections[idx]
if section == nil {
continue
}
symbols, err := f.Symbols()
if err != nil {
return nil, fmt.Errorf("load symbols: %v", err)
}
rels, err := ec.loadRelocations(relSection, symbols)
if err != nil {
return nil, fmt.Errorf("relocation for section %q: %w", section.Name, err)
}
ec.assignSymbols(symbols)
for _, rel := range rels {
target := sections[rel.Section]
if target == nil {
return nil, fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported)
}
if target.Flags&elf.SHF_STRINGS > 0 {
return nil, fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported)
}
target.references++
}
section.relocations = rels
if err := ec.loadRelocations(relSections, symbols); err != nil {
return nil, fmt.Errorf("load relocations: %w", err)
}
// Collect all the various ways to define maps.
@ -183,12 +135,12 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
}
// Finally, collect programs and link them.
progs, err := ec.loadPrograms()
progs, err := ec.loadProgramSections()
if err != nil {
return nil, fmt.Errorf("load programs: %w", err)
}
return &CollectionSpec{maps, progs, ec.ByteOrder}, nil
return &CollectionSpec{maps, progs, btfSpec, ec.ByteOrder}, nil
}
func loadLicense(sec *elf.Section) (string, error) {
@ -247,12 +199,91 @@ func newElfSection(section *elf.Section, kind elfSectionKind) *elfSection {
}
}
func (ec *elfCode) loadPrograms() (map[string]*ProgramSpec, error) {
var (
progs []*ProgramSpec
libs []*ProgramSpec
)
// assignSymbols takes a list of symbols and assigns them to their
// respective sections, indexed by name.
func (ec *elfCode) assignSymbols(symbols []elf.Symbol) {
for _, symbol := range symbols {
symType := elf.ST_TYPE(symbol.Info)
symSection := ec.sections[symbol.Section]
if symSection == nil {
continue
}
// Anonymous symbols only occur in debug sections which we don't process
// relocations for. Anonymous symbols are not referenced from other sections.
if symbol.Name == "" {
continue
}
// Older versions of LLVM don't tag symbols correctly, so keep
// all NOTYPE ones.
switch symSection.kind {
case mapSection, btfMapSection, dataSection:
if symType != elf.STT_NOTYPE && symType != elf.STT_OBJECT {
continue
}
case programSection:
if symType != elf.STT_NOTYPE && symType != elf.STT_FUNC {
continue
}
// LLVM emits LBB_ (Local Basic Block) symbols that seem to be jump
// targets within sections, but BPF has no use for them.
if symType == elf.STT_NOTYPE && elf.ST_BIND(symbol.Info) == elf.STB_LOCAL &&
strings.HasPrefix(symbol.Name, "LBB") {
continue
}
// Only collect symbols that occur in program/maps/data sections.
default:
continue
}
symSection.symbols[symbol.Value] = symbol
}
}
// loadRelocations iterates .rel* sections and extracts relocation entries for
// sections of interest. Makes sure relocations point at valid sections.
func (ec *elfCode) loadRelocations(relSections map[elf.SectionIndex]*elf.Section, symbols []elf.Symbol) error {
for idx, relSection := range relSections {
section := ec.sections[idx]
if section == nil {
continue
}
rels, err := ec.loadSectionRelocations(relSection, symbols)
if err != nil {
return fmt.Errorf("relocation for section %q: %w", section.Name, err)
}
for _, rel := range rels {
target := ec.sections[rel.Section]
if target == nil {
return fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported)
}
if target.Flags&elf.SHF_STRINGS > 0 {
return fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported)
}
target.references++
}
section.relocations = rels
}
return nil
}
// loadProgramSections iterates ec's sections and emits a ProgramSpec
// for each function it finds.
//
// The resulting map is indexed by function name.
func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) {
progs := make(map[string]*ProgramSpec)
// Generate a ProgramSpec for each function found in each program section.
var export []string
for _, sec := range ec.sections {
if sec.kind != programSection {
continue
@ -262,86 +293,144 @@ func (ec *elfCode) loadPrograms() (map[string]*ProgramSpec, error) {
return nil, fmt.Errorf("section %v: missing symbols", sec.Name)
}
funcSym, ok := sec.symbols[0]
if !ok {
return nil, fmt.Errorf("section %v: no label at start", sec.Name)
}
insns, length, err := ec.loadInstructions(sec)
funcs, err := ec.loadFunctions(sec)
if err != nil {
return nil, fmt.Errorf("program %s: %w", funcSym.Name, err)
return nil, fmt.Errorf("section %v: %w", sec.Name, err)
}
progType, attachType, progFlags, attachTo := getProgType(sec.Name)
spec := &ProgramSpec{
Name: funcSym.Name,
Type: progType,
Flags: progFlags,
AttachType: attachType,
AttachTo: attachTo,
License: ec.license,
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
}
for name, insns := range funcs {
spec := &ProgramSpec{
Name: name,
Type: progType,
Flags: progFlags,
AttachType: attachType,
AttachTo: attachTo,
SectionName: sec.Name,
License: ec.license,
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
BTF: ec.btf,
}
if ec.btf != nil {
spec.BTF, err = ec.btf.Program(sec.Name, length)
if err != nil && !errors.Is(err, btf.ErrNoExtendedInfo) {
return nil, fmt.Errorf("program %s: %w", funcSym.Name, err)
// Function names must be unique within a single ELF blob.
if progs[name] != nil {
return nil, fmt.Errorf("duplicate program name %s", name)
}
progs[name] = spec
if spec.SectionName != ".text" {
export = append(export, name)
}
}
}
if spec.Type == UnspecifiedProgram {
// There is no single name we can use for "library" sections,
// since they may contain multiple functions. We'll decode the
// labels they contain later on, and then link sections that way.
libs = append(libs, spec)
} else {
progs = append(progs, spec)
flattenPrograms(progs, export)
// Hide programs (e.g. library functions) that were not explicitly emitted
// to an ELF section. These could be exposed in a separate CollectionSpec
// field later to allow them to be modified.
for n, p := range progs {
if p.SectionName == ".text" {
delete(progs, n)
}
}
res := make(map[string]*ProgramSpec, len(progs))
for _, prog := range progs {
err := link(prog, libs)
if err != nil {
return nil, fmt.Errorf("program %s: %w", prog.Name, err)
}
res[prog.Name] = prog
}
return res, nil
return progs, nil
}
func (ec *elfCode) loadInstructions(section *elfSection) (asm.Instructions, uint64, error) {
var (
r = bufio.NewReader(section.Open())
insns asm.Instructions
offset uint64
)
for {
var ins asm.Instruction
n, err := ins.Unmarshal(r, ec.ByteOrder)
if err == io.EOF {
return insns, offset, nil
}
if err != nil {
return nil, 0, fmt.Errorf("offset %d: %w", offset, err)
// loadFunctions extracts instruction streams from the given program section
// starting at each symbol in the section. The section's symbols must already
// be narrowed down to STT_NOTYPE (emitted by clang <8) or STT_FUNC.
//
// The resulting map is indexed by function name.
func (ec *elfCode) loadFunctions(section *elfSection) (map[string]asm.Instructions, error) {
r := bufio.NewReader(section.Open())
// Decode the section's instruction stream.
var insns asm.Instructions
if err := insns.Unmarshal(r, ec.ByteOrder); err != nil {
return nil, fmt.Errorf("decoding instructions for section %s: %w", section.Name, err)
}
if len(insns) == 0 {
return nil, fmt.Errorf("no instructions found in section %s", section.Name)
}
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
offset := iter.Offset.Bytes()
// Tag Symbol Instructions.
if sym, ok := section.symbols[offset]; ok {
*ins = ins.WithSymbol(sym.Name)
}
ins.Symbol = section.symbols[offset].Name
// Apply any relocations for the current instruction.
// If no relocation is present, resolve any section-relative function calls.
if rel, ok := section.relocations[offset]; ok {
if err = ec.relocateInstruction(&ins, rel); err != nil {
return nil, 0, fmt.Errorf("offset %d: relocate instruction: %w", offset, err)
if err := ec.relocateInstruction(ins, rel); err != nil {
return nil, fmt.Errorf("offset %d: relocating instruction: %w", offset, err)
}
} else {
if err := referenceRelativeJump(ins, offset, section.symbols); err != nil {
return nil, fmt.Errorf("offset %d: resolving relative jump: %w", offset, err)
}
}
insns = append(insns, ins)
offset += n
}
if ec.extInfo != nil {
ec.extInfo.Assign(insns, section.Name)
}
return splitSymbols(insns)
}
// referenceRelativeJump turns a relative jump to another bpf subprogram within
// the same ELF section into a Reference Instruction.
//
// Up to LLVM 9, calls to subprograms within the same ELF section are sometimes
// encoded using relative jumps instead of relocation entries. These jumps go
// out of bounds of the current program, so their targets must be memoized
// before the section's instruction stream is split.
//
// The relative jump Constant is blinded to -1 and the target Symbol is set as
// the Instruction's Reference so it can be resolved by the linker.
func referenceRelativeJump(ins *asm.Instruction, offset uint64, symbols map[uint64]elf.Symbol) error {
if !ins.IsFunctionReference() || ins.Constant == -1 {
return nil
}
tgt := jumpTarget(offset, *ins)
sym := symbols[tgt].Name
if sym == "" {
return fmt.Errorf("no jump target found at offset %d", tgt)
}
*ins = ins.WithReference(sym)
ins.Constant = -1
return nil
}
// jumpTarget takes ins' offset within an instruction stream (in bytes)
// and returns its absolute jump destination (in bytes) within the
// instruction stream.
func jumpTarget(offset uint64, ins asm.Instruction) uint64 {
// A relative jump instruction describes the amount of raw BPF instructions
// to jump, convert the offset into bytes.
dest := ins.Constant * asm.InstructionSize
// The starting point of the jump is the end of the current instruction.
dest += int64(offset + asm.InstructionSize)
if dest < 0 {
return 0
}
return uint64(dest)
}
func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error {
@ -367,18 +456,12 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
ins.Src = asm.PseudoMapFD
// Mark the instruction as needing an update when creating the
// collection.
if err := ins.RewriteMapPtr(-1); err != nil {
return err
}
case dataSection:
var offset uint32
switch typ {
case elf.STT_SECTION:
if bind != elf.STB_LOCAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
return fmt.Errorf("direct load: %s: unsupported section relocation %s", name, bind)
}
// This is really a reference to a static symbol, which clang doesn't
@ -387,8 +470,17 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
offset = uint32(uint64(ins.Constant))
case elf.STT_OBJECT:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
// LLVM 9 emits OBJECT-LOCAL symbols for anonymous constants.
if bind != elf.STB_GLOBAL && bind != elf.STB_LOCAL {
return fmt.Errorf("direct load: %s: unsupported object relocation %s", name, bind)
}
offset = uint32(rel.Value)
case elf.STT_NOTYPE:
// LLVM 7 emits NOTYPE-LOCAL symbols for anonymous constants.
if bind != elf.STB_LOCAL {
return fmt.Errorf("direct load: %s: unsupported untyped relocation %s", name, bind)
}
offset = uint32(rel.Value)
@ -406,51 +498,71 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
ins.Constant = int64(uint64(offset) << 32)
ins.Src = asm.PseudoMapValue
// Mark the instruction as needing an update when creating the
// collection.
if err := ins.RewriteMapPtr(-1); err != nil {
return err
}
case programSection:
if ins.OpCode.JumpOp() != asm.Call {
return fmt.Errorf("not a call instruction: %s", ins)
}
if ins.Src != asm.PseudoCall {
return fmt.Errorf("call: %s: incorrect source register", name)
}
switch typ {
case elf.STT_NOTYPE, elf.STT_FUNC:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
switch opCode := ins.OpCode; {
case opCode.JumpOp() == asm.Call:
if ins.Src != asm.PseudoCall {
return fmt.Errorf("call: %s: incorrect source register", name)
}
case elf.STT_SECTION:
if bind != elf.STB_LOCAL {
return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
switch typ {
case elf.STT_NOTYPE, elf.STT_FUNC:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
}
case elf.STT_SECTION:
if bind != elf.STB_LOCAL {
return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
}
// The function we want to call is in the indicated section,
// at the offset encoded in the instruction itself. Reverse
// the calculation to find the real function we're looking for.
// A value of -1 references the first instruction in the section.
offset := int64(int32(ins.Constant)+1) * asm.InstructionSize
sym, ok := target.symbols[uint64(offset)]
if !ok {
return fmt.Errorf("call: no symbol at offset %d", offset)
}
name = sym.Name
ins.Constant = -1
default:
return fmt.Errorf("call: %s: invalid symbol type %s", name, typ)
}
case opCode.IsDWordLoad():
switch typ {
case elf.STT_FUNC:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("load: %s: unsupported binding: %s", name, bind)
}
case elf.STT_SECTION:
if bind != elf.STB_LOCAL {
return fmt.Errorf("load: %s: unsupported binding: %s", name, bind)
}
// ins.Constant already contains the offset in bytes from the
// start of the section. This is different than a call to a
// static function.
default:
return fmt.Errorf("load: %s: invalid symbol type %s", name, typ)
}
// The function we want to call is in the indicated section,
// at the offset encoded in the instruction itself. Reverse
// the calculation to find the real function we're looking for.
// A value of -1 references the first instruction in the section.
offset := int64(int32(ins.Constant)+1) * asm.InstructionSize
if offset < 0 {
return fmt.Errorf("call: %s: invalid offset %d", name, offset)
}
sym, ok := target.symbols[uint64(offset)]
sym, ok := target.symbols[uint64(ins.Constant)]
if !ok {
return fmt.Errorf("call: %s: no symbol at offset %d", name, offset)
return fmt.Errorf("load: no symbol at offset %d", ins.Constant)
}
ins.Constant = -1
name = sym.Name
ins.Constant = -1
ins.Src = asm.PseudoFunc
default:
return fmt.Errorf("call: %s: invalid symbol type %s", name, typ)
return fmt.Errorf("neither a call nor a load instruction: %v", ins)
}
case undefSection:
@ -468,7 +580,7 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported)
}
ins.Reference = name
*ins = ins.WithReference(name)
return nil
}
@ -525,7 +637,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("map %s: reading map tail: %w", mapName, err)
}
if len(extra) > 0 {
spec.Extra = *bytes.NewReader(extra)
spec.Extra = bytes.NewReader(extra)
}
if err := spec.clampPerfEventArraySize(); err != nil {
@ -554,7 +666,7 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
// Each section must appear as a DataSec in the ELF's BTF blob.
var ds *btf.Datasec
if err := ec.btf.FindType(sec.Name, &ds); err != nil {
if err := ec.btf.TypeByName(sec.Name, &ds); err != nil {
return fmt.Errorf("cannot find section '%s' in BTF: %w", sec.Name, err)
}
@ -617,14 +729,6 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
return nil
}
// A programStub is a placeholder for a Program to be inserted at a certain map key.
// It needs to be resolved into a Program later on in the loader process.
type programStub string
// A mapStub is a placeholder for a Map to be inserted at a certain map key.
// It needs to be resolved into a Map later on in the loader process.
type mapStub string
// mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing
// a BTF map definition. The name and spec arguments will be copied to the
// resulting MapSpec, and inner must be true on any resursive invocations.
@ -811,7 +915,9 @@ func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *b
ValueSize: valueSize,
MaxEntries: maxEntries,
Flags: flags,
BTF: &btf.Map{Spec: spec, Key: key, Value: value},
Key: key,
Value: value,
BTF: spec,
Pinning: pinType,
InnerMap: innerMapSpec,
Contents: contents,
@ -863,7 +969,7 @@ func resolveBTFValuesContents(es *elfSection, vs *btf.VarSecinfo, member btf.Mem
// The offset of the 'values' member within the _struct_ (in bits)
// is the starting point of the array. Convert to bytes. Add VarSecinfo
// offset to get the absolute position in the ELF blob.
start := (member.OffsetBits / 8) + vs.Offset
start := member.Offset.Bytes() + vs.Offset
// 'values' is encoded in BTF as a zero (variable) length struct
// member, and its contents run until the end of the VarSecinfo.
// Add VarSecinfo offset to get the absolute position in the ELF blob.
@ -898,9 +1004,9 @@ func resolveBTFValuesContents(es *elfSection, vs *btf.VarSecinfo, member btf.Mem
// skipped here.
switch t := elf.ST_TYPE(r.Info); t {
case elf.STT_FUNC:
contents = append(contents, MapKV{uint32(k), programStub(r.Name)})
contents = append(contents, MapKV{uint32(k), r.Name})
case elf.STT_OBJECT:
contents = append(contents, MapKV{uint32(k), mapStub(r.Name)})
contents = append(contents, MapKV{uint32(k), r.Name})
default:
return nil, fmt.Errorf("unknown relocation type %v", t)
}
@ -921,15 +1027,6 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
continue
}
if ec.btf == nil {
return errors.New("data sections require BTF, make sure all consts are marked as static")
}
var datasec *btf.Datasec
if err := ec.btf.FindType(sec.Name, &datasec); err != nil {
return fmt.Errorf("data section %s: can't get BTF: %w", sec.Name, err)
}
data, err := sec.Data()
if err != nil {
return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err)
@ -946,14 +1043,25 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
ValueSize: uint32(len(data)),
MaxEntries: 1,
Contents: []MapKV{{uint32(0), data}},
BTF: &btf.Map{Spec: ec.btf, Key: &btf.Void{}, Value: datasec},
}
switch sec.Name {
case ".rodata":
// It is possible for a data section to exist without a corresponding BTF Datasec
// if it only contains anonymous values like macro-defined arrays.
if ec.btf != nil {
var ds *btf.Datasec
if ec.btf.TypeByName(sec.Name, &ds) == nil {
// Assign the spec's key and BTF only if the Datasec lookup was successful.
mapSpec.BTF = ec.btf
mapSpec.Key = &btf.Void{}
mapSpec.Value = ds
}
}
switch n := sec.Name; {
case strings.HasPrefix(n, ".rodata"):
mapSpec.Flags = unix.BPF_F_RDONLY_PROG
mapSpec.Freeze = true
case ".bss":
case n == ".bss":
// The kernel already zero-initializes the map
mapSpec.Contents = nil
}
@ -964,91 +1072,103 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
}
func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
types := map[string]struct {
types := []struct {
prefix string
progType ProgramType
attachType AttachType
progFlags uint32
}{
// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c
"socket": {SocketFilter, AttachNone, 0},
"sk_reuseport/migrate": {SkReuseport, AttachSkReuseportSelectOrMigrate, 0},
"sk_reuseport": {SkReuseport, AttachSkReuseportSelect, 0},
"seccomp": {SocketFilter, AttachNone, 0},
"kprobe/": {Kprobe, AttachNone, 0},
"uprobe/": {Kprobe, AttachNone, 0},
"kretprobe/": {Kprobe, AttachNone, 0},
"uretprobe/": {Kprobe, AttachNone, 0},
"tracepoint/": {TracePoint, AttachNone, 0},
"raw_tracepoint/": {RawTracepoint, AttachNone, 0},
"raw_tp/": {RawTracepoint, AttachNone, 0},
"tp_btf/": {Tracing, AttachTraceRawTp, 0},
"xdp": {XDP, AttachNone, 0},
"perf_event": {PerfEvent, AttachNone, 0},
"lwt_in": {LWTIn, AttachNone, 0},
"lwt_out": {LWTOut, AttachNone, 0},
"lwt_xmit": {LWTXmit, AttachNone, 0},
"lwt_seg6local": {LWTSeg6Local, AttachNone, 0},
"sockops": {SockOps, AttachCGroupSockOps, 0},
"sk_skb/stream_parser": {SkSKB, AttachSkSKBStreamParser, 0},
"sk_skb/stream_verdict": {SkSKB, AttachSkSKBStreamParser, 0},
"sk_msg": {SkMsg, AttachSkSKBStreamVerdict, 0},
"lirc_mode2": {LircMode2, AttachLircMode2, 0},
"flow_dissector": {FlowDissector, AttachFlowDissector, 0},
"iter/": {Tracing, AttachTraceIter, 0},
"fentry/": {Tracing, AttachTraceFEntry, 0},
"fmod_ret/": {Tracing, AttachModifyReturn, 0},
"fexit/": {Tracing, AttachTraceFExit, 0},
"fentry.s/": {Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE},
"fmod_ret.s/": {Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE},
"fexit.s/": {Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE},
"sk_lookup/": {SkLookup, AttachSkLookup, 0},
"freplace/": {Extension, AttachNone, 0},
"lsm/": {LSM, AttachLSMMac, 0},
"lsm.s/": {LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE},
// Please update the types from libbpf.c and follow the order of it.
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c
{"socket", SocketFilter, AttachNone, 0},
{"sk_reuseport/migrate", SkReuseport, AttachSkReuseportSelectOrMigrate, 0},
{"sk_reuseport", SkReuseport, AttachSkReuseportSelect, 0},
{"kprobe/", Kprobe, AttachNone, 0},
{"uprobe/", Kprobe, AttachNone, 0},
{"kretprobe/", Kprobe, AttachNone, 0},
{"uretprobe/", Kprobe, AttachNone, 0},
{"tc", SchedCLS, AttachNone, 0},
{"classifier", SchedCLS, AttachNone, 0},
{"action", SchedACT, AttachNone, 0},
{"tracepoint/", TracePoint, AttachNone, 0},
{"tp/", TracePoint, AttachNone, 0},
{"raw_tracepoint/", RawTracepoint, AttachNone, 0},
{"raw_tp/", RawTracepoint, AttachNone, 0},
{"raw_tracepoint.w/", RawTracepointWritable, AttachNone, 0},
{"raw_tp.w/", RawTracepointWritable, AttachNone, 0},
{"tp_btf/", Tracing, AttachTraceRawTp, 0},
{"fentry/", Tracing, AttachTraceFEntry, 0},
{"fmod_ret/", Tracing, AttachModifyReturn, 0},
{"fexit/", Tracing, AttachTraceFExit, 0},
{"fentry.s/", Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE},
{"fmod_ret.s/", Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE},
{"fexit.s/", Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE},
{"freplace/", Extension, AttachNone, 0},
{"lsm/", LSM, AttachLSMMac, 0},
{"lsm.s/", LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE},
{"iter/", Tracing, AttachTraceIter, 0},
{"syscall", Syscall, AttachNone, 0},
{"xdp_devmap/", XDP, AttachXDPDevMap, 0},
{"xdp_cpumap/", XDP, AttachXDPCPUMap, 0},
{"xdp", XDP, AttachNone, 0},
{"perf_event", PerfEvent, AttachNone, 0},
{"lwt_in", LWTIn, AttachNone, 0},
{"lwt_out", LWTOut, AttachNone, 0},
{"lwt_xmit", LWTXmit, AttachNone, 0},
{"lwt_seg6local", LWTSeg6Local, AttachNone, 0},
{"cgroup_skb/ingress", CGroupSKB, AttachCGroupInetIngress, 0},
{"cgroup_skb/egress", CGroupSKB, AttachCGroupInetEgress, 0},
{"cgroup/skb", CGroupSKB, AttachNone, 0},
{"cgroup/sock_create", CGroupSock, AttachCGroupInetSockCreate, 0},
{"cgroup/sock_release", CGroupSock, AttachCgroupInetSockRelease, 0},
{"cgroup/sock", CGroupSock, AttachCGroupInetSockCreate, 0},
{"cgroup/post_bind4", CGroupSock, AttachCGroupInet4PostBind, 0},
{"cgroup/post_bind6", CGroupSock, AttachCGroupInet6PostBind, 0},
{"cgroup/dev", CGroupDevice, AttachCGroupDevice, 0},
{"sockops", SockOps, AttachCGroupSockOps, 0},
{"sk_skb/stream_parser", SkSKB, AttachSkSKBStreamParser, 0},
{"sk_skb/stream_verdict", SkSKB, AttachSkSKBStreamVerdict, 0},
{"sk_skb", SkSKB, AttachNone, 0},
{"sk_msg", SkMsg, AttachSkMsgVerdict, 0},
{"lirc_mode2", LircMode2, AttachLircMode2, 0},
{"flow_dissector", FlowDissector, AttachFlowDissector, 0},
{"cgroup/bind4", CGroupSockAddr, AttachCGroupInet4Bind, 0},
{"cgroup/bind6", CGroupSockAddr, AttachCGroupInet6Bind, 0},
{"cgroup/connect4", CGroupSockAddr, AttachCGroupInet4Connect, 0},
{"cgroup/connect6", CGroupSockAddr, AttachCGroupInet6Connect, 0},
{"cgroup/sendmsg4", CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0},
{"cgroup/sendmsg6", CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0},
{"cgroup/recvmsg4", CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0},
{"cgroup/recvmsg6", CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0},
{"cgroup/getpeername4", CGroupSockAddr, AttachCgroupInet4GetPeername, 0},
{"cgroup/getpeername6", CGroupSockAddr, AttachCgroupInet6GetPeername, 0},
{"cgroup/getsockname4", CGroupSockAddr, AttachCgroupInet4GetSockname, 0},
{"cgroup/getsockname6", CGroupSockAddr, AttachCgroupInet6GetSockname, 0},
{"cgroup/sysctl", CGroupSysctl, AttachCGroupSysctl, 0},
{"cgroup/getsockopt", CGroupSockopt, AttachCGroupGetsockopt, 0},
{"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0},
{"struct_ops+", StructOps, AttachNone, 0},
{"sk_lookup/", SkLookup, AttachSkLookup, 0},
"cgroup_skb/ingress": {CGroupSKB, AttachCGroupInetIngress, 0},
"cgroup_skb/egress": {CGroupSKB, AttachCGroupInetEgress, 0},
"cgroup/dev": {CGroupDevice, AttachCGroupDevice, 0},
"cgroup/skb": {CGroupSKB, AttachNone, 0},
"cgroup/sock": {CGroupSock, AttachCGroupInetSockCreate, 0},
"cgroup/post_bind4": {CGroupSock, AttachCGroupInet4PostBind, 0},
"cgroup/post_bind6": {CGroupSock, AttachCGroupInet6PostBind, 0},
"cgroup/bind4": {CGroupSockAddr, AttachCGroupInet4Bind, 0},
"cgroup/bind6": {CGroupSockAddr, AttachCGroupInet6Bind, 0},
"cgroup/connect4": {CGroupSockAddr, AttachCGroupInet4Connect, 0},
"cgroup/connect6": {CGroupSockAddr, AttachCGroupInet6Connect, 0},
"cgroup/sendmsg4": {CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0},
"cgroup/sendmsg6": {CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0},
"cgroup/recvmsg4": {CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0},
"cgroup/recvmsg6": {CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0},
"cgroup/sysctl": {CGroupSysctl, AttachCGroupSysctl, 0},
"cgroup/getsockopt": {CGroupSockopt, AttachCGroupGetsockopt, 0},
"cgroup/setsockopt": {CGroupSockopt, AttachCGroupSetsockopt, 0},
"classifier": {SchedCLS, AttachNone, 0},
"action": {SchedACT, AttachNone, 0},
"cgroup/getsockname4": {CGroupSockAddr, AttachCgroupInet4GetSockname, 0},
"cgroup/getsockname6": {CGroupSockAddr, AttachCgroupInet6GetSockname, 0},
"cgroup/getpeername4": {CGroupSockAddr, AttachCgroupInet4GetPeername, 0},
"cgroup/getpeername6": {CGroupSockAddr, AttachCgroupInet6GetPeername, 0},
{"seccomp", SocketFilter, AttachNone, 0},
}
for prefix, t := range types {
if !strings.HasPrefix(sectionName, prefix) {
for _, t := range types {
if !strings.HasPrefix(sectionName, t.prefix) {
continue
}
if !strings.HasSuffix(prefix, "/") {
if !strings.HasSuffix(t.prefix, "/") {
return t.progType, t.attachType, t.progFlags, ""
}
return t.progType, t.attachType, t.progFlags, sectionName[len(prefix):]
return t.progType, t.attachType, t.progFlags, sectionName[len(t.prefix):]
}
return UnspecifiedProgram, AttachNone, 0, ""
}
func (ec *elfCode) loadRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) {
func (ec *elfCode) loadSectionRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) {
rels := make(map[uint64]elf.Symbol)
if sec.Entsize < 16 {

View file

@ -1,22 +0,0 @@
//go:build gofuzz
// +build gofuzz
// Use with https://github.com/dvyukov/go-fuzz
package ebpf
import "bytes"
func FuzzLoadCollectionSpec(data []byte) int {
spec, err := LoadCollectionSpecFromReader(bytes.NewReader(data))
if err != nil {
if spec != nil {
panic("spec is not nil")
}
return 0
}
if spec == nil {
panic("spec is nil")
}
return 1
}

158
vendor/github.com/cilium/ebpf/info.go generated vendored
View file

@ -2,6 +2,7 @@ package ebpf
import (
"bufio"
"bytes"
"encoding/hex"
"errors"
"fmt"
@ -10,9 +11,13 @@ import (
"strings"
"syscall"
"time"
"unsafe"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// MapInfo describes a map.
@ -23,12 +28,13 @@ type MapInfo struct {
ValueSize uint32
MaxEntries uint32
Flags uint32
// Name as supplied by user space at load time.
// Name as supplied by user space at load time. Available from 4.15.
Name string
}
func newMapInfoFromFd(fd *internal.FD) (*MapInfo, error) {
info, err := bpfGetMapInfoByFD(fd)
func newMapInfoFromFd(fd *sys.FD) (*MapInfo, error) {
var info sys.MapInfo
err := sys.ObjInfo(fd, &info)
if errors.Is(err, syscall.EINVAL) {
return newMapInfoFromProc(fd)
}
@ -37,18 +43,17 @@ func newMapInfoFromFd(fd *internal.FD) (*MapInfo, error) {
}
return &MapInfo{
MapType(info.map_type),
MapID(info.id),
info.key_size,
info.value_size,
info.max_entries,
info.map_flags,
// name is available from 4.15.
internal.CString(info.name[:]),
MapType(info.Type),
MapID(info.Id),
info.KeySize,
info.ValueSize,
info.MaxEntries,
info.MapFlags,
unix.ByteSliceToString(info.Name[:]),
}, nil
}
func newMapInfoFromProc(fd *internal.FD) (*MapInfo, error) {
func newMapInfoFromProc(fd *sys.FD) (*MapInfo, error) {
var mi MapInfo
err := scanFdInfo(fd, map[string]interface{}{
"map_type": &mi.Type,
@ -84,20 +89,21 @@ type programStats struct {
type ProgramInfo struct {
Type ProgramType
id ProgramID
// Truncated hash of the BPF bytecode.
// Truncated hash of the BPF bytecode. Available from 4.13.
Tag string
// Name as supplied by user space at load time.
// Name as supplied by user space at load time. Available from 4.15.
Name string
// BTF for the program.
btf btf.ID
// IDS map ids related to program.
ids []MapID
btf btf.ID
stats *programStats
maps []MapID
insns []byte
}
func newProgramInfoFromFd(fd *internal.FD) (*ProgramInfo, error) {
info, err := bpfGetProgInfoByFD(fd, nil)
func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) {
var info sys.ProgInfo
err := sys.ObjInfo(fd, &info)
if errors.Is(err, syscall.EINVAL) {
return newProgramInfoFromProc(fd)
}
@ -105,32 +111,43 @@ func newProgramInfoFromFd(fd *internal.FD) (*ProgramInfo, error) {
return nil, err
}
var mapIDs []MapID
if info.nr_map_ids > 0 {
mapIDs = make([]MapID, info.nr_map_ids)
info, err = bpfGetProgInfoByFD(fd, mapIDs)
if err != nil {
pi := ProgramInfo{
Type: ProgramType(info.Type),
id: ProgramID(info.Id),
Tag: hex.EncodeToString(info.Tag[:]),
Name: unix.ByteSliceToString(info.Name[:]),
btf: btf.ID(info.BtfId),
stats: &programStats{
runtime: time.Duration(info.RunTimeNs),
runCount: info.RunCnt,
},
}
// Start with a clean struct for the second call, otherwise we may get EFAULT.
var info2 sys.ProgInfo
if info.NrMapIds > 0 {
pi.maps = make([]MapID, info.NrMapIds)
info2.NrMapIds = info.NrMapIds
info2.MapIds = sys.NewPointer(unsafe.Pointer(&pi.maps[0]))
}
if info.XlatedProgLen > 0 {
pi.insns = make([]byte, info.XlatedProgLen)
info2.XlatedProgLen = info.XlatedProgLen
info2.XlatedProgInsns = sys.NewSlicePointer(pi.insns)
}
if info.NrMapIds > 0 || info.XlatedProgLen > 0 {
if err := sys.ObjInfo(fd, &info2); err != nil {
return nil, err
}
}
return &ProgramInfo{
Type: ProgramType(info.prog_type),
id: ProgramID(info.id),
// tag is available if the kernel supports BPF_PROG_GET_INFO_BY_FD.
Tag: hex.EncodeToString(info.tag[:]),
// name is available from 4.15.
Name: internal.CString(info.name[:]),
btf: btf.ID(info.btf_id),
ids: mapIDs,
stats: &programStats{
runtime: time.Duration(info.run_time_ns),
runCount: info.run_cnt,
},
}, nil
return &pi, nil
}
func newProgramInfoFromProc(fd *internal.FD) (*ProgramInfo, error) {
func newProgramInfoFromProc(fd *sys.FD) (*ProgramInfo, error) {
var info ProgramInfo
err := scanFdInfo(fd, map[string]interface{}{
"prog_type": &info.Type,
@ -160,6 +177,7 @@ func (pi *ProgramInfo) ID() (ProgramID, bool) {
// BTFID returns the BTF ID associated with the program.
//
// The ID is only valid as long as the associated program is kept alive.
// Available from 5.0.
//
// The bool return value indicates whether this optional field is available and
@ -191,20 +209,50 @@ func (pi *ProgramInfo) Runtime() (time.Duration, bool) {
return time.Duration(0), false
}
// Instructions returns the 'xlated' instruction stream of the program
// after it has been verified and rewritten by the kernel. These instructions
// cannot be loaded back into the kernel as-is, this is mainly used for
// inspecting loaded programs for troubleshooting, dumping, etc.
//
// For example, map accesses are made to reference their kernel map IDs,
// not the FDs they had when the program was inserted. Note that before
// the introduction of bpf_insn_prepare_dump in kernel 4.16, xlated
// instructions were not sanitized, making the output even less reusable
// and less likely to round-trip or evaluate to the same program Tag.
//
// The first instruction is marked as a symbol using the Program's name.
//
// Available from 4.13. Requires CAP_BPF or equivalent.
func (pi *ProgramInfo) Instructions() (asm.Instructions, error) {
// If the calling process is not BPF-capable or if the kernel doesn't
// support getting xlated instructions, the field will be zero.
if len(pi.insns) == 0 {
return nil, fmt.Errorf("insufficient permissions or unsupported kernel: %w", ErrNotSupported)
}
r := bytes.NewReader(pi.insns)
var insns asm.Instructions
if err := insns.Unmarshal(r, internal.NativeEndian); err != nil {
return nil, fmt.Errorf("unmarshaling instructions: %w", err)
}
// Tag the first instruction with the name of the program, if available.
insns[0] = insns[0].WithSymbol(pi.Name)
return insns, nil
}
// MapIDs returns the maps related to the program.
//
// Available from 4.15.
//
// The bool return value indicates whether this optional field is available.
func (pi *ProgramInfo) MapIDs() ([]MapID, bool) {
return pi.ids, pi.ids != nil
return pi.maps, pi.maps != nil
}
func scanFdInfo(fd *internal.FD, fields map[string]interface{}) error {
raw, err := fd.Value()
if err != nil {
return err
}
fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", raw))
func scanFdInfo(fd *sys.FD, fields map[string]interface{}) error {
fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", fd.Int()))
if err != nil {
return err
}
@ -247,6 +295,10 @@ func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
return err
}
if len(fields) > 0 && scanned == 0 {
return ErrNotSupported
}
if scanned != len(fields) {
return errMissingFields
}
@ -261,11 +313,9 @@ func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
//
// Requires at least 5.8.
func EnableStats(which uint32) (io.Closer, error) {
attr := internal.BPFEnableStatsAttr{
StatsType: which,
}
fd, err := internal.BPFEnableStats(&attr)
fd, err := sys.EnableStats(&sys.EnableStatsAttr{
Type: which,
})
if err != nil {
return nil, err
}

View file

@ -1,798 +0,0 @@
package btf
import (
"bytes"
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"reflect"
"sync"
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
const btfMagic = 0xeB9F
// Errors returned by BTF functions.
var (
ErrNotSupported = internal.ErrNotSupported
ErrNotFound = errors.New("not found")
ErrNoExtendedInfo = errors.New("no extended info")
)
// ID represents the unique ID of a BTF object.
type ID uint32
// Spec represents decoded BTF.
type Spec struct {
rawTypes []rawType
strings stringTable
types []Type
namedTypes map[string][]NamedType
funcInfos map[string]extInfo
lineInfos map[string]extInfo
coreRelos map[string]coreRelos
byteOrder binary.ByteOrder
}
type btfHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
TypeOff uint32
TypeLen uint32
StringOff uint32
StringLen uint32
}
// LoadSpecFromReader reads BTF sections from an ELF.
//
// Returns ErrNotFound if the reader contains no BTF.
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
return nil, err
}
defer file.Close()
symbols, err := file.Symbols()
if err != nil {
return nil, fmt.Errorf("can't read symbols: %v", err)
}
variableOffsets := make(map[variable]uint32)
for _, symbol := range symbols {
if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
// Ignore things like SHN_ABS
continue
}
if int(symbol.Section) >= len(file.Sections) {
return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section)
}
secName := file.Sections[symbol.Section].Name
if symbol.Value > math.MaxUint32 {
return nil, fmt.Errorf("section %s: symbol %s: size exceeds maximum", secName, symbol.Name)
}
variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value)
}
return loadSpecFromELF(file, variableOffsets)
}
func loadSpecFromELF(file *internal.SafeELFFile, variableOffsets map[variable]uint32) (*Spec, error) {
var (
btfSection *elf.Section
btfExtSection *elf.Section
sectionSizes = make(map[string]uint32)
)
for _, sec := range file.Sections {
switch sec.Name {
case ".BTF":
btfSection = sec
case ".BTF.ext":
btfExtSection = sec
default:
if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS {
break
}
if sec.Size > math.MaxUint32 {
return nil, fmt.Errorf("section %s exceeds maximum size", sec.Name)
}
sectionSizes[sec.Name] = uint32(sec.Size)
}
}
if btfSection == nil {
return nil, fmt.Errorf("btf: %w", ErrNotFound)
}
spec, err := loadRawSpec(btfSection.Open(), file.ByteOrder, sectionSizes, variableOffsets)
if err != nil {
return nil, err
}
if btfExtSection == nil {
return spec, nil
}
spec.funcInfos, spec.lineInfos, spec.coreRelos, err = parseExtInfos(btfExtSection.Open(), file.ByteOrder, spec.strings)
if err != nil {
return nil, fmt.Errorf("can't read ext info: %w", err)
}
return spec, nil
}
// LoadRawSpec reads a blob of BTF data that isn't wrapped in an ELF file.
//
// Prefer using LoadSpecFromReader, since this function only supports a subset
// of BTF.
func LoadRawSpec(btf io.Reader, bo binary.ByteOrder) (*Spec, error) {
// This will return an error if we encounter a Datasec, since we can't fix
// it up.
return loadRawSpec(btf, bo, nil, nil)
}
func loadRawSpec(btf io.Reader, bo binary.ByteOrder, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) (*Spec, error) {
rawTypes, rawStrings, err := parseBTF(btf, bo)
if err != nil {
return nil, err
}
err = fixupDatasec(rawTypes, rawStrings, sectionSizes, variableOffsets)
if err != nil {
return nil, err
}
types, typesByName, err := inflateRawTypes(rawTypes, rawStrings)
if err != nil {
return nil, err
}
return &Spec{
rawTypes: rawTypes,
namedTypes: typesByName,
types: types,
strings: rawStrings,
byteOrder: bo,
}, nil
}
var kernelBTF struct {
sync.Mutex
*Spec
}
// LoadKernelSpec returns the current kernel's BTF information.
//
// Requires a >= 5.5 kernel with CONFIG_DEBUG_INFO_BTF enabled. Returns
// ErrNotSupported if BTF is not enabled.
func LoadKernelSpec() (*Spec, error) {
kernelBTF.Lock()
defer kernelBTF.Unlock()
if kernelBTF.Spec != nil {
return kernelBTF.Spec, nil
}
var err error
kernelBTF.Spec, err = loadKernelSpec()
return kernelBTF.Spec, err
}
func loadKernelSpec() (*Spec, error) {
release, err := unix.KernelRelease()
if err != nil {
return nil, fmt.Errorf("can't read kernel release number: %w", err)
}
fh, err := os.Open("/sys/kernel/btf/vmlinux")
if err == nil {
defer fh.Close()
return LoadRawSpec(fh, internal.NativeEndian)
}
// use same list of locations as libbpf
// https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122
locations := []string{
"/boot/vmlinux-%s",
"/lib/modules/%s/vmlinux-%[1]s",
"/lib/modules/%s/build/vmlinux",
"/usr/lib/modules/%s/kernel/vmlinux",
"/usr/lib/debug/boot/vmlinux-%s",
"/usr/lib/debug/boot/vmlinux-%s.debug",
"/usr/lib/debug/lib/modules/%s/vmlinux",
}
for _, loc := range locations {
path := fmt.Sprintf(loc, release)
fh, err := os.Open(path)
if err != nil {
continue
}
defer fh.Close()
file, err := internal.NewSafeELFFile(fh)
if err != nil {
return nil, err
}
defer file.Close()
return loadSpecFromELF(file, nil)
}
return nil, fmt.Errorf("no BTF for kernel version %s: %w", release, internal.ErrNotSupported)
}
func parseBTF(btf io.Reader, bo binary.ByteOrder) ([]rawType, stringTable, error) {
rawBTF, err := io.ReadAll(btf)
if err != nil {
return nil, nil, fmt.Errorf("can't read BTF: %v", err)
}
rd := bytes.NewReader(rawBTF)
var header btfHeader
if err := binary.Read(rd, bo, &header); err != nil {
return nil, nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, nil, errors.New("header is too short")
}
if _, err := io.CopyN(internal.DiscardZeroes{}, rd, remainder); err != nil {
return nil, nil, fmt.Errorf("header padding: %v", err)
}
if _, err := rd.Seek(int64(header.HdrLen+header.StringOff), io.SeekStart); err != nil {
return nil, nil, fmt.Errorf("can't seek to start of string section: %v", err)
}
rawStrings, err := readStringTable(io.LimitReader(rd, int64(header.StringLen)))
if err != nil {
return nil, nil, fmt.Errorf("can't read type names: %w", err)
}
if _, err := rd.Seek(int64(header.HdrLen+header.TypeOff), io.SeekStart); err != nil {
return nil, nil, fmt.Errorf("can't seek to start of type section: %v", err)
}
rawTypes, err := readTypes(io.LimitReader(rd, int64(header.TypeLen)), bo)
if err != nil {
return nil, nil, fmt.Errorf("can't read types: %w", err)
}
return rawTypes, rawStrings, nil
}
type variable struct {
section string
name string
}
func fixupDatasec(rawTypes []rawType, rawStrings stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
for i, rawType := range rawTypes {
if rawType.Kind() != kindDatasec {
continue
}
name, err := rawStrings.Lookup(rawType.NameOff)
if err != nil {
return err
}
if name == ".kconfig" || name == ".ksyms" {
return fmt.Errorf("reference to %s: %w", name, ErrNotSupported)
}
if rawTypes[i].SizeType != 0 {
continue
}
size, ok := sectionSizes[name]
if !ok {
return fmt.Errorf("data section %s: missing size", name)
}
rawTypes[i].SizeType = size
secinfos := rawType.data.([]btfVarSecinfo)
for j, secInfo := range secinfos {
id := int(secInfo.Type - 1)
if id >= len(rawTypes) {
return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j)
}
varName, err := rawStrings.Lookup(rawTypes[id].NameOff)
if err != nil {
return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err)
}
offset, ok := variableOffsets[variable{name, varName}]
if !ok {
return fmt.Errorf("data section %s: missing offset for variable %s", name, varName)
}
secinfos[j].Offset = offset
}
}
return nil
}
// Copy creates a copy of Spec.
func (s *Spec) Copy() *Spec {
types, _ := copyTypes(s.types, nil)
namedTypes := make(map[string][]NamedType)
for _, typ := range types {
if named, ok := typ.(NamedType); ok {
name := essentialName(named.TypeName())
namedTypes[name] = append(namedTypes[name], named)
}
}
// NB: Other parts of spec are not copied since they are immutable.
return &Spec{
s.rawTypes,
s.strings,
types,
namedTypes,
s.funcInfos,
s.lineInfos,
s.coreRelos,
s.byteOrder,
}
}
type marshalOpts struct {
ByteOrder binary.ByteOrder
StripFuncLinkage bool
}
func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
var (
buf bytes.Buffer
header = new(btfHeader)
headerLen = binary.Size(header)
)
// Reserve space for the header. We have to write it last since
// we don't know the size of the type section yet.
_, _ = buf.Write(make([]byte, headerLen))
// Write type section, just after the header.
for _, raw := range s.rawTypes {
switch {
case opts.StripFuncLinkage && raw.Kind() == kindFunc:
raw.SetLinkage(StaticFunc)
}
if err := raw.Marshal(&buf, opts.ByteOrder); err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
}
typeLen := uint32(buf.Len() - headerLen)
// Write string section after type section.
_, _ = buf.Write(s.strings)
// Fill out the header, and write it out.
header = &btfHeader{
Magic: btfMagic,
Version: 1,
Flags: 0,
HdrLen: uint32(headerLen),
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(len(s.strings)),
}
raw := buf.Bytes()
err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header)
if err != nil {
return nil, fmt.Errorf("can't write header: %v", err)
}
return raw, nil
}
type sliceWriter []byte
func (sw sliceWriter) Write(p []byte) (int, error) {
if len(p) != len(sw) {
return 0, errors.New("size doesn't match")
}
return copy(sw, p), nil
}
// Program finds the BTF for a specific section.
//
// Length is the number of bytes in the raw BPF instruction stream.
//
// Returns an error which may wrap ErrNoExtendedInfo if the Spec doesn't
// contain extended BTF info.
func (s *Spec) Program(name string, length uint64) (*Program, error) {
if length == 0 {
return nil, errors.New("length musn't be zero")
}
if s.funcInfos == nil && s.lineInfos == nil && s.coreRelos == nil {
return nil, fmt.Errorf("BTF for section %s: %w", name, ErrNoExtendedInfo)
}
funcInfos, funcOK := s.funcInfos[name]
lineInfos, lineOK := s.lineInfos[name]
relos, coreOK := s.coreRelos[name]
if !funcOK && !lineOK && !coreOK {
return nil, fmt.Errorf("no extended BTF info for section %s", name)
}
return &Program{s, length, funcInfos, lineInfos, relos}, nil
}
// FindType searches for a type with a specific name.
//
// Called T a type that satisfies Type, typ must be a non-nil **T.
// On success, the address of the found type will be copied in typ.
//
// Returns an error wrapping ErrNotFound if no matching
// type exists in spec.
func (s *Spec) FindType(name string, typ interface{}) error {
typValue := reflect.ValueOf(typ)
if typValue.Kind() != reflect.Ptr {
return fmt.Errorf("%T is not a pointer", typ)
}
typPtr := typValue.Elem()
if !typPtr.CanSet() {
return fmt.Errorf("%T cannot be set", typ)
}
wanted := typPtr.Type()
if !wanted.AssignableTo(reflect.TypeOf((*Type)(nil)).Elem()) {
return fmt.Errorf("%T does not satisfy Type interface", typ)
}
var candidate Type
for _, typ := range s.namedTypes[essentialName(name)] {
if reflect.TypeOf(typ) != wanted {
continue
}
// Match against the full name, not just the essential one.
if typ.TypeName() != name {
continue
}
if candidate != nil {
return fmt.Errorf("type %s: multiple candidates for %T", name, typ)
}
candidate = typ
}
if candidate == nil {
return fmt.Errorf("type %s: %w", name, ErrNotFound)
}
typPtr.Set(reflect.ValueOf(candidate))
return nil
}
// Handle is a reference to BTF loaded into the kernel.
type Handle struct {
spec *Spec
fd *internal.FD
}
// NewHandle loads BTF into the kernel.
//
// Returns ErrNotSupported if BTF is not supported.
func NewHandle(spec *Spec) (*Handle, error) {
if err := haveBTF(); err != nil {
return nil, err
}
if spec.byteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian)
}
btf, err := spec.marshal(marshalOpts{
ByteOrder: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
})
if err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
if uint64(len(btf)) > math.MaxUint32 {
return nil, errors.New("BTF exceeds the maximum size")
}
attr := &bpfLoadBTFAttr{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
}
fd, err := bpfLoadBTF(attr)
if err != nil {
logBuf := make([]byte, 64*1024)
attr.logBuf = internal.NewSlicePointer(logBuf)
attr.btfLogSize = uint32(len(logBuf))
attr.btfLogLevel = 1
_, logErr := bpfLoadBTF(attr)
return nil, internal.ErrorWithLog(err, logBuf, logErr)
}
return &Handle{spec.Copy(), fd}, nil
}
// NewHandleFromID returns the BTF handle for a given id.
//
// Returns ErrNotExist, if there is no BTF with the given id.
//
// Requires CAP_SYS_ADMIN.
func NewHandleFromID(id ID) (*Handle, error) {
fd, err := internal.BPFObjGetFDByID(internal.BPF_BTF_GET_FD_BY_ID, uint32(id))
if err != nil {
return nil, fmt.Errorf("get BTF by id: %w", err)
}
info, err := newInfoFromFd(fd)
if err != nil {
_ = fd.Close()
return nil, fmt.Errorf("get BTF spec for handle: %w", err)
}
return &Handle{info.BTF, fd}, nil
}
// Spec returns the Spec that defined the BTF loaded into the kernel.
func (h *Handle) Spec() *Spec {
return h.spec
}
// Close destroys the handle.
//
// Subsequent calls to FD will return an invalid value.
func (h *Handle) Close() error {
return h.fd.Close()
}
// FD returns the file descriptor for the handle.
func (h *Handle) FD() int {
value, err := h.fd.Value()
if err != nil {
return -1
}
return int(value)
}
// Map is the BTF for a map.
type Map struct {
Spec *Spec
Key, Value Type
}
// Program is the BTF information for a stream of instructions.
type Program struct {
spec *Spec
length uint64
funcInfos, lineInfos extInfo
coreRelos coreRelos
}
// Spec returns the BTF spec of this program.
func (p *Program) Spec() *Spec {
return p.spec
}
// Append the information from other to the Program.
func (p *Program) Append(other *Program) error {
if other.spec != p.spec {
return fmt.Errorf("can't append program with different BTF specs")
}
funcInfos, err := p.funcInfos.append(other.funcInfos, p.length)
if err != nil {
return fmt.Errorf("func infos: %w", err)
}
lineInfos, err := p.lineInfos.append(other.lineInfos, p.length)
if err != nil {
return fmt.Errorf("line infos: %w", err)
}
p.funcInfos = funcInfos
p.lineInfos = lineInfos
p.coreRelos = p.coreRelos.append(other.coreRelos, p.length)
p.length += other.length
return nil
}
// FuncInfos returns the binary form of BTF function infos.
func (p *Program) FuncInfos() (recordSize uint32, bytes []byte, err error) {
bytes, err = p.funcInfos.MarshalBinary()
if err != nil {
return 0, nil, fmt.Errorf("func infos: %w", err)
}
return p.funcInfos.recordSize, bytes, nil
}
// LineInfos returns the binary form of BTF line infos.
func (p *Program) LineInfos() (recordSize uint32, bytes []byte, err error) {
bytes, err = p.lineInfos.MarshalBinary()
if err != nil {
return 0, nil, fmt.Errorf("line infos: %w", err)
}
return p.lineInfos.recordSize, bytes, nil
}
// Fixups returns the changes required to adjust the program to the target.
//
// Passing a nil target will relocate against the running kernel.
func (p *Program) Fixups(target *Spec) (COREFixups, error) {
if len(p.coreRelos) == 0 {
return nil, nil
}
if target == nil {
var err error
target, err = LoadKernelSpec()
if err != nil {
return nil, err
}
}
return coreRelocate(p.spec, target, p.coreRelos)
}
type bpfLoadBTFAttr struct {
btf internal.Pointer
logBuf internal.Pointer
btfSize uint32
btfLogSize uint32
btfLogLevel uint32
}
func bpfLoadBTF(attr *bpfLoadBTFAttr) (*internal.FD, error) {
fd, err := internal.BPF(internal.BPF_BTF_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return internal.NewFD(uint32(fd)), nil
}
func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte {
const minHeaderLength = 24
typesLen := uint32(binary.Size(types))
header := btfHeader{
Magic: btfMagic,
Version: 1,
HdrLen: minHeaderLength,
TypeOff: 0,
TypeLen: typesLen,
StringOff: typesLen,
StringLen: uint32(len(strings)),
}
buf := new(bytes.Buffer)
_ = binary.Write(buf, bo, &header)
_ = binary.Write(buf, bo, types)
buf.Write(strings)
return buf.Bytes()
}
var haveBTF = internal.FeatureTest("BTF", "5.1", func() error {
var (
types struct {
Integer btfType
Var btfType
btfVar struct{ Linkage uint32 }
}
strings = []byte{0, 'a', 0}
)
// We use a BTF_KIND_VAR here, to make sure that
// the kernel understands BTF at least as well as we
// do. BTF_KIND_VAR was introduced ~5.1.
types.Integer.SetKind(kindPointer)
types.Var.NameOff = 1
types.Var.SetKind(kindVar)
types.Var.SizeType = 1
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := bpfLoadBTF(&bpfLoadBTFAttr{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
})
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: loading the program
// might still succeed without BTF.
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
})
var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error {
if err := haveBTF(); err != nil {
return err
}
var (
types struct {
FuncProto btfType
Func btfType
}
strings = []byte{0, 'a', 0}
)
types.FuncProto.SetKind(kindFuncProto)
types.Func.SetKind(kindFunc)
types.Func.SizeType = 1 // aka FuncProto
types.Func.NameOff = 1
types.Func.SetLinkage(GlobalFunc)
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := bpfLoadBTF(&bpfLoadBTFAttr{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
})
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
})

View file

@ -1,312 +0,0 @@
package btf
import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
)
type btfExtHeader struct {
Magic uint16
Version uint8
Flags uint8
HdrLen uint32
FuncInfoOff uint32
FuncInfoLen uint32
LineInfoOff uint32
LineInfoLen uint32
}
type btfExtCoreHeader struct {
CoreReloOff uint32
CoreReloLen uint32
}
func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, relos map[string]coreRelos, err error) {
var header btfExtHeader
var coreHeader btfExtCoreHeader
if err := binary.Read(r, bo, &header); err != nil {
return nil, nil, nil, fmt.Errorf("can't read header: %v", err)
}
if header.Magic != btfMagic {
return nil, nil, nil, fmt.Errorf("incorrect magic value %v", header.Magic)
}
if header.Version != 1 {
return nil, nil, nil, fmt.Errorf("unexpected version %v", header.Version)
}
if header.Flags != 0 {
return nil, nil, nil, fmt.Errorf("unsupported flags %v", header.Flags)
}
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
if remainder < 0 {
return nil, nil, nil, errors.New("header is too short")
}
coreHdrSize := int64(binary.Size(&coreHeader))
if remainder >= coreHdrSize {
if err := binary.Read(r, bo, &coreHeader); err != nil {
return nil, nil, nil, fmt.Errorf("can't read CO-RE relocation header: %v", err)
}
remainder -= coreHdrSize
}
// Of course, the .BTF.ext header has different semantics than the
// .BTF ext header. We need to ignore non-null values.
_, err = io.CopyN(io.Discard, r, remainder)
if err != nil {
return nil, nil, nil, fmt.Errorf("header padding: %v", err)
}
if _, err := r.Seek(int64(header.HdrLen+header.FuncInfoOff), io.SeekStart); err != nil {
return nil, nil, nil, fmt.Errorf("can't seek to function info section: %v", err)
}
buf := bufio.NewReader(io.LimitReader(r, int64(header.FuncInfoLen)))
funcInfo, err = parseExtInfo(buf, bo, strings)
if err != nil {
return nil, nil, nil, fmt.Errorf("function info: %w", err)
}
if _, err := r.Seek(int64(header.HdrLen+header.LineInfoOff), io.SeekStart); err != nil {
return nil, nil, nil, fmt.Errorf("can't seek to line info section: %v", err)
}
buf = bufio.NewReader(io.LimitReader(r, int64(header.LineInfoLen)))
lineInfo, err = parseExtInfo(buf, bo, strings)
if err != nil {
return nil, nil, nil, fmt.Errorf("line info: %w", err)
}
if coreHeader.CoreReloOff > 0 && coreHeader.CoreReloLen > 0 {
if _, err := r.Seek(int64(header.HdrLen+coreHeader.CoreReloOff), io.SeekStart); err != nil {
return nil, nil, nil, fmt.Errorf("can't seek to CO-RE relocation section: %v", err)
}
relos, err = parseExtInfoRelos(io.LimitReader(r, int64(coreHeader.CoreReloLen)), bo, strings)
if err != nil {
return nil, nil, nil, fmt.Errorf("CO-RE relocation info: %w", err)
}
}
return funcInfo, lineInfo, relos, nil
}
type btfExtInfoSec struct {
SecNameOff uint32
NumInfo uint32
}
type extInfoRecord struct {
InsnOff uint64
Opaque []byte
}
type extInfo struct {
byteOrder binary.ByteOrder
recordSize uint32
records []extInfoRecord
}
func (ei extInfo) append(other extInfo, offset uint64) (extInfo, error) {
if other.byteOrder != ei.byteOrder {
return extInfo{}, fmt.Errorf("ext_info byte order mismatch, want %v (got %v)", ei.byteOrder, other.byteOrder)
}
if other.recordSize != ei.recordSize {
return extInfo{}, fmt.Errorf("ext_info record size mismatch, want %d (got %d)", ei.recordSize, other.recordSize)
}
records := make([]extInfoRecord, 0, len(ei.records)+len(other.records))
records = append(records, ei.records...)
for _, info := range other.records {
records = append(records, extInfoRecord{
InsnOff: info.InsnOff + offset,
Opaque: info.Opaque,
})
}
return extInfo{ei.byteOrder, ei.recordSize, records}, nil
}
func (ei extInfo) MarshalBinary() ([]byte, error) {
if ei.byteOrder != internal.NativeEndian {
return nil, fmt.Errorf("%s is not the native byte order", ei.byteOrder)
}
if len(ei.records) == 0 {
return nil, nil
}
buf := bytes.NewBuffer(make([]byte, 0, int(ei.recordSize)*len(ei.records)))
for _, info := range ei.records {
// The kernel expects offsets in number of raw bpf instructions,
// while the ELF tracks it in bytes.
insnOff := uint32(info.InsnOff / asm.InstructionSize)
if err := binary.Write(buf, internal.NativeEndian, insnOff); err != nil {
return nil, fmt.Errorf("can't write instruction offset: %v", err)
}
buf.Write(info.Opaque)
}
return buf.Bytes(), nil
}
func parseExtInfo(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]extInfo, error) {
const maxRecordSize = 256
var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil {
return nil, fmt.Errorf("can't read record size: %v", err)
}
if recordSize < 4 {
// Need at least insnOff
return nil, errors.New("record size too short")
}
if recordSize > maxRecordSize {
return nil, fmt.Errorf("record size %v exceeds %v", recordSize, maxRecordSize)
}
result := make(map[string]extInfo)
for {
secName, infoHeader, err := parseExtInfoHeader(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
var records []extInfoRecord
for i := uint32(0); i < infoHeader.NumInfo; i++ {
var byteOff uint32
if err := binary.Read(r, bo, &byteOff); err != nil {
return nil, fmt.Errorf("section %v: can't read extended info offset: %v", secName, err)
}
buf := make([]byte, int(recordSize-4))
if _, err := io.ReadFull(r, buf); err != nil {
return nil, fmt.Errorf("section %v: can't read record: %v", secName, err)
}
if byteOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, byteOff)
}
records = append(records, extInfoRecord{uint64(byteOff), buf})
}
result[secName] = extInfo{
bo,
recordSize,
records,
}
}
}
// bpfCoreRelo matches `struct bpf_core_relo` from the kernel
type bpfCoreRelo struct {
InsnOff uint32
TypeID TypeID
AccessStrOff uint32
Kind COREKind
}
type coreRelo struct {
insnOff uint32
typeID TypeID
accessor coreAccessor
kind COREKind
}
type coreRelos []coreRelo
// append two slices of extInfoRelo to each other. The InsnOff of b are adjusted
// by offset.
func (r coreRelos) append(other coreRelos, offset uint64) coreRelos {
result := make([]coreRelo, 0, len(r)+len(other))
result = append(result, r...)
for _, relo := range other {
relo.insnOff += uint32(offset)
result = append(result, relo)
}
return result
}
var extInfoReloSize = binary.Size(bpfCoreRelo{})
func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]coreRelos, error) {
var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil {
return nil, fmt.Errorf("read record size: %v", err)
}
if recordSize != uint32(extInfoReloSize) {
return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize)
}
result := make(map[string]coreRelos)
for {
secName, infoHeader, err := parseExtInfoHeader(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
var relos coreRelos
for i := uint32(0); i < infoHeader.NumInfo; i++ {
var relo bpfCoreRelo
if err := binary.Read(r, bo, &relo); err != nil {
return nil, fmt.Errorf("section %v: read record: %v", secName, err)
}
if relo.InsnOff%asm.InstructionSize != 0 {
return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, relo.InsnOff)
}
accessorStr, err := strings.Lookup(relo.AccessStrOff)
if err != nil {
return nil, err
}
accessor, err := parseCoreAccessor(accessorStr)
if err != nil {
return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
}
relos = append(relos, coreRelo{
relo.InsnOff,
relo.TypeID,
accessor,
relo.Kind,
})
}
result[secName] = relos
}
}
func parseExtInfoHeader(r io.Reader, bo binary.ByteOrder, strings stringTable) (string, *btfExtInfoSec, error) {
var infoHeader btfExtInfoSec
if err := binary.Read(r, bo, &infoHeader); err != nil {
return "", nil, fmt.Errorf("read ext info header: %w", err)
}
secName, err := strings.Lookup(infoHeader.SecNameOff)
if err != nil {
return "", nil, fmt.Errorf("get section name: %w", err)
}
if infoHeader.NumInfo == 0 {
return "", nil, fmt.Errorf("section %s has zero records", secName)
}
return secName, &infoHeader, nil
}

View file

@ -1,50 +0,0 @@
//go:build gofuzz
// +build gofuzz
// Use with https://github.com/dvyukov/go-fuzz
package btf
import (
"bytes"
"encoding/binary"
"github.com/cilium/ebpf/internal"
)
func FuzzSpec(data []byte) int {
if len(data) < binary.Size(btfHeader{}) {
return -1
}
spec, err := loadNakedSpec(bytes.NewReader(data), internal.NativeEndian, nil, nil)
if err != nil {
if spec != nil {
panic("spec is not nil")
}
return 0
}
if spec == nil {
panic("spec is nil")
}
return 1
}
func FuzzExtInfo(data []byte) int {
if len(data) < binary.Size(btfExtHeader{}) {
return -1
}
table := stringTable("\x00foo\x00barfoo\x00")
info, err := parseExtInfo(bytes.NewReader(data), internal.NativeEndian, table)
if err != nil {
if info != nil {
panic("info is not nil")
}
return 0
}
if info == nil {
panic("info is nil")
}
return 1
}

View file

@ -1,48 +0,0 @@
package btf
import (
"bytes"
"github.com/cilium/ebpf/internal"
)
// info describes a BTF object.
type info struct {
BTF *Spec
ID ID
// Name is an identifying name for the BTF, currently only used by the
// kernel.
Name string
// KernelBTF is true if the BTf originated with the kernel and not
// userspace.
KernelBTF bool
}
func newInfoFromFd(fd *internal.FD) (*info, error) {
// We invoke the syscall once with a empty BTF and name buffers to get size
// information to allocate buffers. Then we invoke it a second time with
// buffers to receive the data.
bpfInfo, err := bpfGetBTFInfoByFD(fd, nil, nil)
if err != nil {
return nil, err
}
btfBuffer := make([]byte, bpfInfo.btfSize)
nameBuffer := make([]byte, bpfInfo.nameLen)
bpfInfo, err = bpfGetBTFInfoByFD(fd, btfBuffer, nameBuffer)
if err != nil {
return nil, err
}
spec, err := loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, nil, nil)
if err != nil {
return nil, err
}
return &info{
BTF: spec,
ID: ID(bpfInfo.id),
Name: internal.CString(nameBuffer),
KernelBTF: bpfInfo.kernelBTF != 0,
}, nil
}

View file

@ -1,54 +0,0 @@
package btf
import (
"bytes"
"errors"
"fmt"
"io"
)
type stringTable []byte
func readStringTable(r io.Reader) (stringTable, error) {
contents, err := io.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("can't read string table: %v", err)
}
if len(contents) < 1 {
return nil, errors.New("string table is empty")
}
if contents[0] != '\x00' {
return nil, errors.New("first item in string table is non-empty")
}
if contents[len(contents)-1] != '\x00' {
return nil, errors.New("string table isn't null terminated")
}
return stringTable(contents), nil
}
func (st stringTable) Lookup(offset uint32) (string, error) {
if int64(offset) > int64(^uint(0)>>1) {
return "", fmt.Errorf("offset %d overflows int", offset)
}
pos := int(offset)
if pos >= len(st) {
return "", fmt.Errorf("offset %d is out of bounds", offset)
}
if pos > 0 && st[pos-1] != '\x00' {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
}
str := st[pos:]
end := bytes.IndexByte(str, '\x00')
if end == -1 {
return "", fmt.Errorf("offset %d isn't null terminated", offset)
}
return string(str[:end]), nil
}

View file

@ -1,31 +0,0 @@
package btf
import (
"fmt"
"unsafe"
"github.com/cilium/ebpf/internal"
)
type bpfBTFInfo struct {
btf internal.Pointer
btfSize uint32
id uint32
name internal.Pointer
nameLen uint32
kernelBTF uint32
}
func bpfGetBTFInfoByFD(fd *internal.FD, btf, name []byte) (*bpfBTFInfo, error) {
info := bpfBTFInfo{
btf: internal.NewSlicePointer(btf),
btfSize: uint32(len(btf)),
name: internal.NewSlicePointer(name),
nameLen: uint32(len(name)),
}
if err := internal.BPFObjGetInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)); err != nil {
return nil, fmt.Errorf("can't get program info: %w", err)
}
return &info, nil
}

View file

@ -1,957 +0,0 @@
package btf
import (
"fmt"
"math"
"strings"
)
const maxTypeDepth = 32
// TypeID identifies a type in a BTF section.
type TypeID uint32
// ID implements part of the Type interface.
func (tid TypeID) ID() TypeID {
return tid
}
// Type represents a type described by BTF.
type Type interface {
ID() TypeID
String() string
// Make a copy of the type, without copying Type members.
copy() Type
// Enumerate all nested Types. Repeated calls must visit nested
// types in the same order.
walk(*typeDeque)
}
// NamedType is a type with a name.
type NamedType interface {
Type
// Name of the type, empty for anonymous types.
TypeName() string
}
var (
_ NamedType = (*Int)(nil)
_ NamedType = (*Struct)(nil)
_ NamedType = (*Union)(nil)
_ NamedType = (*Enum)(nil)
_ NamedType = (*Fwd)(nil)
_ NamedType = (*Func)(nil)
_ NamedType = (*Typedef)(nil)
_ NamedType = (*Var)(nil)
_ NamedType = (*Datasec)(nil)
_ NamedType = (*Float)(nil)
)
// Void is the unit type of BTF.
type Void struct{}
func (v *Void) ID() TypeID { return 0 }
func (v *Void) String() string { return "void#0" }
func (v *Void) size() uint32 { return 0 }
func (v *Void) copy() Type { return (*Void)(nil) }
func (v *Void) walk(*typeDeque) {}
type IntEncoding byte
const (
Signed IntEncoding = 1 << iota
Char
Bool
)
// Int is an integer of a given length.
type Int struct {
TypeID
Name string
// The size of the integer in bytes.
Size uint32
Encoding IntEncoding
// OffsetBits is the starting bit offset. Currently always 0.
// See https://www.kernel.org/doc/html/latest/bpf/btf.html#btf-kind-int
OffsetBits uint32
Bits byte
}
func (i *Int) String() string {
var s strings.Builder
switch {
case i.Encoding&Char != 0:
s.WriteString("char")
case i.Encoding&Bool != 0:
s.WriteString("bool")
default:
if i.Encoding&Signed == 0 {
s.WriteRune('u')
}
s.WriteString("int")
fmt.Fprintf(&s, "%d", i.Size*8)
}
fmt.Fprintf(&s, "#%d", i.TypeID)
if i.Bits > 0 {
fmt.Fprintf(&s, "[bits=%d]", i.Bits)
}
return s.String()
}
func (i *Int) TypeName() string { return i.Name }
func (i *Int) size() uint32 { return i.Size }
func (i *Int) walk(*typeDeque) {}
func (i *Int) copy() Type {
cpy := *i
return &cpy
}
func (i *Int) isBitfield() bool {
return i.OffsetBits > 0
}
// Pointer is a pointer to another type.
type Pointer struct {
TypeID
Target Type
}
func (p *Pointer) String() string {
return fmt.Sprintf("pointer#%d[target=#%d]", p.TypeID, p.Target.ID())
}
func (p *Pointer) size() uint32 { return 8 }
func (p *Pointer) walk(tdq *typeDeque) { tdq.push(&p.Target) }
func (p *Pointer) copy() Type {
cpy := *p
return &cpy
}
// Array is an array with a fixed number of elements.
type Array struct {
TypeID
Type Type
Nelems uint32
}
func (arr *Array) String() string {
return fmt.Sprintf("array#%d[type=#%d n=%d]", arr.TypeID, arr.Type.ID(), arr.Nelems)
}
func (arr *Array) walk(tdq *typeDeque) { tdq.push(&arr.Type) }
func (arr *Array) copy() Type {
cpy := *arr
return &cpy
}
// Struct is a compound type of consecutive members.
type Struct struct {
TypeID
Name string
// The size of the struct including padding, in bytes
Size uint32
Members []Member
}
func (s *Struct) String() string {
return fmt.Sprintf("struct#%d[%q]", s.TypeID, s.Name)
}
func (s *Struct) TypeName() string { return s.Name }
func (s *Struct) size() uint32 { return s.Size }
func (s *Struct) walk(tdq *typeDeque) {
for i := range s.Members {
tdq.push(&s.Members[i].Type)
}
}
func (s *Struct) copy() Type {
cpy := *s
cpy.Members = copyMembers(s.Members)
return &cpy
}
func (s *Struct) members() []Member {
return s.Members
}
// Union is a compound type where members occupy the same memory.
type Union struct {
TypeID
Name string
// The size of the union including padding, in bytes.
Size uint32
Members []Member
}
func (u *Union) String() string {
return fmt.Sprintf("union#%d[%q]", u.TypeID, u.Name)
}
func (u *Union) TypeName() string { return u.Name }
func (u *Union) size() uint32 { return u.Size }
func (u *Union) walk(tdq *typeDeque) {
for i := range u.Members {
tdq.push(&u.Members[i].Type)
}
}
func (u *Union) copy() Type {
cpy := *u
cpy.Members = copyMembers(u.Members)
return &cpy
}
func (u *Union) members() []Member {
return u.Members
}
func copyMembers(orig []Member) []Member {
cpy := make([]Member, len(orig))
copy(cpy, orig)
return cpy
}
type composite interface {
members() []Member
}
var (
_ composite = (*Struct)(nil)
_ composite = (*Union)(nil)
)
// Member is part of a Struct or Union.
//
// It is not a valid Type.
type Member struct {
Name string
Type Type
// OffsetBits is the bit offset of this member.
OffsetBits uint32
BitfieldSize uint32
}
// Enum lists possible values.
type Enum struct {
TypeID
Name string
Values []EnumValue
}
func (e *Enum) String() string {
return fmt.Sprintf("enum#%d[%q]", e.TypeID, e.Name)
}
func (e *Enum) TypeName() string { return e.Name }
// EnumValue is part of an Enum
//
// Is is not a valid Type
type EnumValue struct {
Name string
Value int32
}
func (e *Enum) size() uint32 { return 4 }
func (e *Enum) walk(*typeDeque) {}
func (e *Enum) copy() Type {
cpy := *e
cpy.Values = make([]EnumValue, len(e.Values))
copy(cpy.Values, e.Values)
return &cpy
}
// FwdKind is the type of forward declaration.
type FwdKind int
// Valid types of forward declaration.
const (
FwdStruct FwdKind = iota
FwdUnion
)
func (fk FwdKind) String() string {
switch fk {
case FwdStruct:
return "struct"
case FwdUnion:
return "union"
default:
return fmt.Sprintf("%T(%d)", fk, int(fk))
}
}
// Fwd is a forward declaration of a Type.
type Fwd struct {
TypeID
Name string
Kind FwdKind
}
func (f *Fwd) String() string {
return fmt.Sprintf("fwd#%d[%s %q]", f.TypeID, f.Kind, f.Name)
}
func (f *Fwd) TypeName() string { return f.Name }
func (f *Fwd) walk(*typeDeque) {}
func (f *Fwd) copy() Type {
cpy := *f
return &cpy
}
// Typedef is an alias of a Type.
type Typedef struct {
TypeID
Name string
Type Type
}
func (td *Typedef) String() string {
return fmt.Sprintf("typedef#%d[%q #%d]", td.TypeID, td.Name, td.Type.ID())
}
func (td *Typedef) TypeName() string { return td.Name }
func (td *Typedef) walk(tdq *typeDeque) { tdq.push(&td.Type) }
func (td *Typedef) copy() Type {
cpy := *td
return &cpy
}
// Volatile is a qualifier.
type Volatile struct {
TypeID
Type Type
}
func (v *Volatile) String() string {
return fmt.Sprintf("volatile#%d[#%d]", v.TypeID, v.Type.ID())
}
func (v *Volatile) qualify() Type { return v.Type }
func (v *Volatile) walk(tdq *typeDeque) { tdq.push(&v.Type) }
func (v *Volatile) copy() Type {
cpy := *v
return &cpy
}
// Const is a qualifier.
type Const struct {
TypeID
Type Type
}
func (c *Const) String() string {
return fmt.Sprintf("const#%d[#%d]", c.TypeID, c.Type.ID())
}
func (c *Const) qualify() Type { return c.Type }
func (c *Const) walk(tdq *typeDeque) { tdq.push(&c.Type) }
func (c *Const) copy() Type {
cpy := *c
return &cpy
}
// Restrict is a qualifier.
type Restrict struct {
TypeID
Type Type
}
func (r *Restrict) String() string {
return fmt.Sprintf("restrict#%d[#%d]", r.TypeID, r.Type.ID())
}
func (r *Restrict) qualify() Type { return r.Type }
func (r *Restrict) walk(tdq *typeDeque) { tdq.push(&r.Type) }
func (r *Restrict) copy() Type {
cpy := *r
return &cpy
}
// Func is a function definition.
type Func struct {
TypeID
Name string
Type Type
Linkage FuncLinkage
}
func (f *Func) String() string {
return fmt.Sprintf("func#%d[%s %q proto=#%d]", f.TypeID, f.Linkage, f.Name, f.Type.ID())
}
func (f *Func) TypeName() string { return f.Name }
func (f *Func) walk(tdq *typeDeque) { tdq.push(&f.Type) }
func (f *Func) copy() Type {
cpy := *f
return &cpy
}
// FuncProto is a function declaration.
type FuncProto struct {
TypeID
Return Type
Params []FuncParam
}
func (fp *FuncProto) String() string {
var s strings.Builder
fmt.Fprintf(&s, "proto#%d[", fp.TypeID)
for _, param := range fp.Params {
fmt.Fprintf(&s, "%q=#%d, ", param.Name, param.Type.ID())
}
fmt.Fprintf(&s, "return=#%d]", fp.Return.ID())
return s.String()
}
func (fp *FuncProto) walk(tdq *typeDeque) {
tdq.push(&fp.Return)
for i := range fp.Params {
tdq.push(&fp.Params[i].Type)
}
}
func (fp *FuncProto) copy() Type {
cpy := *fp
cpy.Params = make([]FuncParam, len(fp.Params))
copy(cpy.Params, fp.Params)
return &cpy
}
type FuncParam struct {
Name string
Type Type
}
// Var is a global variable.
type Var struct {
TypeID
Name string
Type Type
Linkage VarLinkage
}
func (v *Var) String() string {
return fmt.Sprintf("var#%d[%s %q]", v.TypeID, v.Linkage, v.Name)
}
func (v *Var) TypeName() string { return v.Name }
func (v *Var) walk(tdq *typeDeque) { tdq.push(&v.Type) }
func (v *Var) copy() Type {
cpy := *v
return &cpy
}
// Datasec is a global program section containing data.
type Datasec struct {
TypeID
Name string
Size uint32
Vars []VarSecinfo
}
func (ds *Datasec) String() string {
return fmt.Sprintf("section#%d[%q]", ds.TypeID, ds.Name)
}
func (ds *Datasec) TypeName() string { return ds.Name }
func (ds *Datasec) size() uint32 { return ds.Size }
func (ds *Datasec) walk(tdq *typeDeque) {
for i := range ds.Vars {
tdq.push(&ds.Vars[i].Type)
}
}
func (ds *Datasec) copy() Type {
cpy := *ds
cpy.Vars = make([]VarSecinfo, len(ds.Vars))
copy(cpy.Vars, ds.Vars)
return &cpy
}
// VarSecinfo describes variable in a Datasec.
//
// It is not a valid Type.
type VarSecinfo struct {
Type Type
Offset uint32
Size uint32
}
// Float is a float of a given length.
type Float struct {
TypeID
Name string
// The size of the float in bytes.
Size uint32
}
func (f *Float) String() string {
return fmt.Sprintf("float%d#%d[%q]", f.Size*8, f.TypeID, f.Name)
}
func (f *Float) TypeName() string { return f.Name }
func (f *Float) size() uint32 { return f.Size }
func (f *Float) walk(*typeDeque) {}
func (f *Float) copy() Type {
cpy := *f
return &cpy
}
type sizer interface {
size() uint32
}
var (
_ sizer = (*Int)(nil)
_ sizer = (*Pointer)(nil)
_ sizer = (*Struct)(nil)
_ sizer = (*Union)(nil)
_ sizer = (*Enum)(nil)
_ sizer = (*Datasec)(nil)
)
type qualifier interface {
qualify() Type
}
var (
_ qualifier = (*Const)(nil)
_ qualifier = (*Restrict)(nil)
_ qualifier = (*Volatile)(nil)
)
// Sizeof returns the size of a type in bytes.
//
// Returns an error if the size can't be computed.
func Sizeof(typ Type) (int, error) {
var (
n = int64(1)
elem int64
)
for i := 0; i < maxTypeDepth; i++ {
switch v := typ.(type) {
case *Array:
if n > 0 && int64(v.Nelems) > math.MaxInt64/n {
return 0, fmt.Errorf("type %s: overflow", typ)
}
// Arrays may be of zero length, which allows
// n to be zero as well.
n *= int64(v.Nelems)
typ = v.Type
continue
case sizer:
elem = int64(v.size())
case *Typedef:
typ = v.Type
continue
case qualifier:
typ = v.qualify()
continue
default:
return 0, fmt.Errorf("unsized type %T", typ)
}
if n > 0 && elem > math.MaxInt64/n {
return 0, fmt.Errorf("type %s: overflow", typ)
}
size := n * elem
if int64(int(size)) != size {
return 0, fmt.Errorf("type %s: overflow", typ)
}
return int(size), nil
}
return 0, fmt.Errorf("type %s: exceeded type depth", typ)
}
// copy a Type recursively.
//
// typ may form a cycle.
//
// Returns any errors from transform verbatim.
func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
copies := make(copier)
return typ, copies.copy(&typ, transform)
}
// copy a slice of Types recursively.
//
// Types may form a cycle.
//
// Returns any errors from transform verbatim.
func copyTypes(types []Type, transform func(Type) (Type, error)) ([]Type, error) {
result := make([]Type, len(types))
copy(result, types)
copies := make(copier)
for i := range result {
if err := copies.copy(&result[i], transform); err != nil {
return nil, err
}
}
return result, nil
}
type copier map[Type]Type
func (c copier) copy(typ *Type, transform func(Type) (Type, error)) error {
var work typeDeque
for t := typ; t != nil; t = work.pop() {
// *t is the identity of the type.
if cpy := c[*t]; cpy != nil {
*t = cpy
continue
}
var cpy Type
if transform != nil {
tf, err := transform(*t)
if err != nil {
return fmt.Errorf("copy %s: %w", *t, err)
}
cpy = tf.copy()
} else {
cpy = (*t).copy()
}
c[*t] = cpy
*t = cpy
// Mark any nested types for copying.
cpy.walk(&work)
}
return nil
}
// typeDeque keeps track of pointers to types which still
// need to be visited.
type typeDeque struct {
types []*Type
read, write uint64
mask uint64
}
func (dq *typeDeque) empty() bool {
return dq.read == dq.write
}
// push adds a type to the stack.
func (dq *typeDeque) push(t *Type) {
if dq.write-dq.read < uint64(len(dq.types)) {
dq.types[dq.write&dq.mask] = t
dq.write++
return
}
new := len(dq.types) * 2
if new == 0 {
new = 8
}
types := make([]*Type, new)
pivot := dq.read & dq.mask
n := copy(types, dq.types[pivot:])
n += copy(types[n:], dq.types[:pivot])
types[n] = t
dq.types = types
dq.mask = uint64(new) - 1
dq.read, dq.write = 0, uint64(n+1)
}
// shift returns the first element or null.
func (dq *typeDeque) shift() *Type {
if dq.empty() {
return nil
}
index := dq.read & dq.mask
t := dq.types[index]
dq.types[index] = nil
dq.read++
return t
}
// pop returns the last element or null.
func (dq *typeDeque) pop() *Type {
if dq.empty() {
return nil
}
dq.write--
index := dq.write & dq.mask
t := dq.types[index]
dq.types[index] = nil
return t
}
// all returns all elements.
//
// The deque is empty after calling this method.
func (dq *typeDeque) all() []*Type {
length := dq.write - dq.read
types := make([]*Type, 0, length)
for t := dq.shift(); t != nil; t = dq.shift() {
types = append(types, t)
}
return types
}
// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns
// it into a graph of Types connected via pointers.
//
// Returns a map of named types (so, where NameOff is non-zero) and a slice of types
// indexed by TypeID. Since BTF ignores compilation units, multiple types may share
// the same name. A Type may form a cyclic graph by pointing at itself.
func inflateRawTypes(rawTypes []rawType, rawStrings stringTable) (types []Type, namedTypes map[string][]NamedType, err error) {
type fixupDef struct {
id TypeID
expectedKind btfKind
typ *Type
}
var fixups []fixupDef
fixup := func(id TypeID, expectedKind btfKind, typ *Type) {
fixups = append(fixups, fixupDef{id, expectedKind, typ})
}
convertMembers := func(raw []btfMember, kindFlag bool) ([]Member, error) {
// NB: The fixup below relies on pre-allocating this array to
// work, since otherwise append might re-allocate members.
members := make([]Member, 0, len(raw))
for i, btfMember := range raw {
name, err := rawStrings.Lookup(btfMember.NameOff)
if err != nil {
return nil, fmt.Errorf("can't get name for member %d: %w", i, err)
}
m := Member{
Name: name,
OffsetBits: btfMember.Offset,
}
if kindFlag {
m.BitfieldSize = btfMember.Offset >> 24
m.OffsetBits &= 0xffffff
}
members = append(members, m)
}
for i := range members {
fixup(raw[i].Type, kindUnknown, &members[i].Type)
}
return members, nil
}
types = make([]Type, 0, len(rawTypes))
types = append(types, (*Void)(nil))
namedTypes = make(map[string][]NamedType)
for i, raw := range rawTypes {
var (
// Void is defined to always be type ID 0, and is thus
// omitted from BTF.
id = TypeID(i + 1)
typ Type
)
name, err := rawStrings.Lookup(raw.NameOff)
if err != nil {
return nil, nil, fmt.Errorf("get name for type id %d: %w", id, err)
}
switch raw.Kind() {
case kindInt:
encoding, offset, bits := intEncoding(*raw.data.(*uint32))
typ = &Int{id, name, raw.Size(), encoding, offset, bits}
case kindPointer:
ptr := &Pointer{id, nil}
fixup(raw.Type(), kindUnknown, &ptr.Target)
typ = ptr
case kindArray:
btfArr := raw.data.(*btfArray)
// IndexType is unused according to btf.rst.
// Don't make it available right now.
arr := &Array{id, nil, btfArr.Nelems}
fixup(btfArr.Type, kindUnknown, &arr.Type)
typ = arr
case kindStruct:
members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag())
if err != nil {
return nil, nil, fmt.Errorf("struct %s (id %d): %w", name, id, err)
}
typ = &Struct{id, name, raw.Size(), members}
case kindUnion:
members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag())
if err != nil {
return nil, nil, fmt.Errorf("union %s (id %d): %w", name, id, err)
}
typ = &Union{id, name, raw.Size(), members}
case kindEnum:
rawvals := raw.data.([]btfEnum)
vals := make([]EnumValue, 0, len(rawvals))
for i, btfVal := range rawvals {
name, err := rawStrings.Lookup(btfVal.NameOff)
if err != nil {
return nil, nil, fmt.Errorf("get name for enum value %d: %s", i, err)
}
vals = append(vals, EnumValue{
Name: name,
Value: btfVal.Val,
})
}
typ = &Enum{id, name, vals}
case kindForward:
if raw.KindFlag() {
typ = &Fwd{id, name, FwdUnion}
} else {
typ = &Fwd{id, name, FwdStruct}
}
case kindTypedef:
typedef := &Typedef{id, name, nil}
fixup(raw.Type(), kindUnknown, &typedef.Type)
typ = typedef
case kindVolatile:
volatile := &Volatile{id, nil}
fixup(raw.Type(), kindUnknown, &volatile.Type)
typ = volatile
case kindConst:
cnst := &Const{id, nil}
fixup(raw.Type(), kindUnknown, &cnst.Type)
typ = cnst
case kindRestrict:
restrict := &Restrict{id, nil}
fixup(raw.Type(), kindUnknown, &restrict.Type)
typ = restrict
case kindFunc:
fn := &Func{id, name, nil, raw.Linkage()}
fixup(raw.Type(), kindFuncProto, &fn.Type)
typ = fn
case kindFuncProto:
rawparams := raw.data.([]btfParam)
params := make([]FuncParam, 0, len(rawparams))
for i, param := range rawparams {
name, err := rawStrings.Lookup(param.NameOff)
if err != nil {
return nil, nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err)
}
params = append(params, FuncParam{
Name: name,
})
}
for i := range params {
fixup(rawparams[i].Type, kindUnknown, &params[i].Type)
}
fp := &FuncProto{id, nil, params}
fixup(raw.Type(), kindUnknown, &fp.Return)
typ = fp
case kindVar:
variable := raw.data.(*btfVariable)
v := &Var{id, name, nil, VarLinkage(variable.Linkage)}
fixup(raw.Type(), kindUnknown, &v.Type)
typ = v
case kindDatasec:
btfVars := raw.data.([]btfVarSecinfo)
vars := make([]VarSecinfo, 0, len(btfVars))
for _, btfVar := range btfVars {
vars = append(vars, VarSecinfo{
Offset: btfVar.Offset,
Size: btfVar.Size,
})
}
for i := range vars {
fixup(btfVars[i].Type, kindVar, &vars[i].Type)
}
typ = &Datasec{id, name, raw.SizeType, vars}
case kindFloat:
typ = &Float{id, name, raw.Size()}
default:
return nil, nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind())
}
types = append(types, typ)
if named, ok := typ.(NamedType); ok {
if name := essentialName(named.TypeName()); name != "" {
namedTypes[name] = append(namedTypes[name], named)
}
}
}
for _, fixup := range fixups {
i := int(fixup.id)
if i >= len(types) {
return nil, nil, fmt.Errorf("reference to invalid type id: %d", fixup.id)
}
// Default void (id 0) to unknown
rawKind := kindUnknown
if i > 0 {
rawKind = rawTypes[i-1].Kind()
}
if expected := fixup.expectedKind; expected != kindUnknown && rawKind != expected {
return nil, nil, fmt.Errorf("expected type id %d to have kind %s, found %s", fixup.id, expected, rawKind)
}
*fixup.typ = types[i]
}
return types, namedTypes, nil
}
// essentialName returns name without a ___ suffix.
func essentialName(name string) string {
lastIdx := strings.LastIndex(name, "___")
if lastIdx > 0 {
return name[:lastIdx]
}
return name
}

View file

@ -35,6 +35,29 @@ func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) {
return &SafeELFFile{file}, nil
}
// OpenSafeELFFile reads an ELF from a file.
//
// It works like NewSafeELFFile, with the exception that safe.Close will
// close the underlying file.
func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) {
defer func() {
r := recover()
if r == nil {
return
}
safe = nil
err = fmt.Errorf("reading ELF file panicked: %s", r)
}()
file, err := elf.Open(path)
if err != nil {
return nil, err
}
return &SafeELFFile{file}, nil
}
// Symbols is the safe version of elf.File.Symbols.
func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) {
defer func() {
@ -66,3 +89,14 @@ func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) {
syms, err = se.File.DynamicSymbols()
return
}
// SectionsByType returns all sections in the file with the specified section type.
func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section {
sections := make([]*elf.Section, 0, 1)
for _, section := range se.Sections {
if section.Type == typ {
sections = append(sections, section)
}
}
return sections
}

View file

@ -1,29 +0,0 @@
package internal
import (
"encoding/binary"
"unsafe"
)
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder
// Clang is set to either "el" or "eb" depending on the host's endianness.
var ClangEndian string
func init() {
if isBigEndian() {
NativeEndian = binary.BigEndian
ClangEndian = "eb"
} else {
NativeEndian = binary.LittleEndian
ClangEndian = "el"
}
}
func isBigEndian() (ret bool) {
i := int(0x1)
bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i))
return bs[0] == 0
}

13
vendor/github.com/cilium/ebpf/internal/endian_be.go generated vendored Normal file
View file

@ -0,0 +1,13 @@
//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64
package internal
import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.BigEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "eb"

13
vendor/github.com/cilium/ebpf/internal/endian_le.go generated vendored Normal file
View file

@ -0,0 +1,13 @@
//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64
package internal
import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.LittleEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "el"

View file

@ -2,50 +2,205 @@ package internal
import (
"bytes"
"errors"
"fmt"
"io"
"strings"
"github.com/cilium/ebpf/internal/unix"
)
// ErrorWithLog returns an error that includes logs from the
// kernel verifier.
// ErrorWithLog returns an error which includes logs from the kernel verifier.
//
// logErr should be the error returned by the syscall that generated
// the log. It is used to check for truncation of the output.
func ErrorWithLog(err error, log []byte, logErr error) error {
logStr := strings.Trim(CString(log), "\t\r\n ")
if errors.Is(logErr, unix.ENOSPC) {
logStr += " (truncated...)"
// The default error output is a summary of the full log. The latter can be
// accessed via VerifierError.Log or by formatting the error, see Format.
//
// A set of heuristics is used to determine whether the log has been truncated.
func ErrorWithLog(err error, log []byte) *VerifierError {
const whitespace = "\t\r\v\n "
// Convert verifier log C string by truncating it on the first 0 byte
// and trimming trailing whitespace before interpreting as a Go string.
truncated := false
if i := bytes.IndexByte(log, 0); i != -1 {
if i == len(log)-1 && !bytes.HasSuffix(log[:i], []byte{'\n'}) {
// The null byte is at the end of the buffer and it's not preceded
// by a newline character. Most likely the buffer was too short.
truncated = true
}
log = log[:i]
} else if len(log) > 0 {
// No null byte? Dodgy!
truncated = true
}
return &VerifierError{err, logStr}
log = bytes.Trim(log, whitespace)
logLines := bytes.Split(log, []byte{'\n'})
lines := make([]string, 0, len(logLines))
for _, line := range logLines {
// Don't remove leading white space on individual lines. We rely on it
// when outputting logs.
lines = append(lines, string(bytes.TrimRight(line, whitespace)))
}
return &VerifierError{err, lines, truncated}
}
// VerifierError includes information from the eBPF verifier.
//
// It summarises the log output, see Format if you want to output the full contents.
type VerifierError struct {
cause error
log string
// The error which caused this error.
Cause error
// The verifier output split into lines.
Log []string
// Whether the log output is truncated, based on several heuristics.
Truncated bool
}
func (le *VerifierError) Unwrap() error {
return le.cause
return le.Cause
}
func (le *VerifierError) Error() string {
if le.log == "" {
return le.cause.Error()
log := le.Log
if n := len(log); n > 0 && strings.HasPrefix(log[n-1], "processed ") {
// Get rid of "processed 39 insns (limit 1000000) ..." from summary.
log = log[:n-1]
}
return fmt.Sprintf("%s: %s", le.cause, le.log)
n := len(log)
if n == 0 {
return le.Cause.Error()
}
lines := log[n-1:]
if n >= 2 && (includePreviousLine(log[n-1]) || le.Truncated) {
// Add one more line of context if it aids understanding the error.
lines = log[n-2:]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: ", le.Cause.Error())
for i, line := range lines {
b.WriteString(strings.TrimSpace(line))
if i != len(lines)-1 {
b.WriteString(": ")
}
}
omitted := len(le.Log) - len(lines)
if omitted == 0 && !le.Truncated {
return b.String()
}
b.WriteString(" (")
if le.Truncated {
b.WriteString("truncated")
}
if omitted > 0 {
if le.Truncated {
b.WriteString(", ")
}
fmt.Fprintf(&b, "%d line(s) omitted", omitted)
}
b.WriteString(")")
return b.String()
}
// CString turns a NUL / zero terminated byte buffer into a string.
func CString(in []byte) string {
inLen := bytes.IndexByte(in, 0)
if inLen == -1 {
return ""
// includePreviousLine returns true if the given line likely is better
// understood with additional context from the preceding line.
func includePreviousLine(line string) bool {
// We need to find a good trade off between understandable error messages
// and too much complexity here. Checking the string prefix is ok, requiring
// regular expressions to do it is probably overkill.
if strings.HasPrefix(line, "\t") {
// [13] STRUCT drm_rect size=16 vlen=4
// \tx1 type_id=2
return true
}
if len(line) >= 2 && line[0] == 'R' && line[1] >= '0' && line[1] <= '9' {
// 0: (95) exit
// R0 !read_ok
return true
}
if strings.HasPrefix(line, "invalid bpf_context access") {
// 0: (79) r6 = *(u64 *)(r1 +0)
// func '__x64_sys_recvfrom' arg0 type FWD is not a struct
// invalid bpf_context access off=0 size=8
return true
}
return false
}
// Format the error.
//
// Understood verbs are %s and %v, which are equivalent to calling Error(). %v
// allows outputting additional information using the following flags:
//
// + Output the first <width> lines, or all lines if no width is given.
// - Output the last <width> lines, or all lines if no width is given.
//
// Use width to specify how many lines to output. Use the '-' flag to output
// lines from the end of the log instead of the beginning.
func (le *VerifierError) Format(f fmt.State, verb rune) {
switch verb {
case 's':
_, _ = io.WriteString(f, le.Error())
case 'v':
n, haveWidth := f.Width()
if !haveWidth || n > len(le.Log) {
n = len(le.Log)
}
if !f.Flag('+') && !f.Flag('-') {
if haveWidth {
_, _ = io.WriteString(f, "%!v(BADWIDTH)")
return
}
_, _ = io.WriteString(f, le.Error())
return
}
if f.Flag('+') && f.Flag('-') {
_, _ = io.WriteString(f, "%!v(BADFLAG)")
return
}
fmt.Fprintf(f, "%s:", le.Cause.Error())
omitted := len(le.Log) - n
lines := le.Log[:n]
if f.Flag('-') {
// Print last instead of first lines.
lines = le.Log[len(le.Log)-n:]
if omitted > 0 {
fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
}
}
for _, line := range lines {
fmt.Fprintf(f, "\n\t%s", line)
}
if !f.Flag('-') {
if omitted > 0 {
fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
}
}
if le.Truncated {
fmt.Fprintf(f, "\n\t(truncated)")
}
default:
fmt.Fprintf(f, "%%!%c(BADVERB)", verb)
}
return string(in[:inLen])
}

View file

@ -1,69 +0,0 @@
package internal
import (
"errors"
"fmt"
"os"
"runtime"
"strconv"
"github.com/cilium/ebpf/internal/unix"
)
var ErrClosedFd = errors.New("use of closed file descriptor")
type FD struct {
raw int64
}
func NewFD(value uint32) *FD {
fd := &FD{int64(value)}
runtime.SetFinalizer(fd, (*FD).Close)
return fd
}
func (fd *FD) String() string {
return strconv.FormatInt(fd.raw, 10)
}
func (fd *FD) Value() (uint32, error) {
if fd.raw < 0 {
return 0, ErrClosedFd
}
return uint32(fd.raw), nil
}
func (fd *FD) Close() error {
if fd.raw < 0 {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.Forget()
return unix.Close(value)
}
func (fd *FD) Forget() {
runtime.SetFinalizer(fd, nil)
}
func (fd *FD) Dup() (*FD, error) {
if fd.raw < 0 {
return nil, ErrClosedFd
}
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("can't dup fd: %v", err)
}
return NewFD(uint32(dup)), nil
}
func (fd *FD) File(name string) *os.File {
fd.Forget()
return os.NewFile(uintptr(fd.raw), name)
}

View file

@ -54,11 +54,6 @@ type FeatureTestFn func() error
//
// Returns an error wrapping ErrNotSupported if the feature is not supported.
func FeatureTest(name, version string, fn FeatureTestFn) func() error {
v, err := NewVersion(version)
if err != nil {
return func() error { return err }
}
ft := new(featureTest)
return func() error {
ft.RLock()
@ -79,6 +74,11 @@ func FeatureTest(name, version string, fn FeatureTestFn) func() error {
err := fn()
switch {
case errors.Is(err, ErrNotSupported):
v, err := NewVersion(version)
if err != nil {
return err
}
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: name,

View file

@ -1,6 +1,35 @@
package internal
import "errors"
import (
"bufio"
"compress/gzip"
"errors"
"io"
"os"
)
// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized
// buffered reader. It is a convenience function for reading subsections of
// ELF sections while minimizing the amount of read() syscalls made.
//
// Syscall overhead is non-negligible in continuous integration context
// where ELFs might be accessed over virtual filesystems with poor random
// access performance. Buffering reads makes sense because (sub)sections
// end up being read completely anyway.
//
// Use instead of the r.Seek() + io.LimitReader() pattern.
func NewBufferedSectionReader(ra io.ReaderAt, off, n int64) *bufio.Reader {
// Clamp the size of the buffer to one page to avoid slurping large parts
// of a file into memory. bufio.NewReader uses a hardcoded default buffer
// of 4096. Allow arches with larger pages to allocate more, but don't
// allocate a fixed 4k buffer if we only need to read a small segment.
buf := n
if ps := int64(os.Getpagesize()); n > ps {
buf = ps
}
return bufio.NewReaderSize(io.NewSectionReader(ra, off, n), int(buf))
}
// DiscardZeroes makes sure that all written bytes are zero
// before discarding them.
@ -14,3 +43,20 @@ func (DiscardZeroes) Write(p []byte) (int, error) {
}
return len(p), nil
}
// ReadAllCompressed decompresses a gzipped file into memory.
func ReadAllCompressed(file string) ([]byte, error) {
fh, err := os.Open(file)
if err != nil {
return nil, err
}
defer fh.Close()
gz, err := gzip.NewReader(fh)
if err != nil {
return nil, err
}
defer gz.Close()
return io.ReadAll(gz)
}

84
vendor/github.com/cilium/ebpf/internal/output.go generated vendored Normal file
View file

@ -0,0 +1,84 @@
package internal
import (
"bytes"
"errors"
"go/format"
"go/scanner"
"io"
"strings"
"unicode"
)
// Identifier turns a C style type or field name into an exportable Go equivalent.
func Identifier(str string) string {
prev := rune(-1)
return strings.Map(func(r rune) rune {
// See https://golang.org/ref/spec#Identifiers
switch {
case unicode.IsLetter(r):
if prev == -1 {
r = unicode.ToUpper(r)
}
case r == '_':
switch {
// The previous rune was deleted, or we are at the
// beginning of the string.
case prev == -1:
fallthrough
// The previous rune is a lower case letter or a digit.
case unicode.IsDigit(prev) || (unicode.IsLetter(prev) && unicode.IsLower(prev)):
// delete the current rune, and force the
// next character to be uppercased.
r = -1
}
case unicode.IsDigit(r):
default:
// Delete the current rune. prev is unchanged.
return -1
}
prev = r
return r
}, str)
}
// WriteFormatted outputs a formatted src into out.
//
// If formatting fails it returns an informative error message.
func WriteFormatted(src []byte, out io.Writer) error {
formatted, err := format.Source(src)
if err == nil {
_, err = out.Write(formatted)
return err
}
var el scanner.ErrorList
if !errors.As(err, &el) {
return err
}
var nel scanner.ErrorList
for _, err := range el {
if !err.Pos.IsValid() {
nel = append(nel, err)
continue
}
buf := src[err.Pos.Offset:]
nl := bytes.IndexRune(buf, '\n')
if nl == -1 {
nel = append(nel, err)
continue
}
err.Msg += ": " + string(buf[:nl])
nel = append(nel, err)
}
return nel
}

View file

@ -4,24 +4,54 @@ import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
func Pin(currentPath, newPath string, fd *FD) error {
func Pin(currentPath, newPath string, fd *sys.FD) error {
const bpfFSType = 0xcafe4a11
if newPath == "" {
return errors.New("given pinning path cannot be empty")
}
if currentPath == newPath {
return nil
}
if currentPath == "" {
return BPFObjPin(newPath, fd)
var statfs unix.Statfs_t
if err := unix.Statfs(filepath.Dir(newPath), &statfs); err != nil {
return err
}
var err error
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
if fsType != bpfFSType {
return fmt.Errorf("%s is not on a bpf filesystem", newPath)
}
defer runtime.KeepAlive(fd)
if currentPath == "" {
return sys.ObjPin(&sys.ObjPinAttr{
Pathname: sys.NewStringPointer(newPath),
BpfFd: fd.Uint(),
})
}
// Renameat2 is used instead of os.Rename to disallow the new path replacing
// an existing path.
if err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE); err == nil {
err := unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
if err == nil {
// Object is now moved to the new pinning path.
return nil
}
@ -29,7 +59,10 @@ func Pin(currentPath, newPath string, fd *FD) error {
return fmt.Errorf("unable to move pinned object to new path %v: %w", newPath, err)
}
// Internal state not in sync with the file system so let's fix it.
return BPFObjPin(newPath, fd)
return sys.ObjPin(&sys.ObjPinAttr{
Pathname: sys.NewStringPointer(newPath),
BpfFd: fd.Uint(),
})
}
func Unpin(pinnedPath string) error {

6
vendor/github.com/cilium/ebpf/internal/sys/doc.go generated vendored Normal file
View file

@ -0,0 +1,6 @@
// Package sys contains bindings for the BPF syscall.
package sys
// Regenerate types.go by invoking go generate in the current directory.
//go:generate go run github.com/cilium/ebpf/internal/cmd/gentypes ../../btf/testdata/vmlinux.btf.gz

96
vendor/github.com/cilium/ebpf/internal/sys/fd.go generated vendored Normal file
View file

@ -0,0 +1,96 @@
package sys
import (
"fmt"
"math"
"os"
"runtime"
"strconv"
"github.com/cilium/ebpf/internal/unix"
)
var ErrClosedFd = unix.EBADF
type FD struct {
raw int
}
func newFD(value int) *FD {
fd := &FD{value}
runtime.SetFinalizer(fd, (*FD).Close)
return fd
}
// NewFD wraps a raw fd with a finalizer.
//
// You must not use the raw fd after calling this function, since the underlying
// file descriptor number may change. This is because the BPF UAPI assumes that
// zero is not a valid fd value.
func NewFD(value int) (*FD, error) {
if value < 0 {
return nil, fmt.Errorf("invalid fd %d", value)
}
fd := newFD(value)
if value != 0 {
return fd, nil
}
dup, err := fd.Dup()
_ = fd.Close()
return dup, err
}
func (fd *FD) String() string {
return strconv.FormatInt(int64(fd.raw), 10)
}
func (fd *FD) Int() int {
return fd.raw
}
func (fd *FD) Uint() uint32 {
if fd.raw < 0 || int64(fd.raw) > math.MaxUint32 {
// Best effort: this is the number most likely to be an invalid file
// descriptor. It is equal to -1 (on two's complement arches).
return math.MaxUint32
}
return uint32(fd.raw)
}
func (fd *FD) Close() error {
if fd.raw < 0 {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.Forget()
return unix.Close(value)
}
func (fd *FD) Forget() {
runtime.SetFinalizer(fd, nil)
}
func (fd *FD) Dup() (*FD, error) {
if fd.raw < 0 {
return nil, ErrClosedFd
}
// Always require the fd to be larger than zero: the BPF API treats the value
// as "no argument provided".
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 1)
if err != nil {
return nil, fmt.Errorf("can't dup fd: %v", err)
}
return newFD(dup), nil
}
func (fd *FD) File(name string) *os.File {
fd.Forget()
return os.NewFile(uintptr(fd.raw), name)
}

View file

@ -1,4 +1,4 @@
package internal
package sys
import (
"unsafe"
@ -20,6 +20,13 @@ func NewSlicePointer(buf []byte) Pointer {
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
//
// Useful to assign both the pointer and the length in one go.
func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
return NewSlicePointer(buf), uint32(len(buf))
}
// NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer {
p, err := unix.BytePtrFromString(str)

View file

@ -1,7 +1,7 @@
//go:build armbe || mips || mips64p32
// +build armbe mips mips64p32
package internal
package sys
import (
"unsafe"

View file

@ -1,7 +1,7 @@
//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
// +build 386 amd64p32 arm mipsle mips64p32le
package internal
package sys
import (
"unsafe"

View file

@ -1,7 +1,7 @@
//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le,!armbe,!mips,!mips64p32
package internal
package sys
import (
"unsafe"

126
vendor/github.com/cilium/ebpf/internal/sys/syscall.go generated vendored Normal file
View file

@ -0,0 +1,126 @@
package sys
import (
"runtime"
"syscall"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
for {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
// As of ~4.20 the verifier can be interrupted by a signal,
// and returns EAGAIN in that case.
if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD {
continue
}
var err error
if errNo != 0 {
err = wrappedErrno{errNo}
}
return r1, err
}
}
// Info is implemented by all structs that can be passed to the ObjInfo syscall.
//
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
type Info interface {
info() (unsafe.Pointer, uint32)
}
var _ Info = (*MapInfo)(nil)
func (i *MapInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*ProgInfo)(nil)
func (i *ProgInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*LinkInfo)(nil)
func (i *LinkInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*BtfInfo)(nil)
func (i *BtfInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
// ObjInfo retrieves information about a BPF Fd.
//
// info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo.
func ObjInfo(fd *FD, info Info) error {
ptr, len := info.info()
err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{
BpfFd: fd.Uint(),
InfoLen: len,
Info: NewPointer(ptr),
})
runtime.KeepAlive(fd)
return err
}
// BPFObjName is a null-terminated string made up of
// 'A-Za-z0-9_' characters.
type ObjName [unix.BPF_OBJ_NAME_LEN]byte
// NewObjName truncates the result if it is too long.
func NewObjName(name string) ObjName {
var result ObjName
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
return result
}
// LinkID uniquely identifies a bpf_link.
type LinkID uint32
// BTFID uniquely identifies a BTF blob loaded into the kernel.
type BTFID uint32
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
// You should never export an error of this type.
type wrappedErrno struct {
syscall.Errno
}
func (we wrappedErrno) Unwrap() error {
return we.Errno
}
type syscallError struct {
error
errno syscall.Errno
}
func Error(err error, errno syscall.Errno) error {
return &syscallError{err, errno}
}
func (se *syscallError) Is(target error) bool {
return target == se.error
}
func (se *syscallError) Unwrap() error {
return se.errno
}

1052
vendor/github.com/cilium/ebpf/internal/sys/types.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,304 +0,0 @@
package internal
import (
"errors"
"fmt"
"path/filepath"
"runtime"
"syscall"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
//go:generate stringer -output syscall_string.go -type=BPFCmd
// BPFCmd identifies a subcommand of the bpf syscall.
type BPFCmd int
// Well known BPF commands.
const (
BPF_MAP_CREATE BPFCmd = iota
BPF_MAP_LOOKUP_ELEM
BPF_MAP_UPDATE_ELEM
BPF_MAP_DELETE_ELEM
BPF_MAP_GET_NEXT_KEY
BPF_PROG_LOAD
BPF_OBJ_PIN
BPF_OBJ_GET
BPF_PROG_ATTACH
BPF_PROG_DETACH
BPF_PROG_TEST_RUN
BPF_PROG_GET_NEXT_ID
BPF_MAP_GET_NEXT_ID
BPF_PROG_GET_FD_BY_ID
BPF_MAP_GET_FD_BY_ID
BPF_OBJ_GET_INFO_BY_FD
BPF_PROG_QUERY
BPF_RAW_TRACEPOINT_OPEN
BPF_BTF_LOAD
BPF_BTF_GET_FD_BY_ID
BPF_TASK_FD_QUERY
BPF_MAP_LOOKUP_AND_DELETE_ELEM
BPF_MAP_FREEZE
BPF_BTF_GET_NEXT_ID
BPF_MAP_LOOKUP_BATCH
BPF_MAP_LOOKUP_AND_DELETE_BATCH
BPF_MAP_UPDATE_BATCH
BPF_MAP_DELETE_BATCH
BPF_LINK_CREATE
BPF_LINK_UPDATE
BPF_LINK_GET_FD_BY_ID
BPF_LINK_GET_NEXT_ID
BPF_ENABLE_STATS
BPF_ITER_CREATE
)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd BPFCmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
var err error
if errNo != 0 {
err = wrappedErrno{errNo}
}
return r1, err
}
type BPFProgLoadAttr struct {
ProgType uint32
InsCount uint32
Instructions Pointer
License Pointer
LogLevel uint32
LogSize uint32
LogBuf Pointer
KernelVersion uint32 // since 4.1 2541517c32be
ProgFlags uint32 // since 4.11 e07b98d9bffe
ProgName BPFObjName // since 4.15 067cae47771c
ProgIfIndex uint32 // since 4.15 1f6f4cb7ba21
ExpectedAttachType uint32 // since 4.17 5e43f899b03a
ProgBTFFd uint32
FuncInfoRecSize uint32
FuncInfo Pointer
FuncInfoCnt uint32
LineInfoRecSize uint32
LineInfo Pointer
LineInfoCnt uint32
AttachBTFID uint32
AttachProgFd uint32
}
// BPFProgLoad wraps BPF_PROG_LOAD.
func BPFProgLoad(attr *BPFProgLoadAttr) (*FD, error) {
for {
fd, err := BPF(BPF_PROG_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
// As of ~4.20 the verifier can be interrupted by a signal,
// and returns EAGAIN in that case.
if errors.Is(err, unix.EAGAIN) {
continue
}
if err != nil {
return nil, err
}
return NewFD(uint32(fd)), nil
}
}
type BPFProgAttachAttr struct {
TargetFd uint32
AttachBpfFd uint32
AttachType uint32
AttachFlags uint32
ReplaceBpfFd uint32
}
func BPFProgAttach(attr *BPFProgAttachAttr) error {
_, err := BPF(BPF_PROG_ATTACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
type BPFProgDetachAttr struct {
TargetFd uint32
AttachBpfFd uint32
AttachType uint32
}
func BPFProgDetach(attr *BPFProgDetachAttr) error {
_, err := BPF(BPF_PROG_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
type BPFEnableStatsAttr struct {
StatsType uint32
}
func BPFEnableStats(attr *BPFEnableStatsAttr) (*FD, error) {
ptr, err := BPF(BPF_ENABLE_STATS, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, fmt.Errorf("enable stats: %w", err)
}
return NewFD(uint32(ptr)), nil
}
type bpfObjAttr struct {
fileName Pointer
fd uint32
fileFlags uint32
}
const bpfFSType = 0xcafe4a11
// BPFObjPin wraps BPF_OBJ_PIN.
func BPFObjPin(fileName string, fd *FD) error {
dirName := filepath.Dir(fileName)
var statfs unix.Statfs_t
if err := unix.Statfs(dirName, &statfs); err != nil {
return err
}
if uint64(statfs.Type) != bpfFSType {
return fmt.Errorf("%s is not on a bpf filesystem", fileName)
}
value, err := fd.Value()
if err != nil {
return err
}
attr := bpfObjAttr{
fileName: NewStringPointer(fileName),
fd: value,
}
_, err = BPF(BPF_OBJ_PIN, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return fmt.Errorf("pin object %s: %w", fileName, err)
}
return nil
}
// BPFObjGet wraps BPF_OBJ_GET.
func BPFObjGet(fileName string, flags uint32) (*FD, error) {
attr := bpfObjAttr{
fileName: NewStringPointer(fileName),
fileFlags: flags,
}
ptr, err := BPF(BPF_OBJ_GET, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return nil, fmt.Errorf("get object %s: %w", fileName, err)
}
return NewFD(uint32(ptr)), nil
}
type bpfObjGetInfoByFDAttr struct {
fd uint32
infoLen uint32
info Pointer
}
// BPFObjGetInfoByFD wraps BPF_OBJ_GET_INFO_BY_FD.
//
// Available from 4.13.
func BPFObjGetInfoByFD(fd *FD, info unsafe.Pointer, size uintptr) error {
value, err := fd.Value()
if err != nil {
return err
}
attr := bpfObjGetInfoByFDAttr{
fd: value,
infoLen: uint32(size),
info: NewPointer(info),
}
_, err = BPF(BPF_OBJ_GET_INFO_BY_FD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
if err != nil {
return fmt.Errorf("fd %v: %w", fd, err)
}
return nil
}
type bpfGetFDByIDAttr struct {
id uint32
next uint32
}
// BPFObjGetInfoByFD wraps BPF_*_GET_FD_BY_ID.
//
// Available from 4.13.
func BPFObjGetFDByID(cmd BPFCmd, id uint32) (*FD, error) {
attr := bpfGetFDByIDAttr{
id: id,
}
ptr, err := BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return NewFD(uint32(ptr)), err
}
// BPFObjName is a null-terminated string made up of
// 'A-Za-z0-9_' characters.
type BPFObjName [unix.BPF_OBJ_NAME_LEN]byte
// NewBPFObjName truncates the result if it is too long.
func NewBPFObjName(name string) BPFObjName {
var result BPFObjName
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
return result
}
type BPFMapCreateAttr struct {
MapType uint32
KeySize uint32
ValueSize uint32
MaxEntries uint32
Flags uint32
InnerMapFd uint32 // since 4.12 56f668dfe00d
NumaNode uint32 // since 4.14 96eabe7a40aa
MapName BPFObjName // since 4.15 ad5b177bd73f
MapIfIndex uint32
BTFFd uint32
BTFKeyTypeID uint32
BTFValueTypeID uint32
}
func BPFMapCreate(attr *BPFMapCreateAttr) (*FD, error) {
fd, err := BPF(BPF_MAP_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return NewFD(uint32(fd)), nil
}
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
// You should never export an error of this type.
type wrappedErrno struct {
syscall.Errno
}
func (we wrappedErrno) Unwrap() error {
return we.Errno
}
type syscallError struct {
error
errno syscall.Errno
}
func SyscallError(err error, errno syscall.Errno) error {
return &syscallError{err, errno}
}
func (se *syscallError) Is(target error) bool {
return target == se.error
}
func (se *syscallError) Unwrap() error {
return se.errno
}

View file

@ -1,56 +0,0 @@
// Code generated by "stringer -output syscall_string.go -type=BPFCmd"; DO NOT EDIT.
package internal
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BPF_MAP_CREATE-0]
_ = x[BPF_MAP_LOOKUP_ELEM-1]
_ = x[BPF_MAP_UPDATE_ELEM-2]
_ = x[BPF_MAP_DELETE_ELEM-3]
_ = x[BPF_MAP_GET_NEXT_KEY-4]
_ = x[BPF_PROG_LOAD-5]
_ = x[BPF_OBJ_PIN-6]
_ = x[BPF_OBJ_GET-7]
_ = x[BPF_PROG_ATTACH-8]
_ = x[BPF_PROG_DETACH-9]
_ = x[BPF_PROG_TEST_RUN-10]
_ = x[BPF_PROG_GET_NEXT_ID-11]
_ = x[BPF_MAP_GET_NEXT_ID-12]
_ = x[BPF_PROG_GET_FD_BY_ID-13]
_ = x[BPF_MAP_GET_FD_BY_ID-14]
_ = x[BPF_OBJ_GET_INFO_BY_FD-15]
_ = x[BPF_PROG_QUERY-16]
_ = x[BPF_RAW_TRACEPOINT_OPEN-17]
_ = x[BPF_BTF_LOAD-18]
_ = x[BPF_BTF_GET_FD_BY_ID-19]
_ = x[BPF_TASK_FD_QUERY-20]
_ = x[BPF_MAP_LOOKUP_AND_DELETE_ELEM-21]
_ = x[BPF_MAP_FREEZE-22]
_ = x[BPF_BTF_GET_NEXT_ID-23]
_ = x[BPF_MAP_LOOKUP_BATCH-24]
_ = x[BPF_MAP_LOOKUP_AND_DELETE_BATCH-25]
_ = x[BPF_MAP_UPDATE_BATCH-26]
_ = x[BPF_MAP_DELETE_BATCH-27]
_ = x[BPF_LINK_CREATE-28]
_ = x[BPF_LINK_UPDATE-29]
_ = x[BPF_LINK_GET_FD_BY_ID-30]
_ = x[BPF_LINK_GET_NEXT_ID-31]
_ = x[BPF_ENABLE_STATS-32]
_ = x[BPF_ITER_CREATE-33]
}
const _BPFCmd_name = "BPF_MAP_CREATEBPF_MAP_LOOKUP_ELEMBPF_MAP_UPDATE_ELEMBPF_MAP_DELETE_ELEMBPF_MAP_GET_NEXT_KEYBPF_PROG_LOADBPF_OBJ_PINBPF_OBJ_GETBPF_PROG_ATTACHBPF_PROG_DETACHBPF_PROG_TEST_RUNBPF_PROG_GET_NEXT_IDBPF_MAP_GET_NEXT_IDBPF_PROG_GET_FD_BY_IDBPF_MAP_GET_FD_BY_IDBPF_OBJ_GET_INFO_BY_FDBPF_PROG_QUERYBPF_RAW_TRACEPOINT_OPENBPF_BTF_LOADBPF_BTF_GET_FD_BY_IDBPF_TASK_FD_QUERYBPF_MAP_LOOKUP_AND_DELETE_ELEMBPF_MAP_FREEZEBPF_BTF_GET_NEXT_IDBPF_MAP_LOOKUP_BATCHBPF_MAP_LOOKUP_AND_DELETE_BATCHBPF_MAP_UPDATE_BATCHBPF_MAP_DELETE_BATCHBPF_LINK_CREATEBPF_LINK_UPDATEBPF_LINK_GET_FD_BY_IDBPF_LINK_GET_NEXT_IDBPF_ENABLE_STATSBPF_ITER_CREATE"
var _BPFCmd_index = [...]uint16{0, 14, 33, 52, 71, 91, 104, 115, 126, 141, 156, 173, 193, 212, 233, 253, 275, 289, 312, 324, 344, 361, 391, 405, 424, 444, 475, 495, 515, 530, 545, 566, 586, 602, 617}
func (i BPFCmd) String() string {
if i < 0 || i >= BPFCmd(len(_BPFCmd_index)-1) {
return "BPFCmd(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _BPFCmd_name[_BPFCmd_index[i]:_BPFCmd_index[i+1]]
}

View file

@ -4,7 +4,6 @@
package unix
import (
"bytes"
"syscall"
linux "golang.org/x/sys/unix"
@ -23,6 +22,8 @@ const (
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
@ -66,11 +67,16 @@ const (
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
)
// Statfs_t is a wrapper
type Statfs_t = linux.Statfs_t
type Stat_t = linux.Stat_t
// Rlimit is a wrapper
type Rlimit = linux.Rlimit
@ -191,18 +197,14 @@ func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags
return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
}
func KernelRelease() (string, error) {
var uname Utsname
err := Uname(&uname)
if err != nil {
return "", err
}
end := bytes.IndexByte(uname.Release[:], 0)
release := string(uname.Release[:end])
return release, nil
}
func Prlimit(pid, resource int, new, old *Rlimit) error {
return linux.Prlimit(pid, resource, new, old)
}
func Open(path string, mode int, perm uint32) (int, error) {
return linux.Open(path, mode, perm)
}
func Fstat(fd int, stat *Stat_t) error {
return linux.Fstat(fd, stat)
}

View file

@ -23,6 +23,8 @@ const (
ENODEV = syscall.ENODEV
EBADF = syscall.Errno(0)
E2BIG = syscall.Errno(0)
EFAULT = syscall.EFAULT
EACCES = syscall.Errno(0)
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
@ -67,6 +69,9 @@ const (
PERF_RECORD_SAMPLE = 9
AT_FDCWD = -0x2
RENAME_NOREPLACE = 0x1
SO_ATTACH_BPF = 0x32
SO_DETACH_BPF = 0x1b
SOL_SOCKET = 0x1
)
// Statfs_t is a wrapper
@ -85,6 +90,8 @@ type Statfs_t struct {
Spare [4]int64
}
type Stat_t struct{}
// Rlimit is a wrapper
type Rlimit struct {
Cur uint64
@ -258,10 +265,14 @@ func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags
return errNonLinux
}
func KernelRelease() (string, error) {
return "", errNonLinux
}
func Prlimit(pid, resource int, new, old *Rlimit) error {
return errNonLinux
}
func Open(path string, mode int, perm uint32) (int, error) {
return -1, errNonLinux
}
func Fstat(fd int, stat *Stat_t) error {
return errNonLinux
}

150
vendor/github.com/cilium/ebpf/internal/vdso.go generated vendored Normal file
View file

@ -0,0 +1,150 @@
package internal
import (
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"github.com/cilium/ebpf/internal/unix"
)
var (
errAuxvNoVDSO = errors.New("no vdso address found in auxv")
)
// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library
// linked into the current process image.
func vdsoVersion() (uint32, error) {
// Read data from the auxiliary vector, which is normally passed directly
// to the process. Go does not expose that data, so we must read it from procfs.
// https://man7.org/linux/man-pages/man3/getauxval.3.html
av, err := os.Open("/proc/self/auxv")
if err != nil {
return 0, fmt.Errorf("opening auxv: %w", err)
}
defer av.Close()
vdsoAddr, err := vdsoMemoryAddress(av)
if err != nil {
return 0, fmt.Errorf("finding vDSO memory address: %w", err)
}
// Use /proc/self/mem rather than unsafe.Pointer tricks.
mem, err := os.Open("/proc/self/mem")
if err != nil {
return 0, fmt.Errorf("opening mem: %w", err)
}
defer mem.Close()
// Open ELF at provided memory address, as offset into /proc/self/mem.
c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64))
if err != nil {
return 0, fmt.Errorf("reading linux version code: %w", err)
}
return c, nil
}
// vdsoMemoryAddress returns the memory address of the vDSO library
// linked into the current process image. r is an io.Reader into an auxv blob.
func vdsoMemoryAddress(r io.Reader) (uint64, error) {
const (
_AT_NULL = 0 // End of vector
_AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image
)
// Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`,
// the address of a page containing the virtual Dynamic Shared Object (vDSO).
aux := struct{ Tag, Val uint64 }{}
for {
if err := binary.Read(r, NativeEndian, &aux); err != nil {
return 0, fmt.Errorf("reading auxv entry: %w", err)
}
switch aux.Tag {
case _AT_SYSINFO_EHDR:
if aux.Val != 0 {
return aux.Val, nil
}
return 0, fmt.Errorf("invalid vDSO address in auxv")
// _AT_NULL is always the last tag/val pair in the aux vector
// and can be treated like EOF.
case _AT_NULL:
return 0, errAuxvNoVDSO
}
}
}
// format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)'
type elfNoteHeader struct {
NameSize int32
DescSize int32
Type int32
}
// vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in
// the ELF notes section of the binary provided by the reader.
func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
hdr, err := NewSafeELFFile(r)
if err != nil {
return 0, fmt.Errorf("reading vDSO ELF: %w", err)
}
sections := hdr.SectionsByType(elf.SHT_NOTE)
if len(sections) == 0 {
return 0, fmt.Errorf("no note section found in vDSO ELF")
}
for _, sec := range sections {
sr := sec.Open()
var n elfNoteHeader
// Read notes until we find one named 'Linux'.
for {
if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil {
if errors.Is(err, io.EOF) {
// We looked at all the notes in this section
break
}
return 0, fmt.Errorf("reading note header: %w", err)
}
// If a note name is defined, it follows the note header.
var name string
if n.NameSize > 0 {
// Read the note name, aligned to 4 bytes.
buf := make([]byte, Align(int(n.NameSize), 4))
if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil {
return 0, fmt.Errorf("reading note name: %w", err)
}
// Read nul-terminated string.
name = unix.ByteSliceToString(buf[:n.NameSize])
}
// If a note descriptor is defined, it follows the name.
// It is possible for a note to have a descriptor but not a name.
if n.DescSize > 0 {
// LINUX_VERSION_CODE is a uint32 value.
if name == "Linux" && n.DescSize == 4 && n.Type == 0 {
var version uint32
if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil {
return 0, fmt.Errorf("reading note descriptor: %w", err)
}
return version, nil
}
// Discard the note descriptor if it exists but we're not interested in it.
if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil {
return 0, err
}
}
}
}
return 0, fmt.Errorf("no Linux note in ELF")
}

View file

@ -2,8 +2,6 @@ package internal
import (
"fmt"
"os"
"regexp"
"sync"
"github.com/cilium/ebpf/internal/unix"
@ -18,12 +16,6 @@ const (
)
var (
// Match between one and three decimals separated by dots, with the last
// segment (patch level) being optional on some kernels.
// The x.y.z string must appear at the start of a string or right after
// whitespace to prevent sequences like 'x.y.z-a.b.c' from matching 'a.b.c'.
rgxKernelVersion = regexp.MustCompile(`(?:\A|\s)\d{1,3}\.\d{1,3}(?:\.\d{1,3})?`)
kernelVersion = struct {
once sync.Once
version Version
@ -46,6 +38,15 @@ func NewVersion(ver string) (Version, error) {
return Version{major, minor, patch}, nil
}
// NewVersionFromCode creates a version from a LINUX_VERSION_CODE.
func NewVersionFromCode(code uint32) Version {
return Version{
uint16(uint8(code >> 16)),
uint16(uint8(code >> 8)),
uint16(uint8(code)),
}
}
func (v Version) String() string {
if v[2] == 0 {
return fmt.Sprintf("v%d.%d", v[0], v[1])
@ -98,66 +99,24 @@ func KernelVersion() (Version, error) {
return kernelVersion.version, nil
}
// detectKernelVersion returns the version of the running kernel. It scans the
// following sources in order: /proc/version_signature, uname -v, uname -r.
// In each of those locations, the last-appearing x.y(.z) value is selected
// for parsing. The first location that yields a usable version number is
// returned.
// detectKernelVersion returns the version of the running kernel.
func detectKernelVersion() (Version, error) {
// Try reading /proc/version_signature for Ubuntu compatibility.
// Example format: Ubuntu 4.15.0-91.92-generic 4.15.18
// This method exists in the kernel itself, see d18acd15c
// ("perf tools: Fix kernel version error in ubuntu").
if pvs, err := os.ReadFile("/proc/version_signature"); err == nil {
// If /proc/version_signature exists, failing to parse it is an error.
// It only exists on Ubuntu, where the real patch level is not obtainable
// through any other method.
v, err := findKernelVersion(string(pvs))
if err != nil {
return Version{}, err
}
return v, nil
}
var uname unix.Utsname
if err := unix.Uname(&uname); err != nil {
return Version{}, fmt.Errorf("calling uname: %w", err)
}
// Debian puts the version including the patch level in uname.Version.
// It is not an error if there's no version number in uname.Version,
// as most distributions don't use it. Parsing can continue on uname.Release.
// Example format: #1 SMP Debian 4.19.37-5+deb10u2 (2019-08-08)
if v, err := findKernelVersion(unix.ByteSliceToString(uname.Version[:])); err == nil {
return v, nil
}
// Most other distributions have the full kernel version including patch
// level in uname.Release.
// Example format: 4.19.0-5-amd64, 5.5.10-arch1-1
v, err := findKernelVersion(unix.ByteSliceToString(uname.Release[:]))
vc, err := vdsoVersion()
if err != nil {
return Version{}, err
}
return v, nil
return NewVersionFromCode(vc), nil
}
// findKernelVersion matches s against rgxKernelVersion and parses the result
// into a Version. If s contains multiple matches, the last entry is selected.
func findKernelVersion(s string) (Version, error) {
m := rgxKernelVersion.FindAllString(s, -1)
if m == nil {
return Version{}, fmt.Errorf("no kernel version in string: %s", s)
}
// Pick the last match of the string in case there are multiple.
s = m[len(m)-1]
v, err := NewVersion(s)
if err != nil {
return Version{}, fmt.Errorf("parsing version string %s: %w", s, err)
// KernelRelease returns the release string of the running kernel.
// Its format depends on the Linux distribution and corresponds to directory
// names in /lib/modules by convention. Some examples are 5.15.17-1-lts and
// 4.19.0-16-amd64.
func KernelRelease() (string, error) {
var uname unix.Utsname
if err := unix.Uname(&uname); err != nil {
return "", fmt.Errorf("uname failed: %w", err)
}
return v, nil
return unix.ByteSliceToString(uname.Release[:]), nil
}

View file

@ -56,16 +56,6 @@ func AttachCgroup(opts CgroupOptions) (Link, error) {
return cg, nil
}
// LoadPinnedCgroup loads a pinned cgroup from a bpffs.
func LoadPinnedCgroup(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
link, err := LoadPinnedRawLink(fileName, CgroupType, opts)
if err != nil {
return nil, err
}
return &linkCgroup{*link}, nil
}
type progAttachCgroup struct {
cgroup *os.File
current *ebpf.Program
@ -151,6 +141,10 @@ func (cg *progAttachCgroup) Unpin() error {
return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
}
func (cg *progAttachCgroup) Info() (*Info, error) {
return nil, fmt.Errorf("can't get cgroup info: %w", ErrNotSupported)
}
type linkCgroup struct {
RawLink
}

View file

@ -1,88 +0,0 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/btf"
)
type FreplaceLink struct {
RawLink
}
// AttachFreplace attaches the given eBPF program to the function it replaces.
//
// The program and name can either be provided at link time, or can be provided
// at program load time. If they were provided at load time, they should be nil
// and empty respectively here, as they will be ignored by the kernel.
// Examples:
//
// AttachFreplace(dispatcher, "function", replacement)
// AttachFreplace(nil, "", replacement)
func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (*FreplaceLink, error) {
if (name == "") != (targetProg == nil) {
return nil, fmt.Errorf("must provide both or neither of name and targetProg: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if prog.Type() != ebpf.Extension {
return nil, fmt.Errorf("eBPF program type %s is not an Extension: %w", prog.Type(), errInvalidInput)
}
var (
target int
typeID btf.TypeID
)
if targetProg != nil {
info, err := targetProg.Info()
if err != nil {
return nil, err
}
btfID, ok := info.BTFID()
if !ok {
return nil, fmt.Errorf("could not get BTF ID for program %s: %w", info.Name, errInvalidInput)
}
btfHandle, err := btf.NewHandleFromID(btfID)
if err != nil {
return nil, err
}
defer btfHandle.Close()
var function *btf.Func
if err := btfHandle.Spec().FindType(name, &function); err != nil {
return nil, err
}
target = targetProg.FD()
typeID = function.ID()
}
link, err := AttachRawLink(RawLinkOptions{
Target: target,
Program: prog,
Attach: ebpf.AttachNone,
BTF: typeID,
})
if err != nil {
return nil, err
}
return &FreplaceLink{*link}, nil
}
// Update implements the Link interface.
func (f *FreplaceLink) Update(new *ebpf.Program) error {
return fmt.Errorf("freplace update: %w", ErrNotSupported)
}
// LoadPinnedFreplace loads a pinned iterator from a bpffs.
func LoadPinnedFreplace(fileName string, opts *ebpf.LoadPinOptions) (*FreplaceLink, error) {
link, err := LoadPinnedRawLink(fileName, TracingType, opts)
if err != nil {
return nil, err
}
return &FreplaceLink{*link}, err
}

View file

@ -6,7 +6,7 @@ import (
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
type IterOptions struct {
@ -31,26 +31,26 @@ func AttachIter(opts IterOptions) (*Iter, error) {
progFd := opts.Program.FD()
if progFd < 0 {
return nil, fmt.Errorf("invalid program: %s", internal.ErrClosedFd)
return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
}
var info bpfIterLinkInfoMap
if opts.Map != nil {
mapFd := opts.Map.FD()
if mapFd < 0 {
return nil, fmt.Errorf("invalid map: %w", internal.ErrClosedFd)
return nil, fmt.Errorf("invalid map: %w", sys.ErrClosedFd)
}
info.map_fd = uint32(mapFd)
}
attr := bpfLinkCreateIterAttr{
prog_fd: uint32(progFd),
attach_type: ebpf.AttachTraceIter,
iter_info: internal.NewPointer(unsafe.Pointer(&info)),
iter_info_len: uint32(unsafe.Sizeof(info)),
attr := sys.LinkCreateIterAttr{
ProgFd: uint32(progFd),
AttachType: sys.AttachType(ebpf.AttachTraceIter),
IterInfo: sys.NewPointer(unsafe.Pointer(&info)),
IterInfoLen: uint32(unsafe.Sizeof(info)),
}
fd, err := bpfLinkCreateIter(&attr)
fd, err := sys.LinkCreateIter(&attr)
if err != nil {
return nil, fmt.Errorf("can't link iterator: %w", err)
}
@ -58,16 +58,6 @@ func AttachIter(opts IterOptions) (*Iter, error) {
return &Iter{RawLink{fd, ""}}, err
}
// LoadPinnedIter loads a pinned iterator from a bpffs.
func LoadPinnedIter(fileName string, opts *ebpf.LoadPinOptions) (*Iter, error) {
link, err := LoadPinnedRawLink(fileName, IterType, opts)
if err != nil {
return nil, err
}
return &Iter{*link}, err
}
// Iter represents an attached bpf_iter.
type Iter struct {
RawLink
@ -77,16 +67,11 @@ type Iter struct {
//
// Reading from the returned reader triggers the BPF program.
func (it *Iter) Open() (io.ReadCloser, error) {
linkFd, err := it.fd.Value()
if err != nil {
return nil, err
attr := &sys.IterCreateAttr{
LinkFd: it.fd.Uint(),
}
attr := &bpfIterCreateAttr{
linkFd: linkFd,
}
fd, err := bpfIterCreate(attr)
fd, err := sys.IterCreate(attr)
if err != nil {
return nil, fmt.Errorf("can't create iterator: %w", err)
}

View file

@ -8,11 +8,13 @@ import (
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
@ -28,6 +30,27 @@ var (
type probeType uint8
type probeArgs struct {
symbol, group, path string
offset, refCtrOffset, cookie uint64
pid int
ret bool
}
// KprobeOptions defines additional parameters that will be used
// when loading Kprobes.
type KprobeOptions struct {
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
// Offset of the kprobe relative to the traced symbol.
// Can be used to insert kprobes at arbitrary offsets in kernel functions,
// e.g. in places where functions have been inlined.
Offset uint64
}
const (
kprobeType probeType = iota
uprobeType
@ -71,70 +94,109 @@ func (pt probeType) RetprobeBit() (uint64, error) {
// given kernel symbol starts executing. See /proc/kallsyms for available
// symbols. For example, printk():
//
// kp, err := Kprobe("printk", prog)
// kp, err := Kprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
func Kprobe(symbol string, prog *ebpf.Program) (Link, error) {
k, err := kprobe(symbol, prog, false)
func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, false)
if err != nil {
return nil, err
}
err = k.attach(prog)
lnk, err := attachPerfEvent(k, prog)
if err != nil {
k.Close()
return nil, err
}
return k, nil
return lnk, nil
}
// Kretprobe attaches the given eBPF program to a perf event that fires right
// before the given kernel symbol exits, with the function stack left intact.
// See /proc/kallsyms for available symbols. For example, printk():
//
// kp, err := Kretprobe("printk", prog)
// kp, err := Kretprobe("printk", prog, nil)
//
// Losing the reference to the resulting Link (kp) will close the Kretprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
func Kretprobe(symbol string, prog *ebpf.Program) (Link, error) {
k, err := kprobe(symbol, prog, true)
func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, true)
if err != nil {
return nil, err
}
err = k.attach(prog)
lnk, err := attachPerfEvent(k, prog)
if err != nil {
k.Close()
return nil, err
}
return k, nil
return lnk, nil
}
// isValidKprobeSymbol implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_.]*$".
func isValidKprobeSymbol(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
// Allow `.` in symbol name. GCC-compiled kernel may change symbol name
// to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`.
// See: https://gcc.gnu.org/gcc-10/changes.html
case i > 0 && c == '.':
default:
return false
}
}
return true
}
// kprobe opens a perf event on the given symbol and attaches prog to it.
// If ret is true, create a kretprobe.
func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) {
if symbol == "" {
return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !rgxTraceEvent.MatchString(symbol) {
return nil, fmt.Errorf("symbol '%s' must be alphanumeric or underscore: %w", symbol, errInvalidInput)
if !isValidKprobeSymbol(symbol) {
return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput)
}
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
}
args := probeArgs{
pid: perfAllThreads,
symbol: symbol,
ret: ret,
}
if opts != nil {
args.cookie = opts.Cookie
args.offset = opts.Offset
}
// Use kprobe PMU if the kernel has it available.
tp, err := pmuKprobe(platformPrefix(symbol), ret)
tp, err := pmuKprobe(args)
if errors.Is(err, os.ErrNotExist) {
tp, err = pmuKprobe(symbol, ret)
args.symbol = platformPrefix(symbol)
tp, err = pmuKprobe(args)
}
if err == nil {
return tp, nil
@ -144,9 +206,11 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
}
// Use tracefs if kprobe PMU is missing.
tp, err = tracefsKprobe(platformPrefix(symbol), ret)
args.symbol = symbol
tp, err = tracefsKprobe(args)
if errors.Is(err, os.ErrNotExist) {
tp, err = tracefsKprobe(symbol, ret)
args.symbol = platformPrefix(symbol)
tp, err = tracefsKprobe(args)
}
if err != nil {
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
@ -157,8 +221,8 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
// pmuKprobe opens a perf event based on the kprobe PMU.
// Returns os.ErrNotExist if the given symbol does not exist in the kernel.
func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
return pmuProbe(kprobeType, symbol, "", 0, perfAllThreads, ret)
func pmuKprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(kprobeType, args)
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
@ -168,7 +232,7 @@ func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(typ probeType, symbol, path string, offset uint64, pid int, ret bool) (*perfEvent, error) {
func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU.
et, err := getPMUEventType(typ)
@ -177,7 +241,7 @@ func pmuProbe(typ probeType, symbol, path string, offset uint64, pid int, ret bo
}
var config uint64
if ret {
if args.ret {
bit, err := typ.RetprobeBit()
if err != nil {
return nil, err
@ -192,22 +256,30 @@ func pmuProbe(typ probeType, symbol, path string, offset uint64, pid int, ret bo
switch typ {
case kprobeType:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err = unsafeStringPtr(symbol)
sp, err = unsafeStringPtr(args.symbol)
if err != nil {
return nil, err
}
attr = unix.PerfEventAttr{
// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Ext2: args.offset, // Kernel symbol offset
Config: config, // Retprobe flag
}
case uprobeType:
sp, err = unsafeStringPtr(path)
sp, err = unsafeStringPtr(args.path)
if err != nil {
return nil, err
}
if args.refCtrOffset != 0 {
config |= args.refCtrOffset << uprobeRefCtrOffsetShift
}
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
@ -216,23 +288,34 @@ func pmuProbe(typ probeType, symbol, path string, offset uint64, pid int, ret bo
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: offset, // Uprobe offset
Config: config, // Retprobe flag
Ext2: args.offset, // Uprobe offset
Config: config, // RefCtrOffset, Retprobe flag
}
}
fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") {
return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported)
}
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("symbol '%s' not found: %w", symbol, os.ErrNotExist)
return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist)
}
// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
// when attempting to set a uprobe on a trap instruction.
if errors.Is(err, unix.ENOTSUPP) {
return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", offset, err)
return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err)
}
if err != nil {
return nil, fmt.Errorf("opening perf event: %w", err)
@ -241,18 +324,24 @@ func pmuProbe(typ probeType, symbol, path string, offset uint64, pid int, ret bo
// Ensure the string pointer is not collected before PerfEventOpen returns.
runtime.KeepAlive(sp)
fd, err := sys.NewFD(rawFd)
if err != nil {
return nil, err
}
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
return &perfEvent{
fd: internal.NewFD(uint32(fd)),
pmuID: et,
name: symbol,
typ: typ.PerfEventType(ret),
typ: typ.PerfEventType(args.ret),
name: args.symbol,
pmuID: et,
cookie: args.cookie,
fd: fd,
}, nil
}
// tracefsKprobe creates a Kprobe tracefs entry.
func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
return tracefsProbe(kprobeType, symbol, "", 0, perfAllThreads, ret)
func tracefsKprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(kprobeType, args)
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
@ -261,7 +350,7 @@ func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(typ probeType, symbol, path string, offset uint64, pid int, ret bool) (*perfEvent, error) {
func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) {
// Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name.
@ -269,42 +358,53 @@ func tracefsProbe(typ probeType, symbol, path string, offset uint64, pid int, re
if err != nil {
return nil, fmt.Errorf("randomizing group name: %w", err)
}
args.group = group
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
_, err = getTraceEventID(group, symbol)
_, err = getTraceEventID(group, args.symbol)
if err == nil {
return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol)
return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err)
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err)
}
// Create the [k,u]probe trace event using tracefs.
if err := createTraceFSProbeEvent(typ, group, symbol, path, offset, ret); err != nil {
if err := createTraceFSProbeEvent(typ, args); err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
defer func() {
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = closeTraceFSProbeEvent(typ, args.group, args.symbol)
}
}()
// Get the newly-created trace event's id.
tid, err := getTraceEventID(group, symbol)
tid, err := getTraceEventID(group, args.symbol)
if err != nil {
return nil, fmt.Errorf("getting trace event id: %w", err)
}
// Kprobes are ephemeral tracepoints and share the same perf event type.
fd, err := openTracepointPerfEvent(tid, pid)
fd, err := openTracepointPerfEvent(tid, args.pid)
if err != nil {
return nil, err
}
return &perfEvent{
fd: fd,
typ: typ.PerfEventType(args.ret),
group: group,
name: symbol,
name: args.symbol,
tracefsID: tid,
typ: typ.PerfEventType(ret),
cookie: args.cookie,
fd: fd,
}, nil
}
@ -312,7 +412,7 @@ func tracefsProbe(typ probeType, symbol, path string, offset uint64, pid int, re
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists.
func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset uint64, ret bool) error {
func createTraceFSProbeEvent(typ probeType, args probeArgs) error {
// Open the kprobe_events file in tracefs.
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
@ -320,7 +420,7 @@ func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset u
}
defer f.Close()
var pe string
var pe, token string
switch typ {
case kprobeType:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
@ -337,7 +437,8 @@ func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset u
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, symbol)
token = kprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token)
case uprobeType:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
@ -346,18 +447,30 @@ func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset u
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
pathOffset := uprobePathOffset(path, offset)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, pathOffset)
token = uprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
// EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token
// is resolved to an invalid insn boundary.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return fmt.Errorf("symbol %s not found: %w", symbol, os.ErrNotExist)
return fmt.Errorf("token %s: %w", token, os.ErrNotExist)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
@ -377,7 +490,7 @@ func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
pe := fmt.Sprintf("-:%s/%s", group, symbol)
pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol))
if _, err = f.WriteString(pe); err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
@ -388,9 +501,9 @@ func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
// randomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by rgxTraceEvent.
// allowed by isValidTraceID.
func randomGroup(prefix string) (string, error) {
if !rgxTraceEvent.MatchString(prefix) {
if !isValidTraceID(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput)
}
@ -442,3 +555,14 @@ func kretprobeBit() (uint64, error) {
})
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
}
// kprobeToken creates the SYM[+offs] token for the tracefs api.
func kprobeToken(args probeArgs) string {
po := args.symbol
if args.offset != 0 {
po += fmt.Sprintf("+%#x", args.offset)
}
return po
}

View file

@ -1,12 +1,14 @@
package link
import (
"bytes"
"encoding/binary"
"fmt"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/sys"
)
var ErrNotSupported = internal.ErrNotSupported
@ -35,12 +37,53 @@ type Link interface {
// not called.
Close() error
// Info returns metadata on a link.
//
// May return an error wrapping ErrNotSupported.
Info() (*Info, error)
// Prevent external users from implementing this interface.
isLink()
}
// LoadPinnedLink loads a link that was persisted into a bpffs.
func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
raw, err := loadPinnedRawLink(fileName, opts)
if err != nil {
return nil, err
}
return wrapRawLink(raw)
}
// wrap a RawLink in a more specific type if possible.
//
// The function takes ownership of raw and closes it on error.
func wrapRawLink(raw *RawLink) (Link, error) {
info, err := raw.Info()
if err != nil {
raw.Close()
return nil, err
}
switch info.Type {
case RawTracepointType:
return &rawTracepoint{*raw}, nil
case TracingType:
return &tracing{*raw}, nil
case CgroupType:
return &linkCgroup{*raw}, nil
case IterType:
return &Iter{*raw}, nil
case NetNsType:
return &NetNsLink{*raw}, nil
default:
return raw, nil
}
}
// ID uniquely identifies a BPF link.
type ID uint32
type ID = sys.LinkID
// RawLinkOptions control the creation of a raw link.
type RawLinkOptions struct {
@ -52,13 +95,53 @@ type RawLinkOptions struct {
Attach ebpf.AttachType
// BTF is the BTF of the attachment target.
BTF btf.TypeID
// Flags control the attach behaviour.
Flags uint32
}
// RawLinkInfo contains metadata on a link.
type RawLinkInfo struct {
// Info contains metadata on a link.
type Info struct {
Type Type
ID ID
Program ebpf.ProgramID
extra interface{}
}
type TracingInfo sys.TracingLinkInfo
type CgroupInfo sys.CgroupLinkInfo
type NetNsInfo sys.NetNsLinkInfo
type XDPInfo sys.XDPLinkInfo
// Tracing returns tracing type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) Tracing() *TracingInfo {
e, _ := r.extra.(*TracingInfo)
return e
}
// Cgroup returns cgroup type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) Cgroup() *CgroupInfo {
e, _ := r.extra.(*CgroupInfo)
return e
}
// NetNs returns netns type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) NetNs() *NetNsInfo {
e, _ := r.extra.(*NetNsInfo)
return e
}
// ExtraNetNs returns XDP type-specific link info.
//
// Returns nil if the type-specific link info isn't available.
func (r Info) XDP() *XDPInfo {
e, _ := r.extra.(*XDPInfo)
return e
}
// RawLink is the low-level API to bpf_link.
@ -66,7 +149,7 @@ type RawLinkInfo struct {
// You should consider using the higher level interfaces in this
// package instead.
type RawLink struct {
fd *internal.FD
fd *sys.FD
pinnedPath string
}
@ -77,21 +160,22 @@ func AttachRawLink(opts RawLinkOptions) (*RawLink, error) {
}
if opts.Target < 0 {
return nil, fmt.Errorf("invalid target: %s", internal.ErrClosedFd)
return nil, fmt.Errorf("invalid target: %s", sys.ErrClosedFd)
}
progFd := opts.Program.FD()
if progFd < 0 {
return nil, fmt.Errorf("invalid program: %s", internal.ErrClosedFd)
return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
}
attr := bpfLinkCreateAttr{
targetFd: uint32(opts.Target),
progFd: uint32(progFd),
attachType: opts.Attach,
targetBTFID: uint32(opts.BTF),
attr := sys.LinkCreateAttr{
TargetFd: uint32(opts.Target),
ProgFd: uint32(progFd),
AttachType: sys.AttachType(opts.Attach),
TargetBtfId: uint32(opts.BTF),
Flags: opts.Flags,
}
fd, err := bpfLinkCreate(&attr)
fd, err := sys.LinkCreate(&attr)
if err != nil {
return nil, fmt.Errorf("can't create link: %s", err)
}
@ -99,44 +183,23 @@ func AttachRawLink(opts RawLinkOptions) (*RawLink, error) {
return &RawLink{fd, ""}, nil
}
// LoadPinnedRawLink loads a persisted link from a bpffs.
//
// Returns an error if the pinned link type doesn't match linkType. Pass
// UnspecifiedType to disable this behaviour.
func LoadPinnedRawLink(fileName string, linkType Type, opts *ebpf.LoadPinOptions) (*RawLink, error) {
fd, err := internal.BPFObjGet(fileName, opts.Marshal())
func loadPinnedRawLink(fileName string, opts *ebpf.LoadPinOptions) (*RawLink, error) {
fd, err := sys.ObjGet(&sys.ObjGetAttr{
Pathname: sys.NewStringPointer(fileName),
FileFlags: opts.Marshal(),
})
if err != nil {
return nil, fmt.Errorf("load pinned link: %w", err)
}
link := &RawLink{fd, fileName}
if linkType == UnspecifiedType {
return link, nil
}
info, err := link.Info()
if err != nil {
link.Close()
return nil, fmt.Errorf("get pinned link info: %s", err)
}
if info.Type != linkType {
link.Close()
return nil, fmt.Errorf("link type %v doesn't match %v", info.Type, linkType)
}
return link, nil
return &RawLink{fd, fileName}, nil
}
func (l *RawLink) isLink() {}
// FD returns the raw file descriptor.
func (l *RawLink) FD() int {
fd, err := l.fd.Value()
if err != nil {
return -1
}
return int(fd)
return l.fd.Int()
}
// Close breaks the link.
@ -185,49 +248,66 @@ type RawLinkUpdateOptions struct {
func (l *RawLink) UpdateArgs(opts RawLinkUpdateOptions) error {
newFd := opts.New.FD()
if newFd < 0 {
return fmt.Errorf("invalid program: %s", internal.ErrClosedFd)
return fmt.Errorf("invalid program: %s", sys.ErrClosedFd)
}
var oldFd int
if opts.Old != nil {
oldFd = opts.Old.FD()
if oldFd < 0 {
return fmt.Errorf("invalid replacement program: %s", internal.ErrClosedFd)
return fmt.Errorf("invalid replacement program: %s", sys.ErrClosedFd)
}
}
linkFd, err := l.fd.Value()
if err != nil {
return fmt.Errorf("can't update link: %s", err)
attr := sys.LinkUpdateAttr{
LinkFd: l.fd.Uint(),
NewProgFd: uint32(newFd),
OldProgFd: uint32(oldFd),
Flags: opts.Flags,
}
attr := bpfLinkUpdateAttr{
linkFd: linkFd,
newProgFd: uint32(newFd),
oldProgFd: uint32(oldFd),
flags: opts.Flags,
}
return bpfLinkUpdate(&attr)
}
// struct bpf_link_info
type bpfLinkInfo struct {
typ uint32
id uint32
prog_id uint32
return sys.LinkUpdate(&attr)
}
// Info returns metadata about the link.
func (l *RawLink) Info() (*RawLinkInfo, error) {
var info bpfLinkInfo
err := internal.BPFObjGetInfoByFD(l.fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
if err != nil {
func (l *RawLink) Info() (*Info, error) {
var info sys.LinkInfo
if err := sys.ObjInfo(l.fd, &info); err != nil {
return nil, fmt.Errorf("link info: %s", err)
}
return &RawLinkInfo{
Type(info.typ),
ID(info.id),
ebpf.ProgramID(info.prog_id),
var extra interface{}
switch info.Type {
case CgroupType:
extra = &CgroupInfo{}
case IterType:
// not supported
case NetNsType:
extra = &NetNsInfo{}
case RawTracepointType:
// not supported
case TracingType:
extra = &TracingInfo{}
case XDPType:
extra = &XDPInfo{}
case PerfEventType:
// no extra
default:
return nil, fmt.Errorf("unknown link info type: %d", info.Type)
}
if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType {
buf := bytes.NewReader(info.Extra[:])
err := binary.Read(buf, internal.NativeEndian, extra)
if err != nil {
return nil, fmt.Errorf("can not read extra link info: %w", err)
}
}
return &Info{
info.Type,
info.Id,
ebpf.ProgramID(info.ProgId),
extra,
}, nil
}

View file

@ -6,14 +6,9 @@ import (
"github.com/cilium/ebpf"
)
// NetNsInfo contains metadata about a network namespace link.
type NetNsInfo struct {
RawLinkInfo
}
// NetNsLink is a program attached to a network namespace.
type NetNsLink struct {
*RawLink
RawLink
}
// AttachNetNs attaches a program to a network namespace.
@ -37,24 +32,5 @@ func AttachNetNs(ns int, prog *ebpf.Program) (*NetNsLink, error) {
return nil, err
}
return &NetNsLink{link}, nil
}
// LoadPinnedNetNs loads a network namespace link from bpffs.
func LoadPinnedNetNs(fileName string, opts *ebpf.LoadPinOptions) (*NetNsLink, error) {
link, err := LoadPinnedRawLink(fileName, NetNsType, opts)
if err != nil {
return nil, err
}
return &NetNsLink{link}, nil
}
// Info returns information about the link.
func (nns *NetNsLink) Info() (*NetNsInfo, error) {
info, err := nns.RawLink.Info()
if err != nil {
return nil, err
}
return &NetNsInfo{*info}, nil
return &NetNsLink{*link}, nil
}

View file

@ -6,14 +6,15 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
@ -43,11 +44,6 @@ import (
var (
tracefsPath = "/sys/kernel/debug/tracing"
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
rgxTraceEvent = regexp.MustCompile("^[a-zA-Z_][0-9a-zA-Z_]*$")
errInvalidInput = errors.New("invalid input")
)
@ -69,6 +65,8 @@ const (
// can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU).
type perfEvent struct {
// The event type determines the types of programs that can be attached.
typ perfEventType
// Group and name of the tracepoint/kprobe/uprobe.
group string
@ -79,53 +77,15 @@ type perfEvent struct {
// ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64
// The event type determines the types of programs that can be attached.
typ perfEventType
// User provided arbitrary value.
cookie uint64
fd *internal.FD
}
func (pe *perfEvent) isLink() {}
func (pe *perfEvent) Pin(string) error {
return fmt.Errorf("pin perf event: %w", ErrNotSupported)
}
func (pe *perfEvent) Unpin() error {
return fmt.Errorf("unpin perf event: %w", ErrNotSupported)
}
// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
// owned by the perf event, which means multiple programs can be attached
// simultaneously.
//
// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
// returns EEXIST.
//
// Detaching a program from a perf event is currently not possible, so a
// program replacement mechanism cannot be implemented for perf events.
func (pe *perfEvent) Update(prog *ebpf.Program) error {
return fmt.Errorf("can't replace eBPF program in perf event: %w", ErrNotSupported)
// This is the perf event FD.
fd *sys.FD
}
func (pe *perfEvent) Close() error {
if pe.fd == nil {
return nil
}
pfd, err := pe.fd.Value()
if err != nil {
return fmt.Errorf("getting perf event fd: %w", err)
}
err = unix.IoctlSetInt(int(pfd), unix.PERF_EVENT_IOC_DISABLE, 0)
if err != nil {
return fmt.Errorf("disabling perf event: %w", err)
}
err = pe.fd.Close()
if err != nil {
if err := pe.fd.Close(); err != nil {
return fmt.Errorf("closing perf event fd: %w", err)
}
@ -148,49 +108,150 @@ func (pe *perfEvent) Close() error {
return nil
}
// perfEventLink represents a bpf perf link.
type perfEventLink struct {
RawLink
pe *perfEvent
}
func (pl *perfEventLink) isLink() {}
// Pinning requires the underlying perf event FD to stay open.
//
// | PerfEvent FD | BpfLink FD | Works |
// |--------------|------------|-------|
// | Open | Open | Yes |
// | Closed | Open | No |
// | Open | Closed | No (Pin() -> EINVAL) |
// | Closed | Closed | No (Pin() -> EINVAL) |
//
// There is currently no pretty way to recover the perf event FD
// when loading a pinned link, so leave as not supported for now.
func (pl *perfEventLink) Pin(string) error {
return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
}
func (pl *perfEventLink) Unpin() error {
return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
}
func (pl *perfEventLink) Close() error {
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event link close: %w", err)
}
return pl.fd.Close()
}
func (pl *perfEventLink) Update(prog *ebpf.Program) error {
return fmt.Errorf("perf event link update: %w", ErrNotSupported)
}
// perfEventIoctl implements Link and handles the perf event lifecycle
// via ioctl().
type perfEventIoctl struct {
*perfEvent
}
func (pi *perfEventIoctl) isLink() {}
// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
// owned by the perf event, which means multiple programs can be attached
// simultaneously.
//
// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
// returns EEXIST.
//
// Detaching a program from a perf event is currently not possible, so a
// program replacement mechanism cannot be implemented for perf events.
func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
}
func (pi *perfEventIoctl) Pin(string) error {
return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
}
func (pi *perfEventIoctl) Unpin() error {
return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
}
func (pi *perfEventIoctl) Info() (*Info, error) {
return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
}
// attach the given eBPF prog to the perf event stored in pe.
// pe must contain a valid perf event fd.
// prog's type must match the program type stored in pe.
func (pe *perfEvent) attach(prog *ebpf.Program) error {
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
if prog == nil {
return errors.New("cannot attach a nil program")
}
if pe.fd == nil {
return errors.New("cannot attach to nil perf event")
return nil, errors.New("cannot attach a nil program")
}
if prog.FD() < 0 {
return fmt.Errorf("invalid program: %w", internal.ErrClosedFd)
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return fmt.Errorf("unknown perf event type: %d", pe.typ)
return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
}
// The ioctl below will fail when the fd is invalid.
kfd, _ := pe.fd.Value()
if err := haveBPFLinkPerfEvent(); err == nil {
return attachPerfEventLink(pe, prog)
}
return attachPerfEventIoctl(pe, prog)
}
func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
if pe.cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}
// Assign the eBPF program to the perf event.
err := unix.IoctlSetInt(int(kfd), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
if err != nil {
return fmt.Errorf("setting perf event bpf program: %w", err)
return nil, fmt.Errorf("setting perf event bpf program: %w", err)
}
// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
if err := unix.IoctlSetInt(int(kfd), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
return fmt.Errorf("enable perf event: %s", err)
if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
return nil, fmt.Errorf("enable perf event: %s", err)
}
pi := &perfEventIoctl{pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pe, (*perfEvent).Close)
return nil
runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
return pi, nil
}
// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
//
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
TargetFd: pe.fd.Uint(),
AttachType: sys.BPF_PERF_EVENT,
BpfCookie: pe.cookie,
})
if err != nil {
return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
}
pl := &perfEventLink{RawLink{fd: fd}, pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pl, (*perfEventLink).Close)
return pl, nil
}
// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
@ -203,8 +264,12 @@ func unsafeStringPtr(str string) (unsafe.Pointer, error) {
}
// getTraceEventID reads a trace event's ID from tracefs given its group and name.
// group and name must be alphanumeric or underscore, as required by the kernel.
// The kernel requires group and name to be alphanumeric or underscore.
//
// name automatically has its invalid symbols converted to underscores so the caller
// can pass a raw symbol name, e.g. a kernel symbol containing dots.
func getTraceEventID(group, name string) (uint64, error) {
name = sanitizeSymbol(name)
tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
@ -235,7 +300,7 @@ func getPMUEventType(typ probeType) (uint64, error) {
// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
// behind the scenes, and can be attached to using these perf events.
func openTracepointPerfEvent(tid uint64, pid int) (*internal.FD, error) {
func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
attr := unix.PerfEventAttr{
Type: unix.PERF_TYPE_TRACEPOINT,
Config: tid,
@ -249,7 +314,7 @@ func openTracepointPerfEvent(tid uint64, pid int) (*internal.FD, error) {
return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
}
return internal.NewFD(uint32(fd)), nil
return sys.NewFD(fd)
}
// uint64FromFile reads a uint64 from a file. All elements of path are sanitized
@ -270,3 +335,60 @@ func uint64FromFile(base string, path ...string) (uint64, error) {
et := bytes.TrimSpace(data)
return strconv.ParseUint(string(et), 10, 64)
}
// Probe BPF perf link.
//
// https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Name: "probe_bpf_perf_link",
Type: ebpf.Kprobe,
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
License: "MIT",
})
if err != nil {
return err
}
defer prog.Close()
_, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_PERF_EVENT,
})
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})
// isValidTraceID implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func isValidTraceID(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}

View file

@ -4,7 +4,7 @@ import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
type RawAttachProgramOptions struct {
@ -34,7 +34,7 @@ func RawAttachProgram(opts RawAttachProgramOptions) error {
replaceFd = uint32(opts.Replace.FD())
}
attr := internal.BPFProgAttachAttr{
attr := sys.ProgAttachAttr{
TargetFd: uint32(opts.Target),
AttachBpfFd: uint32(opts.Program.FD()),
ReplaceBpfFd: replaceFd,
@ -42,7 +42,7 @@ func RawAttachProgram(opts RawAttachProgramOptions) error {
AttachFlags: uint32(opts.Flags),
}
if err := internal.BPFProgAttach(&attr); err != nil {
if err := sys.ProgAttach(&attr); err != nil {
return fmt.Errorf("can't attach program: %w", err)
}
return nil
@ -63,12 +63,12 @@ func RawDetachProgram(opts RawDetachProgramOptions) error {
return err
}
attr := internal.BPFProgDetachAttr{
attr := sys.ProgDetachAttr{
TargetFd: uint32(opts.Target),
AttachBpfFd: uint32(opts.Program.FD()),
AttachType: uint32(opts.Attach),
}
if err := internal.BPFProgDetach(&attr); err != nil {
if err := sys.ProgDetach(&attr); err != nil {
return fmt.Errorf("can't detach program: %w", err)
}

View file

@ -1,10 +1,11 @@
package link
import (
"errors"
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
type RawTracepointOptions struct {
@ -22,40 +23,65 @@ func AttachRawTracepoint(opts RawTracepointOptions) (Link, error) {
return nil, fmt.Errorf("invalid program type %s, expected RawTracepoint(Writable)", t)
}
if opts.Program.FD() < 0 {
return nil, fmt.Errorf("invalid program: %w", internal.ErrClosedFd)
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}
fd, err := bpfRawTracepointOpen(&bpfRawTracepointOpenAttr{
name: internal.NewStringPointer(opts.Name),
fd: uint32(opts.Program.FD()),
fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
Name: sys.NewStringPointer(opts.Name),
ProgFd: uint32(opts.Program.FD()),
})
if err != nil {
return nil, err
}
return &progAttachRawTracepoint{fd: fd}, nil
err = haveBPFLink()
if errors.Is(err, ErrNotSupported) {
// Prior to commit 70ed506c3bbc ("bpf: Introduce pinnable bpf_link abstraction")
// raw_tracepoints are just a plain fd.
return &simpleRawTracepoint{fd}, nil
}
if err != nil {
return nil, err
}
return &rawTracepoint{RawLink{fd: fd}}, nil
}
type progAttachRawTracepoint struct {
fd *internal.FD
type simpleRawTracepoint struct {
fd *sys.FD
}
var _ Link = (*progAttachRawTracepoint)(nil)
var _ Link = (*simpleRawTracepoint)(nil)
func (rt *progAttachRawTracepoint) isLink() {}
func (frt *simpleRawTracepoint) isLink() {}
func (rt *progAttachRawTracepoint) Close() error {
return rt.fd.Close()
func (frt *simpleRawTracepoint) Close() error {
return frt.fd.Close()
}
func (rt *progAttachRawTracepoint) Update(_ *ebpf.Program) error {
return fmt.Errorf("can't update raw_tracepoint: %w", ErrNotSupported)
func (frt *simpleRawTracepoint) Update(_ *ebpf.Program) error {
return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported)
}
func (rt *progAttachRawTracepoint) Pin(_ string) error {
return fmt.Errorf("can't pin raw_tracepoint: %w", ErrNotSupported)
func (frt *simpleRawTracepoint) Pin(string) error {
return fmt.Errorf("pin raw_tracepoint: %w", ErrNotSupported)
}
func (rt *progAttachRawTracepoint) Unpin() error {
func (frt *simpleRawTracepoint) Unpin() error {
return fmt.Errorf("unpin raw_tracepoint: %w", ErrNotSupported)
}
func (frt *simpleRawTracepoint) Info() (*Info, error) {
return nil, fmt.Errorf("can't get raw_tracepoint info: %w", ErrNotSupported)
}
type rawTracepoint struct {
RawLink
}
var _ Link = (*rawTracepoint)(nil)
func (rt *rawTracepoint) Update(_ *ebpf.Program) error {
return fmt.Errorf("update raw_tracepoint: %w", ErrNotSupported)
}

40
vendor/github.com/cilium/ebpf/link/socket_filter.go generated vendored Normal file
View file

@ -0,0 +1,40 @@
package link
import (
"syscall"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/unix"
)
// AttachSocketFilter attaches a SocketFilter BPF program to a socket.
func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error {
rawConn, err := conn.SyscallConn()
if err != nil {
return err
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD())
})
if ssoErr != nil {
return ssoErr
}
return err
}
// DetachSocketFilter detaches a SocketFilter BPF program from a socket.
func DetachSocketFilter(conn syscall.Conn) error {
rawConn, err := conn.SyscallConn()
if err != nil {
return err
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0)
})
if ssoErr != nil {
return ssoErr
}
return err
}

View file

@ -2,35 +2,33 @@ package link
import (
"errors"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// Type is the kind of link.
type Type uint32
type Type = sys.LinkType
// Valid link types.
//
// Equivalent to enum bpf_link_type.
const (
UnspecifiedType Type = iota
RawTracepointType
TracingType
CgroupType
IterType
NetNsType
XDPType
UnspecifiedType = sys.BPF_LINK_TYPE_UNSPEC
RawTracepointType = sys.BPF_LINK_TYPE_RAW_TRACEPOINT
TracingType = sys.BPF_LINK_TYPE_TRACING
CgroupType = sys.BPF_LINK_TYPE_CGROUP
IterType = sys.BPF_LINK_TYPE_ITER
NetNsType = sys.BPF_LINK_TYPE_NETNS
XDPType = sys.BPF_LINK_TYPE_XDP
PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT
)
var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupSKB,
AttachType: ebpf.AttachCGroupInetIngress,
License: "MIT",
Type: ebpf.CGroupSKB,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
@ -69,7 +67,7 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace
// We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs.
// If passing BPF_F_REPLACE gives us EINVAL we know that the feature isn't
// present.
attr := internal.BPFProgAttachAttr{
attr := sys.ProgAttachAttr{
// We rely on this being checked after attachFlags.
TargetFd: ^uint32(0),
AttachBpfFd: uint32(prog.FD()),
@ -77,7 +75,7 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace
AttachFlags: uint32(flagReplace),
}
err = internal.BPFProgAttach(&attr)
err = sys.ProgAttach(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
@ -87,73 +85,14 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace
return err
})
type bpfLinkCreateAttr struct {
progFd uint32
targetFd uint32
attachType ebpf.AttachType
flags uint32
targetBTFID uint32
}
func bpfLinkCreate(attr *bpfLinkCreateAttr) (*internal.FD, error) {
ptr, err := internal.BPF(internal.BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return internal.NewFD(uint32(ptr)), nil
}
type bpfLinkCreateIterAttr struct {
prog_fd uint32
target_fd uint32
attach_type ebpf.AttachType
flags uint32
iter_info internal.Pointer
iter_info_len uint32
}
func bpfLinkCreateIter(attr *bpfLinkCreateIterAttr) (*internal.FD, error) {
ptr, err := internal.BPF(internal.BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return internal.NewFD(uint32(ptr)), nil
}
type bpfLinkUpdateAttr struct {
linkFd uint32
newProgFd uint32
flags uint32
oldProgFd uint32
}
func bpfLinkUpdate(attr *bpfLinkUpdateAttr) error {
_, err := internal.BPF(internal.BPF_LINK_UPDATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupSKB,
AttachType: ebpf.AttachCGroupInetIngress,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
})
if err != nil {
return internal.ErrNotSupported
}
defer prog.Close()
attr := bpfLinkCreateAttr{
attr := sys.LinkCreateAttr{
// This is a hopefully invalid file descriptor, which triggers EBADF.
targetFd: ^uint32(0),
progFd: uint32(prog.FD()),
attachType: ebpf.AttachCGroupInetIngress,
TargetFd: ^uint32(0),
ProgFd: ^uint32(0),
AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress),
}
_, err = bpfLinkCreate(&attr)
_, err := sys.LinkCreate(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
@ -162,30 +101,3 @@ var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
}
return err
})
type bpfIterCreateAttr struct {
linkFd uint32
flags uint32
}
func bpfIterCreate(attr *bpfIterCreateAttr) (*internal.FD, error) {
ptr, err := internal.BPF(internal.BPF_ITER_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err == nil {
return internal.NewFD(uint32(ptr)), nil
}
return nil, err
}
type bpfRawTracepointOpenAttr struct {
name internal.Pointer
fd uint32
_ uint32
}
func bpfRawTracepointOpen(attr *bpfRawTracepointOpenAttr) (*internal.FD, error) {
ptr, err := internal.BPF(internal.BPF_RAW_TRACEPOINT_OPEN, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err == nil {
return internal.NewFD(uint32(ptr)), nil
}
return nil, err
}

View file

@ -6,12 +6,22 @@ import (
"github.com/cilium/ebpf"
)
// TracepointOptions defines additional parameters that will be used
// when loading Tracepoints.
type TracepointOptions struct {
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
}
// Tracepoint attaches the given eBPF program to the tracepoint with the given
// group and name. See /sys/kernel/debug/tracing/events to find available
// tracepoints. The top-level directory is the group, the event's subdirectory
// is the name. Example:
//
// tp, err := Tracepoint("syscalls", "sys_enter_fork", prog)
// tp, err := Tracepoint("syscalls", "sys_enter_fork", prog, nil)
//
// Losing the reference to the resulting Link (tp) will close the Tracepoint
// and prevent further execution of prog. The Link must be Closed during
@ -19,14 +29,14 @@ import (
//
// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is
// only possible as of kernel 4.14 (commit cf5f5ce).
func Tracepoint(group, name string, prog *ebpf.Program) (Link, error) {
func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions) (Link, error) {
if group == "" || name == "" {
return nil, fmt.Errorf("group and name cannot be empty: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !rgxTraceEvent.MatchString(group) || !rgxTraceEvent.MatchString(name) {
if !isValidTraceID(group) || !isValidTraceID(name) {
return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput)
}
if prog.Type() != ebpf.TracePoint {
@ -43,18 +53,25 @@ func Tracepoint(group, name string, prog *ebpf.Program) (Link, error) {
return nil, err
}
pe := &perfEvent{
fd: fd,
tracefsID: tid,
group: group,
name: name,
typ: tracepointEvent,
var cookie uint64
if opts != nil {
cookie = opts.Cookie
}
if err := pe.attach(prog); err != nil {
pe := &perfEvent{
typ: tracepointEvent,
group: group,
name: name,
tracefsID: tid,
cookie: cookie,
fd: fd,
}
lnk, err := attachPerfEvent(pe, prog)
if err != nil {
pe.Close()
return nil, err
}
return pe, nil
return lnk, nil
}

141
vendor/github.com/cilium/ebpf/link/tracing.go generated vendored Normal file
View file

@ -0,0 +1,141 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal/sys"
)
type tracing struct {
RawLink
}
func (f *tracing) Update(new *ebpf.Program) error {
return fmt.Errorf("tracing update: %w", ErrNotSupported)
}
// AttachFreplace attaches the given eBPF program to the function it replaces.
//
// The program and name can either be provided at link time, or can be provided
// at program load time. If they were provided at load time, they should be nil
// and empty respectively here, as they will be ignored by the kernel.
// Examples:
//
// AttachFreplace(dispatcher, "function", replacement)
// AttachFreplace(nil, "", replacement)
func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (Link, error) {
if (name == "") != (targetProg == nil) {
return nil, fmt.Errorf("must provide both or neither of name and targetProg: %w", errInvalidInput)
}
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if prog.Type() != ebpf.Extension {
return nil, fmt.Errorf("eBPF program type %s is not an Extension: %w", prog.Type(), errInvalidInput)
}
var (
target int
typeID btf.TypeID
)
if targetProg != nil {
btfHandle, err := targetProg.Handle()
if err != nil {
return nil, err
}
defer btfHandle.Close()
spec, err := btfHandle.Spec(nil)
if err != nil {
return nil, err
}
var function *btf.Func
if err := spec.TypeByName(name, &function); err != nil {
return nil, err
}
target = targetProg.FD()
typeID, err = spec.TypeID(function)
if err != nil {
return nil, err
}
}
link, err := AttachRawLink(RawLinkOptions{
Target: target,
Program: prog,
Attach: ebpf.AttachNone,
BTF: typeID,
})
if err != nil {
return nil, err
}
return &tracing{*link}, nil
}
type TracingOptions struct {
// Program must be of type Tracing with attach type
// AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or
// AttachTraceRawTp.
Program *ebpf.Program
}
type LSMOptions struct {
// Program must be of type LSM with attach type
// AttachLSMMac.
Program *ebpf.Program
}
// attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id.
func attachBTFID(program *ebpf.Program) (Link, error) {
if program.FD() < 0 {
return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd)
}
fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
ProgFd: uint32(program.FD()),
})
if err != nil {
return nil, err
}
raw := RawLink{fd: fd}
info, err := raw.Info()
if err != nil {
raw.Close()
return nil, err
}
if info.Type == RawTracepointType {
// Sadness upon sadness: a Tracing program with AttachRawTp returns
// a raw_tracepoint link. Other types return a tracing link.
return &rawTracepoint{raw}, nil
}
return &tracing{RawLink: RawLink{fd: fd}}, nil
}
// AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or
// a BTF-powered raw tracepoint (tp_btf) BPF Program to a BPF hook defined
// in kernel modules.
func AttachTracing(opts TracingOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.Tracing {
return nil, fmt.Errorf("invalid program type %s, expected Tracing", t)
}
return attachBTFID(opts.Program)
}
// AttachLSM links a Linux security module (LSM) BPF Program to a BPF
// hook defined in kernel modules.
func AttachLSM(opts LSMOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.LSM {
return nil, fmt.Errorf("invalid program type %s, expected LSM", t)
}
return attachBTFID(opts.Program)
}

View file

@ -6,7 +6,7 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"github.com/cilium/ebpf"
@ -16,16 +16,23 @@ import (
var (
uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events")
// rgxUprobeSymbol is used to strip invalid characters from the uprobe symbol
// as they are not allowed to be used as the EVENT token in tracefs.
rgxUprobeSymbol = regexp.MustCompile("[^a-zA-Z0-9]+")
uprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799
uprobeRefCtrOffsetShift = 32
haveRefCtrOffsetPMU = internal.FeatureTest("RefCtrOffsetPMU", "4.20", func() error {
_, err := os.Stat(uprobeRefCtrOffsetPMUPath)
if err != nil {
return internal.ErrNotSupported
}
return nil
})
// ErrNoSymbol indicates that the given symbol was not found
// in the ELF symbols table.
ErrNoSymbol = errors.New("not found")
@ -35,24 +42,46 @@ var (
type Executable struct {
// Path of the executable on the filesystem.
path string
// Parsed ELF symbols and dynamic symbols offsets.
offsets map[string]uint64
// Parsed ELF and dynamic symbols' addresses.
addresses map[string]uint64
}
// UprobeOptions defines additional parameters that will be used
// when loading Uprobes.
type UprobeOptions struct {
// Symbol offset. Must be provided in case of external symbols (shared libs).
// If set, overrides the offset eventually parsed from the executable.
// Symbol address. Must be provided in case of external symbols (shared libs).
// If set, overrides the address eventually parsed from the executable.
Address uint64
// The offset relative to given symbol. Useful when tracing an arbitrary point
// inside the frame of given symbol.
//
// Note: this field changed from being an absolute offset to being relative
// to Address.
Offset uint64
// Only set the uprobe on the given process ID. Useful when tracing
// shared library calls or programs that have many running instances.
PID int
// Automatically manage SDT reference counts (semaphores).
//
// If this field is set, the Kernel will increment/decrement the
// semaphore located in the process memory at the provided address on
// probe attach/detach.
//
// See also:
// sourceware.org/systemtap/wiki/UserSpaceProbeImplementation (Semaphore Handling)
// github.com/torvalds/linux/commit/1cc33161a83d
// github.com/torvalds/linux/commit/a6ca88b241d5
RefCtrOffset uint64
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
//
// Needs kernel 5.15+.
Cookie uint64
}
// To open a new Executable, use:
//
// OpenExecutable("/bin/bash")
// OpenExecutable("/bin/bash")
//
// The returned value can then be used to open Uprobe(s).
func OpenExecutable(path string) (*Executable, error) {
@ -77,8 +106,8 @@ func OpenExecutable(path string) (*Executable, error) {
}
ex := Executable{
path: path,
offsets: make(map[string]uint64),
path: path,
addresses: make(map[string]uint64),
}
if err := ex.load(se); err != nil {
@ -107,7 +136,7 @@ func (ex *Executable) load(f *internal.SafeELFFile) error {
continue
}
off := s.Value
address := s.Value
// Loop over ELF segments.
for _, prog := range f.Progs {
@ -123,32 +152,42 @@ func (ex *Executable) load(f *internal.SafeELFFile) error {
// fn symbol offset = fn symbol VA - .text VA + .text offset
//
// stackoverflow.com/a/40249502
off = s.Value - prog.Vaddr + prog.Off
address = s.Value - prog.Vaddr + prog.Off
break
}
}
ex.offsets[s.Name] = off
ex.addresses[s.Name] = address
}
return nil
}
func (ex *Executable) offset(symbol string) (uint64, error) {
if off, ok := ex.offsets[symbol]; ok {
// Symbols with location 0 from section undef are shared library calls and
// are relocated before the binary is executed. Dynamic linking is not
// implemented by the library, so mark this as unsupported for now.
//
// Since only offset values are stored and not elf.Symbol, if the value is 0,
// assume it's an external symbol.
if off == 0 {
return 0, fmt.Errorf("cannot resolve %s library call '%s', "+
"consider providing the offset via options: %w", ex.path, symbol, ErrNotSupported)
}
return off, nil
// address calculates the address of a symbol in the executable.
//
// opts must not be nil.
func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error) {
if opts.Address > 0 {
return opts.Address + opts.Offset, nil
}
return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol)
address, ok := ex.addresses[symbol]
if !ok {
return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol)
}
// Symbols with location 0 from section undef are shared library calls and
// are relocated before the binary is executed. Dynamic linking is not
// implemented by the library, so mark this as unsupported for now.
//
// Since only offset values are stored and not elf.Symbol, if the value is 0,
// assume it's an external symbol.
if address == 0 {
return 0, fmt.Errorf("cannot resolve %s library call '%s': %w "+
"(consider providing UprobeOptions.Address)", ex.path, symbol, ErrNotSupported)
}
return address + opts.Offset, nil
}
// Uprobe attaches the given eBPF program to a perf event that fires when the
@ -161,7 +200,9 @@ func (ex *Executable) offset(symbol string) (uint64, error) {
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
// Losing the reference to the resulting Link (up) will close the Uprobe
// and prevent further execution of prog. The Link must be Closed during
@ -175,13 +216,13 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
return nil, err
}
err = u.attach(prog)
lnk, err := attachPerfEvent(u, prog)
if err != nil {
u.Close()
return nil, err
}
return u, nil
return lnk, nil
}
// Uretprobe attaches the given eBPF program to a perf event that fires right
@ -193,7 +234,9 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
// Losing the reference to the resulting Link (up) will close the Uprobe
// and prevent further execution of prog. The Link must be Closed during
@ -207,13 +250,13 @@ func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeO
return nil, err
}
err = u.attach(prog)
lnk, err := attachPerfEvent(u, prog)
if err != nil {
u.Close()
return nil, err
}
return u, nil
return lnk, nil
}
// uprobe opens a perf event for the given binary/symbol and attaches prog to it.
@ -225,25 +268,38 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput)
}
var offset uint64
if opts != nil && opts.Offset != 0 {
offset = opts.Offset
} else {
off, err := ex.offset(symbol)
if err != nil {
return nil, err
}
offset = off
if opts == nil {
opts = &UprobeOptions{}
}
pid := perfAllThreads
if opts != nil && opts.PID != 0 {
pid = opts.PID
offset, err := ex.address(symbol, opts)
if err != nil {
return nil, err
}
pid := opts.PID
if pid == 0 {
pid = perfAllThreads
}
if opts.RefCtrOffset != 0 {
if err := haveRefCtrOffsetPMU(); err != nil {
return nil, fmt.Errorf("uprobe ref_ctr_offset: %w", err)
}
}
args := probeArgs{
symbol: symbol,
path: ex.path,
offset: offset,
pid: pid,
refCtrOffset: opts.RefCtrOffset,
ret: ret,
cookie: opts.Cookie,
}
// Use uprobe PMU if the kernel has it available.
tp, err := pmuUprobe(symbol, ex.path, offset, pid, ret)
tp, err := pmuUprobe(args)
if err == nil {
return tp, nil
}
@ -252,7 +308,8 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
}
// Use tracefs if uprobe PMU is missing.
tp, err = tracefsUprobe(uprobeSanitizedSymbol(symbol), ex.path, offset, pid, ret)
args.symbol = sanitizeSymbol(symbol)
tp, err = tracefsUprobe(args)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err)
}
@ -261,23 +318,51 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
}
// pmuUprobe opens a perf event based on the uprobe PMU.
func pmuUprobe(symbol, path string, offset uint64, pid int, ret bool) (*perfEvent, error) {
return pmuProbe(uprobeType, symbol, path, offset, pid, ret)
func pmuUprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(uprobeType, args)
}
// tracefsUprobe creates a Uprobe tracefs entry.
func tracefsUprobe(symbol, path string, offset uint64, pid int, ret bool) (*perfEvent, error) {
return tracefsProbe(uprobeType, symbol, path, offset, pid, ret)
func tracefsUprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(uprobeType, args)
}
// uprobeSanitizedSymbol replaces every invalid characted for the tracefs api with an underscore.
func uprobeSanitizedSymbol(symbol string) string {
return rgxUprobeSymbol.ReplaceAllString(symbol, "_")
// sanitizeSymbol replaces every invalid character for the tracefs api with an underscore.
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeSymbol(s string) string {
var b strings.Builder
b.Grow(len(s))
var skip bool
for _, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
b.WriteByte(c)
default:
if !skip {
b.WriteByte('_')
skip = true
}
}
}
return b.String()
}
// uprobePathOffset creates the PATH:OFFSET token for the tracefs api.
func uprobePathOffset(path string, offset uint64) string {
return fmt.Sprintf("%s:%#x", path, offset)
// uprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func uprobeToken(args probeArgs) string {
po := fmt.Sprintf("%s:%#x", args.path, args.offset)
if args.refCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.refCtrOffset)
}
return po
}
func uretprobeBit() (uint64, error) {

54
vendor/github.com/cilium/ebpf/link/xdp.go generated vendored Normal file
View file

@ -0,0 +1,54 @@
package link
import (
"fmt"
"github.com/cilium/ebpf"
)
// XDPAttachFlags represents how XDP program will be attached to interface.
type XDPAttachFlags uint32
const (
// XDPGenericMode (SKB) links XDP BPF program for drivers which do
// not yet support native XDP.
XDPGenericMode XDPAttachFlags = 1 << (iota + 1)
// XDPDriverMode links XDP BPF program into the drivers receive path.
XDPDriverMode
// XDPOffloadMode offloads the entire XDP BPF program into hardware.
XDPOffloadMode
)
type XDPOptions struct {
// Program must be an XDP BPF program.
Program *ebpf.Program
// Interface is the interface index to attach program to.
Interface int
// Flags is one of XDPAttachFlags (optional).
//
// Only one XDP mode should be set, without flag defaults
// to driver/generic mode (best effort).
Flags XDPAttachFlags
}
// AttachXDP links an XDP BPF program to an XDP hook.
func AttachXDP(opts XDPOptions) (Link, error) {
if t := opts.Program.Type(); t != ebpf.XDP {
return nil, fmt.Errorf("invalid program type %s, expected XDP", t)
}
if opts.Interface < 1 {
return nil, fmt.Errorf("invalid interface index: %d", opts.Interface)
}
rawLink, err := AttachRawLink(RawLinkOptions{
Program: opts.Program,
Attach: ebpf.AttachXDP,
Target: opts.Interface,
Flags: uint32(opts.Flags),
})
return rawLink, err
}

View file

@ -1,159 +1,238 @@
package ebpf
import (
"errors"
"fmt"
"sync"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
)
// link resolves bpf-to-bpf calls.
// splitSymbols splits insns into subsections delimited by Symbol Instructions.
// insns cannot be empty and must start with a Symbol Instruction.
//
// Each library may contain multiple functions / labels, and is only linked
// if prog references one of these functions.
// The resulting map is indexed by Symbol name.
func splitSymbols(insns asm.Instructions) (map[string]asm.Instructions, error) {
if len(insns) == 0 {
return nil, errors.New("insns is empty")
}
if insns[0].Symbol() == "" {
return nil, errors.New("insns must start with a Symbol")
}
var name string
progs := make(map[string]asm.Instructions)
for _, ins := range insns {
if sym := ins.Symbol(); sym != "" {
if progs[sym] != nil {
return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym)
}
name = sym
}
progs[name] = append(progs[name], ins)
}
return progs, nil
}
// The linker is responsible for resolving bpf-to-bpf calls between programs
// within an ELF. Each BPF program must be a self-contained binary blob,
// so when an instruction in one ELF program section wants to jump to
// a function in another, the linker needs to pull in the bytecode
// (and BTF info) of the target function and concatenate the instruction
// streams.
//
// Libraries also linked.
func link(prog *ProgramSpec, libs []*ProgramSpec) error {
var (
linked = make(map[*ProgramSpec]bool)
pending = []asm.Instructions{prog.Instructions}
insns asm.Instructions
)
for len(pending) > 0 {
insns, pending = pending[0], pending[1:]
for _, lib := range libs {
if linked[lib] {
continue
}
// Later on in the pipeline, all call sites are fixed up with relative jumps
// within this newly-created instruction stream to then finally hand off to
// the kernel with BPF_PROG_LOAD.
//
// Each function is denoted by an ELF symbol and the compiler takes care of
// register setup before each jump instruction.
needed, err := needSection(insns, lib.Instructions)
if err != nil {
return fmt.Errorf("linking %s: %w", lib.Name, err)
}
// hasFunctionReferences returns true if insns contains one or more bpf2bpf
// function references.
func hasFunctionReferences(insns asm.Instructions) bool {
for _, i := range insns {
if i.IsFunctionReference() {
return true
}
}
return false
}
if !needed {
continue
}
// applyRelocations collects and applies any CO-RE relocations in insns.
//
// Passing a nil target will relocate against the running kernel. insns are
// modified in place.
func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error {
var relos []*btf.CORERelocation
var reloInsns []*asm.Instruction
iter := insns.Iterate()
for iter.Next() {
if relo := btf.CORERelocationMetadata(iter.Ins); relo != nil {
relos = append(relos, relo)
reloInsns = append(reloInsns, iter.Ins)
}
}
linked[lib] = true
prog.Instructions = append(prog.Instructions, lib.Instructions...)
pending = append(pending, lib.Instructions)
if len(relos) == 0 {
return nil
}
if prog.BTF != nil && lib.BTF != nil {
if err := prog.BTF.Append(lib.BTF); err != nil {
return fmt.Errorf("linking BTF of %s: %w", lib.Name, err)
}
}
target, err := maybeLoadKernelBTF(target)
if err != nil {
return err
}
fixups, err := btf.CORERelocate(local, target, relos)
if err != nil {
return err
}
for i, fixup := range fixups {
if err := fixup.Apply(reloInsns[i]); err != nil {
return fmt.Errorf("apply fixup %s: %w", &fixup, err)
}
}
return nil
}
func needSection(insns, section asm.Instructions) (bool, error) {
// A map of symbols to the libraries which contain them.
symbols, err := section.SymbolOffsets()
if err != nil {
return false, err
// flattenPrograms resolves bpf-to-bpf calls for a set of programs.
//
// Links all programs in names by modifying their ProgramSpec in progs.
func flattenPrograms(progs map[string]*ProgramSpec, names []string) {
// Pre-calculate all function references.
refs := make(map[*ProgramSpec][]string)
for _, prog := range progs {
refs[prog] = prog.Instructions.FunctionReferences()
}
for _, ins := range insns {
if ins.Reference == "" {
continue
}
if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.PseudoCall {
continue
}
if ins.Constant != -1 {
// This is already a valid call, no need to link again.
continue
}
if _, ok := symbols[ins.Reference]; !ok {
// Symbol isn't available in this section
continue
}
// At this point we know that at least one function in the
// library is called from insns, so we have to link it.
return true, nil
// Create a flattened instruction stream, but don't modify progs yet to
// avoid linking multiple times.
flattened := make([]asm.Instructions, 0, len(names))
for _, name := range names {
flattened = append(flattened, flattenInstructions(name, progs, refs))
}
// None of the functions in the section are called.
return false, nil
// Finally, assign the flattened instructions.
for i, name := range names {
progs[name].Instructions = flattened[i]
}
}
func fixupJumpsAndCalls(insns asm.Instructions) error {
symbolOffsets := make(map[string]asm.RawInstructionOffset)
// flattenInstructions resolves bpf-to-bpf calls for a single program.
//
// Flattens the instructions of prog by concatenating the instructions of all
// direct and indirect dependencies.
//
// progs contains all referenceable programs, while refs contain the direct
// dependencies of each program.
func flattenInstructions(name string, progs map[string]*ProgramSpec, refs map[*ProgramSpec][]string) asm.Instructions {
prog := progs[name]
insns := make(asm.Instructions, len(prog.Instructions))
copy(insns, prog.Instructions)
// Add all direct references of prog to the list of to be linked programs.
pending := make([]string, len(refs[prog]))
copy(pending, refs[prog])
// All references for which we've appended instructions.
linked := make(map[string]bool)
// Iterate all pending references. We can't use a range since pending is
// modified in the body below.
for len(pending) > 0 {
var ref string
ref, pending = pending[0], pending[1:]
if linked[ref] {
// We've already linked this ref, don't append instructions again.
continue
}
progRef := progs[ref]
if progRef == nil {
// We don't have instructions that go with this reference. This
// happens when calling extern functions.
continue
}
insns = append(insns, progRef.Instructions...)
linked[ref] = true
// Make sure we link indirect references.
pending = append(pending, refs[progRef]...)
}
return insns
}
// fixupAndValidate is called by the ELF reader right before marshaling the
// instruction stream. It performs last-minute adjustments to the program and
// runs some sanity checks before sending it off to the kernel.
func fixupAndValidate(insns asm.Instructions) error {
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
if ins.Symbol == "" {
continue
// Map load was tagged with a Reference, but does not contain a Map pointer.
if ins.IsLoadFromMap() && ins.Reference() != "" && ins.Map() == nil {
return fmt.Errorf("instruction %d: map %s: %w", iter.Index, ins.Reference(), asm.ErrUnsatisfiedMapReference)
}
if _, ok := symbolOffsets[ins.Symbol]; ok {
return fmt.Errorf("duplicate symbol %s", ins.Symbol)
}
symbolOffsets[ins.Symbol] = iter.Offset
}
iter = insns.Iterate()
for iter.Next() {
i := iter.Index
offset := iter.Offset
ins := iter.Ins
if ins.Reference == "" {
continue
}
switch {
case ins.IsFunctionCall() && ins.Constant == -1:
// Rewrite bpf to bpf call
callOffset, ok := symbolOffsets[ins.Reference]
if !ok {
return fmt.Errorf("call at %d: reference to missing symbol %q", i, ins.Reference)
}
ins.Constant = int64(callOffset - offset - 1)
case ins.OpCode.Class() == asm.JumpClass && ins.Offset == -1:
// Rewrite jump to label
jumpOffset, ok := symbolOffsets[ins.Reference]
if !ok {
return fmt.Errorf("jump at %d: reference to missing symbol %q", i, ins.Reference)
}
ins.Offset = int16(jumpOffset - offset - 1)
case ins.IsLoadFromMap() && ins.MapPtr() == -1:
return fmt.Errorf("map %s: %w", ins.Reference, errUnsatisfiedReference)
}
}
// fixupBPFCalls replaces bpf_probe_read_{kernel,user}[_str] with bpf_probe_read[_str] on older kernels
// https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L6009
iter = insns.Iterate()
for iter.Next() {
ins := iter.Ins
if !ins.IsBuiltinCall() {
continue
}
switch asm.BuiltinFunc(ins.Constant) {
case asm.FnProbeReadKernel, asm.FnProbeReadUser:
if err := haveProbeReadKernel(); err != nil {
ins.Constant = int64(asm.FnProbeRead)
}
case asm.FnProbeReadKernelStr, asm.FnProbeReadUserStr:
if err := haveProbeReadKernel(); err != nil {
ins.Constant = int64(asm.FnProbeReadStr)
}
}
fixupProbeReadKernel(ins)
}
return nil
}
// fixupProbeReadKernel replaces calls to bpf_probe_read_{kernel,user}(_str)
// with bpf_probe_read(_str) on kernels that don't support it yet.
func fixupProbeReadKernel(ins *asm.Instruction) {
if !ins.IsBuiltinCall() {
return
}
// Kernel supports bpf_probe_read_kernel, nothing to do.
if haveProbeReadKernel() == nil {
return
}
switch asm.BuiltinFunc(ins.Constant) {
case asm.FnProbeReadKernel, asm.FnProbeReadUser:
ins.Constant = int64(asm.FnProbeRead)
case asm.FnProbeReadKernelStr, asm.FnProbeReadUserStr:
ins.Constant = int64(asm.FnProbeReadStr)
}
}
var kernelBTF struct {
sync.Mutex
spec *btf.Spec
}
// maybeLoadKernelBTF loads the current kernel's BTF if spec is nil, otherwise
// it returns spec unchanged.
//
// The kernel BTF is cached for the lifetime of the process.
func maybeLoadKernelBTF(spec *btf.Spec) (*btf.Spec, error) {
if spec != nil {
return spec, nil
}
kernelBTF.Lock()
defer kernelBTF.Unlock()
if kernelBTF.spec != nil {
return kernelBTF.spec, nil
}
var err error
kernelBTF.spec, err = btf.LoadKernelSpec()
return kernelBTF.spec, err
}

547
vendor/github.com/cilium/ebpf/map.go generated vendored
View file

@ -5,12 +5,15 @@ import (
"errors"
"fmt"
"io"
"math/rand"
"path/filepath"
"reflect"
"strings"
"time"
"unsafe"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
@ -19,7 +22,8 @@ var (
ErrKeyNotExist = errors.New("key does not exist")
ErrKeyExist = errors.New("key already exists")
ErrIterationAborted = errors.New("iteration aborted")
ErrMapIncompatible = errors.New("map's spec is incompatible with pinned map")
ErrMapIncompatible = errors.New("map spec is incompatible with existing map")
errMapNoBTFValue = errors.New("map spec does not contain a BTF Value")
)
// MapOptions control loading a map into the kernel.
@ -67,12 +71,15 @@ type MapSpec struct {
InnerMap *MapSpec
// Extra trailing bytes found in the ELF map definition when using structs
// larger than libbpf's bpf_map_def. Must be empty before instantiating
// the MapSpec into a Map.
Extra bytes.Reader
// larger than libbpf's bpf_map_def. nil if no trailing bytes were present.
// Must be nil or empty before instantiating the MapSpec into a Map.
Extra *bytes.Reader
// The key and value type of this map. May be nil.
Key, Value btf.Type
// The BTF associated with this map.
BTF *btf.Map
BTF *btf.Spec
}
func (ms *MapSpec) String() string {
@ -97,6 +104,12 @@ func (ms *MapSpec) Copy() *MapSpec {
return &cpy
}
// hasBTF returns true if the MapSpec has a valid BTF spec and if its
// map type supports associated BTF metadata in the kernel.
func (ms *MapSpec) hasBTF() bool {
return ms.BTF != nil && ms.Type.hasBTF()
}
func (ms *MapSpec) clampPerfEventArraySize() error {
if ms.Type != PerfEventArray {
return nil
@ -114,6 +127,31 @@ func (ms *MapSpec) clampPerfEventArraySize() error {
return nil
}
// dataSection returns the contents and BTF Datasec descriptor of the spec.
func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) {
if ms.Value == nil {
return nil, nil, errMapNoBTFValue
}
ds, ok := ms.Value.(*btf.Datasec)
if !ok {
return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value)
}
if n := len(ms.Contents); n != 1 {
return nil, nil, fmt.Errorf("expected one key, found %d", n)
}
kv := ms.Contents[0]
value, ok := kv.Value.([]byte)
if !ok {
return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value)
}
return value, ds, nil
}
// MapKV is used to initialize the contents of a Map.
type MapKV struct {
Key interface{}
@ -131,7 +169,8 @@ func (ms *MapSpec) checkCompatibility(m *Map) error {
case m.valueSize != ms.ValueSize:
return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible)
case m.maxEntries != ms.MaxEntries:
case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) &&
m.maxEntries != ms.MaxEntries:
return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible)
case m.flags != ms.Flags:
@ -151,7 +190,7 @@ func (ms *MapSpec) checkCompatibility(m *Map) error {
// if you require custom encoding.
type Map struct {
name string
fd *internal.FD
fd *sys.FD
typ MapType
keySize uint32
valueSize uint32
@ -166,18 +205,19 @@ type Map struct {
//
// You should not use fd after calling this function.
func NewMapFromFD(fd int) (*Map, error) {
if fd < 0 {
return nil, errors.New("invalid fd")
f, err := sys.NewFD(fd)
if err != nil {
return nil, err
}
return newMapFromFD(internal.NewFD(uint32(fd)))
return newMapFromFD(f)
}
func newMapFromFD(fd *internal.FD) (*Map, error) {
func newMapFromFD(fd *sys.FD) (*Map, error) {
info, err := newMapInfoFromFd(fd)
if err != nil {
fd.Close()
return nil, fmt.Errorf("get map info: %s", err)
return nil, fmt.Errorf("get map info: %w", err)
}
return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags)
@ -209,8 +249,8 @@ func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
return nil, fmt.Errorf("creating map: %w", err)
}
err = m.finalize(spec)
if err != nil {
if err := m.finalize(spec); err != nil {
m.Close()
return nil, fmt.Errorf("populating map: %w", err)
}
@ -257,7 +297,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported)
}
var innerFd *internal.FD
var innerFd *sys.FD
if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps {
if spec.InnerMap == nil {
return nil, fmt.Errorf("%s requires InnerMap", spec.Type)
@ -288,7 +328,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
if spec.Pinning == PinByName {
path := filepath.Join(opts.PinPath, spec.Name)
if err := m.Pin(path); err != nil {
return nil, fmt.Errorf("pin map: %s", err)
return nil, fmt.Errorf("pin map: %w", err)
}
}
@ -297,7 +337,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
// createMap validates the spec's properties and creates the map in the kernel
// using the given opts. It does not populate or freeze the map.
func (spec *MapSpec) createMap(inner *internal.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) {
func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) {
closeOnError := func(closer io.Closer) {
if err != nil {
closer.Close()
@ -310,8 +350,10 @@ func (spec *MapSpec) createMap(inner *internal.FD, opts MapOptions, handles *han
// additional 'inner_map_idx' and later 'numa_node' fields.
// In order to support loading these definitions, tolerate the presence of
// extra bytes, but require them to be zeroes.
if _, err := io.Copy(internal.DiscardZeroes{}, &spec.Extra); err != nil {
return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map")
if spec.Extra != nil {
if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil {
return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map")
}
}
switch spec.Type {
@ -360,51 +402,63 @@ func (spec *MapSpec) createMap(inner *internal.FD, opts MapOptions, handles *han
return nil, fmt.Errorf("map create: %w", err)
}
}
attr := internal.BPFMapCreateAttr{
MapType: uint32(spec.Type),
KeySize: spec.KeySize,
ValueSize: spec.ValueSize,
MaxEntries: spec.MaxEntries,
Flags: spec.Flags,
NumaNode: spec.NumaNode,
}
if inner != nil {
var err error
attr.InnerMapFd, err = inner.Value()
if err != nil {
if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
if err := haveNoPreallocMaps(); err != nil {
return nil, fmt.Errorf("map create: %w", err)
}
}
if haveObjName() == nil {
attr.MapName = internal.NewBPFObjName(spec.Name)
attr := sys.MapCreateAttr{
MapType: sys.MapType(spec.Type),
KeySize: spec.KeySize,
ValueSize: spec.ValueSize,
MaxEntries: spec.MaxEntries,
MapFlags: spec.Flags,
NumaNode: spec.NumaNode,
}
var btfDisabled bool
if spec.BTF != nil {
handle, err := handles.btfHandle(spec.BTF.Spec)
btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled {
if inner != nil {
attr.InnerMapFd = inner.Uint()
}
if haveObjName() == nil {
attr.MapName = sys.NewObjName(spec.Name)
}
if spec.hasBTF() {
handle, err := handles.btfHandle(spec.BTF)
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, fmt.Errorf("load BTF: %w", err)
}
if handle != nil {
attr.BTFFd = uint32(handle.FD())
attr.BTFKeyTypeID = uint32(spec.BTF.Key.ID())
attr.BTFValueTypeID = uint32(spec.BTF.Value.ID())
keyTypeID, err := spec.BTF.TypeID(spec.Key)
if err != nil {
return nil, err
}
valueTypeID, err := spec.BTF.TypeID(spec.Value)
if err != nil {
return nil, err
}
attr.BtfFd = uint32(handle.FD())
attr.BtfKeyTypeId = uint32(keyTypeID)
attr.BtfValueTypeId = uint32(valueTypeID)
}
}
fd, err := internal.BPFMapCreate(&attr)
fd, err := sys.MapCreate(&attr)
if err != nil {
if errors.Is(err, unix.EPERM) {
return nil, fmt.Errorf("map create: %w (MEMLOCK bay be too low, consider rlimit.RemoveMemlock)", err)
return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
}
if btfDisabled {
if !spec.hasBTF() {
return nil, fmt.Errorf("map create without BTF: %w", err)
}
if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 {
return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
}
return nil, fmt.Errorf("map create: %w", err)
}
defer closeOnError(fd)
@ -419,7 +473,7 @@ func (spec *MapSpec) createMap(inner *internal.FD, opts MapOptions, handles *han
// newMap allocates and returns a new Map structure.
// Sets the fullValueSize on per-CPU maps.
func newMap(fd *internal.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
m := &Map{
name,
fd,
@ -482,6 +536,12 @@ func (m *Map) Info() (*MapInfo, error) {
return newMapInfoFromFd(m.fd)
}
// MapLookupFlags controls the behaviour of the map lookup calls.
type MapLookupFlags uint64
// LookupLock look up the value of a spin-locked map.
const LookupLock MapLookupFlags = 4
// Lookup retrieves a value from a Map.
//
// Calls Close() on valueOut if it is of type **Map or **Program,
@ -490,7 +550,26 @@ func (m *Map) Info() (*MapInfo, error) {
// Returns an error if the key doesn't exist, see ErrKeyNotExist.
func (m *Map) Lookup(key, valueOut interface{}) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookup(key, valuePtr); err != nil {
if err := m.lookup(key, valuePtr, 0); err != nil {
return err
}
return m.unmarshalValue(valueOut, valueBytes)
}
// LookupWithFlags retrieves a value from a Map with flags.
//
// Passing LookupLock flag will look up the value of a spin-locked
// map without returning the lock. This must be specified if the
// elements contain a spinlock.
//
// Calls Close() on valueOut if it is of type **Map or **Program,
// and *valueOut is not nil.
//
// Returns an error if the key doesn't exist, see ErrKeyNotExist.
func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookup(key, valuePtr, flags); err != nil {
return err
}
@ -501,18 +580,18 @@ func (m *Map) Lookup(key, valueOut interface{}) error {
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
return m.lookupAndDelete(key, valueOut, 0)
}
keyPtr, err := m.marshalKey(key)
if err != nil {
return fmt.Errorf("can't marshal key: %w", err)
}
if err := bpfMapLookupAndDelete(m.fd, keyPtr, valuePtr); err != nil {
return fmt.Errorf("lookup and delete failed: %w", err)
}
return m.unmarshalValue(valueOut, valueBytes)
// LookupAndDeleteWithFlags retrieves and deletes a value from a Map.
//
// Passing LookupLock flag will look up and delete the value of a spin-locked
// map without returning the lock. This must be specified if the elements
// contain a spinlock.
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
return m.lookupAndDelete(key, valueOut, flags)
}
// LookupBytes gets a value from Map.
@ -520,9 +599,9 @@ func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
// Returns a nil value if a key doesn't exist.
func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
valueBytes := make([]byte, m.fullValueSize)
valuePtr := internal.NewSlicePointer(valueBytes)
valuePtr := sys.NewSlicePointer(valueBytes)
err := m.lookup(key, valuePtr)
err := m.lookup(key, valuePtr, 0)
if errors.Is(err, ErrKeyNotExist) {
return nil, nil
}
@ -530,18 +609,47 @@ func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
return valueBytes, err
}
func (m *Map) lookup(key interface{}, valueOut internal.Pointer) error {
func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error {
keyPtr, err := m.marshalKey(key)
if err != nil {
return fmt.Errorf("can't marshal key: %w", err)
}
if err = bpfMapLookupElem(m.fd, keyPtr, valueOut); err != nil {
return fmt.Errorf("lookup failed: %w", err)
attr := sys.MapLookupElemAttr{
MapFd: m.fd.Uint(),
Key: keyPtr,
Value: valueOut,
Flags: uint64(flags),
}
if err = sys.MapLookupElem(&attr); err != nil {
return fmt.Errorf("lookup: %w", wrapMapError(err))
}
return nil
}
func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
keyPtr, err := m.marshalKey(key)
if err != nil {
return fmt.Errorf("can't marshal key: %w", err)
}
attr := sys.MapLookupAndDeleteElemAttr{
MapFd: m.fd.Uint(),
Key: keyPtr,
Value: valuePtr,
Flags: uint64(flags),
}
if err := sys.MapLookupAndDeleteElem(&attr); err != nil {
return fmt.Errorf("lookup and delete: %w", wrapMapError(err))
}
return m.unmarshalValue(valueOut, valueBytes)
}
// MapUpdateFlags controls the behaviour of the Map.Update call.
//
// The exact semantics depend on the specific MapType.
@ -554,6 +662,8 @@ const (
UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
// UpdateExist updates an existing element.
UpdateExist
// UpdateLock updates elements under bpf_spin_lock.
UpdateLock
)
// Put replaces or creates a value in map.
@ -575,8 +685,15 @@ func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
return fmt.Errorf("can't marshal value: %w", err)
}
if err = bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)); err != nil {
return fmt.Errorf("update failed: %w", err)
attr := sys.MapUpdateElemAttr{
MapFd: m.fd.Uint(),
Key: keyPtr,
Value: valuePtr,
Flags: uint64(flags),
}
if err = sys.MapUpdateElem(&attr); err != nil {
return fmt.Errorf("update: %w", wrapMapError(err))
}
return nil
@ -591,8 +708,13 @@ func (m *Map) Delete(key interface{}) error {
return fmt.Errorf("can't marshal key: %w", err)
}
if err = bpfMapDeleteElem(m.fd, keyPtr); err != nil {
return fmt.Errorf("delete failed: %w", err)
attr := sys.MapDeleteElemAttr{
MapFd: m.fd.Uint(),
Key: keyPtr,
}
if err = sys.MapDeleteElem(&attr); err != nil {
return fmt.Errorf("delete: %w", wrapMapError(err))
}
return nil
}
@ -624,7 +746,7 @@ func (m *Map) NextKey(key, nextKeyOut interface{}) error {
// Returns nil if there are no more keys.
func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
nextKey := make([]byte, m.keySize)
nextKeyPtr := internal.NewSlicePointer(nextKey)
nextKeyPtr := sys.NewSlicePointer(nextKey)
err := m.nextKey(key, nextKeyPtr)
if errors.Is(err, ErrKeyNotExist) {
@ -634,9 +756,9 @@ func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
return nextKey, err
}
func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error {
var (
keyPtr internal.Pointer
keyPtr sys.Pointer
err error
)
@ -647,12 +769,77 @@ func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
}
}
if err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut); err != nil {
return fmt.Errorf("next key failed: %w", err)
attr := sys.MapGetNextKeyAttr{
MapFd: m.fd.Uint(),
Key: keyPtr,
NextKey: nextKeyOut,
}
if err = sys.MapGetNextKey(&attr); err != nil {
// Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the
// first map element when a nil key pointer is specified.
if key == nil && errors.Is(err, unix.EFAULT) {
var guessKey []byte
guessKey, err = m.guessNonExistentKey()
if err != nil {
return err
}
// Retry the syscall with a valid non-existing key.
attr.Key = sys.NewSlicePointer(guessKey)
if err = sys.MapGetNextKey(&attr); err == nil {
return nil
}
}
return fmt.Errorf("next key: %w", wrapMapError(err))
}
return nil
}
// guessNonExistentKey attempts to perform a map lookup that returns ENOENT.
// This is necessary on kernels before 4.4.132, since those don't support
// iterating maps from the start by providing an invalid key pointer.
func (m *Map) guessNonExistentKey() ([]byte, error) {
// Provide an invalid value pointer to prevent a copy on the kernel side.
valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0)))
randKey := make([]byte, int(m.keySize))
for i := 0; i < 4; i++ {
switch i {
// For hash maps, the 0 key is less likely to be occupied. They're often
// used for storing data related to pointers, and their access pattern is
// generally scattered across the keyspace.
case 0:
// An all-0xff key is guaranteed to be out of bounds of any array, since
// those have a fixed key size of 4 bytes. The only corner case being
// arrays with 2^32 max entries, but those are prohibitively expensive
// in many environments.
case 1:
for r := range randKey {
randKey[r] = 0xff
}
// Inspired by BCC, 0x55 is an alternating binary pattern (0101), so
// is unlikely to be taken.
case 2:
for r := range randKey {
randKey[r] = 0x55
}
// Last ditch effort, generate a random key.
case 3:
rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey)
}
err := m.lookup(randKey, valuePtr, 0)
if errors.Is(err, ErrKeyNotExist) {
return randKey, nil
}
}
return nil, errors.New("couldn't find non-existing key")
}
// BatchLookup looks up many elements in a map at once.
//
// "keysOut" and "valuesOut" must be of type slice, a pointer
@ -664,7 +851,7 @@ func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
// the end of all possible results, even when partial results
// are returned. It should be used to evaluate when lookup is "done".
func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
return m.batchLookup(internal.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
}
// BatchLookupAndDelete looks up many elements in a map at once,
@ -679,10 +866,10 @@ func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, o
// the end of all possible results, even when partial results
// are returned. It should be used to evaluate when lookup is "done".
func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
return m.batchLookup(internal.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
}
func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
if err := haveBatchAPI(); err != nil {
return 0, err
}
@ -702,29 +889,36 @@ func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, va
return 0, fmt.Errorf("keysOut and valuesOut must be the same length")
}
keyBuf := make([]byte, count*int(m.keySize))
keyPtr := internal.NewSlicePointer(keyBuf)
keyPtr := sys.NewSlicePointer(keyBuf)
valueBuf := make([]byte, count*int(m.fullValueSize))
valuePtr := internal.NewSlicePointer(valueBuf)
valuePtr := sys.NewSlicePointer(valueBuf)
nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize))
var (
startPtr internal.Pointer
err error
retErr error
)
attr := sys.MapLookupBatchAttr{
MapFd: m.fd.Uint(),
Keys: keyPtr,
Values: valuePtr,
Count: uint32(count),
OutBatch: nextPtr,
}
if opts != nil {
attr.ElemFlags = opts.ElemFlags
attr.Flags = opts.Flags
}
var err error
if startKey != nil {
startPtr, err = marshalPtr(startKey, int(m.keySize))
attr.InBatch, err = marshalPtr(startKey, int(m.keySize))
if err != nil {
return 0, err
}
}
nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize))
ct, err := bpfMapBatch(cmd, m.fd, startPtr, nextPtr, keyPtr, valuePtr, uint32(count), opts)
if err != nil {
if !errors.Is(err, ErrKeyNotExist) {
return 0, err
}
retErr = ErrKeyNotExist
_, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
sysErr = wrapMapError(sysErr)
if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
return 0, sysErr
}
err = m.unmarshalKey(nextKeyOut, nextBuf)
@ -737,9 +931,10 @@ func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, va
}
err = unmarshalBytes(valuesOut, valueBuf)
if err != nil {
retErr = err
return 0, err
}
return int(ct), retErr
return int(attr.Count), sysErr
}
// BatchUpdate updates the map with multiple keys and values
@ -763,7 +958,7 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
}
var (
count = keysValue.Len()
valuePtr internal.Pointer
valuePtr sys.Pointer
err error
)
if count != valuesValue.Len() {
@ -777,9 +972,24 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
if err != nil {
return 0, err
}
var nilPtr internal.Pointer
ct, err := bpfMapBatch(internal.BPF_MAP_UPDATE_BATCH, m.fd, nilPtr, nilPtr, keyPtr, valuePtr, uint32(count), opts)
return int(ct), err
attr := sys.MapUpdateBatchAttr{
MapFd: m.fd.Uint(),
Keys: keyPtr,
Values: valuePtr,
Count: uint32(count),
}
if opts != nil {
attr.ElemFlags = opts.ElemFlags
attr.Flags = opts.Flags
}
err = sys.MapUpdateBatch(&attr)
if err != nil {
return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err))
}
return int(attr.Count), nil
}
// BatchDelete batch deletes entries in the map by keys.
@ -800,9 +1010,23 @@ func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
if err != nil {
return 0, fmt.Errorf("cannot marshal keys: %v", err)
}
var nilPtr internal.Pointer
ct, err := bpfMapBatch(internal.BPF_MAP_DELETE_BATCH, m.fd, nilPtr, nilPtr, keyPtr, nilPtr, uint32(count), opts)
return int(ct), err
attr := sys.MapDeleteBatchAttr{
MapFd: m.fd.Uint(),
Keys: keyPtr,
Count: uint32(count),
}
if opts != nil {
attr.ElemFlags = opts.ElemFlags
attr.Flags = opts.Flags
}
if err = sys.MapDeleteBatch(&attr); err != nil {
return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err))
}
return int(attr.Count), nil
}
// Iterate traverses a map.
@ -815,7 +1039,8 @@ func (m *Map) Iterate() *MapIterator {
return newMapIterator(m)
}
// Close removes a Map
// Close the Map's underlying file descriptor, which could unload the
// Map from the kernel if it is not pinned or in use by a loaded Program.
func (m *Map) Close() error {
if m == nil {
// This makes it easier to clean up when iterating maps
@ -830,14 +1055,7 @@ func (m *Map) Close() error {
//
// Calling this function is invalid after Close has been called.
func (m *Map) FD() int {
fd, err := m.fd.Value()
if err != nil {
// Best effort: -1 is the number most likely to be an
// invalid file descriptor.
return -1
}
return int(fd)
return m.fd.Int()
}
// Clone creates a duplicate of the Map.
@ -912,7 +1130,11 @@ func (m *Map) Freeze() error {
return fmt.Errorf("can't freeze map: %w", err)
}
if err := bpfMapFreeze(m.fd); err != nil {
attr := sys.MapFreezeAttr{
MapFd: m.fd.Uint(),
}
if err := sys.MapFreeze(&attr); err != nil {
return fmt.Errorf("can't freeze map: %w", err)
}
return nil
@ -936,13 +1158,13 @@ func (m *Map) finalize(spec *MapSpec) error {
return nil
}
func (m *Map) marshalKey(data interface{}) (internal.Pointer, error) {
func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) {
if data == nil {
if m.keySize == 0 {
// Queues have a key length of zero, so passing nil here is valid.
return internal.NewPointer(nil), nil
return sys.NewPointer(nil), nil
}
return internal.Pointer{}, errors.New("can't use nil as key of map")
return sys.Pointer{}, errors.New("can't use nil as key of map")
}
return marshalPtr(data, int(m.keySize))
@ -957,7 +1179,7 @@ func (m *Map) unmarshalKey(data interface{}, buf []byte) error {
return unmarshalBytes(data, buf)
}
func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) {
func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
if m.typ.hasPerCPUValue() {
return marshalPerCPUValue(data, int(m.valueSize))
}
@ -970,13 +1192,13 @@ func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) {
switch value := data.(type) {
case *Map:
if !m.typ.canStoreMap() {
return internal.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
}
buf, err = marshalMap(value, int(m.valueSize))
case *Program:
if !m.typ.canStoreProgram() {
return internal.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
}
buf, err = marshalProgram(value, int(m.valueSize))
@ -985,10 +1207,10 @@ func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) {
}
if err != nil {
return internal.Pointer{}, err
return sys.Pointer{}, err
}
return internal.NewSlicePointer(buf), nil
return sys.NewSlicePointer(buf), nil
}
func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
@ -1052,7 +1274,10 @@ func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
// LoadPinnedMap loads a Map from a BPF file.
func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) {
fd, err := internal.BPFObjGet(fileName, opts.Marshal())
fd, err := sys.ObjGet(&sys.ObjGetAttr{
Pathname: sys.NewStringPointer(fileName),
FileFlags: opts.Marshal(),
})
if err != nil {
return nil, err
}
@ -1081,70 +1306,11 @@ func marshalMap(m *Map, length int) ([]byte, error) {
return nil, fmt.Errorf("can't marshal map to %d bytes", length)
}
fd, err := m.fd.Value()
if err != nil {
return nil, err
}
buf := make([]byte, 4)
internal.NativeEndian.PutUint32(buf, fd)
internal.NativeEndian.PutUint32(buf, m.fd.Uint())
return buf, nil
}
func patchValue(value []byte, typ btf.Type, replacements map[string]interface{}) error {
replaced := make(map[string]bool)
replace := func(name string, offset, size int, replacement interface{}) error {
if offset+size > len(value) {
return fmt.Errorf("%s: offset %d(+%d) is out of bounds", name, offset, size)
}
buf, err := marshalBytes(replacement, size)
if err != nil {
return fmt.Errorf("marshal %s: %w", name, err)
}
copy(value[offset:offset+size], buf)
replaced[name] = true
return nil
}
switch parent := typ.(type) {
case *btf.Datasec:
for _, secinfo := range parent.Vars {
name := string(secinfo.Type.(*btf.Var).Name)
replacement, ok := replacements[name]
if !ok {
continue
}
err := replace(name, int(secinfo.Offset), int(secinfo.Size), replacement)
if err != nil {
return err
}
}
default:
return fmt.Errorf("patching %T is not supported", typ)
}
if len(replaced) == len(replacements) {
return nil
}
var missing []string
for name := range replacements {
if !replaced[name] {
missing = append(missing, name)
}
}
if len(missing) == 1 {
return fmt.Errorf("unknown field: %s", missing[0])
}
return fmt.Errorf("unknown fields: %s", strings.Join(missing, ","))
}
// MapIterator iterates a Map.
//
// See Map.Iterate.
@ -1239,29 +1405,20 @@ func (mi *MapIterator) Err() error {
//
// Returns ErrNotExist, if there is no next eBPF map.
func MapGetNextID(startID MapID) (MapID, error) {
id, err := objGetNextID(internal.BPF_MAP_GET_NEXT_ID, uint32(startID))
return MapID(id), err
attr := &sys.MapGetNextIdAttr{Id: uint32(startID)}
return MapID(attr.NextId), sys.MapGetNextId(attr)
}
// NewMapFromID returns the map for a given id.
//
// Returns ErrNotExist, if there is no eBPF map with the given id.
func NewMapFromID(id MapID) (*Map, error) {
fd, err := internal.BPFObjGetFDByID(internal.BPF_MAP_GET_FD_BY_ID, uint32(id))
fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, err
}
return newMapFromFD(fd)
}
// ID returns the systemwide unique ID of the map.
//
// Deprecated: use MapInfo.ID() instead.
func (m *Map) ID() (MapID, error) {
info, err := bpfGetMapInfoByFD(m.fd)
if err != nil {
return MapID(0), err
}
return MapID(info.id), nil
}

View file

@ -12,6 +12,7 @@ import (
"unsafe"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
// marshalPtr converts an arbitrary value into a pointer suitable
@ -19,17 +20,17 @@ import (
//
// As an optimization, it returns the original value if it is an
// unsafe.Pointer.
func marshalPtr(data interface{}, length int) (internal.Pointer, error) {
func marshalPtr(data interface{}, length int) (sys.Pointer, error) {
if ptr, ok := data.(unsafe.Pointer); ok {
return internal.NewPointer(ptr), nil
return sys.NewPointer(ptr), nil
}
buf, err := marshalBytes(data, length)
if err != nil {
return internal.Pointer{}, err
return sys.Pointer{}, err
}
return internal.NewSlicePointer(buf), nil
return sys.NewSlicePointer(buf), nil
}
// marshalBytes converts an arbitrary value into a byte buffer.
@ -73,13 +74,13 @@ func marshalBytes(data interface{}, length int) (buf []byte, err error) {
return buf, nil
}
func makeBuffer(dst interface{}, length int) (internal.Pointer, []byte) {
func makeBuffer(dst interface{}, length int) (sys.Pointer, []byte) {
if ptr, ok := dst.(unsafe.Pointer); ok {
return internal.NewPointer(ptr), nil
return sys.NewPointer(ptr), nil
}
buf := make([]byte, length)
return internal.NewSlicePointer(buf), buf
return sys.NewSlicePointer(buf), buf
}
var bytesReaderPool = sync.Pool{
@ -98,14 +99,7 @@ var bytesReaderPool = sync.Pool{
func unmarshalBytes(data interface{}, buf []byte) error {
switch value := data.(type) {
case unsafe.Pointer:
var dst []byte
// Use unsafe.Slice when we drop support for pre1.17 (https://github.com/golang/go/issues/19367)
// We could opt for removing unsafe.Pointer support in the lib as well
sh := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
sh.Data = uintptr(value)
sh.Len = len(buf)
sh.Cap = len(buf)
dst := unsafe.Slice((*byte)(value), len(buf))
copy(dst, buf)
runtime.KeepAlive(value)
return nil
@ -164,21 +158,21 @@ func unmarshalBytes(data interface{}, buf []byte) error {
// Values are initialized to zero if the slice has less elements than CPUs.
//
// slice must have a type like []elementType.
func marshalPerCPUValue(slice interface{}, elemLength int) (internal.Pointer, error) {
func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error) {
sliceType := reflect.TypeOf(slice)
if sliceType.Kind() != reflect.Slice {
return internal.Pointer{}, errors.New("per-CPU value requires slice")
return sys.Pointer{}, errors.New("per-CPU value requires slice")
}
possibleCPUs, err := internal.PossibleCPUs()
if err != nil {
return internal.Pointer{}, err
return sys.Pointer{}, err
}
sliceValue := reflect.ValueOf(slice)
sliceLen := sliceValue.Len()
if sliceLen > possibleCPUs {
return internal.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs")
return sys.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs")
}
alignedElemLength := internal.Align(elemLength, 8)
@ -188,14 +182,14 @@ func marshalPerCPUValue(slice interface{}, elemLength int) (internal.Pointer, er
elem := sliceValue.Index(i).Interface()
elemBytes, err := marshalBytes(elem, elemLength)
if err != nil {
return internal.Pointer{}, err
return sys.Pointer{}, err
}
offset := i * alignedElemLength
copy(buf[offset:offset+elemLength], elemBytes)
}
return internal.NewSlicePointer(buf), nil
return sys.NewSlicePointer(buf), nil
}
// unmarshalPerCPUValue decodes a buffer into a slice containing one value per

620
vendor/github.com/cilium/ebpf/prog.go generated vendored
View file

@ -5,23 +5,22 @@ import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"path/filepath"
"runtime"
"strings"
"time"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/btf"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// ErrNotSupported is returned whenever the kernel doesn't support a feature.
var ErrNotSupported = internal.ErrNotSupported
var errUnsatisfiedReference = errors.New("unsatisfied reference")
// ProgramID represents the unique ID of an eBPF program.
type ProgramID uint32
@ -44,12 +43,13 @@ type ProgramOptions struct {
// Controls the output buffer size for the verifier. Defaults to
// DefaultVerifierLogSize.
LogSize int
// An ELF containing the target BTF for this program. It is used both to
// find the correct function to trace and to apply CO-RE relocations.
// Type information used for CO-RE relocations and when attaching to
// kernel functions.
//
// This is useful in environments where the kernel BTF is not available
// (containers) or where it is in a non-standard location. Defaults to
// use the kernel BTF from a well-known location.
TargetBTF io.ReaderAt
// use the kernel BTF from a well-known location if nil.
KernelTypes *btf.Spec
}
// ProgramSpec defines a Program.
@ -59,13 +59,24 @@ type ProgramSpec struct {
Name string
// Type determines at which hook in the kernel a program will run.
Type ProgramType
Type ProgramType
// AttachType of the program, needed to differentiate allowed context
// accesses in some newer program types like CGroupSockAddr.
//
// Available on kernels 4.17 and later.
AttachType AttachType
// Name of a kernel data structure or function to attach to. Its
// interpretation depends on Type and AttachType.
AttachTo string
// The program to attach to. Must be provided manually.
AttachTarget *Program
// The name of the ELF section this program orininated from.
SectionName string
Instructions asm.Instructions
// Flags is passed to the kernel and specifies additional program
@ -87,7 +98,7 @@ type ProgramSpec struct {
// The BTF associated with this program. Changing Instructions
// will most likely invalidate the contained data, and may
// result in errors when attempting to load it into the kernel.
BTF *btf.Program
BTF *btf.Spec
// The byte order this program was compiled for, may be nil.
ByteOrder binary.ByteOrder
@ -112,6 +123,8 @@ func (ps *ProgramSpec) Tag() (string, error) {
return ps.Instructions.Tag(internal.NativeEndian)
}
type VerifierError = internal.VerifierError
// Program represents BPF program loaded into the kernel.
//
// It is not safe to close a Program which is used by other goroutines.
@ -120,7 +133,7 @@ type Program struct {
// otherwise it is empty.
VerifierLog string
fd *internal.FD
fd *sys.FD
name string
pinnedPath string
typ ProgramType
@ -128,8 +141,7 @@ type Program struct {
// NewProgram creates a new Program.
//
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
// See NewProgramWithOptions for details.
func NewProgram(spec *ProgramSpec) (*Program, error) {
return NewProgramWithOptions(spec, ProgramOptions{})
}
@ -138,12 +150,19 @@ func NewProgram(spec *ProgramSpec) (*Program, error) {
//
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
//
// Returns an error wrapping VerifierError if the program or its BTF is rejected
// by the kernel.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
if spec == nil {
return nil, errors.New("can't load a program from a nil spec")
}
handles := newHandleCache()
defer handles.close()
prog, err := newProgramWithOptions(spec, opts, handles)
if errors.Is(err, errUnsatisfiedReference) {
if errors.Is(err, asm.ErrUnsatisfiedMapReference) {
return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err)
}
return prog, err
@ -154,6 +173,10 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
return nil, errors.New("instructions cannot be empty")
}
if spec.Type == UnspecifiedProgram {
return nil, errors.New("can't load program of unspecified type")
}
if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
}
@ -171,114 +194,85 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
kv = v.Kernel()
}
attr := &internal.BPFProgLoadAttr{
ProgType: uint32(spec.Type),
attr := &sys.ProgLoadAttr{
ProgType: sys.ProgType(spec.Type),
ProgFlags: spec.Flags,
ExpectedAttachType: uint32(spec.AttachType),
License: internal.NewStringPointer(spec.License),
KernelVersion: kv,
ExpectedAttachType: sys.AttachType(spec.AttachType),
License: sys.NewStringPointer(spec.License),
KernVersion: kv,
}
if haveObjName() == nil {
attr.ProgName = internal.NewBPFObjName(spec.Name)
attr.ProgName = sys.NewObjName(spec.Name)
}
var err error
var targetBTF *btf.Spec
if opts.TargetBTF != nil {
targetBTF, err = handles.btfSpec(opts.TargetBTF)
if err != nil {
return nil, fmt.Errorf("load target BTF: %w", err)
}
}
kernelTypes := opts.KernelTypes
insns := make(asm.Instructions, len(spec.Instructions))
copy(insns, spec.Instructions)
var btfDisabled bool
var core btf.COREFixups
if spec.BTF != nil {
core, err = spec.BTF.Fixups(targetBTF)
if err != nil {
return nil, fmt.Errorf("CO-RE relocations: %w", err)
if err := applyRelocations(insns, spec.BTF, kernelTypes); err != nil {
return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
}
handle, err := handles.btfHandle(spec.BTF.Spec())
handle, err := handles.btfHandle(spec.BTF)
btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err)
}
if handle != nil {
attr.ProgBTFFd = uint32(handle.FD())
attr.ProgBtfFd = uint32(handle.FD())
recSize, bytes, err := spec.BTF.LineInfos()
fib, lib, err := btf.MarshalExtInfos(insns, spec.BTF.TypeID)
if err != nil {
return nil, fmt.Errorf("get BTF line infos: %w", err)
return nil, err
}
attr.LineInfoRecSize = recSize
attr.LineInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize))
attr.LineInfo = internal.NewSlicePointer(bytes)
recSize, bytes, err = spec.BTF.FuncInfos()
if err != nil {
return nil, fmt.Errorf("get BTF function infos: %w", err)
}
attr.FuncInfoRecSize = recSize
attr.FuncInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize))
attr.FuncInfo = internal.NewSlicePointer(bytes)
attr.FuncInfoRecSize = btf.FuncInfoSize
attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize
attr.FuncInfo = sys.NewSlicePointer(fib)
attr.LineInfoRecSize = btf.LineInfoSize
attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize
attr.LineInfo = sys.NewSlicePointer(lib)
}
}
insns, err := core.Apply(spec.Instructions)
if err != nil {
return nil, fmt.Errorf("CO-RE fixup: %w", err)
}
if err := fixupJumpsAndCalls(insns); err != nil {
if err := fixupAndValidate(insns); err != nil {
return nil, err
}
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
err = insns.Marshal(buf, internal.NativeEndian)
buf := bytes.NewBuffer(make([]byte, 0, insns.Size()))
err := insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
bytecode := buf.Bytes()
attr.Instructions = internal.NewSlicePointer(bytecode)
attr.InsCount = uint32(len(bytecode) / asm.InstructionSize)
attr.Insns = sys.NewSlicePointer(bytecode)
attr.InsnCnt = uint32(len(bytecode) / asm.InstructionSize)
if spec.AttachTo != "" {
if spec.AttachTarget != nil {
info, err := spec.AttachTarget.Info()
if err != nil {
return nil, fmt.Errorf("load target BTF: %w", err)
}
btfID, ok := info.BTFID()
if !ok {
return nil, fmt.Errorf("load target BTF: no BTF info available")
}
btfHandle, err := btf.NewHandleFromID(btfID)
if err != nil {
return nil, fmt.Errorf("load target BTF: %w", err)
}
defer btfHandle.Close()
targetBTF = btfHandle.Spec()
if err != nil {
return nil, fmt.Errorf("load target BTF: %w", err)
}
}
target, err := resolveBTFType(targetBTF, spec.AttachTo, spec.Type, spec.AttachType)
if spec.AttachTarget != nil {
targetID, err := findTargetInProgram(spec.AttachTarget, spec.AttachTo, spec.Type, spec.AttachType)
if err != nil {
return nil, err
return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
}
if target != nil {
attr.AttachBTFID = uint32(target.ID())
}
if spec.AttachTarget != nil {
attr.AttachProgFd = uint32(spec.AttachTarget.FD())
attr.AttachBtfId = uint32(targetID)
attr.AttachProgFd = uint32(spec.AttachTarget.FD())
defer runtime.KeepAlive(spec.AttachTarget)
} else if spec.AttachTo != "" {
targetID, err := findTargetInKernel(kernelTypes, spec.AttachTo, spec.Type, spec.AttachType)
if err != nil && !errors.Is(err, errUnrecognizedAttachType) {
// We ignore errUnrecognizedAttachType since AttachTo may be non-empty
// for programs that don't attach anywhere.
return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
}
attr.AttachBtfId = uint32(targetID)
}
logSize := DefaultVerifierLogSize
@ -291,37 +285,44 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
logBuf = make([]byte, logSize)
attr.LogLevel = opts.LogLevel
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = internal.NewSlicePointer(logBuf)
attr.LogBuf = sys.NewSlicePointer(logBuf)
}
fd, err := internal.BPFProgLoad(attr)
fd, err := sys.ProgLoad(attr)
if err == nil {
return &Program{internal.CString(logBuf), fd, spec.Name, "", spec.Type}, nil
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
}
logErr := err
if opts.LogLevel == 0 && opts.LogSize >= 0 {
// Re-run with the verifier enabled to get better error messages.
logBuf = make([]byte, logSize)
attr.LogLevel = 1
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = internal.NewSlicePointer(logBuf)
attr.LogBuf = sys.NewSlicePointer(logBuf)
_, _ = sys.ProgLoad(attr)
}
fd, logErr = internal.BPFProgLoad(attr)
if logErr == nil {
fd.Close()
switch {
case errors.Is(err, unix.EPERM):
if len(logBuf) > 0 && logBuf[0] == 0 {
// EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can
// check that the log is empty to reduce false positives.
return nil, fmt.Errorf("load program: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
}
fallthrough
case errors.Is(err, unix.EINVAL):
if hasFunctionReferences(spec.Instructions) {
if err := haveBPFToBPFCalls(); err != nil {
return nil, fmt.Errorf("load program: %w", err)
}
}
}
if errors.Is(logErr, unix.EPERM) && logBuf[0] == 0 {
// EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can
// check that the log is empty to reduce false positives.
return nil, fmt.Errorf("load program: %w (MEMLOCK bay be too low, consider rlimit.RemoveMemlock)", logErr)
}
err = internal.ErrorWithLog(err, logBuf, logErr)
err = internal.ErrorWithLog(err, logBuf)
if btfDisabled {
return nil, fmt.Errorf("load program without BTF: %w", err)
return nil, fmt.Errorf("load program: %w (BTF disabled)", err)
}
return nil, fmt.Errorf("load program: %w", err)
}
@ -332,18 +333,21 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
//
// Requires at least Linux 4.10.
func NewProgramFromFD(fd int) (*Program, error) {
if fd < 0 {
return nil, errors.New("invalid fd")
f, err := sys.NewFD(fd)
if err != nil {
return nil, err
}
return newProgramFromFD(internal.NewFD(uint32(fd)))
return newProgramFromFD(f)
}
// NewProgramFromID returns the program for a given id.
//
// Returns ErrNotExist, if there is no eBPF program with the given id.
func NewProgramFromID(id ProgramID) (*Program, error) {
fd, err := internal.BPFObjGetFDByID(internal.BPF_PROG_GET_FD_BY_ID, uint32(id))
fd, err := sys.ProgGetFdById(&sys.ProgGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, fmt.Errorf("get program by id: %w", err)
}
@ -351,7 +355,7 @@ func NewProgramFromID(id ProgramID) (*Program, error) {
return newProgramFromFD(fd)
}
func newProgramFromFD(fd *internal.FD) (*Program, error) {
func newProgramFromFD(fd *sys.FD) (*Program, error) {
info, err := newProgramInfoFromFd(fd)
if err != nil {
fd.Close()
@ -380,18 +384,29 @@ func (p *Program) Info() (*ProgramInfo, error) {
return newProgramInfoFromFd(p.fd)
}
// Handle returns a reference to the program's type information in the kernel.
//
// Returns ErrNotSupported if the kernel has no BTF support, or if there is no
// BTF associated with the program.
func (p *Program) Handle() (*btf.Handle, error) {
info, err := p.Info()
if err != nil {
return nil, err
}
id, ok := info.BTFID()
if !ok {
return nil, fmt.Errorf("program %s: retrieve BTF ID: %w", p, ErrNotSupported)
}
return btf.NewHandleFromID(id)
}
// FD gets the file descriptor of the Program.
//
// It is invalid to call this function after Close has been called.
func (p *Program) FD() int {
fd, err := p.fd.Value()
if err != nil {
// Best effort: -1 is the number most likely to be an
// invalid file descriptor.
return -1
}
return int(fd)
return p.fd.Int()
}
// Clone creates a duplicate of the Program.
@ -445,7 +460,9 @@ func (p *Program) IsPinned() bool {
return p.pinnedPath != ""
}
// Close unloads the program from the kernel.
// Close the Program's underlying file descriptor, which could unload
// the program from the kernel if it is not pinned or attached to a
// kernel hook.
func (p *Program) Close() error {
if p == nil {
return nil
@ -454,6 +471,28 @@ func (p *Program) Close() error {
return p.fd.Close()
}
// Various options for Run'ing a Program
type RunOptions struct {
// Program's data input. Required field.
Data []byte
// Program's data after Program has run. Caller must allocate. Optional field.
DataOut []byte
// Program's context input. Optional field.
Context interface{}
// Program's context after Program has run. Must be a pointer or slice. Optional field.
ContextOut interface{}
// Number of times to run Program. Optional field. Defaults to 1.
Repeat uint32
// Optional flags.
Flags uint32
// CPU to run Program on. Optional field.
// Note not all program types support this field.
CPU uint32
// Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer
// or similar. Typically used during benchmarking. Optional field.
Reset func()
}
// Test runs the Program in the kernel with the given input and returns the
// value returned by the eBPF program. outLen may be zero.
//
@ -462,11 +501,38 @@ func (p *Program) Close() error {
//
// This function requires at least Linux 4.12.
func (p *Program) Test(in []byte) (uint32, []byte, error) {
ret, out, _, err := p.testRun(in, 1, nil)
// Older kernels ignore the dataSizeOut argument when copying to user space.
// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
// size will be. Hence we allocate an output buffer which we hope will always be large
// enough, and panic if the kernel wrote past the end of the allocation.
// See https://patchwork.ozlabs.org/cover/1006822/
var out []byte
if len(in) > 0 {
out = make([]byte, len(in)+outputPad)
}
opts := RunOptions{
Data: in,
DataOut: out,
Repeat: 1,
}
ret, _, err := p.testRun(&opts)
if err != nil {
return ret, nil, fmt.Errorf("can't test program: %w", err)
}
return ret, out, nil
return ret, opts.DataOut, nil
}
// Run runs the Program in kernel with given RunOptions.
//
// Note: the same restrictions from Test apply.
func (p *Program) Run(opts *RunOptions) (uint32, error) {
ret, _, err := p.testRun(opts)
if err != nil {
return ret, fmt.Errorf("can't test program: %w", err)
}
return ret, nil
}
// Benchmark runs the Program with the given input for a number of times
@ -481,7 +547,17 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) {
//
// This function requires at least Linux 4.12.
func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) {
ret, _, total, err := p.testRun(in, repeat, reset)
if uint(repeat) > math.MaxUint32 {
return 0, 0, fmt.Errorf("repeat is too high")
}
opts := RunOptions{
Data: in,
Repeat: uint32(repeat),
Reset: reset,
}
ret, total, err := p.testRun(&opts)
if err != nil {
return ret, total, fmt.Errorf("can't benchmark program: %w", err)
}
@ -490,6 +566,7 @@ func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.D
var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error {
prog, err := NewProgram(&ProgramSpec{
// SocketFilter does not require privileges on newer kernels.
Type: SocketFilter,
Instructions: asm.Instructions{
asm.LoadImm(asm.R0, 0, asm.DWord),
@ -505,88 +582,109 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e
// Programs require at least 14 bytes input
in := make([]byte, 14)
attr := bpfProgTestRunAttr{
fd: uint32(prog.FD()),
dataSizeIn: uint32(len(in)),
dataIn: internal.NewSlicePointer(in),
attr := sys.ProgRunAttr{
ProgFd: uint32(prog.FD()),
DataSizeIn: uint32(len(in)),
DataIn: sys.NewSlicePointer(in),
}
err = bpfProgTestRun(&attr)
if errors.Is(err, unix.EINVAL) {
err = sys.ProgRun(&attr)
switch {
case errors.Is(err, unix.EINVAL):
// Check for EINVAL specifically, rather than err != nil since we
// otherwise misdetect due to insufficient permissions.
return internal.ErrNotSupported
}
if errors.Is(err, unix.EINTR) {
case errors.Is(err, unix.EINTR):
// We know that PROG_TEST_RUN is supported if we get EINTR.
return nil
case errors.Is(err, unix.ENOTSUPP):
// The first PROG_TEST_RUN patches shipped in 4.12 didn't include
// a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is
// supported, but not for the program type used in the probe.
return nil
}
return err
})
func (p *Program) testRun(in []byte, repeat int, reset func()) (uint32, []byte, time.Duration, error) {
if uint(repeat) > math.MaxUint32 {
return 0, nil, 0, fmt.Errorf("repeat is too high")
}
if len(in) == 0 {
return 0, nil, 0, fmt.Errorf("missing input")
}
if uint(len(in)) > math.MaxUint32 {
return 0, nil, 0, fmt.Errorf("input is too long")
func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) {
if uint(len(opts.Data)) > math.MaxUint32 {
return 0, 0, fmt.Errorf("input is too long")
}
if err := haveProgTestRun(); err != nil {
return 0, nil, 0, err
return 0, 0, err
}
// Older kernels ignore the dataSizeOut argument when copying to user space.
// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
// size will be. Hence we allocate an output buffer which we hope will always be large
// enough, and panic if the kernel wrote past the end of the allocation.
// See https://patchwork.ozlabs.org/cover/1006822/
out := make([]byte, len(in)+outputPad)
fd, err := p.fd.Value()
if err != nil {
return 0, nil, 0, err
var ctxBytes []byte
if opts.Context != nil {
ctx := new(bytes.Buffer)
if err := binary.Write(ctx, internal.NativeEndian, opts.Context); err != nil {
return 0, 0, fmt.Errorf("cannot serialize context: %v", err)
}
ctxBytes = ctx.Bytes()
}
attr := bpfProgTestRunAttr{
fd: fd,
dataSizeIn: uint32(len(in)),
dataSizeOut: uint32(len(out)),
dataIn: internal.NewSlicePointer(in),
dataOut: internal.NewSlicePointer(out),
repeat: uint32(repeat),
var ctxOut []byte
if opts.ContextOut != nil {
ctxOut = make([]byte, binary.Size(opts.ContextOut))
}
attr := sys.ProgRunAttr{
ProgFd: p.fd.Uint(),
DataSizeIn: uint32(len(opts.Data)),
DataSizeOut: uint32(len(opts.DataOut)),
DataIn: sys.NewSlicePointer(opts.Data),
DataOut: sys.NewSlicePointer(opts.DataOut),
Repeat: uint32(opts.Repeat),
CtxSizeIn: uint32(len(ctxBytes)),
CtxSizeOut: uint32(len(ctxOut)),
CtxIn: sys.NewSlicePointer(ctxBytes),
CtxOut: sys.NewSlicePointer(ctxOut),
Flags: opts.Flags,
Cpu: opts.CPU,
}
for {
err = bpfProgTestRun(&attr)
err := sys.ProgRun(&attr)
if err == nil {
break
}
if errors.Is(err, unix.EINTR) {
if reset != nil {
reset()
if opts.Reset != nil {
opts.Reset()
}
continue
}
return 0, nil, 0, fmt.Errorf("can't run test: %w", err)
if errors.Is(err, unix.ENOTSUPP) {
return 0, 0, fmt.Errorf("kernel doesn't support testing program type %s: %w", p.Type(), ErrNotSupported)
}
return 0, 0, fmt.Errorf("can't run test: %w", err)
}
if int(attr.dataSizeOut) > cap(out) {
// Houston, we have a problem. The program created more data than we allocated,
// and the kernel wrote past the end of our buffer.
panic("kernel wrote past end of output buffer")
if opts.DataOut != nil {
if int(attr.DataSizeOut) > cap(opts.DataOut) {
// Houston, we have a problem. The program created more data than we allocated,
// and the kernel wrote past the end of our buffer.
panic("kernel wrote past end of output buffer")
}
opts.DataOut = opts.DataOut[:int(attr.DataSizeOut)]
}
out = out[:int(attr.dataSizeOut)]
total := time.Duration(attr.duration) * time.Nanosecond
return attr.retval, out, total, nil
if len(ctxOut) != 0 {
b := bytes.NewReader(ctxOut)
if err := binary.Read(b, internal.NativeEndian, opts.ContextOut); err != nil {
return 0, 0, fmt.Errorf("failed to decode ContextOut: %v", err)
}
}
total := time.Duration(attr.Duration) * time.Nanosecond
return attr.Retval, total, nil
}
func unmarshalProgram(buf []byte) (*Program, error) {
@ -605,70 +703,19 @@ func marshalProgram(p *Program, length int) ([]byte, error) {
return nil, fmt.Errorf("can't marshal program to %d bytes", length)
}
value, err := p.fd.Value()
if err != nil {
return nil, err
}
buf := make([]byte, 4)
internal.NativeEndian.PutUint32(buf, value)
internal.NativeEndian.PutUint32(buf, p.fd.Uint())
return buf, nil
}
// Attach a Program.
//
// Deprecated: use link.RawAttachProgram instead.
func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
if fd < 0 {
return errors.New("invalid fd")
}
pfd, err := p.fd.Value()
if err != nil {
return err
}
attr := internal.BPFProgAttachAttr{
TargetFd: uint32(fd),
AttachBpfFd: pfd,
AttachType: uint32(typ),
AttachFlags: uint32(flags),
}
return internal.BPFProgAttach(&attr)
}
// Detach a Program.
//
// Deprecated: use link.RawDetachProgram instead.
func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
if fd < 0 {
return errors.New("invalid fd")
}
if flags != 0 {
return errors.New("flags must be zero")
}
pfd, err := p.fd.Value()
if err != nil {
return err
}
attr := internal.BPFProgDetachAttr{
TargetFd: uint32(fd),
AttachBpfFd: pfd,
AttachType: uint32(typ),
}
return internal.BPFProgDetach(&attr)
}
// LoadPinnedProgram loads a Program from a BPF file.
//
// Requires at least Linux 4.11.
func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) {
fd, err := internal.BPFObjGet(fileName, opts.Marshal())
fd, err := sys.ObjGet(&sys.ObjGetAttr{
Pathname: sys.NewStringPointer(fileName),
FileFlags: opts.Marshal(),
})
if err != nil {
return nil, err
}
@ -702,28 +749,42 @@ func SanitizeName(name string, replacement rune) string {
//
// Returns ErrNotExist, if there is no next eBPF program.
func ProgramGetNextID(startID ProgramID) (ProgramID, error) {
id, err := objGetNextID(internal.BPF_PROG_GET_NEXT_ID, uint32(startID))
return ProgramID(id), err
attr := &sys.ProgGetNextIdAttr{Id: uint32(startID)}
return ProgramID(attr.NextId), sys.ProgGetNextId(attr)
}
// ID returns the systemwide unique ID of the program.
// BindMap binds map to the program and is only released once program is released.
//
// Deprecated: use ProgramInfo.ID() instead.
func (p *Program) ID() (ProgramID, error) {
info, err := bpfGetProgInfoByFD(p.fd, nil)
if err != nil {
return ProgramID(0), err
// This may be used in cases where metadata should be associated with the program
// which otherwise does not contain any references to the map.
func (p *Program) BindMap(m *Map) error {
attr := &sys.ProgBindMapAttr{
ProgFd: uint32(p.FD()),
MapFd: uint32(m.FD()),
}
return ProgramID(info.id), nil
return sys.ProgBindMap(attr)
}
func resolveBTFType(spec *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.Type, error) {
var errUnrecognizedAttachType = errors.New("unrecognized attach type")
// find an attach target type in the kernel.
//
// spec may be nil and defaults to the canonical kernel BTF. name together with
// progType and attachType determine which type we need to attach to.
//
// Returns errUnrecognizedAttachType.
func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) {
type match struct {
p ProgramType
a AttachType
}
var typeName, featureName string
var (
typeName, featureName string
isBTFTypeFunc = true
)
switch (match{progType, attachType}) {
case match{LSM, AttachLSMMac}:
typeName = "bpf_lsm_" + name
@ -731,31 +792,84 @@ func resolveBTFType(spec *btf.Spec, name string, progType ProgramType, attachTyp
case match{Tracing, AttachTraceIter}:
typeName = "bpf_iter_" + name
featureName = name + " iterator"
case match{Tracing, AttachTraceFEntry}:
typeName = name
featureName = fmt.Sprintf("fentry %s", name)
case match{Tracing, AttachTraceFExit}:
typeName = name
featureName = fmt.Sprintf("fexit %s", name)
case match{Tracing, AttachModifyReturn}:
typeName = name
featureName = fmt.Sprintf("fmod_ret %s", name)
case match{Tracing, AttachTraceRawTp}:
typeName = fmt.Sprintf("btf_trace_%s", name)
featureName = fmt.Sprintf("raw_tp %s", name)
isBTFTypeFunc = false
default:
return 0, errUnrecognizedAttachType
}
spec, err := maybeLoadKernelBTF(spec)
if err != nil {
return 0, fmt.Errorf("load kernel spec: %w", err)
}
var target btf.Type
if isBTFTypeFunc {
var targetFunc *btf.Func
err = spec.TypeByName(typeName, &targetFunc)
target = targetFunc
} else {
var targetTypedef *btf.Typedef
err = spec.TypeByName(typeName, &targetTypedef)
target = targetTypedef
}
if err != nil {
if errors.Is(err, btf.ErrNotFound) {
return 0, &internal.UnsupportedFeatureError{
Name: featureName,
}
}
return 0, fmt.Errorf("find target for %s: %w", featureName, err)
}
return spec.TypeID(target)
}
// find an attach target type in a program.
//
// Returns errUnrecognizedAttachType.
func findTargetInProgram(prog *Program, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) {
type match struct {
p ProgramType
a AttachType
}
var typeName string
switch (match{progType, attachType}) {
case match{Extension, AttachNone}:
typeName = name
featureName = fmt.Sprintf("freplace %s", name)
default:
return nil, nil
return 0, errUnrecognizedAttachType
}
if spec == nil {
var err error
spec, err = btf.LoadKernelSpec()
if err != nil {
return nil, fmt.Errorf("load kernel spec: %w", err)
}
}
var target *btf.Func
err := spec.FindType(typeName, &target)
if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: featureName,
}
}
btfHandle, err := prog.Handle()
if err != nil {
return nil, fmt.Errorf("resolve BTF for %s: %w", featureName, err)
return 0, fmt.Errorf("load target BTF: %w", err)
}
defer btfHandle.Close()
spec, err := btfHandle.Spec(nil)
if err != nil {
return 0, err
}
return target, nil
var targetFunc *btf.Func
err = spec.TypeByName(typeName, &targetFunc)
if err != nil {
return 0, fmt.Errorf("find target %s: %w", typeName, err)
}
return spec.TypeID(targetFunc)
}

View file

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Test the current package under a different kernel.
# Requires virtme and qemu to be installed.
# Examples:
@ -48,21 +48,31 @@ if [[ "${1:-}" = "--exec-vm" ]]; then
rm "${output}/fake-stdin"
fi
if ! $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \
--rwdir="${testdir}=${testdir}" \
--rodir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--script-sh "PATH=\"$PATH\" \"$script\" --exec-test $cmd" \
--kopt possible_cpus=2; then # need at least two CPUs for some tests
exit 23
fi
for ((i = 0; i < 3; i++)); do
if ! $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \
--rwdir="${testdir}=${testdir}" \
--rodir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--script-sh "PATH=\"$PATH\" CI_MAX_KERNEL_VERSION="${CI_MAX_KERNEL_VERSION:-}" \"$script\" --exec-test $cmd" \
--kopt possible_cpus=2; then # need at least two CPUs for some tests
exit 23
fi
if [[ -e "${output}/status" ]]; then
break
fi
if [[ -v CI ]]; then
echo "Retrying test run due to qemu crash"
continue
fi
if [[ ! -e "${output}/success" ]]; then
exit 42
fi
done
rc=$(<"${output}/status")
$sudo rm -r "$output"
exit 0
exit $rc
elif [[ "${1:-}" = "--exec-test" ]]; then
shift
@ -73,13 +83,16 @@ elif [[ "${1:-}" = "--exec-test" ]]; then
export KERNEL_SELFTESTS="/run/input/bpf"
fi
dmesg -C
if ! "$@"; then
dmesg
exit 1 # this return code is "swallowed" by qemu
if [[ -f "/run/input/bpf/bpf_testmod/bpf_testmod.ko" ]]; then
insmod "/run/input/bpf/bpf_testmod/bpf_testmod.ko"
fi
touch "/run/output/success"
exit 0
dmesg --clear
rc=0
"$@" || rc=$?
dmesg
echo $rc > "/run/output/status"
exit $rc # this return code is "swallowed" by qemu
fi
readonly kernel_version="${1:-}"
@ -90,22 +103,27 @@ fi
shift
readonly kernel="linux-${kernel_version}.bz"
readonly selftests="linux-${kernel_version}-selftests-bpf.bz"
readonly selftests="linux-${kernel_version}-selftests-bpf.tgz"
readonly input="$(mktemp -d)"
readonly tmp_dir="${TMPDIR:-/tmp}"
readonly branch="${BRANCH:-master}"
fetch() {
echo Fetching "${1}"
wget -nv -N -P "${tmp_dir}" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}"
pushd "${tmp_dir}" > /dev/null
curl -s -L -O --fail --etag-compare "${1}.etag" --etag-save "${1}.etag" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}"
local ret=$?
popd > /dev/null
return $ret
}
fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
if fetch "${selftests}"; then
echo "Decompressing selftests"
mkdir "${input}/bpf"
tar --strip-components=4 -xjf "${tmp_dir}/${selftests}" -C "${input}/bpf"
tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf"
else
echo "No selftests found, disabling"
fi
@ -117,6 +135,8 @@ fi
export GOFLAGS=-mod=readonly
export CGO_ENABLED=0
# LINUX_VERSION_CODE test compares this to discovered value.
export KERNEL_VERSION="${kernel_version}"
echo Testing on "${kernel_version}"
go test -exec "$script --exec-vm $input" "${args[@]}"

View file

@ -4,19 +4,13 @@ import (
"bytes"
"errors"
"fmt"
"os"
"unsafe"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// ErrNotExist is returned when loading a non-existing map or program.
//
// Deprecated: use os.ErrNotExist instead.
var ErrNotExist = os.ErrNotExist
// invalidBPFObjNameChar returns true if char may not appear in
// a BPF object name.
func invalidBPFObjNameChar(char rune) bool {
@ -38,108 +32,24 @@ func invalidBPFObjNameChar(char rune) bool {
}
}
type bpfMapOpAttr struct {
mapFd uint32
padding uint32
key internal.Pointer
value internal.Pointer
flags uint64
}
func progLoad(insns asm.Instructions, typ ProgramType, license string) (*sys.FD, error) {
buf := bytes.NewBuffer(make([]byte, 0, insns.Size()))
if err := insns.Marshal(buf, internal.NativeEndian); err != nil {
return nil, err
}
bytecode := buf.Bytes()
type bpfBatchMapOpAttr struct {
inBatch internal.Pointer
outBatch internal.Pointer
keys internal.Pointer
values internal.Pointer
count uint32
mapFd uint32
elemFlags uint64
flags uint64
}
type bpfMapInfo struct {
map_type uint32 // since 4.12 1e2709769086
id uint32
key_size uint32
value_size uint32
max_entries uint32
map_flags uint32
name internal.BPFObjName // since 4.15 ad5b177bd73f
ifindex uint32 // since 4.16 52775b33bb50
btf_vmlinux_value_type_id uint32 // since 5.6 85d33df357b6
netns_dev uint64 // since 4.16 52775b33bb50
netns_ino uint64
btf_id uint32 // since 4.18 78958fca7ead
btf_key_type_id uint32 // since 4.18 9b2cf328b2ec
btf_value_type_id uint32
}
type bpfProgInfo struct {
prog_type uint32
id uint32
tag [unix.BPF_TAG_SIZE]byte
jited_prog_len uint32
xlated_prog_len uint32
jited_prog_insns internal.Pointer
xlated_prog_insns internal.Pointer
load_time uint64 // since 4.15 cb4d2b3f03d8
created_by_uid uint32
nr_map_ids uint32 // since 4.15 cb4d2b3f03d8
map_ids internal.Pointer
name internal.BPFObjName // since 4.15 067cae47771c
ifindex uint32
gpl_compatible uint32
netns_dev uint64
netns_ino uint64
nr_jited_ksyms uint32
nr_jited_func_lens uint32
jited_ksyms internal.Pointer
jited_func_lens internal.Pointer
btf_id uint32
func_info_rec_size uint32
func_info internal.Pointer
nr_func_info uint32
nr_line_info uint32
line_info internal.Pointer
jited_line_info internal.Pointer
nr_jited_line_info uint32
line_info_rec_size uint32
jited_line_info_rec_size uint32
nr_prog_tags uint32
prog_tags internal.Pointer
run_time_ns uint64
run_cnt uint64
}
type bpfProgTestRunAttr struct {
fd uint32
retval uint32
dataSizeIn uint32
dataSizeOut uint32
dataIn internal.Pointer
dataOut internal.Pointer
repeat uint32
duration uint32
}
type bpfMapFreezeAttr struct {
mapFd uint32
}
type bpfObjGetNextIDAttr struct {
startID uint32
nextID uint32
openFlags uint32
}
func bpfProgTestRun(attr *bpfProgTestRunAttr) error {
_, err := internal.BPF(internal.BPF_PROG_TEST_RUN, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
return sys.ProgLoad(&sys.ProgLoadAttr{
ProgType: sys.ProgType(typ),
License: sys.NewStringPointer(license),
Insns: sys.NewSlicePointer(bytecode),
InsnCnt: uint32(len(bytecode) / asm.InstructionSize),
})
}
var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error {
_, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{
MapType: uint32(ArrayOfMaps),
_, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(ArrayOfMaps),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
@ -158,12 +68,12 @@ var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error {
var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() error {
// This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since
// BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check.
m, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{
MapType: uint32(Array),
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
Flags: unix.BPF_F_RDONLY_PROG,
MapFlags: unix.BPF_F_RDONLY_PROG,
})
if err != nil {
return internal.ErrNotSupported
@ -174,12 +84,12 @@ var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps
var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error {
// This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps.
m, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{
MapType: uint32(Array),
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
Flags: unix.BPF_F_MMAPABLE,
MapFlags: unix.BPF_F_MMAPABLE,
})
if err != nil {
return internal.ErrNotSupported
@ -190,12 +100,12 @@ var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error
var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error {
// This checks BPF_F_INNER_MAP, which appeared in 5.10.
m, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{
MapType: uint32(Array),
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
Flags: unix.BPF_F_INNER_MAP,
MapFlags: unix.BPF_F_INNER_MAP,
})
if err != nil {
return internal.ErrNotSupported
@ -204,111 +114,21 @@ var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error {
return nil
})
func bpfMapLookupElem(m *internal.FD, key, valueOut internal.Pointer) error {
fd, err := m.Value()
var haveNoPreallocMaps = internal.FeatureTest("prealloc maps", "4.6", func() error {
// This checks BPF_F_NO_PREALLOC, which appeared in 4.6.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Hash),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapFlags: unix.BPF_F_NO_PREALLOC,
})
if err != nil {
return err
return internal.ErrNotSupported
}
attr := bpfMapOpAttr{
mapFd: fd,
key: key,
value: valueOut,
}
_, err = internal.BPF(internal.BPF_MAP_LOOKUP_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapLookupAndDelete(m *internal.FD, key, valueOut internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapOpAttr{
mapFd: fd,
key: key,
value: valueOut,
}
_, err = internal.BPF(internal.BPF_MAP_LOOKUP_AND_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapUpdateElem(m *internal.FD, key, valueOut internal.Pointer, flags uint64) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapOpAttr{
mapFd: fd,
key: key,
value: valueOut,
flags: flags,
}
_, err = internal.BPF(internal.BPF_MAP_UPDATE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapDeleteElem(m *internal.FD, key internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapOpAttr{
mapFd: fd,
key: key,
}
_, err = internal.BPF(internal.BPF_MAP_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func bpfMapGetNextKey(m *internal.FD, key, nextKeyOut internal.Pointer) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapOpAttr{
mapFd: fd,
key: key,
value: nextKeyOut,
}
_, err = internal.BPF(internal.BPF_MAP_GET_NEXT_KEY, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return wrapMapError(err)
}
func objGetNextID(cmd internal.BPFCmd, start uint32) (uint32, error) {
attr := bpfObjGetNextIDAttr{
startID: start,
}
_, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return attr.nextID, err
}
func bpfMapBatch(cmd internal.BPFCmd, m *internal.FD, inBatch, outBatch, keys, values internal.Pointer, count uint32, opts *BatchOptions) (uint32, error) {
fd, err := m.Value()
if err != nil {
return 0, err
}
attr := bpfBatchMapOpAttr{
inBatch: inBatch,
outBatch: outBatch,
keys: keys,
values: values,
count: count,
mapFd: fd,
}
if opts != nil {
attr.elemFlags = opts.ElemFlags
attr.flags = opts.Flags
}
_, err = internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
// always return count even on an error, as things like update might partially be fulfilled.
return attr.count, wrapMapError(err)
}
_ = m.Close()
return nil
})
func wrapMapError(err error) error {
if err == nil {
@ -316,15 +136,15 @@ func wrapMapError(err error) error {
}
if errors.Is(err, unix.ENOENT) {
return internal.SyscallError(ErrKeyNotExist, unix.ENOENT)
return sys.Error(ErrKeyNotExist, unix.ENOENT)
}
if errors.Is(err, unix.EEXIST) {
return internal.SyscallError(ErrKeyExist, unix.EEXIST)
return sys.Error(ErrKeyExist, unix.EEXIST)
}
if errors.Is(err, unix.ENOTSUPP) {
return internal.SyscallError(ErrNotSupported, unix.ENOTSUPP)
return sys.Error(ErrNotSupported, unix.ENOTSUPP)
}
if errors.Is(err, unix.E2BIG) {
@ -334,51 +154,16 @@ func wrapMapError(err error) error {
return err
}
func bpfMapFreeze(m *internal.FD) error {
fd, err := m.Value()
if err != nil {
return err
}
attr := bpfMapFreezeAttr{
mapFd: fd,
}
_, err = internal.BPF(internal.BPF_MAP_FREEZE, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
return err
}
func bpfGetProgInfoByFD(fd *internal.FD, ids []MapID) (*bpfProgInfo, error) {
var info bpfProgInfo
if len(ids) > 0 {
info.nr_map_ids = uint32(len(ids))
info.map_ids = internal.NewPointer(unsafe.Pointer(&ids[0]))
}
if err := internal.BPFObjGetInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)); err != nil {
return nil, fmt.Errorf("can't get program info: %w", err)
}
return &info, nil
}
func bpfGetMapInfoByFD(fd *internal.FD) (*bpfMapInfo, error) {
var info bpfMapInfo
err := internal.BPFObjGetInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
if err != nil {
return nil, fmt.Errorf("can't get map info: %w", err)
}
return &info, nil
}
var haveObjName = internal.FeatureTest("object names", "4.15", func() error {
attr := internal.BPFMapCreateAttr{
MapType: uint32(Array),
attr := sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapName: internal.NewBPFObjName("feature_test"),
MapName: sys.NewObjName("feature_test"),
}
fd, err := internal.BPFMapCreate(&attr)
fd, err := sys.MapCreate(&attr)
if err != nil {
return internal.ErrNotSupported
}
@ -392,15 +177,15 @@ var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func()
return err
}
attr := internal.BPFMapCreateAttr{
MapType: uint32(Array),
attr := sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
ValueSize: 4,
MaxEntries: 1,
MapName: internal.NewBPFObjName(".test"),
MapName: sys.NewObjName(".test"),
}
fd, err := internal.BPFMapCreate(&attr)
fd, err := sys.MapCreate(&attr)
if err != nil {
return internal.ErrNotSupported
}
@ -411,24 +196,30 @@ var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func()
var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error {
var maxEntries uint32 = 2
attr := internal.BPFMapCreateAttr{
MapType: uint32(Hash),
attr := sys.MapCreateAttr{
MapType: sys.MapType(Hash),
KeySize: 4,
ValueSize: 4,
MaxEntries: maxEntries,
}
fd, err := internal.BPFMapCreate(&attr)
fd, err := sys.MapCreate(&attr)
if err != nil {
return internal.ErrNotSupported
}
defer fd.Close()
keys := []uint32{1, 2}
values := []uint32{3, 4}
kp, _ := marshalPtr(keys, 8)
vp, _ := marshalPtr(values, 8)
nilPtr := internal.NewPointer(nil)
_, err = bpfMapBatch(internal.BPF_MAP_UPDATE_BATCH, fd, nilPtr, nilPtr, kp, vp, maxEntries, nil)
err = sys.MapUpdateBatch(&sys.MapUpdateBatchAttr{
MapFd: fd.Uint(),
Keys: kp,
Values: vp,
Count: maxEntries,
})
if err != nil {
return internal.ErrNotSupported
}
@ -444,21 +235,30 @@ var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", f
asm.FnProbeReadKernel.Call(),
asm.Return(),
}
buf := bytes.NewBuffer(make([]byte, 0, len(insns)*asm.InstructionSize))
if err := insns.Marshal(buf, internal.NativeEndian); err != nil {
return err
}
bytecode := buf.Bytes()
fd, err := internal.BPFProgLoad(&internal.BPFProgLoadAttr{
ProgType: uint32(Kprobe),
License: internal.NewStringPointer("GPL"),
Instructions: internal.NewSlicePointer(bytecode),
InsCount: uint32(len(bytecode) / asm.InstructionSize),
})
fd, err := progLoad(insns, Kprobe, "GPL")
if err != nil {
return internal.ErrNotSupported
}
_ = fd.Close()
return nil
})
var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() error {
insns := asm.Instructions{
asm.Call.Label("prog2").WithSymbol("prog1"),
asm.Return(),
asm.Mov.Imm(asm.R0, 0).WithSymbol("prog2"),
asm.Return(),
}
fd, err := progLoad(insns, SocketFilter, "MIT")
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
_ = fd.Close()
return nil
})

View file

@ -11,7 +11,7 @@ import (
type MapType uint32
// Max returns the latest supported MapType.
func (_ MapType) Max() MapType {
func (MapType) Max() MapType {
return maxMapType - 1
}
@ -103,12 +103,6 @@ const (
maxMapType
)
// Deprecated: StructOpts was a typo, use StructOpsMap instead.
//
// Declared as a variable to prevent stringer from picking it up
// as an enum value.
var StructOpts MapType = StructOpsMap
// hasPerCPUValue returns true if the Map stores a value per CPU.
func (mt MapType) hasPerCPUValue() bool {
return mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash || mt == PerCPUCGroupStorage
@ -126,11 +120,22 @@ func (mt MapType) canStoreProgram() bool {
return mt == ProgramArray
}
// hasBTF returns true if the map type supports BTF key/value metadata.
func (mt MapType) hasBTF() bool {
switch mt {
case PerfEventArray, CGroupArray, StackTrace, ArrayOfMaps, HashOfMaps, DevMap,
DevMapHash, CPUMap, XSKMap, SockMap, SockHash, Queue, Stack, RingBuf:
return false
default:
return true
}
}
// ProgramType of the eBPF program
type ProgramType uint32
// Max return the latest supported ProgramType.
func (_ ProgramType) Max() ProgramType {
func (ProgramType) Max() ProgramType {
return maxProgramType - 1
}
@ -167,6 +172,7 @@ const (
Extension
LSM
SkLookup
Syscall
maxProgramType
)

View file

@ -86,12 +86,13 @@ func _() {
_ = x[Extension-28]
_ = x[LSM-29]
_ = x[SkLookup-30]
_ = x[maxProgramType-31]
_ = x[Syscall-31]
_ = x[maxProgramType-32]
}
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupmaxProgramType"
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallmaxProgramType"
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 308}
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 315}
func (i ProgramType) String() string {
if i >= ProgramType(len(_ProgramType_index)-1) {

View file

@ -1,46 +0,0 @@
version = "unstable"
generator = "gogoctrd"
plugins = ["grpc"]
# Control protoc include paths. Below are usually some good defaults, but feel
# free to try it without them if it works for your project.
[includes]
# Include paths that will be added before all others. Typically, you want to
# treat the root of the project as an include, but this may not be necessary.
# before = ["."]
# Paths that should be treated as include roots in relation to the vendor
# directory. These will be calculated with the vendor directory nearest the
# target package.
# vendored = ["github.com/gogo/protobuf"]
packages = ["github.com/gogo/protobuf"]
# Paths that will be added untouched to the end of the includes. We use
# `/usr/local/include` to pickup the common install location of protobuf.
# This is the default.
after = ["/usr/local/include", "/usr/include"]
# This section maps protobuf imports to Go packages. These will become
# `-M` directives in the call to the go protobuf generator.
[packages]
"gogoproto/gogo.proto" = "github.com/gogo/protobuf/gogoproto"
"google/protobuf/any.proto" = "github.com/gogo/protobuf/types"
"google/protobuf/descriptor.proto" = "github.com/gogo/protobuf/protoc-gen-gogo/descriptor"
"google/protobuf/field_mask.proto" = "github.com/gogo/protobuf/types"
"google/protobuf/timestamp.proto" = "github.com/gogo/protobuf/types"
# Aggregrate the API descriptors to lock down API changes.
[[descriptors]]
prefix = "github.com/containerd/cgroups/stats/v1"
target = "stats/v1/metrics.pb.txt"
ignore_files = [
"google/protobuf/descriptor.proto",
"gogoproto/gogo.proto"
]
[[descriptors]]
prefix = "github.com/containerd/cgroups/v2/stats"
target = "v2/stats/metrics.pb.txt"
ignore_files = [
"google/protobuf/descriptor.proto",
"gogoproto/gogo.proto"
]

View file

@ -1,46 +0,0 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
Vagrant.configure("2") do |config|
# Fedora box is used for testing cgroup v2 support
config.vm.box = "fedora/35-cloud-base"
config.vm.provider :virtualbox do |v|
v.memory = 4096
v.cpus = 2
end
config.vm.provider :libvirt do |v|
v.memory = 4096
v.cpus = 2
end
config.vm.provision "shell", inline: <<-SHELL
set -eux -o pipefail
# configuration
GO_VERSION="1.17.7"
# install gcc and Golang
dnf -y install gcc
curl -fsSL "https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz" | tar Cxz /usr/local
# setup env vars
cat >> /etc/profile.d/sh.local <<EOF
PATH=/usr/local/go/bin:$PATH
GO111MODULE=on
export PATH GO111MODULE
EOF
source /etc/profile.d/sh.local
# enter /root/go/src/github.com/containerd/cgroups
mkdir -p /root/go/src/github.com/containerd
ln -s /vagrant /root/go/src/github.com/containerd/cgroups
cd /root/go/src/github.com/containerd/cgroups
# create /test.sh
cat > /test.sh <<EOF
#!/bin/bash
set -eux -o pipefail
cd /root/go/src/github.com/containerd/cgroups
go test -v ./...
EOF
chmod +x /test.sh
SHELL
end

File diff suppressed because it is too large Load diff

201
vendor/github.com/containerd/cgroups/v3/LICENSE generated vendored Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -22,3 +22,5 @@ cgutil:
proto:
protobuild --quiet ${PACKAGES}
# Keep them Go-idiomatic and backward-compatible with the gogo/protobuf era.
go-fix-acronym -w -a '(Cpu|Tcp|Rss)' $(shell find cgroup1/stats/ cgroup2/stats/ -name '*.pb.go')

View file

@ -0,0 +1,31 @@
version = "2"
generators = ["go"]
# Control protoc include paths. Below are usually some good defaults, but feel
# free to try it without them if it works for your project.
[includes]
# Include paths that will be added before all others. Typically, you want to
# treat the root of the project as an include, but this may not be necessary.
# before = ["."]
# Paths that will be added untouched to the end of the includes. We use
# `/usr/local/include` to pickup the common install location of protobuf.
# This is the default.
after = ["/usr/local/include", "/usr/include"]
# Aggregrate the API descriptors to lock down API changes.
[[descriptors]]
prefix = "github.com/containerd/cgroups/cgroup1/stats"
target = "cgroup1/stats/metrics.pb.txt"
ignore_files = [
"google/protobuf/descriptor.proto",
]
[[descriptors]]
prefix = "github.com/containerd/cgroups/cgroup2/stats"
target = "cgroup2/stats/metrics.pb.txt"
ignore_files = [
"google/protobuf/descriptor.proto",
]
[parameters.go]
paths = "source_relative"

View file

@ -9,7 +9,7 @@ Go package for creating, managing, inspecting, and destroying cgroups.
The resources format for settings on the cgroup uses the OCI runtime-spec found
[here](https://github.com/opencontainers/runtime-spec).
## Examples
## Examples (v1)
### Create a new cgroup
@ -25,7 +25,7 @@ uses the v1 implementation of cgroups.
```go
shares := uint64(100)
control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{
control, err := cgroup1.New(cgroup1.StaticPath("/test"), &specs.LinuxResources{
CPU: &specs.LinuxCPU{
Shares: &shares,
},
@ -37,7 +37,7 @@ defer control.Delete()
```go
control, err := cgroups.New(cgroups.Systemd, cgroups.Slice("system.slice", "runc-test"), &specs.LinuxResources{
control, err := cgroup1.New(cgroup1.Systemd, cgroup1.Slice("system.slice", "runc-test"), &specs.LinuxResources{
CPU: &specs.CPU{
Shares: &shares,
},
@ -48,17 +48,17 @@ control, err := cgroups.New(cgroups.Systemd, cgroups.Slice("system.slice", "runc
### Load an existing cgroup
```go
control, err = cgroups.Load(cgroups.V1, cgroups.StaticPath("/test"))
control, err = cgroup1.Load(cgroup1.Default, cgroups.StaticPath("/test"))
```
### Add a process to the cgroup
```go
if err := control.Add(cgroups.Process{Pid:1234}); err != nil {
if err := control.Add(cgroup1.Process{Pid:1234}); err != nil {
}
```
### Update the cgroup
### Update the cgroup
To update the resources applied in the cgroup
@ -84,7 +84,7 @@ if err := control.Thaw(); err != nil {
### List all processes in the cgroup or recursively
```go
processes, err := control.Processes(cgroups.Devices, recursive)
processes, err := control.Processes(cgroup1.Devices, recursive)
```
### Get Stats on the cgroup
@ -95,7 +95,7 @@ stats, err := control.Stat()
By adding `cgroups.IgnoreNotExist` all non-existent files will be ignored, e.g. swap memory stats without swap enabled
```go
stats, err := control.Stat(cgroups.IgnoreNotExist)
stats, err := control.Stat(cgroup1.IgnoreNotExist)
```
### Move process across cgroups
@ -117,22 +117,90 @@ subCgroup, err := control.New("child", resources)
This allows you to get notified by an eventfd for v1 memory cgroups events.
```go
event := cgroups.MemoryThresholdEvent(50 * 1024 * 1024, false)
event := cgroup1.MemoryThresholdEvent(50 * 1024 * 1024, false)
efd, err := control.RegisterMemoryEvent(event)
```
```go
event := cgroups.MemoryPressureEvent(cgroups.MediumPressure, cgroups.DefaultMode)
event := cgroup1.MemoryPressureEvent(cgroup1.MediumPressure, cgroup1.DefaultMode)
efd, err := control.RegisterMemoryEvent(event)
```
```go
efd, err := control.OOMEventFD()
// or by using RegisterMemoryEvent
event := cgroups.OOMEvent()
event := cgroup1.OOMEvent()
efd, err := control.RegisterMemoryEvent(event)
```
## Examples (v2/unified)
### Check that the current system is running cgroups v2
```go
var cgroupV2 bool
if cgroups.Mode() == cgroups.Unified {
cgroupV2 = true
}
```
### Create a new cgroup
This creates a new systemd v2 cgroup slice. Systemd slices consider ["-" a special character](https://www.freedesktop.org/software/systemd/man/systemd.slice.html),
so the resulting slice would be located here on disk:
* /sys/fs/cgroup/my.slice/my-cgroup.slice/my-cgroup-abc.slice
```go
import (
"github.com/containerd/cgroups/v3/cgroup2"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
res := cgroup2.Resources{}
// dummy PID of -1 is used for creating a "general slice" to be used as a parent cgroup.
// see https://github.com/containerd/cgroups/blob/1df78138f1e1e6ee593db155c6b369466f577651/v2/manager.go#L732-L735
m, err := cgroup2.NewSystemd("/", "my-cgroup-abc.slice", -1, &res)
if err != nil {
return err
}
```
### Load an existing cgroup
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
```
### Delete a cgroup
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
err = m.DeleteSystemd()
if err != nil {
return err
}
```
### Kill all processes in a cgroup
```go
m, err := cgroup2.LoadSystemd("/", "my-cgroup-abc.slice")
if err != nil {
return err
}
err = m.Kill()
if err != nil {
return err
}
```
### Attention
All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name

View file

@ -14,7 +14,7 @@
limitations under the License.
*/
package cgroups
package cgroup1
import (
"bufio"
@ -25,7 +25,8 @@ import (
"strconv"
"strings"
v1 "github.com/containerd/cgroups/stats/v1"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@ -71,7 +72,7 @@ func (b *blkioController) Create(path string, resources *specs.LinuxResources) e
}
for _, t := range createBlkioSettings(resources.BlockIO) {
if t.value != nil {
if err := retryingWriteFile(
if err := os.WriteFile(
filepath.Join(b.Path(path), "blkio."+t.name),
t.format(t.value),
defaultFilePerm,

View file

@ -14,31 +14,34 @@
limitations under the License.
*/
package cgroups
package cgroup1
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
v1 "github.com/containerd/cgroups/stats/v1"
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
"github.com/opencontainers/runtime-spec/specs-go"
)
// New returns a new control via the cgroup cgroups interface
func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) {
func New(path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) {
config := newInitConfig()
for _, o := range opts {
if err := o(config); err != nil {
return nil, err
}
}
subsystems, err := hierarchy()
subsystems, err := config.hiearchy()
if err != nil {
return nil, err
}
@ -68,7 +71,7 @@ func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts .
// Load will load an existing cgroup and allow it to be controlled
// All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name
func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) {
func Load(path Path, opts ...InitOpts) (Cgroup, error) {
config := newInitConfig()
for _, o := range opts {
if err := o(config); err != nil {
@ -76,7 +79,7 @@ func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) {
}
}
var activeSubsystems []Subsystem
subsystems, err := hierarchy()
subsystems, err := config.hiearchy()
if err != nil {
return nil, err
}
@ -84,7 +87,7 @@ func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) {
for _, s := range pathers(subsystems) {
p, err := path(s.Name())
if err != nil {
if errors.Is(err, os.ErrNotExist) {
if errors.Is(err, os.ErrNotExist) {
return nil, ErrCgroupDeleted
}
if err == ErrControllerNotActive {
@ -193,6 +196,31 @@ func (c *cgroup) AddTask(process Process, subsystems ...Name) error {
return c.add(process, cgroupTasks, subsystems...)
}
// writeCgroupsProcs writes to the file, but retries on EINVAL.
func writeCgroupProcs(path string, content []byte, perm fs.FileMode) error {
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, perm)
if err != nil {
return err
}
defer f.Close()
for i := 0; i < 5; i++ {
_, err = f.Write(content)
if err == nil {
return nil
}
// If the process's associated task's state is TASK_NEW, the kernel
// returns EINVAL. The function will retry on the error like runc.
// https://github.com/torvalds/linux/blob/v6.0/kernel/sched/core.c#L10308-L10337
// https://github.com/opencontainers/runc/pull/1950
if !errors.Is(err, syscall.EINVAL) {
return err
}
time.Sleep(30 * time.Millisecond)
}
return err
}
func (c *cgroup) add(process Process, pType procType, subsystems ...Name) error {
if process.Pid <= 0 {
return ErrInvalidPid
@ -207,7 +235,7 @@ func (c *cgroup) add(process Process, pType procType, subsystems ...Name) error
if err != nil {
return err
}
err = retryingWriteFile(
err = writeCgroupProcs(
filepath.Join(s.Path(p), pType),
[]byte(strconv.Itoa(process.Pid)),
defaultFilePerm,
@ -228,6 +256,15 @@ func (c *cgroup) Delete() error {
}
var errs []string
for _, s := range c.subsystems {
// kernel prevents cgroups with running process from being removed, check the tree is empty
procs, err := c.processes(s.Name(), true, cgroupProcs)
if err != nil {
return err
}
if len(procs) > 0 {
errs = append(errs, fmt.Sprintf("%s (contains running processes)", string(s.Name())))
continue
}
if d, ok := s.(deleter); ok {
sp, err := c.path(s.Name())
if err != nil {
@ -247,6 +284,7 @@ func (c *cgroup) Delete() error {
if err := remove(path); err != nil {
errs = append(errs, path)
}
continue
}
}
if len(errs) > 0 {

Some files were not shown because too many files have changed in this diff Show more