vendor: github.com/cilium/ebpf v0.11.0

full diff: https://github.com/cilium/ebpf/compare/v0.9.1...v0.11.0

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn 2024-01-05 13:14:49 +01:00
parent c3c1ee0150
commit 12128189eb
No known key found for this signature in database
GPG key ID: 76698F39D527CE8C
96 changed files with 7185 additions and 2417 deletions

View file

@ -130,7 +130,7 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cilium/ebpf v0.9.1 // indirect
github.com/cilium/ebpf v0.11.0 // indirect
github.com/container-storage-interface/spec v1.5.0 // indirect
github.com/containerd/cgroups v1.1.0 // indirect
github.com/containerd/console v1.0.3 // indirect
@ -203,6 +203,7 @@ require (
go.uber.org/multierr v1.8.0 // indirect
go.uber.org/zap v1.21.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/tools v0.10.0 // indirect
google.golang.org/api v0.126.0 // indirect

View file

@ -260,8 +260,8 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg=
github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc=
github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
@ -456,7 +456,7 @@ github.com/fluent/fluent-logger-golang v1.9.0/go.mod h1:2/HCT/jTy78yGyeNGQLGQsjF
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
github.com/fortytw2/leaktest v1.2.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss=
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
@ -1408,6 +1408,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI=
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=

View file

@ -14,4 +14,6 @@ KeepEmptyLinesAtTheStartOfBlocks: false
TabWidth: 4
UseTab: ForContinuationAndIndentation
ColumnLimit: 1000
# Go compiler comments need to stay unindented.
CommentPragmas: '^go:.*'
...

View file

@ -9,7 +9,6 @@ issues:
linters:
disable-all: true
enable:
- deadcode
- errcheck
- goimports
- gosimple
@ -17,10 +16,9 @@ linters:
- ineffassign
- misspell
- staticcheck
- structcheck
- typecheck
- unused
- varcheck
- gofmt
# Could be enabled later:
# - gocyclo

View file

@ -1,7 +1,21 @@
Architecture of the library
===
ELF -> Specifications -> Objects -> Links
```mermaid
graph RL
Program --> ProgramSpec --> ELF
btf.Spec --> ELF
Map --> MapSpec --> ELF
Links --> Map & Program
ProgramSpec -.-> btf.Spec
MapSpec -.-> btf.Spec
subgraph Collection
Program & Map
end
subgraph CollectionSpec
ProgramSpec & MapSpec & btf.Spec
end
```
ELF
---
@ -11,7 +25,7 @@ an ELF file which contains program byte code (aka BPF), but also metadata for
maps used by the program. The metadata follows the conventions set by libbpf
shipped with the kernel. Certain ELF sections have special meaning
and contain structures defined by libbpf. Newer versions of clang emit
additional metadata in BPF Type Format (aka BTF).
additional metadata in [BPF Type Format](#BTF).
The library aims to be compatible with libbpf so that moving from a C toolchain
to a Go one creates little friction. To that end, the [ELF reader](elf_reader.go)
@ -20,41 +34,33 @@ if possible.
The output of the ELF reader is a `CollectionSpec` which encodes
all of the information contained in the ELF in a form that is easy to work with
in Go.
### BTF
The BPF Type Format describes more than just the types used by a BPF program. It
includes debug aids like which source line corresponds to which instructions and
what global variables are used.
[BTF parsing](internal/btf/) lives in a separate internal package since exposing
it would mean an additional maintenance burden, and because the API still
has sharp corners. The most important concept is the `btf.Type` interface, which
also describes things that aren't really types like `.rodata` or `.bss` sections.
`btf.Type`s can form cyclical graphs, which can easily lead to infinite loops if
one is not careful. Hopefully a safe pattern to work with `btf.Type` emerges as
we write more code that deals with it.
in Go. The returned `CollectionSpec` should be deterministic: reading the same ELF
file on different systems must produce the same output.
As a corollary, any changes that depend on the runtime environment like the
current kernel version must happen when creating [Objects](#Objects).
Specifications
---
`CollectionSpec`, `ProgramSpec` and `MapSpec` are blueprints for in-kernel
objects and contain everything necessary to execute the relevant `bpf(2)`
syscalls. Since the ELF reader outputs a `CollectionSpec` it's possible to
modify clang-compiled BPF code, for example to rewrite constants. At the same
time the [asm](asm/) package provides an assembler that can be used to generate
`ProgramSpec` on the fly.
`CollectionSpec` is a very simple container for `ProgramSpec`, `MapSpec` and
`btf.Spec`. Avoid adding functionality to it if possible.
Creating a spec should never require any privileges or be restricted in any way,
for example by only allowing programs in native endianness. This ensures that
the library stays flexible.
`ProgramSpec` and `MapSpec` are blueprints for in-kernel
objects and contain everything necessary to execute the relevant `bpf(2)`
syscalls. They refer to `btf.Spec` for type information such as `Map` key and
value types.
The [asm](asm/) package provides an assembler that can be used to generate
`ProgramSpec` on the fly.
Objects
---
`Program` and `Map` are the result of loading specs into the kernel. Sometimes
loading a spec will fail because the kernel is too old, or a feature is not
`Program` and `Map` are the result of loading specifications into the kernel.
Features that depend on knowledge of the current system (e.g kernel version)
are implemented at this point.
Sometimes loading a spec will fail because the kernel is too old, or a feature is not
enabled. There are multiple ways the library deals with that:
* Fallback: older kernels don't allow naming programs and maps. The library
@ -73,7 +79,7 @@ useful when our higher-level API doesn't support a particular use case.
Links
---
BPF can be attached to many different points in the kernel and newer BPF hooks
Programs can be attached to many different points in the kernel and newer BPF hooks
tend to use bpf_link to do so. Older hooks unfortunately use a combination of
syscalls, netlink messages, etc. Adding support for a new link type should not
pull in large dependencies like netlink, so XDP programs or tracepoints are

View file

@ -5,15 +5,23 @@ the form of pull requests and issues reporting bugs or suggesting new features
are welcome. Please take a look at [the architecture](ARCHITECTURE.md) to get
a better understanding for the high-level goals.
New features must be accompanied by tests. Before starting work on any large
feature, please [join](https://ebpf.io/slack) the
[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack to
discuss the design first.
## Adding a new feature
When submitting pull requests, consider writing details about what problem you
are solving and why the proposed approach solves that problem in commit messages
and/or pull request description to help future library users and maintainers to
reason about the proposed changes.
1. [Join](https://ebpf.io/slack) the
[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel to discuss your requirements and how the feature can be implemented. The most important part is figuring out how much new exported API is necessary. **The less new API is required the easier it will be to land the feature.**
2. (*optional*) Create a draft PR if you want to discuss the implementation or have hit a problem. It's fine if this doesn't compile or contains debug statements.
3. Create a PR that is ready to merge. This must pass CI and have tests.
### API stability
The library doesn't guarantee the stability of its API at the moment.
1. If possible avoid breakage by introducing new API and deprecating the old one
at the same time. If an API was deprecated in v0.x it can be removed in v0.x+1.
2. Breaking API in a way that causes compilation failures is acceptable but must
have good reasons.
3. Changing the semantics of the API without causing compilation failures is
heavily discouraged.
## Running the tests
@ -35,6 +43,6 @@ Examples:
./run-tests.sh 5.4
# Run a subset of tests:
./run-tests.sh 5.4 go test ./link
./run-tests.sh 5.4 ./link
```

View file

@ -1,8 +1,3 @@
# Maintainers
* [Lorenz Bauer]
* [Timo Beckers] (Isovalent)
[Lorenz Bauer]: https://github.com/lmb
[Timo Beckers]: https://github.com/ti-mo
Maintainers can be found in the [Cilium Maintainers file](https://github.com/cilium/community/blob/main/roles/Maintainers.md)

View file

@ -28,6 +28,7 @@ TARGETS := \
testdata/loader-clang-7 \
testdata/loader-clang-9 \
testdata/loader-$(CLANG) \
testdata/manyprogs \
testdata/btf_map_init \
testdata/invalid_map \
testdata/raw_tracepoint \
@ -39,9 +40,15 @@ TARGETS := \
testdata/map_spin_lock \
testdata/subprog_reloc \
testdata/fwd_decl \
testdata/kconfig \
testdata/kconfig_config \
testdata/kfunc \
testdata/invalid-kfunc \
testdata/kfunc-kmod \
btf/testdata/relocs \
btf/testdata/relocs_read \
btf/testdata/relocs_read_tgt
btf/testdata/relocs_read_tgt \
cmd/bpf2go/testdata/minimal
.PHONY: all clean container-all container-shell generate
@ -49,12 +56,12 @@ TARGETS := \
# Build all ELF binaries using a containerized LLVM toolchain.
container-all:
${CONTAINER_ENGINE} run --rm ${CONTAINER_RUN_ARGS} \
+${CONTAINER_ENGINE} run --rm -ti ${CONTAINER_RUN_ARGS} \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
--env HOME="/tmp" \
"${IMAGE}:${VERSION}" \
$(MAKE) all
make all
# (debug) Drop the user into a shell inside the container as root.
container-shell:
@ -77,9 +84,7 @@ all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) gene
generate: export BPF_CLANG := $(CLANG)
generate: export BPF_CFLAGS := $(CFLAGS)
generate:
go generate ./cmd/bpf2go/test
go generate ./internal/sys
cd examples/ && go generate ./...
go generate ./...
testdata/loader-%-el.elf: testdata/loader.c
$* $(CFLAGS) -target bpfel -c $< -o $@
@ -98,11 +103,11 @@ testdata/loader-%-eb.elf: testdata/loader.c
$(STRIP) -g $@
.PHONY: generate-btf
generate-btf: KERNEL_VERSION?=5.18
generate-btf: KERNEL_VERSION?=5.19
generate-btf:
$(eval TMP := $(shell mktemp -d))
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage"
./testdata/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux"
/lib/modules/$(uname -r)/build/scripts/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux"
$(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz"
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz"
tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \

View file

@ -4,33 +4,37 @@
![HoneyGopher](.github/images/cilium-ebpf.png)
eBPF is a pure Go library that provides utilities for loading, compiling, and
ebpf-go is a pure Go library that provides utilities for loading, compiling, and
debugging eBPF programs. It has minimal external dependencies and is intended to
be used in long running processes.
The library is maintained by [Cloudflare](https://www.cloudflare.com) and
[Cilium](https://www.cilium.io).
See [ebpf.io](https://ebpf.io) for other projects from the eBPF ecosystem.
See [ebpf.io](https://ebpf.io) for complementary projects from the wider eBPF
ecosystem.
## Getting Started
A small collection of Go and eBPF programs that serve as examples for building
your own tools can be found under [examples/](examples/).
Contributions are highly encouraged, as they highlight certain use cases of
[Contributions](CONTRIBUTING.md) are highly encouraged, as they highlight certain use cases of
eBPF and the library, and help shape the future of the project.
## Getting Help
Please
[join](https://ebpf.io/slack) the
The community actively monitors our [GitHub Discussions](https://github.com/cilium/ebpf/discussions) page.
Please search for existing threads before starting a new one. Refrain from
opening issues on the bug tracker if you're just starting out or if you're not
sure if something is a bug in the library code.
Alternatively, [join](https://ebpf.io/slack) the
[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack if you
have questions regarding the library.
have other questions regarding the project. Note that this channel is ephemeral
and has its history erased past a certain point, which is less helpful for
others running into the same problem later.
## Packages
This library includes the following packages:
This library includes the following packages:
* [asm](https://pkg.go.dev/github.com/cilium/ebpf/asm) contains a basic
assembler, allowing you to write eBPF assembly instructions directly
@ -38,7 +42,7 @@ This library includes the following packages:
* [cmd/bpf2go](https://pkg.go.dev/github.com/cilium/ebpf/cmd/bpf2go) allows
compiling and embedding eBPF programs written in C within Go code. As well as
compiling the C code, it auto-generates Go code for loading and manipulating
the eBPF program and map objects.
the eBPF program and map objects.
* [link](https://pkg.go.dev/github.com/cilium/ebpf/link) allows attaching eBPF
to various hooks
* [perf](https://pkg.go.dev/github.com/cilium/ebpf/perf) allows reading from a
@ -49,6 +53,7 @@ This library includes the following packages:
of `bpftool feature probe` for discovering BPF-related kernel features using native Go.
* [rlimit](https://pkg.go.dev/github.com/cilium/ebpf/rlimit) provides a convenient API to lift
the `RLIMIT_MEMLOCK` constraint on kernels before 5.11.
* [btf](https://pkg.go.dev/github.com/cilium/ebpf/btf) allows reading the BPF Type Format.
## Requirements

View file

@ -4,10 +4,10 @@ package asm
// Source of ALU / ALU64 / Branch operations
//
// msb lsb
// +----+-+---+
// |op |S|cls|
// +----+-+---+
// msb lsb
// +----+-+---+
// |op |S|cls|
// +----+-+---+
type Source uint8
const sourceMask OpCode = 0x08
@ -39,10 +39,10 @@ const (
// ALUOp are ALU / ALU64 operations
//
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
type ALUOp uint8
const aluMask OpCode = 0xf0

View file

@ -13,15 +13,15 @@ func (_ BuiltinFunc) Max() BuiltinFunc {
//
// You can regenerate this list using the following gawk script:
//
// /FN\(.+\),/ {
// match($1, /\((.+)\)/, r)
// split(r[1], p, "_")
// printf "Fn"
// for (i in p) {
// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
// }
// print ""
// }
// /FN\(.+\),/ {
// match($1, /\(([a-z_0-9]+),/, r)
// split(r[1], p, "_")
// printf "Fn"
// for (i in p) {
// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
// }
// print ""
// }
//
// The script expects include/uapi/linux/bpf.h as it's input.
const (
@ -229,6 +229,14 @@ const (
FnDynptrRead
FnDynptrWrite
FnDynptrData
FnTcpRawGenSyncookieIpv4
FnTcpRawGenSyncookieIpv6
FnTcpRawCheckSyncookieIpv4
FnTcpRawCheckSyncookieIpv6
FnKtimeGetTaiNs
FnUserRingbufDrain
FnCgrpStorageGet
FnCgrpStorageDelete
maxBuiltinFunc
)

View file

@ -212,12 +212,20 @@ func _() {
_ = x[FnDynptrRead-201]
_ = x[FnDynptrWrite-202]
_ = x[FnDynptrData-203]
_ = x[maxBuiltinFunc-204]
_ = x[FnTcpRawGenSyncookieIpv4-204]
_ = x[FnTcpRawGenSyncookieIpv6-205]
_ = x[FnTcpRawCheckSyncookieIpv4-206]
_ = x[FnTcpRawCheckSyncookieIpv6-207]
_ = x[FnKtimeGetTaiNs-208]
_ = x[FnUserRingbufDrain-209]
_ = x[FnCgrpStorageGet-210]
_ = x[FnCgrpStorageDelete-211]
_ = x[maxBuiltinFunc-212]
}
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDatamaxBuiltinFunc"
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDataFnTcpRawGenSyncookieIpv4FnTcpRawGenSyncookieIpv6FnTcpRawCheckSyncookieIpv4FnTcpRawCheckSyncookieIpv6FnKtimeGetTaiNsFnUserRingbufDrainFnCgrpStorageGetFnCgrpStorageDeletemaxBuiltinFunc"
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3011}
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3021, 3045, 3071, 3097, 3112, 3130, 3146, 3165, 3179}
func (i BuiltinFunc) String() string {
if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {

View file

@ -226,6 +226,13 @@ func (ins *Instruction) IsFunctionCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall
}
// IsKfuncCall returns true if the instruction calls a kfunc.
//
// This is not the same thing as a BPF helper call.
func (ins *Instruction) IsKfuncCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == PseudoKfuncCall
}
// IsLoadOfFunctionPointer returns true if the instruction loads a function pointer.
func (ins *Instruction) IsLoadOfFunctionPointer() bool {
return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc
@ -318,10 +325,14 @@ func (ins Instruction) Format(f fmt.State, c rune) {
case cls.IsJump():
switch jop := op.JumpOp(); jop {
case Call:
if ins.Src == PseudoCall {
switch ins.Src {
case PseudoCall:
// bpf-to-bpf call
fmt.Fprint(f, ins.Constant)
} else {
case PseudoKfuncCall:
// kfunc call
fmt.Fprintf(f, "Kfunc(%d)", ins.Constant)
default:
fmt.Fprint(f, BuiltinFunc(ins.Constant))
}
@ -354,6 +365,13 @@ func (ins Instruction) Size() uint64 {
return uint64(InstructionSize * ins.OpCode.rawInstructions())
}
// WithMetadata sets the given Metadata on the Instruction. e.g. to copy
// Metadata from another Instruction when replacing it.
func (ins Instruction) WithMetadata(meta Metadata) Instruction {
ins.Metadata = meta
return ins
}
type symbolMeta struct{}
// WithSymbol marks the Instruction as a Symbol, which other Instructions

View file

@ -4,10 +4,10 @@ package asm
// JumpOp affect control flow.
//
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
// msb lsb
// +----+-+---+
// |OP |s|cls|
// +----+-+---+
type JumpOp uint8
const jumpMask OpCode = aluMask

View file

@ -4,10 +4,10 @@ package asm
// Mode for load and store operations
//
// msb lsb
// +---+--+---+
// |MDE|sz|cls|
// +---+--+---+
// msb lsb
// +---+--+---+
// |MDE|sz|cls|
// +---+--+---+
type Mode uint8
const modeMask OpCode = 0xe0
@ -30,10 +30,10 @@ const (
// Size of load and store operations
//
// msb lsb
// +---+--+---+
// |mde|SZ|cls|
// +---+--+---+
// msb lsb
// +---+--+---+
// |mde|SZ|cls|
// +---+--+---+
type Size uint8
const sizeMask OpCode = 0x18

View file

@ -9,10 +9,10 @@ import (
// Class of operations
//
// msb lsb
// +---+--+---+
// | ?? |CLS|
// +---+--+---+
// msb lsb
// +---+--+---+
// | ?? |CLS|
// +---+--+---+
type Class uint8
const classMask OpCode = 0x07
@ -70,10 +70,10 @@ func (cls Class) isJumpOrALU() bool {
//
// Its encoding is defined by a Class value:
//
// msb lsb
// +----+-+---+
// | ???? |CLS|
// +----+-+---+
// msb lsb
// +----+-+---+
// | ???? |CLS|
// +----+-+---+
type OpCode uint8
// InvalidOpCode is returned by setters on OpCode

View file

@ -35,10 +35,11 @@ const (
// Pseudo registers used by 64bit loads and jumps
const (
PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD
PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE
PseudoCall = R1 // BPF_PSEUDO_CALL
PseudoFunc = R4 // BPF_PSEUDO_FUNC
PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD
PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE
PseudoCall = R1 // BPF_PSEUDO_CALL
PseudoFunc = R4 // BPF_PSEUDO_FUNC
PseudoKfuncCall = R2 // BPF_PSEUDO_KFUNC_CALL
)
func (r Register) String() string {

View file

@ -51,11 +51,12 @@ func _() {
_ = x[AttachSkReuseportSelect-39]
_ = x[AttachSkReuseportSelectOrMigrate-40]
_ = x[AttachPerfEvent-41]
_ = x[AttachTraceKprobeMulti-42]
}
const _AttachType_name = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEvent"
const _AttachType_name = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEventTraceKprobeMulti"
var _AttachType_index = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610}
var _AttachType_index = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610, 626}
func (i AttachType) String() string {
if i >= AttachType(len(_AttachType_index)-1) {

View file

@ -2,7 +2,6 @@ package btf
import (
"bufio"
"bytes"
"debug/elf"
"encoding/binary"
"errors"
@ -11,6 +10,7 @@ import (
"math"
"os"
"reflect"
"sync"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
@ -21,34 +21,41 @@ const btfMagic = 0xeB9F
// Errors returned by BTF functions.
var (
ErrNotSupported = internal.ErrNotSupported
ErrNotFound = errors.New("not found")
ErrNoExtendedInfo = errors.New("no extended info")
ErrNotSupported = internal.ErrNotSupported
ErrNotFound = errors.New("not found")
ErrNoExtendedInfo = errors.New("no extended info")
ErrMultipleMatches = errors.New("multiple matching types")
)
// ID represents the unique ID of a BTF object.
type ID = sys.BTFID
// Spec represents decoded BTF.
// Spec allows querying a set of Types and loading the set into the
// kernel.
type Spec struct {
// Data from .BTF.
rawTypes []rawType
strings *stringTable
// All types contained by the spec. For the base type, the position of
// a type in the slice is its ID.
types types
// All types contained by the spec, not including types from the base in
// case the spec was parsed from split BTF.
types []Type
// Type IDs indexed by type.
typeIDs map[Type]TypeID
// The ID of the first type in types.
firstTypeID TypeID
// Types indexed by essential name.
// Includes all struct flavors and types with the same name.
namedTypes map[essentialName][]Type
// String table from ELF, may be nil.
strings *stringTable
// Byte order of the ELF we decoded the spec from, may be nil.
byteOrder binary.ByteOrder
}
var btfHeaderLen = binary.Size(&btfHeader{})
type btfHeader struct {
Magic uint16
Version uint8
@ -73,6 +80,18 @@ func (h *btfHeader) stringStart() int64 {
return int64(h.HdrLen + h.StringOff)
}
// newSpec creates a Spec containing only Void.
func newSpec() *Spec {
return &Spec{
[]Type{(*Void)(nil)},
map[Type]TypeID{(*Void)(nil): 0},
0,
make(map[essentialName][]Type),
nil,
nil,
}
}
// LoadSpec opens file and calls LoadSpecFromReader on it.
func LoadSpec(file string) (*Spec, error) {
fh, err := os.Open(file)
@ -92,10 +111,7 @@ func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
if bo := guessRawBTFByteOrder(rd); bo != nil {
// Try to parse a naked BTF blob. This will return an error if
// we encounter a Datasec, since we can't fix it up.
spec, err := loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil, nil)
return spec, err
return loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil)
}
return nil, err
@ -106,7 +122,7 @@ func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
// LoadSpecAndExtInfosFromReader reads from an ELF.
//
// ExtInfos may be nil if the ELF doesn't contain section metadta.
// ExtInfos may be nil if the ELF doesn't contain section metadata.
// Returns ErrNotFound if the ELF contains no BTF.
func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) {
file, err := internal.NewSafeELFFile(rd)
@ -119,7 +135,7 @@ func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) {
return nil, nil, err
}
extInfos, err := loadExtInfosFromELF(file, spec.types, spec.strings)
extInfos, err := loadExtInfosFromELF(file, spec)
if err != nil && !errors.Is(err, ErrNotFound) {
return nil, nil, err
}
@ -127,40 +143,40 @@ func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) {
return spec, extInfos, nil
}
// variableOffsets extracts all symbols offsets from an ELF and indexes them by
// symbolOffsets extracts all symbols offsets from an ELF and indexes them by
// section and variable name.
//
// References to variables in BTF data sections carry unsigned 32-bit offsets.
// Some ELF symbols (e.g. in vmlinux) may point to virtual memory that is well
// beyond this range. Since these symbols cannot be described by BTF info,
// ignore them here.
func variableOffsets(file *internal.SafeELFFile) (map[variable]uint32, error) {
func symbolOffsets(file *internal.SafeELFFile) (map[symbol]uint32, error) {
symbols, err := file.Symbols()
if err != nil {
return nil, fmt.Errorf("can't read symbols: %v", err)
}
variableOffsets := make(map[variable]uint32)
for _, symbol := range symbols {
if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
offsets := make(map[symbol]uint32)
for _, sym := range symbols {
if idx := sym.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
// Ignore things like SHN_ABS
continue
}
if symbol.Value > math.MaxUint32 {
if sym.Value > math.MaxUint32 {
// VarSecinfo offset is u32, cannot reference symbols in higher regions.
continue
}
if int(symbol.Section) >= len(file.Sections) {
return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section)
if int(sym.Section) >= len(file.Sections) {
return nil, fmt.Errorf("symbol %s: invalid section %d", sym.Name, sym.Section)
}
secName := file.Sections[symbol.Section].Name
variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value)
secName := file.Sections[sym.Section].Name
offsets[symbol{secName, sym.Name}] = uint32(sym.Value)
}
return variableOffsets, nil
return offsets, nil
}
func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) {
@ -190,7 +206,7 @@ func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) {
return nil, fmt.Errorf("btf: %w", ErrNotFound)
}
vars, err := variableOffsets(file)
offsets, err := symbolOffsets(file)
if err != nil {
return nil, err
}
@ -199,51 +215,66 @@ func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) {
return nil, fmt.Errorf("compressed BTF is not supported")
}
rawTypes, rawStrings, err := parseBTF(btfSection.ReaderAt, file.ByteOrder, nil)
spec, err := loadRawSpec(btfSection.ReaderAt, file.ByteOrder, nil)
if err != nil {
return nil, err
}
err = fixupDatasec(rawTypes, rawStrings, sectionSizes, vars)
err = fixupDatasec(spec.types, sectionSizes, offsets)
if err != nil {
return nil, err
}
return inflateSpec(rawTypes, rawStrings, file.ByteOrder, nil)
return spec, nil
}
func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder,
baseTypes types, baseStrings *stringTable) (*Spec, error) {
func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, base *Spec) (*Spec, error) {
var (
baseStrings *stringTable
firstTypeID TypeID
err error
)
if base != nil {
if base.firstTypeID != 0 {
return nil, fmt.Errorf("can't use split BTF as base")
}
if base.strings == nil {
return nil, fmt.Errorf("parse split BTF: base must be loaded from an ELF")
}
baseStrings = base.strings
firstTypeID, err = base.nextTypeID()
if err != nil {
return nil, err
}
}
rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings)
if err != nil {
return nil, err
}
return inflateSpec(rawTypes, rawStrings, bo, baseTypes)
}
func inflateSpec(rawTypes []rawType, rawStrings *stringTable, bo binary.ByteOrder,
baseTypes types) (*Spec, error) {
types, err := inflateRawTypes(rawTypes, baseTypes, rawStrings)
types, err := inflateRawTypes(rawTypes, rawStrings, base)
if err != nil {
return nil, err
}
typeIDs, typesByName := indexTypes(types, TypeID(len(baseTypes)))
typeIDs, typesByName := indexTypes(types, firstTypeID)
return &Spec{
rawTypes: rawTypes,
namedTypes: typesByName,
typeIDs: typeIDs,
types: types,
strings: rawStrings,
byteOrder: bo,
namedTypes: typesByName,
typeIDs: typeIDs,
types: types,
firstTypeID: firstTypeID,
strings: rawStrings,
byteOrder: bo,
}, nil
}
func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essentialName][]Type) {
func indexTypes(types []Type, firstTypeID TypeID) (map[Type]TypeID, map[essentialName][]Type) {
namedTypes := 0
for _, typ := range types {
if typ.TypeName() != "" {
@ -261,7 +292,7 @@ func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essenti
if name := newEssentialName(typ.TypeName()); name != "" {
typesByName[name] = append(typesByName[name], typ)
}
typeIDs[typ] = TypeID(i) + typeIDOffset
typeIDs[typ] = firstTypeID + TypeID(i)
}
return typeIDs, typesByName
@ -272,20 +303,70 @@ func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essenti
// Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system
// for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled.
func LoadKernelSpec() (*Spec, error) {
spec, _, err := kernelSpec()
if err != nil {
return nil, err
}
return spec.Copy(), nil
}
var kernelBTF struct {
sync.RWMutex
spec *Spec
// True if the spec was read from an ELF instead of raw BTF in /sys.
fallback bool
}
// FlushKernelSpec removes any cached kernel type information.
func FlushKernelSpec() {
kernelBTF.Lock()
defer kernelBTF.Unlock()
kernelBTF.spec, kernelBTF.fallback = nil, false
}
func kernelSpec() (*Spec, bool, error) {
kernelBTF.RLock()
spec, fallback := kernelBTF.spec, kernelBTF.fallback
kernelBTF.RUnlock()
if spec == nil {
kernelBTF.Lock()
defer kernelBTF.Unlock()
spec, fallback = kernelBTF.spec, kernelBTF.fallback
}
if spec != nil {
return spec, fallback, nil
}
spec, fallback, err := loadKernelSpec()
if err != nil {
return nil, false, err
}
kernelBTF.spec, kernelBTF.fallback = spec, fallback
return spec, fallback, nil
}
func loadKernelSpec() (_ *Spec, fallback bool, _ error) {
fh, err := os.Open("/sys/kernel/btf/vmlinux")
if err == nil {
defer fh.Close()
return loadRawSpec(fh, internal.NativeEndian, nil, nil)
spec, err := loadRawSpec(fh, internal.NativeEndian, nil)
return spec, false, err
}
file, err := findVMLinux()
if err != nil {
return nil, err
return nil, false, err
}
defer file.Close()
return loadSpecFromELF(file)
spec, err := loadSpecFromELF(file)
return spec, true, err
}
// findVMLinux scans multiple well-known paths for vmlinux kernel images.
@ -388,140 +469,122 @@ func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([
return rawTypes, rawStrings, nil
}
type variable struct {
type symbol struct {
section string
name string
}
func fixupDatasec(rawTypes []rawType, rawStrings *stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
for i, rawType := range rawTypes {
if rawType.Kind() != kindDatasec {
// fixupDatasec attempts to patch up missing info in Datasecs and its members by
// supplementing them with information from the ELF headers and symbol table.
func fixupDatasec(types []Type, sectionSizes map[string]uint32, offsets map[symbol]uint32) error {
for _, typ := range types {
ds, ok := typ.(*Datasec)
if !ok {
continue
}
name, err := rawStrings.Lookup(rawType.NameOff)
if err != nil {
return err
}
name := ds.Name
if name == ".kconfig" || name == ".ksyms" {
return fmt.Errorf("reference to %s: %w", name, ErrNotSupported)
}
// Some Datasecs are virtual and don't have corresponding ELF sections.
switch name {
case ".ksyms":
// .ksyms describes forward declarations of kfunc signatures.
// Nothing to fix up, all sizes and offsets are 0.
for _, vsi := range ds.Vars {
_, ok := vsi.Type.(*Func)
if !ok {
// Only Funcs are supported in the .ksyms Datasec.
return fmt.Errorf("data section %s: expected *btf.Func, not %T: %w", name, vsi.Type, ErrNotSupported)
}
}
continue
case ".kconfig":
// .kconfig has a size of 0 and has all members' offsets set to 0.
// Fix up all offsets and set the Datasec's size.
if err := fixupDatasecLayout(ds); err != nil {
return err
}
// Fix up extern to global linkage to avoid a BTF verifier error.
for _, vsi := range ds.Vars {
vsi.Type.(*Var).Linkage = GlobalVar
}
if rawTypes[i].SizeType != 0 {
continue
}
size, ok := sectionSizes[name]
if ds.Size != 0 {
continue
}
ds.Size, ok = sectionSizes[name]
if !ok {
return fmt.Errorf("data section %s: missing size", name)
}
rawTypes[i].SizeType = size
secinfos := rawType.data.([]btfVarSecinfo)
for j, secInfo := range secinfos {
id := int(secInfo.Type - 1)
if id >= len(rawTypes) {
return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j)
}
varName, err := rawStrings.Lookup(rawTypes[id].NameOff)
if err != nil {
return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err)
}
offset, ok := variableOffsets[variable{name, varName}]
for i := range ds.Vars {
symName := ds.Vars[i].Type.TypeName()
ds.Vars[i].Offset, ok = offsets[symbol{name, symName}]
if !ok {
return fmt.Errorf("data section %s: missing offset for variable %s", name, varName)
return fmt.Errorf("data section %s: missing offset for symbol %s", name, symName)
}
secinfos[j].Offset = offset
}
}
return nil
}
// fixupDatasecLayout populates ds.Vars[].Offset according to var sizes and
// alignment. Calculate and set ds.Size.
func fixupDatasecLayout(ds *Datasec) error {
var off uint32
for i, vsi := range ds.Vars {
v, ok := vsi.Type.(*Var)
if !ok {
return fmt.Errorf("member %d: unsupported type %T", i, vsi.Type)
}
size, err := Sizeof(v.Type)
if err != nil {
return fmt.Errorf("variable %s: getting size: %w", v.Name, err)
}
align, err := alignof(v.Type)
if err != nil {
return fmt.Errorf("variable %s: getting alignment: %w", v.Name, err)
}
// Align the current member based on the offset of the end of the previous
// member and the alignment of the current member.
off = internal.Align(off, uint32(align))
ds.Vars[i].Offset = off
off += uint32(size)
}
ds.Size = off
return nil
}
// Copy creates a copy of Spec.
func (s *Spec) Copy() *Spec {
types := copyTypes(s.types, nil)
typeIDOffset := TypeID(0)
if len(s.types) != 0 {
typeIDOffset = s.typeIDs[s.types[0]]
}
typeIDs, typesByName := indexTypes(types, typeIDOffset)
typeIDs, typesByName := indexTypes(types, s.firstTypeID)
// NB: Other parts of spec are not copied since they are immutable.
return &Spec{
s.rawTypes,
s.strings,
types,
typeIDs,
s.firstTypeID,
typesByName,
s.strings,
s.byteOrder,
}
}
type marshalOpts struct {
ByteOrder binary.ByteOrder
StripFuncLinkage bool
}
func (s *Spec) marshal(opts marshalOpts) ([]byte, error) {
var (
buf bytes.Buffer
header = new(btfHeader)
headerLen = binary.Size(header)
)
// Reserve space for the header. We have to write it last since
// we don't know the size of the type section yet.
_, _ = buf.Write(make([]byte, headerLen))
// Write type section, just after the header.
for _, raw := range s.rawTypes {
switch {
case opts.StripFuncLinkage && raw.Kind() == kindFunc:
raw.SetLinkage(StaticFunc)
}
if err := raw.Marshal(&buf, opts.ByteOrder); err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
}
typeLen := uint32(buf.Len() - headerLen)
// Write string section after type section.
stringsLen := s.strings.Length()
buf.Grow(stringsLen)
if err := s.strings.Marshal(&buf); err != nil {
return nil, err
}
// Fill out the header, and write it out.
header = &btfHeader{
Magic: btfMagic,
Version: 1,
Flags: 0,
HdrLen: uint32(headerLen),
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(stringsLen),
}
raw := buf.Bytes()
err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header)
if err != nil {
return nil, fmt.Errorf("can't write header: %v", err)
}
return raw, nil
}
type sliceWriter []byte
func (sw sliceWriter) Write(p []byte) (int, error) {
@ -532,12 +595,31 @@ func (sw sliceWriter) Write(p []byte) (int, error) {
return copy(sw, p), nil
}
// nextTypeID returns the next unallocated type ID or an error if there are no
// more type IDs.
func (s *Spec) nextTypeID() (TypeID, error) {
id := s.firstTypeID + TypeID(len(s.types))
if id < s.firstTypeID {
return 0, fmt.Errorf("no more type IDs")
}
return id, nil
}
// TypeByID returns the BTF Type with the given type ID.
//
// Returns an error wrapping ErrNotFound if a Type with the given ID
// does not exist in the Spec.
func (s *Spec) TypeByID(id TypeID) (Type, error) {
return s.types.ByID(id)
if id < s.firstTypeID {
return nil, fmt.Errorf("look up type with ID %d (first ID is %d): %w", id, s.firstTypeID, ErrNotFound)
}
index := int(id - s.firstTypeID)
if index >= len(s.types) {
return nil, fmt.Errorf("look up type with ID %d: %w", id, ErrNotFound)
}
return s.types[index], nil
}
// TypeID returns the ID for a given Type.
@ -598,17 +680,19 @@ func (s *Spec) AnyTypeByName(name string) (Type, error) {
return types[0], nil
}
// TypeByName searches for a Type with a specific name. Since multiple
// Types with the same name can exist, the parameter typ is taken to
// narrow down the search in case of a clash.
// TypeByName searches for a Type with a specific name. Since multiple Types
// with the same name can exist, the parameter typ is taken to narrow down the
// search in case of a clash.
//
// typ must be a non-nil pointer to an implementation of a Type.
// On success, the address of the found Type will be copied to typ.
// typ must be a non-nil pointer to an implementation of a Type. On success, the
// address of the found Type will be copied to typ.
//
// Returns an error wrapping ErrNotFound if no matching
// Type exists in the Spec. If multiple candidates are found,
// an error is returned.
// Returns an error wrapping ErrNotFound if no matching Type exists in the Spec.
// Returns an error wrapping ErrMultipleTypes if multiple candidates are found.
func (s *Spec) TypeByName(name string, typ interface{}) error {
typeInterface := reflect.TypeOf((*Type)(nil)).Elem()
// typ may be **T or *Type
typValue := reflect.ValueOf(typ)
if typValue.Kind() != reflect.Ptr {
return fmt.Errorf("%T is not a pointer", typ)
@ -620,7 +704,12 @@ func (s *Spec) TypeByName(name string, typ interface{}) error {
}
wanted := typPtr.Type()
if !wanted.AssignableTo(reflect.TypeOf((*Type)(nil)).Elem()) {
if wanted == typeInterface {
// This is *Type. Unwrap the value's type.
wanted = typPtr.Elem().Type()
}
if !wanted.AssignableTo(typeInterface) {
return fmt.Errorf("%T does not satisfy Type interface", typ)
}
@ -636,14 +725,14 @@ func (s *Spec) TypeByName(name string, typ interface{}) error {
}
if candidate != nil {
return fmt.Errorf("type %s: multiple candidates for %T", name, typ)
return fmt.Errorf("type %s(%T): %w", name, typ, ErrMultipleMatches)
}
candidate = typ
}
if candidate == nil {
return fmt.Errorf("type %s: %w", name, ErrNotFound)
return fmt.Errorf("%s %s: %w", wanted, name, ErrNotFound)
}
typPtr.Set(reflect.ValueOf(candidate))
@ -656,12 +745,12 @@ func (s *Spec) TypeByName(name string, typ interface{}) error {
// Types from base are used to resolve references in the split BTF.
// The returned Spec only contains types from the split BTF, not from the base.
func LoadSplitSpecFromReader(r io.ReaderAt, base *Spec) (*Spec, error) {
return loadRawSpec(r, internal.NativeEndian, base.types, base.strings)
return loadRawSpec(r, internal.NativeEndian, base)
}
// TypesIterator iterates over types of a given spec.
type TypesIterator struct {
spec *Spec
types []Type
index int
// The last visited type in the spec.
Type Type
@ -669,229 +758,112 @@ type TypesIterator struct {
// Iterate returns the types iterator.
func (s *Spec) Iterate() *TypesIterator {
return &TypesIterator{spec: s, index: 0}
// We share the backing array of types with the Spec. This is safe since
// we don't allow deletion or shuffling of types.
return &TypesIterator{types: s.types, index: 0}
}
// Next returns true as long as there are any remaining types.
func (iter *TypesIterator) Next() bool {
if len(iter.spec.types) <= iter.index {
if len(iter.types) <= iter.index {
return false
}
iter.Type = iter.spec.types[iter.index]
iter.Type = iter.types[iter.index]
iter.index++
return true
}
// Handle is a reference to BTF loaded into the kernel.
type Handle struct {
fd *sys.FD
// Size of the raw BTF in bytes.
size uint32
}
// NewHandle loads BTF into the kernel.
//
// Returns ErrNotSupported if BTF is not supported.
func NewHandle(spec *Spec) (*Handle, error) {
if err := haveBTF(); err != nil {
return nil, err
}
if spec.byteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian)
}
btf, err := spec.marshal(marshalOpts{
ByteOrder: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
})
if err != nil {
return nil, fmt.Errorf("can't marshal BTF: %w", err)
}
if uint64(len(btf)) > math.MaxUint32 {
return nil, errors.New("BTF exceeds the maximum size")
}
attr := &sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
}
fd, err := sys.BtfLoad(attr)
if err != nil {
logBuf := make([]byte, 64*1024)
attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
attr.BtfLogSize = uint32(len(logBuf))
attr.BtfLogLevel = 1
// NB: The syscall will never return ENOSPC as of 5.18-rc4.
_, _ = sys.BtfLoad(attr)
return nil, internal.ErrorWithLog(err, logBuf)
}
return &Handle{fd, attr.BtfSize}, nil
}
// NewHandleFromID returns the BTF handle for a given id.
//
// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible.
//
// Returns ErrNotExist, if there is no BTF with the given id.
//
// Requires CAP_SYS_ADMIN.
func NewHandleFromID(id ID) (*Handle, error) {
fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, fmt.Errorf("get FD for ID %d: %w", id, err)
}
info, err := newHandleInfoFromFD(fd)
if err != nil {
_ = fd.Close()
return nil, err
}
return &Handle{fd, info.size}, nil
}
// Spec parses the kernel BTF into Go types.
//
// base is used to decode split BTF and may be nil.
func (h *Handle) Spec(base *Spec) (*Spec, error) {
var btfInfo sys.BtfInfo
btfBuffer := make([]byte, h.size)
btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer)
if err := sys.ObjInfo(h.fd, &btfInfo); err != nil {
return nil, err
}
var baseTypes types
var baseStrings *stringTable
if base != nil {
baseTypes = base.types
baseStrings = base.strings
}
return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, baseTypes, baseStrings)
}
// Close destroys the handle.
//
// Subsequent calls to FD will return an invalid value.
func (h *Handle) Close() error {
if h == nil {
return nil
}
return h.fd.Close()
}
// FD returns the file descriptor for the handle.
func (h *Handle) FD() int {
return h.fd.Int()
}
// Info returns metadata about the handle.
func (h *Handle) Info() (*HandleInfo, error) {
return newHandleInfoFromFD(h.fd)
}
func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte {
const minHeaderLength = 24
typesLen := uint32(binary.Size(types))
header := btfHeader{
Magic: btfMagic,
Version: 1,
HdrLen: minHeaderLength,
TypeOff: 0,
TypeLen: typesLen,
StringOff: typesLen,
StringLen: uint32(len(strings)),
}
buf := new(bytes.Buffer)
_ = binary.Write(buf, bo, &header)
_ = binary.Write(buf, bo, types)
buf.Write(strings)
return buf.Bytes()
}
var haveBTF = internal.FeatureTest("BTF", "5.1", func() error {
var (
types struct {
Integer btfType
Var btfType
btfVar struct{ Linkage uint32 }
}
strings = []byte{0, 'a', 0}
)
// We use a BTF_KIND_VAR here, to make sure that
// the kernel understands BTF at least as well as we
// do. BTF_KIND_VAR was introduced ~5.1.
types.Integer.SetKind(kindPointer)
types.Var.NameOff = 1
types.Var.SetKind(kindVar)
types.Var.SizeType = 1
btf := marshalBTF(&types, strings, internal.NativeEndian)
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
})
// haveBTF attempts to load a BTF blob containing an Int. It should pass on any
// kernel that supports BPF_BTF_LOAD.
var haveBTF = internal.NewFeatureTest("BTF", "4.18", func() error {
// 0-length anonymous integer
err := probeBTF(&Int{})
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: loading the program
// might still succeed without BTF.
return internal.ErrNotSupported
}
if err != nil {
return err
}
fd.Close()
return nil
return err
})
var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error {
// haveMapBTF attempts to load a minimal BTF blob containing a Var. It is
// used as a proxy for .bss, .data and .rodata map support, which generally
// come with a Var and Datasec. These were introduced in Linux 5.2.
var haveMapBTF = internal.NewFeatureTest("Map BTF (Var/Datasec)", "5.2", func() error {
if err := haveBTF(); err != nil {
return err
}
var (
types struct {
FuncProto btfType
Func btfType
}
strings = []byte{0, 'a', 0}
)
v := &Var{
Name: "a",
Type: &Pointer{(*Void)(nil)},
}
types.FuncProto.SetKind(kindFuncProto)
types.Func.SetKind(kindFunc)
types.Func.SizeType = 1 // aka FuncProto
types.Func.NameOff = 1
types.Func.SetLinkage(GlobalFunc)
err := probeBTF(v)
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
// Treat both EINVAL and EPERM as not supported: creating the map may still
// succeed without Btf* attrs.
return internal.ErrNotSupported
}
return err
})
btf := marshalBTF(&types, strings, internal.NativeEndian)
// haveProgBTF attempts to load a BTF blob containing a Func and FuncProto. It
// is used as a proxy for ext_info (func_info) support, which depends on
// Func(Proto) by definition.
var haveProgBTF = internal.NewFeatureTest("Program BTF (func/line_info)", "5.0", func() error {
if err := haveBTF(); err != nil {
return err
}
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
})
fn := &Func{
Name: "a",
Type: &FuncProto{Return: (*Void)(nil)},
}
err := probeBTF(fn)
if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) {
return internal.ErrNotSupported
}
return err
})
var haveFuncLinkage = internal.NewFeatureTest("BTF func linkage", "5.6", func() error {
if err := haveProgBTF(); err != nil {
return err
}
fn := &Func{
Name: "a",
Type: &FuncProto{Return: (*Void)(nil)},
Linkage: GlobalFunc,
}
err := probeBTF(fn)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
return err
})
func probeBTF(typ Type) error {
b, err := NewBuilder([]Type{typ})
if err != nil {
return err
}
fd.Close()
return nil
})
buf, err := b.Marshal(nil, nil)
if err != nil {
return err
}
fd, err := sys.BtfLoad(&sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(buf),
BtfSize: uint32(len(buf)),
})
if err == nil {
fd.Close()
}
return err
}

View file

@ -4,35 +4,41 @@ import (
"encoding/binary"
"fmt"
"io"
"unsafe"
)
//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage
//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind
// btfKind describes a Type.
type btfKind uint8
// Equivalents of the BTF_KIND_* constants.
const (
kindUnknown btfKind = iota
kindInt
kindPointer
kindArray
kindStruct
kindUnion
kindEnum
kindForward
kindTypedef
kindVolatile
kindConst
kindRestrict
kindUnknown btfKind = iota // Unknown
kindInt // Int
kindPointer // Pointer
kindArray // Array
kindStruct // Struct
kindUnion // Union
kindEnum // Enum
kindForward // Forward
kindTypedef // Typedef
kindVolatile // Volatile
kindConst // Const
kindRestrict // Restrict
// Added ~4.20
kindFunc
kindFuncProto
kindFunc // Func
kindFuncProto // FuncProto
// Added ~5.1
kindVar
kindDatasec
kindVar // Var
kindDatasec // Datasec
// Added ~5.13
kindFloat
kindFloat // Float
// Added 5.16
kindDeclTag // DeclTag
kindTypeTag // TypeTag
// Added 6.0
kindEnum64 // Enum64
)
// FuncLinkage describes BTF function linkage metadata.
@ -63,6 +69,8 @@ const (
btfTypeKindFlagMask = 1
)
var btfTypeLen = binary.Size(btfType{})
// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst.
type btfType struct {
NameOff uint32
@ -85,47 +93,6 @@ type btfType struct {
SizeType uint32
}
func (k btfKind) String() string {
switch k {
case kindUnknown:
return "Unknown"
case kindInt:
return "Integer"
case kindPointer:
return "Pointer"
case kindArray:
return "Array"
case kindStruct:
return "Struct"
case kindUnion:
return "Union"
case kindEnum:
return "Enumeration"
case kindForward:
return "Forward"
case kindTypedef:
return "Typedef"
case kindVolatile:
return "Volatile"
case kindConst:
return "Const"
case kindRestrict:
return "Restrict"
case kindFunc:
return "Function"
case kindFuncProto:
return "Function Proto"
case kindVar:
return "Variable"
case kindDatasec:
return "Section"
case kindFloat:
return "Float"
default:
return fmt.Sprintf("Unknown (%d)", k)
}
}
func mask(len uint32) uint32 {
return (1 << len) - 1
}
@ -164,10 +131,43 @@ func (bt *btfType) SetVlen(vlen int) {
bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift)
}
func (bt *btfType) KindFlag() bool {
func (bt *btfType) kindFlagBool() bool {
return bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift) == 1
}
func (bt *btfType) setKindFlagBool(set bool) {
var value uint32
if set {
value = 1
}
bt.setInfo(value, btfTypeKindFlagMask, btfTypeKindFlagShift)
}
// Bitfield returns true if the struct or union contain a bitfield.
func (bt *btfType) Bitfield() bool {
return bt.kindFlagBool()
}
func (bt *btfType) SetBitfield(isBitfield bool) {
bt.setKindFlagBool(isBitfield)
}
func (bt *btfType) FwdKind() FwdKind {
return FwdKind(bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift))
}
func (bt *btfType) SetFwdKind(kind FwdKind) {
bt.setInfo(uint32(kind), btfTypeKindFlagMask, btfTypeKindFlagShift)
}
func (bt *btfType) Signed() bool {
return bt.kindFlagBool()
}
func (bt *btfType) SetSigned(signed bool) {
bt.setKindFlagBool(signed)
}
func (bt *btfType) Linkage() FuncLinkage {
return FuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift))
}
@ -181,6 +181,10 @@ func (bt *btfType) Type() TypeID {
return TypeID(bt.SizeType)
}
func (bt *btfType) SetType(id TypeID) {
bt.SizeType = uint32(id)
}
func (bt *btfType) Size() uint32 {
// TODO: Panic here if wrong kind?
return bt.SizeType
@ -190,13 +194,22 @@ func (bt *btfType) SetSize(size uint32) {
bt.SizeType = size
}
func (bt *btfType) Marshal(w io.Writer, bo binary.ByteOrder) error {
buf := make([]byte, unsafe.Sizeof(*bt))
bo.PutUint32(buf[0:], bt.NameOff)
bo.PutUint32(buf[4:], bt.Info)
bo.PutUint32(buf[8:], bt.SizeType)
_, err := w.Write(buf)
return err
}
type rawType struct {
btfType
data interface{}
}
func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error {
if err := binary.Write(w, bo, &rt.btfType); err != nil {
if err := rt.btfType.Marshal(w, bo); err != nil {
return err
}
@ -209,11 +222,11 @@ func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error {
// btfInt encodes additional data for integers.
//
// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b
// ? = undefined
// e = encoding
// o = offset (bitfields?)
// b = bits (bitfields)
// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b
// ? = undefined
// e = encoding
// o = offset (bitfields?)
// b = bits (bitfields)
type btfInt struct {
Raw uint32
}
@ -275,7 +288,13 @@ type btfVariable struct {
type btfEnum struct {
NameOff uint32
Val int32
Val uint32
}
type btfEnum64 struct {
NameOff uint32
ValLo32 uint32
ValHi32 uint32
}
type btfParam struct {
@ -283,12 +302,16 @@ type btfParam struct {
Type TypeID
}
type btfDeclTag struct {
ComponentIdx uint32
}
func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) {
var header btfType
// because of the interleaving between types and struct members it is difficult to
// precompute the numbers of raw types this will parse
// this "guess" is a good first estimation
sizeOfbtfType := uintptr(binary.Size(btfType{}))
sizeOfbtfType := uintptr(btfTypeLen)
tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2
types := make([]rawType, 0, tyMaxCount)
@ -325,6 +348,11 @@ func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, err
case kindDatasec:
data = make([]btfVarSecinfo, header.Vlen())
case kindFloat:
case kindDeclTag:
data = new(btfDeclTag)
case kindTypeTag:
case kindEnum64:
data = make([]btfEnum64, header.Vlen())
default:
return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind())
}

View file

@ -1,4 +1,4 @@
// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage"; DO NOT EDIT.
// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind"; DO NOT EDIT.
package btf
@ -42,3 +42,39 @@ func (i VarLinkage) String() string {
}
return _VarLinkage_name[_VarLinkage_index[i]:_VarLinkage_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[kindUnknown-0]
_ = x[kindInt-1]
_ = x[kindPointer-2]
_ = x[kindArray-3]
_ = x[kindStruct-4]
_ = x[kindUnion-5]
_ = x[kindEnum-6]
_ = x[kindForward-7]
_ = x[kindTypedef-8]
_ = x[kindVolatile-9]
_ = x[kindConst-10]
_ = x[kindRestrict-11]
_ = x[kindFunc-12]
_ = x[kindFuncProto-13]
_ = x[kindVar-14]
_ = x[kindDatasec-15]
_ = x[kindFloat-16]
_ = x[kindDeclTag-17]
_ = x[kindTypeTag-18]
_ = x[kindEnum64-19]
}
const _btfKind_name = "UnknownIntPointerArrayStructUnionEnumForwardTypedefVolatileConstRestrictFuncFuncProtoVarDatasecFloatDeclTagTypeTagEnum64"
var _btfKind_index = [...]uint8{0, 7, 10, 17, 22, 28, 33, 37, 44, 51, 59, 64, 72, 76, 85, 88, 95, 100, 107, 114, 120}
func (i btfKind) String() string {
if i >= btfKind(len(_btfKind_index)-1) {
return "btfKind(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _btfKind_name[_btfKind_index[i]:_btfKind_index[i+1]]
}

View file

@ -156,16 +156,25 @@ func (k coreKind) String() string {
}
}
// CORERelocate calculates the difference in types between local and target.
// CORERelocate calculates changes needed to adjust eBPF instructions for differences
// in types.
//
// Returns a list of fixups which can be applied to instructions to make them
// match the target type(s).
//
// Fixups are returned in the order of relos, e.g. fixup[i] is the solution
// for relos[i].
func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, error) {
if local.byteOrder != target.byteOrder {
return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder)
func CORERelocate(relos []*CORERelocation, target *Spec, bo binary.ByteOrder) ([]COREFixup, error) {
if target == nil {
var err error
target, _, err = kernelSpec()
if err != nil {
return nil, fmt.Errorf("load kernel spec: %w", err)
}
}
if bo != target.byteOrder {
return nil, fmt.Errorf("can't relocate %s against %s", bo, target.byteOrder)
}
type reloGroup struct {
@ -185,15 +194,14 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er
return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
id, err := local.TypeID(relo.typ)
if err != nil {
return nil, fmt.Errorf("%s: %w", relo.kind, err)
}
result[i] = COREFixup{
kind: relo.kind,
local: uint32(id),
target: uint32(id),
kind: relo.kind,
local: uint32(relo.id),
// NB: Using relo.id as the target here is incorrect, since
// it doesn't match the BTF we generate on the fly. This isn't
// too bad for now since there are no uses of the local type ID
// in the kernel, yet.
target: uint32(relo.id),
}
continue
}
@ -214,7 +222,7 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er
}
targets := target.namedTypes[newEssentialName(localTypeName)]
fixups, err := coreCalculateFixups(local, target, localType, targets, group.relos)
fixups, err := coreCalculateFixups(group.relos, target, targets, bo)
if err != nil {
return nil, fmt.Errorf("relocate %s: %w", localType, err)
}
@ -229,34 +237,29 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er
var errAmbiguousRelocation = errors.New("ambiguous relocation")
var errImpossibleRelocation = errors.New("impossible relocation")
var errIncompatibleTypes = errors.New("incompatible types")
// coreCalculateFixups calculates the fixups for the given relocations using
// the "best" target.
// coreCalculateFixups finds the target type that best matches all relocations.
//
// All relos must target the same type.
//
// The best target is determined by scoring: the less poisoning we have to do
// the better the target is.
func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type, relos []*CORERelocation) ([]COREFixup, error) {
localID, err := localSpec.TypeID(local)
if err != nil {
return nil, fmt.Errorf("local type ID: %w", err)
}
local = Copy(local, UnderlyingType)
func coreCalculateFixups(relos []*CORERelocation, targetSpec *Spec, targets []Type, bo binary.ByteOrder) ([]COREFixup, error) {
bestScore := len(relos)
var bestFixups []COREFixup
for i := range targets {
targetID, err := targetSpec.TypeID(targets[i])
for _, target := range targets {
targetID, err := targetSpec.TypeID(target)
if err != nil {
return nil, fmt.Errorf("target type ID: %w", err)
}
target := Copy(targets[i], UnderlyingType)
score := 0 // lower is better
fixups := make([]COREFixup, 0, len(relos))
for _, relo := range relos {
fixup, err := coreCalculateFixup(localSpec.byteOrder, local, localID, target, targetID, relo)
fixup, err := coreCalculateFixup(relo, target, targetID, bo)
if err != nil {
return nil, fmt.Errorf("target %s: %w", target, err)
return nil, fmt.Errorf("target %s: %s: %w", target, relo.kind, err)
}
if fixup.poison || fixup.isNonExistant() {
score++
@ -303,9 +306,11 @@ func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type
return bestFixups, nil
}
var errNoSignedness = errors.New("no signedness")
// coreCalculateFixup calculates the fixup for a single local type, target type
// and relocation.
func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, target Type, targetID TypeID, relo *CORERelocation) (COREFixup, error) {
func coreCalculateFixup(relo *CORERelocation, target Type, targetID TypeID, bo binary.ByteOrder) (COREFixup, error) {
fixup := func(local, target uint32) (COREFixup, error) {
return COREFixup{kind: relo.kind, local: local, target: target}, nil
}
@ -320,18 +325,20 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
}
zero := COREFixup{}
local := relo.typ
switch relo.kind {
case reloTypeIDTarget, reloTypeSize, reloTypeExists:
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
return zero, fmt.Errorf("unexpected accessor %v", relo.accessor)
}
err := coreAreTypesCompatible(local, target)
if errors.Is(err, errImpossibleRelocation) {
if errors.Is(err, errIncompatibleTypes) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
return zero, err
}
switch relo.kind {
@ -339,7 +346,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
return fixup(1, 1)
case reloTypeIDTarget:
return fixup(uint32(localID), uint32(targetID))
return fixup(uint32(relo.id), uint32(targetID))
case reloTypeSize:
localSize, err := Sizeof(local)
@ -361,7 +368,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
return zero, err
}
switch relo.kind {
@ -372,21 +379,8 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
return fixup(uint32(localValue.Value), uint32(targetValue.Value))
}
case reloFieldSigned:
switch local.(type) {
case *Enum:
return fixup(1, 1)
case *Int:
return fixup(
uint32(local.(*Int).Encoding&Signed),
uint32(target.(*Int).Encoding&Signed),
)
default:
return fixupWithoutValidation(0, 0)
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64:
if _, ok := target.(*Fwd); ok {
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64, reloFieldSigned:
if _, ok := as[*Fwd](target); ok {
// We can't relocate fields using a forward declaration, so
// skip it. If a non-forward declaration is present in the BTF
// we'll find it in one of the other iterations.
@ -398,7 +392,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
return poison()
}
if err != nil {
return zero, fmt.Errorf("target %s: %w", target, err)
return zero, err
}
maybeSkipValidation := func(f COREFixup, err error) (COREFixup, error) {
@ -427,7 +421,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
case reloFieldLShiftU64:
var target uint32
if byteOrder == binary.LittleEndian {
if bo == binary.LittleEndian {
targetSize, err := targetField.sizeBits()
if err != nil {
return zero, err
@ -451,10 +445,40 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID,
}
return fixupWithoutValidation(0, uint32(64-targetSize))
case reloFieldSigned:
switch local := UnderlyingType(localField.Type).(type) {
case *Enum:
target, ok := as[*Enum](targetField.Type)
if !ok {
return zero, fmt.Errorf("target isn't *Enum but %T", targetField.Type)
}
return fixup(boolToUint32(local.Signed), boolToUint32(target.Signed))
case *Int:
target, ok := as[*Int](targetField.Type)
if !ok {
return zero, fmt.Errorf("target isn't *Int but %T", targetField.Type)
}
return fixup(
uint32(local.Encoding&Signed),
uint32(target.Encoding&Signed),
)
default:
return zero, fmt.Errorf("type %T: %w", local, errNoSignedness)
}
}
}
return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported)
return zero, ErrNotSupported
}
func boolToUint32(val bool) uint32 {
if val {
return 1
}
return 0
}
/* coreAccessor contains a path through a struct. It contains at least one index.
@ -516,7 +540,7 @@ func (ca coreAccessor) String() string {
}
func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) {
e, ok := t.(*Enum)
e, ok := as[*Enum](t)
if !ok {
return nil, fmt.Errorf("not an enum: %s", t)
}
@ -536,9 +560,9 @@ func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) {
// coreField represents the position of a "child" of a composite type from the
// start of that type.
//
// /- start of composite
// | offset * 8 | bitfieldOffset | bitfieldSize | ... |
// \- start of field end of field -/
// /- start of composite
// | offset * 8 | bitfieldOffset | bitfieldSize | ... |
// \- start of field end of field -/
type coreField struct {
Type Type
@ -555,6 +579,10 @@ type coreField struct {
}
func (cf *coreField) adjustOffsetToNthElement(n int) error {
if n == 0 {
return nil
}
size, err := Sizeof(cf.Type)
if err != nil {
return err
@ -597,7 +625,7 @@ func (cf *coreField) sizeBits() (Bits, error) {
// between kernel versions. Synthesise the size to make the shifts work.
size, err := Sizeof(cf.Type)
if err != nil {
return 0, nil
return 0, err
}
return Bits(size * 8), nil
}
@ -611,6 +639,10 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField,
local := coreField{Type: localT}
target := coreField{Type: targetT}
if err := coreAreMembersCompatible(local.Type, target.Type); err != nil {
return coreField{}, coreField{}, fmt.Errorf("fields: %w", err)
}
// The first index is used to offset a pointer of the base type like
// when accessing an array.
if err := local.adjustOffsetToNthElement(localAcc[0]); err != nil {
@ -621,13 +653,9 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField,
return coreField{}, coreField{}, err
}
if err := coreAreMembersCompatible(local.Type, target.Type); err != nil {
return coreField{}, coreField{}, fmt.Errorf("fields: %w", err)
}
var localMaybeFlex, targetMaybeFlex bool
for i, acc := range localAcc[1:] {
switch localType := local.Type.(type) {
switch localType := UnderlyingType(local.Type).(type) {
case composite:
// For composite types acc is used to find the field in the local type,
// and then we try to find a field in target with the same name.
@ -638,21 +666,21 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField,
localMember := localMembers[acc]
if localMember.Name == "" {
_, ok := localMember.Type.(composite)
localMemberType, ok := as[composite](localMember.Type)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported)
}
// This is an anonymous struct or union, ignore it.
local = coreField{
Type: localMember.Type,
Type: localMemberType,
offset: local.offset + localMember.Offset.Bytes(),
}
localMaybeFlex = false
continue
}
targetType, ok := target.Type.(composite)
targetType, ok := as[composite](target.Type)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation)
}
@ -698,7 +726,7 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField,
case *Array:
// For arrays, acc is the index in the target.
targetType, ok := target.Type.(*Array)
targetType, ok := as[*Array](target.Type)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation)
}
@ -792,7 +820,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) {
continue
}
comp, ok := member.Type.(composite)
comp, ok := as[composite](member.Type)
if !ok {
return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type)
}
@ -811,7 +839,7 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal
return nil, nil, err
}
targetEnum, ok := target.(*Enum)
targetEnum, ok := as[*Enum](target)
if !ok {
return nil, nil, errImpossibleRelocation
}
@ -828,6 +856,13 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal
return nil, nil, errImpossibleRelocation
}
// CheckTypeCompatibility checks local and target types for Compatibility according to CO-RE rules.
//
// Only layout compatibility is checked, ignoring names of the root type.
func CheckTypeCompatibility(localType Type, targetType Type) error {
return coreAreTypesCompatible(localType, targetType)
}
/* The comment below is from bpf_core_types_are_compat in libbpf.c:
*
* Check local and target types for compatibility. This check is used for
@ -849,25 +884,26 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if types are not compatible.
* Returns errIncompatibleTypes if types are not compatible.
*/
func coreAreTypesCompatible(localType Type, targetType Type) error {
var (
localTs, targetTs typeDeque
l, t = &localType, &targetType
depth = 0
)
for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() {
for ; l != nil && t != nil; l, t = localTs.Shift(), targetTs.Shift() {
if depth >= maxTypeDepth {
return errors.New("types are nested too deep")
}
localType = *l
targetType = *t
localType = UnderlyingType(*l)
targetType = UnderlyingType(*t)
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation)
return fmt.Errorf("type mismatch: %w", errIncompatibleTypes)
}
switch lv := (localType).(type) {
@ -876,18 +912,18 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
case *Pointer, *Array:
depth++
localType.walk(&localTs)
targetType.walk(&targetTs)
walkType(localType, localTs.Push)
walkType(targetType, targetTs.Push)
case *FuncProto:
tv := targetType.(*FuncProto)
if len(lv.Params) != len(tv.Params) {
return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation)
return fmt.Errorf("function param mismatch: %w", errIncompatibleTypes)
}
depth++
localType.walk(&localTs)
targetType.walk(&targetTs)
walkType(localType, localTs.Push)
walkType(targetType, targetTs.Push)
default:
return fmt.Errorf("unsupported type %T", localType)
@ -931,6 +967,9 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
* Returns errImpossibleRelocation if the members are not compatible.
*/
func coreAreMembersCompatible(localType Type, targetType Type) error {
localType = UnderlyingType(localType)
targetType = UnderlyingType(targetType)
doNamesMatch := func(a, b string) error {
if a == "" || b == "" {
// allow anonymous and named type to match

View file

@ -24,7 +24,7 @@ type ExtInfos struct {
// loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF.
//
// Returns an error wrapping ErrNotFound if no ext infos are present.
func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTable) (*ExtInfos, error) {
func loadExtInfosFromELF(file *internal.SafeELFFile, spec *Spec) (*ExtInfos, error) {
section := file.Section(".BTF.ext")
if section == nil {
return nil, fmt.Errorf("btf ext infos: %w", ErrNotFound)
@ -34,11 +34,11 @@ func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTa
return nil, fmt.Errorf("compressed ext_info is not supported")
}
return loadExtInfos(section.ReaderAt, file.ByteOrder, ts, strings)
return loadExtInfos(section.ReaderAt, file.ByteOrder, spec, spec.strings)
}
// loadExtInfos parses bare ext infos.
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringTable) (*ExtInfos, error) {
func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec, strings *stringTable) (*ExtInfos, error) {
// Open unbuffered section reader. binary.Read() calls io.ReadFull on
// the header structs, resulting in one syscall per header.
headerRd := io.NewSectionReader(r, 0, math.MaxInt64)
@ -60,7 +60,7 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringT
funcInfos := make(map[string][]funcInfo, len(btfFuncInfos))
for section, bfis := range btfFuncInfos {
funcInfos[section], err = newFuncInfos(bfis, ts)
funcInfos[section], err = newFuncInfos(bfis, spec)
if err != nil {
return nil, fmt.Errorf("section %s: func infos: %w", section, err)
}
@ -93,7 +93,7 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringT
coreRelos := make(map[string][]coreRelocationInfo, len(btfCORERelos))
for section, brs := range btfCORERelos {
coreRelos[section], err = newRelocationInfos(brs, ts, strings)
coreRelos[section], err = newRelocationInfos(brs, spec, strings)
if err != nil {
return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err)
}
@ -114,7 +114,7 @@ func (ei *ExtInfos) Assign(insns asm.Instructions, section string) {
iter := insns.Iterate()
for iter.Next() {
if len(funcInfos) > 0 && funcInfos[0].offset == iter.Offset {
iter.Ins.Metadata.Set(funcInfoMeta{}, funcInfos[0].fn)
*iter.Ins = WithFuncMetadata(*iter.Ins, funcInfos[0].fn)
funcInfos = funcInfos[1:]
}
@ -132,17 +132,37 @@ func (ei *ExtInfos) Assign(insns asm.Instructions, section string) {
// MarshalExtInfos encodes function and line info embedded in insns into kernel
// wire format.
func MarshalExtInfos(insns asm.Instructions, typeID func(Type) (TypeID, error)) (funcInfos, lineInfos []byte, _ error) {
//
// Returns ErrNotSupported if the kernel doesn't support BTF-associated programs.
func MarshalExtInfos(insns asm.Instructions) (_ *Handle, funcInfos, lineInfos []byte, _ error) {
// Bail out early if the kernel doesn't support Func(Proto). If this is the
// case, func_info will also be unsupported.
if err := haveProgBTF(); err != nil {
return nil, nil, nil, err
}
iter := insns.Iterate()
var fiBuf, liBuf bytes.Buffer
for iter.Next() {
_, ok := iter.Ins.Source().(*Line)
fn := FuncMetadata(iter.Ins)
if ok || fn != nil {
goto marshal
}
}
return nil, nil, nil, nil
marshal:
var b Builder
var fiBuf, liBuf bytes.Buffer
for {
if fn := FuncMetadata(iter.Ins); fn != nil {
fi := &funcInfo{
fn: fn,
offset: iter.Offset,
}
if err := fi.marshal(&fiBuf, typeID); err != nil {
return nil, nil, fmt.Errorf("write func info: %w", err)
if err := fi.marshal(&fiBuf, &b); err != nil {
return nil, nil, nil, fmt.Errorf("write func info: %w", err)
}
}
@ -151,12 +171,18 @@ func MarshalExtInfos(insns asm.Instructions, typeID func(Type) (TypeID, error))
line: line,
offset: iter.Offset,
}
if err := li.marshal(&liBuf); err != nil {
return nil, nil, fmt.Errorf("write line info: %w", err)
if err := li.marshal(&liBuf, &b); err != nil {
return nil, nil, nil, fmt.Errorf("write line info: %w", err)
}
}
if !iter.Next() {
break
}
}
return fiBuf.Bytes(), liBuf.Bytes(), nil
handle, err := NewHandle(&b)
return handle, fiBuf.Bytes(), liBuf.Bytes(), err
}
// btfExtHeader is found at the start of the .BTF.ext section.
@ -311,8 +337,8 @@ type bpfFuncInfo struct {
TypeID TypeID
}
func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) {
typ, err := ts.ByID(fi.TypeID)
func newFuncInfo(fi bpfFuncInfo, spec *Spec) (*funcInfo, error) {
typ, err := spec.TypeByID(fi.TypeID)
if err != nil {
return nil, err
}
@ -333,10 +359,10 @@ func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) {
}, nil
}
func newFuncInfos(bfis []bpfFuncInfo, ts types) ([]funcInfo, error) {
func newFuncInfos(bfis []bpfFuncInfo, spec *Spec) ([]funcInfo, error) {
fis := make([]funcInfo, 0, len(bfis))
for _, bfi := range bfis {
fi, err := newFuncInfo(bfi, ts)
fi, err := newFuncInfo(bfi, spec)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", bfi.InsnOff, err)
}
@ -349,8 +375,8 @@ func newFuncInfos(bfis []bpfFuncInfo, ts types) ([]funcInfo, error) {
}
// marshal into the BTF wire format.
func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) error {
id, err := typeID(fi.fn)
func (fi *funcInfo) marshal(w *bytes.Buffer, b *Builder) error {
id, err := b.Add(fi.fn)
if err != nil {
return err
}
@ -358,10 +384,14 @@ func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) erro
InsnOff: uint32(fi.offset),
TypeID: id,
}
return binary.Write(w, internal.NativeEndian, &bfi)
buf := make([]byte, FuncInfoSize)
internal.NativeEndian.PutUint32(buf, bfi.InsnOff)
internal.NativeEndian.PutUint32(buf[4:], uint32(bfi.TypeID))
_, err = w.Write(buf)
return err
}
// parseLineInfos parses a func_info sub-section within .BTF.ext ito a map of
// parseFuncInfos parses a func_info sub-section within .BTF.ext ito a map of
// func infos indexed by section name.
func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) {
recordSize, err := parseExtInfoRecordSize(r, bo)
@ -428,12 +458,6 @@ type Line struct {
line string
lineNumber uint32
lineColumn uint32
// TODO: We should get rid of the fields below, but for that we need to be
// able to write BTF.
fileNameOff uint32
lineOff uint32
}
func (li *Line) FileName() string {
@ -496,8 +520,6 @@ func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) {
line,
lineNumber,
lineColumn,
li.FileNameOff,
li.LineOff,
},
asm.RawInstructionOffset(li.InsnOff),
}, nil
@ -519,7 +541,7 @@ func newLineInfos(blis []bpfLineInfo, strings *stringTable) ([]lineInfo, error)
}
// marshal writes the binary representation of the LineInfo to w.
func (li *lineInfo) marshal(w io.Writer) error {
func (li *lineInfo) marshal(w *bytes.Buffer, b *Builder) error {
line := li.line
if line.lineNumber > bpfLineMax {
return fmt.Errorf("line %d exceeds %d", line.lineNumber, bpfLineMax)
@ -529,13 +551,30 @@ func (li *lineInfo) marshal(w io.Writer) error {
return fmt.Errorf("column %d exceeds %d", line.lineColumn, bpfColumnMax)
}
fileNameOff, err := b.addString(line.fileName)
if err != nil {
return fmt.Errorf("file name %q: %w", line.fileName, err)
}
lineOff, err := b.addString(line.line)
if err != nil {
return fmt.Errorf("line %q: %w", line.line, err)
}
bli := bpfLineInfo{
uint32(li.offset),
line.fileNameOff,
line.lineOff,
fileNameOff,
lineOff,
(line.lineNumber << bpfLineShift) | line.lineColumn,
}
return binary.Write(w, internal.NativeEndian, &bli)
buf := make([]byte, LineInfoSize)
internal.NativeEndian.PutUint32(buf, bli.InsnOff)
internal.NativeEndian.PutUint32(buf[4:], bli.FileNameOff)
internal.NativeEndian.PutUint32(buf[8:], bli.LineOff)
internal.NativeEndian.PutUint32(buf[12:], bli.LineCol)
_, err = w.Write(buf)
return err
}
// parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of
@ -605,9 +644,16 @@ type bpfCORERelo struct {
}
type CORERelocation struct {
// The local type of the relocation, stripped of typedefs and qualifiers.
typ Type
accessor coreAccessor
kind coreKind
// The ID of the local type in the source BTF.
id TypeID
}
func (cr *CORERelocation) String() string {
return fmt.Sprintf("CORERelocation(%s, %s[%s], local_id=%d)", cr.kind, cr.typ, cr.accessor, cr.id)
}
func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation {
@ -620,8 +666,8 @@ type coreRelocationInfo struct {
offset asm.RawInstructionOffset
}
func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreRelocationInfo, error) {
typ, err := ts.ByID(relo.TypeID)
func newRelocationInfo(relo bpfCORERelo, spec *Spec, strings *stringTable) (*coreRelocationInfo, error) {
typ, err := spec.TypeByID(relo.TypeID)
if err != nil {
return nil, err
}
@ -641,15 +687,16 @@ func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreR
typ,
accessor,
relo.Kind,
relo.TypeID,
},
asm.RawInstructionOffset(relo.InsnOff),
}, nil
}
func newRelocationInfos(brs []bpfCORERelo, ts types, strings *stringTable) ([]coreRelocationInfo, error) {
func newRelocationInfos(brs []bpfCORERelo, spec *Spec, strings *stringTable) ([]coreRelocationInfo, error) {
rs := make([]coreRelocationInfo, 0, len(brs))
for _, br := range brs {
relo, err := newRelocationInfo(br, ts, strings)
relo, err := newRelocationInfo(br, spec, strings)
if err != nil {
return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err)
}

View file

@ -56,54 +56,40 @@ func (gf *GoFormatter) enumIdentifier(name, element string) string {
//
// It encodes https://golang.org/ref/spec#Type_declarations:
//
// type foo struct { bar uint32; }
// type bar int32
// type foo struct { bar uint32; }
// type bar int32
func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error {
if name == "" {
return fmt.Errorf("need a name for type %s", typ)
}
switch v := skipQualifiers(typ).(type) {
case *Enum:
fmt.Fprintf(&gf.w, "type %s ", name)
switch v.Size {
case 1:
gf.w.WriteString("int8")
case 2:
gf.w.WriteString("int16")
case 4:
gf.w.WriteString("int32")
case 8:
gf.w.WriteString("int64")
default:
return fmt.Errorf("%s: invalid enum size %d", typ, v.Size)
}
if len(v.Values) == 0 {
return nil
}
gf.w.WriteString("; const ( ")
for _, ev := range v.Values {
id := gf.enumIdentifier(name, ev.Name)
fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value)
}
gf.w.WriteString(")")
return nil
default:
fmt.Fprintf(&gf.w, "type %s ", name)
return gf.writeTypeLit(v, 0)
typ = skipQualifiers(typ)
fmt.Fprintf(&gf.w, "type %s ", name)
if err := gf.writeTypeLit(typ, 0); err != nil {
return err
}
e, ok := typ.(*Enum)
if !ok || len(e.Values) == 0 {
return nil
}
gf.w.WriteString("; const ( ")
for _, ev := range e.Values {
id := gf.enumIdentifier(name, ev.Name)
fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value)
}
gf.w.WriteString(")")
return nil
}
// writeType outputs the name of a named type or a literal describing the type.
//
// It encodes https://golang.org/ref/spec#Types.
//
// foo (if foo is a named type)
// uint32
// foo (if foo is a named type)
// uint32
func (gf *GoFormatter) writeType(typ Type, depth int) error {
typ = skipQualifiers(typ)
@ -122,8 +108,8 @@ func (gf *GoFormatter) writeType(typ Type, depth int) error {
//
// It encodes https://golang.org/ref/spec#TypeLit.
//
// struct { bar uint32; }
// uint32
// struct { bar uint32; }
// uint32
func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error {
depth++
if depth > maxTypeDepth {
@ -133,10 +119,24 @@ func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error {
var err error
switch v := skipQualifiers(typ).(type) {
case *Int:
gf.writeIntLit(v)
err = gf.writeIntLit(v)
case *Enum:
gf.w.WriteString("int32")
if !v.Signed {
gf.w.WriteRune('u')
}
switch v.Size {
case 1:
gf.w.WriteString("int8")
case 2:
gf.w.WriteString("int16")
case 4:
gf.w.WriteString("int32")
case 8:
gf.w.WriteString("int64")
default:
err = fmt.Errorf("invalid enum size %d", v.Size)
}
case *Typedef:
err = gf.writeType(v.Type, depth)
@ -166,19 +166,36 @@ func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error {
return nil
}
func (gf *GoFormatter) writeIntLit(i *Int) {
// NB: Encoding.IsChar is ignored.
if i.Encoding.IsBool() && i.Size == 1 {
gf.w.WriteString("bool")
return
}
func (gf *GoFormatter) writeIntLit(i *Int) error {
bits := i.Size * 8
if i.Encoding.IsSigned() {
fmt.Fprintf(&gf.w, "int%d", bits)
} else {
fmt.Fprintf(&gf.w, "uint%d", bits)
switch i.Encoding {
case Bool:
if i.Size != 1 {
return fmt.Errorf("bool with size %d", i.Size)
}
gf.w.WriteString("bool")
case Char:
if i.Size != 1 {
return fmt.Errorf("char with size %d", i.Size)
}
// BTF doesn't have a way to specify the signedness of a char. Assume
// we are dealing with unsigned, since this works nicely with []byte
// in Go code.
fallthrough
case Unsigned, Signed:
stem := "uint"
if i.Encoding == Signed {
stem = "int"
}
if i.Size > 8 {
fmt.Fprintf(&gf.w, "[%d]byte /* %s%d */", i.Size, stem, i.Size*8)
} else {
fmt.Fprintf(&gf.w, "%s%d", stem, bits)
}
default:
return fmt.Errorf("can't encode %s", i.Encoding)
}
return nil
}
func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) error {
@ -199,11 +216,15 @@ func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int)
gf.writePadding(n)
}
size, err := Sizeof(m.Type)
fieldSize, err := Sizeof(m.Type)
if err != nil {
return fmt.Errorf("field %d: %w", i, err)
}
prevOffset = offset + uint32(size)
prevOffset = offset + uint32(fieldSize)
if prevOffset > size {
return fmt.Errorf("field %d of size %d exceeds type size %d", i, fieldSize, size)
}
if err := gf.writeStructField(m, depth); err != nil {
return fmt.Errorf("field %d: %w", i, err)
@ -272,7 +293,11 @@ func (gf *GoFormatter) writeDatasecLit(ds *Datasec, depth int) error {
prevOffset := uint32(0)
for i, vsi := range ds.Vars {
v := vsi.Type.(*Var)
v, ok := vsi.Type.(*Var)
if !ok {
return fmt.Errorf("can't format %s as part of data section", vsi.Type)
}
if v.Linkage != GlobalVar {
// Ignore static, extern, etc. for now.
continue

View file

@ -1,14 +1,142 @@
package btf
import (
"bytes"
"errors"
"fmt"
"math"
"os"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// Handle is a reference to BTF loaded into the kernel.
type Handle struct {
fd *sys.FD
// Size of the raw BTF in bytes.
size uint32
needsKernelBase bool
}
// NewHandle loads the contents of a [Builder] into the kernel.
//
// Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF.
func NewHandle(b *Builder) (*Handle, error) {
small := getByteSlice()
defer putByteSlice(small)
buf, err := b.Marshal(*small, KernelMarshalOptions())
if err != nil {
return nil, fmt.Errorf("marshal BTF: %w", err)
}
return NewHandleFromRawBTF(buf)
}
// NewHandleFromRawBTF loads raw BTF into the kernel.
//
// Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF.
func NewHandleFromRawBTF(btf []byte) (*Handle, error) {
if uint64(len(btf)) > math.MaxUint32 {
return nil, errors.New("BTF exceeds the maximum size")
}
attr := &sys.BtfLoadAttr{
Btf: sys.NewSlicePointer(btf),
BtfSize: uint32(len(btf)),
}
fd, err := sys.BtfLoad(attr)
if err == nil {
return &Handle{fd, attr.BtfSize, false}, nil
}
if err := haveBTF(); err != nil {
return nil, err
}
logBuf := make([]byte, 64*1024)
attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
attr.BtfLogSize = uint32(len(logBuf))
attr.BtfLogLevel = 1
// Up until at least kernel 6.0, the BTF verifier does not return ENOSPC
// if there are other verification errors. ENOSPC is only returned when
// the BTF blob is correct, a log was requested, and the provided buffer
// is too small.
_, ve := sys.BtfLoad(attr)
return nil, internal.ErrorWithLog("load btf", err, logBuf, errors.Is(ve, unix.ENOSPC))
}
// NewHandleFromID returns the BTF handle for a given id.
//
// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible.
//
// Returns ErrNotExist, if there is no BTF with the given id.
//
// Requires CAP_SYS_ADMIN.
func NewHandleFromID(id ID) (*Handle, error) {
fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{
Id: uint32(id),
})
if err != nil {
return nil, fmt.Errorf("get FD for ID %d: %w", id, err)
}
info, err := newHandleInfoFromFD(fd)
if err != nil {
_ = fd.Close()
return nil, err
}
return &Handle{fd, info.size, info.IsModule()}, nil
}
// Spec parses the kernel BTF into Go types.
//
// base must contain type information for vmlinux if the handle is for
// a kernel module. It may be nil otherwise.
func (h *Handle) Spec(base *Spec) (*Spec, error) {
var btfInfo sys.BtfInfo
btfBuffer := make([]byte, h.size)
btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer)
if err := sys.ObjInfo(h.fd, &btfInfo); err != nil {
return nil, err
}
if h.needsKernelBase && base == nil {
return nil, fmt.Errorf("missing base types")
}
return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, base)
}
// Close destroys the handle.
//
// Subsequent calls to FD will return an invalid value.
func (h *Handle) Close() error {
if h == nil {
return nil
}
return h.fd.Close()
}
// FD returns the file descriptor for the handle.
func (h *Handle) FD() int {
return h.fd.Int()
}
// Info returns metadata about the handle.
func (h *Handle) Info() (*HandleInfo, error) {
return newHandleInfoFromFD(h.fd)
}
// HandleInfo describes a Handle.
type HandleInfo struct {
// ID of this handle in the kernel. The ID is only valid as long as the
@ -59,7 +187,7 @@ func newHandleInfoFromFD(fd *sys.FD) (*HandleInfo, error) {
}, nil
}
// IsModule returns true if the BTF is for the kernel itself.
// IsVmlinux returns true if the BTF is for the kernel itself.
func (i *HandleInfo) IsVmlinux() bool {
return i.IsKernel && i.Name == "vmlinux"
}
@ -71,51 +199,89 @@ func (i *HandleInfo) IsModule() bool {
// HandleIterator allows enumerating BTF blobs loaded into the kernel.
type HandleIterator struct {
// The ID of the last retrieved handle. Only valid after a call to Next.
ID ID
err error
// The ID of the current handle. Only valid after a call to Next.
ID ID
// The current Handle. Only valid until a call to Next.
// See Take if you want to retain the handle.
Handle *Handle
err error
}
// Next retrieves a handle for the next BTF blob.
// Next retrieves a handle for the next BTF object.
//
// [Handle.Close] is called if *handle is non-nil to avoid leaking fds.
//
// Returns true if another BTF blob was found. Call [HandleIterator.Err] after
// Returns true if another BTF object was found. Call [HandleIterator.Err] after
// the function returns false.
func (it *HandleIterator) Next(handle **Handle) bool {
if *handle != nil {
(*handle).Close()
*handle = nil
}
func (it *HandleIterator) Next() bool {
id := it.ID
for {
attr := &sys.BtfGetNextIdAttr{Id: id}
err := sys.BtfGetNextId(attr)
if errors.Is(err, os.ErrNotExist) {
// There are no more BTF objects.
return false
break
} else if err != nil {
it.err = fmt.Errorf("get next BTF ID: %w", err)
return false
break
}
id = attr.NextId
*handle, err = NewHandleFromID(id)
handle, err := NewHandleFromID(id)
if errors.Is(err, os.ErrNotExist) {
// Try again with the next ID.
continue
} else if err != nil {
it.err = fmt.Errorf("retrieve handle for ID %d: %w", id, err)
return false
break
}
it.ID = id
it.Handle.Close()
it.ID, it.Handle = id, handle
return true
}
// No more handles or we encountered an error.
it.Handle.Close()
it.Handle = nil
return false
}
// Take the ownership of the current handle.
//
// It's the callers responsibility to close the handle.
func (it *HandleIterator) Take() *Handle {
handle := it.Handle
it.Handle = nil
return handle
}
// Err returns an error if iteration failed for some reason.
func (it *HandleIterator) Err() error {
return it.err
}
// FindHandle returns the first handle for which predicate returns true.
//
// Requires CAP_SYS_ADMIN.
//
// Returns an error wrapping ErrNotFound if predicate never returns true or if
// there is no BTF loaded into the kernel.
func FindHandle(predicate func(info *HandleInfo) bool) (*Handle, error) {
it := new(HandleIterator)
defer it.Handle.Close()
for it.Next() {
info, err := it.Handle.Info()
if err != nil {
return nil, fmt.Errorf("info for ID %d: %w", it.ID, err)
}
if predicate(info) {
return it.Take(), nil
}
}
if err := it.Err(); err != nil {
return nil, fmt.Errorf("iterate handles: %w", err)
}
return nil, fmt.Errorf("find handle: %w", ErrNotFound)
}

543
vendor/github.com/cilium/ebpf/btf/marshal.go generated vendored Normal file
View file

@ -0,0 +1,543 @@
package btf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
"sync"
"github.com/cilium/ebpf/internal"
"golang.org/x/exp/slices"
)
type MarshalOptions struct {
// Target byte order. Defaults to the system's native endianness.
Order binary.ByteOrder
// Remove function linkage information for compatibility with <5.6 kernels.
StripFuncLinkage bool
}
// KernelMarshalOptions will generate BTF suitable for the current kernel.
func KernelMarshalOptions() *MarshalOptions {
return &MarshalOptions{
Order: internal.NativeEndian,
StripFuncLinkage: haveFuncLinkage() != nil,
}
}
// encoder turns Types into raw BTF.
type encoder struct {
MarshalOptions
pending internal.Deque[Type]
buf *bytes.Buffer
strings *stringTableBuilder
ids map[Type]TypeID
lastID TypeID
}
var bufferPool = sync.Pool{
New: func() any {
buf := make([]byte, btfHeaderLen+128)
return &buf
},
}
func getByteSlice() *[]byte {
return bufferPool.Get().(*[]byte)
}
func putByteSlice(buf *[]byte) {
*buf = (*buf)[:0]
bufferPool.Put(buf)
}
// Builder turns Types into raw BTF.
//
// The default value may be used and represents an empty BTF blob. Void is
// added implicitly if necessary.
type Builder struct {
// Explicitly added types.
types []Type
// IDs for all added types which the user knows about.
stableIDs map[Type]TypeID
// Explicitly added strings.
strings *stringTableBuilder
}
// NewBuilder creates a Builder from a list of types.
//
// It is more efficient than calling [Add] individually.
//
// Returns an error if adding any of the types fails.
func NewBuilder(types []Type) (*Builder, error) {
b := &Builder{
make([]Type, 0, len(types)),
make(map[Type]TypeID, len(types)),
nil,
}
for _, typ := range types {
_, err := b.Add(typ)
if err != nil {
return nil, fmt.Errorf("add %s: %w", typ, err)
}
}
return b, nil
}
// Add a Type and allocate a stable ID for it.
//
// Adding the identical Type multiple times is valid and will return the same ID.
//
// See [Type] for details on identity.
func (b *Builder) Add(typ Type) (TypeID, error) {
if b.stableIDs == nil {
b.stableIDs = make(map[Type]TypeID)
}
if _, ok := typ.(*Void); ok {
// Equality is weird for void, since it is a zero sized type.
return 0, nil
}
if ds, ok := typ.(*Datasec); ok {
if err := datasecResolveWorkaround(b, ds); err != nil {
return 0, err
}
}
id, ok := b.stableIDs[typ]
if ok {
return id, nil
}
b.types = append(b.types, typ)
id = TypeID(len(b.types))
if int(id) != len(b.types) {
return 0, fmt.Errorf("no more type IDs")
}
b.stableIDs[typ] = id
return id, nil
}
// Marshal encodes all types in the Marshaler into BTF wire format.
//
// opts may be nil.
func (b *Builder) Marshal(buf []byte, opts *MarshalOptions) ([]byte, error) {
stb := b.strings
if stb == nil {
// Assume that most types are named. This makes encoding large BTF like
// vmlinux a lot cheaper.
stb = newStringTableBuilder(len(b.types))
} else {
// Avoid modifying the Builder's string table.
stb = b.strings.Copy()
}
if opts == nil {
opts = &MarshalOptions{Order: internal.NativeEndian}
}
// Reserve space for the BTF header.
buf = slices.Grow(buf, btfHeaderLen)[:btfHeaderLen]
w := internal.NewBuffer(buf)
defer internal.PutBuffer(w)
e := encoder{
MarshalOptions: *opts,
buf: w,
strings: stb,
lastID: TypeID(len(b.types)),
ids: make(map[Type]TypeID, len(b.types)),
}
// Ensure that types are marshaled in the exact order they were Add()ed.
// Otherwise the ID returned from Add() won't match.
e.pending.Grow(len(b.types))
for _, typ := range b.types {
e.pending.Push(typ)
e.ids[typ] = b.stableIDs[typ]
}
if err := e.deflatePending(); err != nil {
return nil, err
}
length := e.buf.Len()
typeLen := uint32(length - btfHeaderLen)
stringLen := e.strings.Length()
buf = e.strings.AppendEncoded(e.buf.Bytes())
// Fill out the header, and write it out.
header := &btfHeader{
Magic: btfMagic,
Version: 1,
Flags: 0,
HdrLen: uint32(btfHeaderLen),
TypeOff: 0,
TypeLen: typeLen,
StringOff: typeLen,
StringLen: uint32(stringLen),
}
err := binary.Write(sliceWriter(buf[:btfHeaderLen]), e.Order, header)
if err != nil {
return nil, fmt.Errorf("write header: %v", err)
}
return buf, nil
}
// addString adds a string to the resulting BTF.
//
// Adding the same string multiple times will return the same result.
//
// Returns an identifier into the string table or an error if the string
// contains invalid characters.
func (b *Builder) addString(str string) (uint32, error) {
if b.strings == nil {
b.strings = newStringTableBuilder(0)
}
return b.strings.Add(str)
}
func (e *encoder) allocateID(typ Type) error {
id := e.lastID + 1
if id < e.lastID {
return errors.New("type ID overflow")
}
e.pending.Push(typ)
e.ids[typ] = id
e.lastID = id
return nil
}
// id returns the ID for the given type or panics with an error.
func (e *encoder) id(typ Type) TypeID {
if _, ok := typ.(*Void); ok {
return 0
}
id, ok := e.ids[typ]
if !ok {
panic(fmt.Errorf("no ID for type %v", typ))
}
return id
}
func (e *encoder) deflatePending() error {
// Declare root outside of the loop to avoid repeated heap allocations.
var root Type
skip := func(t Type) (skip bool) {
if t == root {
// Force descending into the current root type even if it already
// has an ID. Otherwise we miss children of types that have their
// ID pre-allocated via Add.
return false
}
_, isVoid := t.(*Void)
_, alreadyEncoded := e.ids[t]
return isVoid || alreadyEncoded
}
for !e.pending.Empty() {
root = e.pending.Shift()
// Allocate IDs for all children of typ, including transitive dependencies.
iter := postorderTraversal(root, skip)
for iter.Next() {
if iter.Type == root {
// The iterator yields root at the end, do not allocate another ID.
break
}
if err := e.allocateID(iter.Type); err != nil {
return err
}
}
if err := e.deflateType(root); err != nil {
id := e.ids[root]
return fmt.Errorf("deflate %v with ID %d: %w", root, id, err)
}
}
return nil
}
func (e *encoder) deflateType(typ Type) (err error) {
defer func() {
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
panic(r)
}
}
}()
var raw rawType
raw.NameOff, err = e.strings.Add(typ.TypeName())
if err != nil {
return err
}
switch v := typ.(type) {
case *Void:
return errors.New("Void is implicit in BTF wire format")
case *Int:
raw.SetKind(kindInt)
raw.SetSize(v.Size)
var bi btfInt
bi.SetEncoding(v.Encoding)
// We need to set bits in addition to size, since btf_type_int_is_regular
// otherwise flags this as a bitfield.
bi.SetBits(byte(v.Size) * 8)
raw.data = bi
case *Pointer:
raw.SetKind(kindPointer)
raw.SetType(e.id(v.Target))
case *Array:
raw.SetKind(kindArray)
raw.data = &btfArray{
e.id(v.Type),
e.id(v.Index),
v.Nelems,
}
case *Struct:
raw.SetKind(kindStruct)
raw.SetSize(v.Size)
raw.data, err = e.convertMembers(&raw.btfType, v.Members)
case *Union:
raw.SetKind(kindUnion)
raw.SetSize(v.Size)
raw.data, err = e.convertMembers(&raw.btfType, v.Members)
case *Enum:
raw.SetSize(v.size())
raw.SetVlen(len(v.Values))
raw.SetSigned(v.Signed)
if v.has64BitValues() {
raw.SetKind(kindEnum64)
raw.data, err = e.deflateEnum64Values(v.Values)
} else {
raw.SetKind(kindEnum)
raw.data, err = e.deflateEnumValues(v.Values)
}
case *Fwd:
raw.SetKind(kindForward)
raw.SetFwdKind(v.Kind)
case *Typedef:
raw.SetKind(kindTypedef)
raw.SetType(e.id(v.Type))
case *Volatile:
raw.SetKind(kindVolatile)
raw.SetType(e.id(v.Type))
case *Const:
raw.SetKind(kindConst)
raw.SetType(e.id(v.Type))
case *Restrict:
raw.SetKind(kindRestrict)
raw.SetType(e.id(v.Type))
case *Func:
raw.SetKind(kindFunc)
raw.SetType(e.id(v.Type))
if !e.StripFuncLinkage {
raw.SetLinkage(v.Linkage)
}
case *FuncProto:
raw.SetKind(kindFuncProto)
raw.SetType(e.id(v.Return))
raw.SetVlen(len(v.Params))
raw.data, err = e.deflateFuncParams(v.Params)
case *Var:
raw.SetKind(kindVar)
raw.SetType(e.id(v.Type))
raw.data = btfVariable{uint32(v.Linkage)}
case *Datasec:
raw.SetKind(kindDatasec)
raw.SetSize(v.Size)
raw.SetVlen(len(v.Vars))
raw.data = e.deflateVarSecinfos(v.Vars)
case *Float:
raw.SetKind(kindFloat)
raw.SetSize(v.Size)
case *declTag:
raw.SetKind(kindDeclTag)
raw.SetType(e.id(v.Type))
raw.data = &btfDeclTag{uint32(v.Index)}
raw.NameOff, err = e.strings.Add(v.Value)
case *typeTag:
raw.SetKind(kindTypeTag)
raw.SetType(e.id(v.Type))
raw.NameOff, err = e.strings.Add(v.Value)
default:
return fmt.Errorf("don't know how to deflate %T", v)
}
if err != nil {
return err
}
return raw.Marshal(e.buf, e.Order)
}
func (e *encoder) convertMembers(header *btfType, members []Member) ([]btfMember, error) {
bms := make([]btfMember, 0, len(members))
isBitfield := false
for _, member := range members {
isBitfield = isBitfield || member.BitfieldSize > 0
offset := member.Offset
if isBitfield {
offset = member.BitfieldSize<<24 | (member.Offset & 0xffffff)
}
nameOff, err := e.strings.Add(member.Name)
if err != nil {
return nil, err
}
bms = append(bms, btfMember{
nameOff,
e.id(member.Type),
uint32(offset),
})
}
header.SetVlen(len(members))
header.SetBitfield(isBitfield)
return bms, nil
}
func (e *encoder) deflateEnumValues(values []EnumValue) ([]btfEnum, error) {
bes := make([]btfEnum, 0, len(values))
for _, value := range values {
nameOff, err := e.strings.Add(value.Name)
if err != nil {
return nil, err
}
if value.Value > math.MaxUint32 {
return nil, fmt.Errorf("value of enum %q exceeds 32 bits", value.Name)
}
bes = append(bes, btfEnum{
nameOff,
uint32(value.Value),
})
}
return bes, nil
}
func (e *encoder) deflateEnum64Values(values []EnumValue) ([]btfEnum64, error) {
bes := make([]btfEnum64, 0, len(values))
for _, value := range values {
nameOff, err := e.strings.Add(value.Name)
if err != nil {
return nil, err
}
bes = append(bes, btfEnum64{
nameOff,
uint32(value.Value),
uint32(value.Value >> 32),
})
}
return bes, nil
}
func (e *encoder) deflateFuncParams(params []FuncParam) ([]btfParam, error) {
bps := make([]btfParam, 0, len(params))
for _, param := range params {
nameOff, err := e.strings.Add(param.Name)
if err != nil {
return nil, err
}
bps = append(bps, btfParam{
nameOff,
e.id(param.Type),
})
}
return bps, nil
}
func (e *encoder) deflateVarSecinfos(vars []VarSecinfo) []btfVarSecinfo {
vsis := make([]btfVarSecinfo, 0, len(vars))
for _, v := range vars {
vsis = append(vsis, btfVarSecinfo{
e.id(v.Type),
v.Offset,
v.Size,
})
}
return vsis
}
// MarshalMapKV creates a BTF object containing a map key and value.
//
// The function is intended for the use of the ebpf package and may be removed
// at any point in time.
func MarshalMapKV(key, value Type) (_ *Handle, keyID, valueID TypeID, err error) {
var b Builder
if key != nil {
keyID, err = b.Add(key)
if err != nil {
return nil, 0, 0, fmt.Errorf("add key type: %w", err)
}
}
if value != nil {
valueID, err = b.Add(value)
if err != nil {
return nil, 0, 0, fmt.Errorf("add value type: %w", err)
}
}
handle, err := NewHandle(&b)
if err != nil {
// Check for 'full' map BTF support, since kernels between 4.18 and 5.2
// already support BTF blobs for maps without Var or Datasec just fine.
if err := haveMapBTF(); err != nil {
return nil, 0, 0, err
}
}
return handle, keyID, valueID, err
}

View file

@ -6,6 +6,9 @@ import (
"errors"
"fmt"
"io"
"strings"
"golang.org/x/exp/maps"
)
type stringTable struct {
@ -88,11 +91,6 @@ func (st *stringTable) lookup(offset uint32) (string, error) {
return st.strings[i], nil
}
func (st *stringTable) Length() int {
last := len(st.offsets) - 1
return int(st.offsets[last]) + len(st.strings[last]) + 1
}
func (st *stringTable) Marshal(w io.Writer) error {
for _, str := range st.strings {
_, err := io.WriteString(w, str)
@ -107,6 +105,11 @@ func (st *stringTable) Marshal(w io.Writer) error {
return nil
}
// Num returns the number of strings in the table.
func (st *stringTable) Num() int {
return len(st.strings)
}
// search is a copy of sort.Search specialised for uint32.
//
// Licensed under https://go.dev/LICENSE
@ -126,3 +129,86 @@ func search(ints []uint32, needle uint32) int {
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i
}
// stringTableBuilder builds BTF string tables.
type stringTableBuilder struct {
length uint32
strings map[string]uint32
}
// newStringTableBuilder creates a builder with the given capacity.
//
// capacity may be zero.
func newStringTableBuilder(capacity int) *stringTableBuilder {
var stb stringTableBuilder
if capacity == 0 {
// Use the runtime's small default size.
stb.strings = make(map[string]uint32)
} else {
stb.strings = make(map[string]uint32, capacity)
}
// Ensure that the empty string is at index 0.
stb.append("")
return &stb
}
// Add a string to the table.
//
// Adding the same string multiple times will only store it once.
func (stb *stringTableBuilder) Add(str string) (uint32, error) {
if strings.IndexByte(str, 0) != -1 {
return 0, fmt.Errorf("string contains null: %q", str)
}
offset, ok := stb.strings[str]
if ok {
return offset, nil
}
return stb.append(str), nil
}
func (stb *stringTableBuilder) append(str string) uint32 {
offset := stb.length
stb.length += uint32(len(str)) + 1
stb.strings[str] = offset
return offset
}
// Lookup finds the offset of a string in the table.
//
// Returns an error if str hasn't been added yet.
func (stb *stringTableBuilder) Lookup(str string) (uint32, error) {
offset, ok := stb.strings[str]
if !ok {
return 0, fmt.Errorf("string %q is not in table", str)
}
return offset, nil
}
// Length returns the length in bytes.
func (stb *stringTableBuilder) Length() int {
return int(stb.length)
}
// AppendEncoded appends the string table to the end of the provided buffer.
func (stb *stringTableBuilder) AppendEncoded(buf []byte) []byte {
n := len(buf)
buf = append(buf, make([]byte, stb.Length())...)
strings := buf[n:]
for str, offset := range stb.strings {
copy(strings[offset:], str)
}
return buf
}
// Copy the string table builder.
func (stb *stringTableBuilder) Copy() *stringTableBuilder {
return &stringTableBuilder{
stb.length,
maps.Clone(stb.strings),
}
}

141
vendor/github.com/cilium/ebpf/btf/traversal.go generated vendored Normal file
View file

@ -0,0 +1,141 @@
package btf
import (
"fmt"
"github.com/cilium/ebpf/internal"
)
// Functions to traverse a cyclic graph of types. The below was very useful:
// https://eli.thegreenplace.net/2015/directed-graph-traversal-orderings-and-applications-to-data-flow-analysis/#post-order-and-reverse-post-order
type postorderIterator struct {
// Iteration skips types for which this function returns true.
skip func(Type) bool
// The root type. May be nil if skip(root) is true.
root Type
// Contains types which need to be either walked or yielded.
types typeDeque
// Contains a boolean whether the type has been walked or not.
walked internal.Deque[bool]
// The set of types which has been pushed onto types.
pushed map[Type]struct{}
// The current type. Only valid after a call to Next().
Type Type
}
// postorderTraversal iterates all types reachable from root by visiting the
// leaves of the graph first.
//
// Types for which skip returns true are ignored. skip may be nil.
func postorderTraversal(root Type, skip func(Type) (skip bool)) postorderIterator {
// Avoid allocations for the common case of a skipped root.
if skip != nil && skip(root) {
return postorderIterator{}
}
po := postorderIterator{root: root, skip: skip}
walkType(root, po.push)
return po
}
func (po *postorderIterator) push(t *Type) {
if _, ok := po.pushed[*t]; ok || *t == po.root {
return
}
if po.skip != nil && po.skip(*t) {
return
}
if po.pushed == nil {
// Lazily allocate pushed to avoid an allocation for Types without children.
po.pushed = make(map[Type]struct{})
}
po.pushed[*t] = struct{}{}
po.types.Push(t)
po.walked.Push(false)
}
// Next returns true if there is another Type to traverse.
func (po *postorderIterator) Next() bool {
for !po.types.Empty() {
t := po.types.Pop()
if !po.walked.Pop() {
// Push the type again, so that we re-evaluate it in done state
// after all children have been handled.
po.types.Push(t)
po.walked.Push(true)
// Add all direct children to todo.
walkType(*t, po.push)
} else {
// We've walked this type previously, so we now know that all
// children have been handled.
po.Type = *t
return true
}
}
// Only return root once.
po.Type, po.root = po.root, nil
return po.Type != nil
}
// walkType calls fn on each child of typ.
func walkType(typ Type, fn func(*Type)) {
// Explicitly type switch on the most common types to allow the inliner to
// do its work. This avoids allocating intermediate slices from walk() on
// the heap.
switch v := typ.(type) {
case *Void, *Int, *Enum, *Fwd, *Float:
// No children to traverse.
case *Pointer:
fn(&v.Target)
case *Array:
fn(&v.Index)
fn(&v.Type)
case *Struct:
for i := range v.Members {
fn(&v.Members[i].Type)
}
case *Union:
for i := range v.Members {
fn(&v.Members[i].Type)
}
case *Typedef:
fn(&v.Type)
case *Volatile:
fn(&v.Type)
case *Const:
fn(&v.Type)
case *Restrict:
fn(&v.Type)
case *Func:
fn(&v.Type)
case *FuncProto:
fn(&v.Return)
for i := range v.Params {
fn(&v.Params[i].Type)
}
case *Var:
fn(&v.Type)
case *Datasec:
for i := range v.Vars {
fn(&v.Vars[i].Type)
}
case *declTag:
fn(&v.Type)
case *typeTag:
fn(&v.Type)
case *cycle:
// cycle has children, but we ignore them deliberately.
default:
panic(fmt.Sprintf("don't know how to walk Type %T", v))
}
}

View file

@ -1,6 +1,7 @@
package btf
import (
"errors"
"fmt"
"io"
"math"
@ -8,14 +9,27 @@ import (
"strings"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
const maxTypeDepth = 32
// TypeID identifies a type in a BTF section.
type TypeID uint32
type TypeID = sys.TypeID
// Type represents a type described by BTF.
//
// Identity of Type follows the [Go specification]: two Types are considered
// equal if they have the same concrete type and the same dynamic value, aka
// they point at the same location in memory. This means that the following
// Types are considered distinct even though they have the same "shape".
//
// a := &Int{Size: 1}
// b := &Int{Size: 1}
// a != b
//
// [Go specification]: https://go.dev/ref/spec#Comparison_operators
type Type interface {
// Type can be formatted using the %s and %v verbs. %s outputs only the
// identity of the type, without any detail. %v outputs additional detail.
@ -35,9 +49,7 @@ type Type interface {
// Make a copy of the type, without copying Type members.
copy() Type
// Enumerate all nested Types. Repeated calls must visit nested
// types in the same order.
walk(*typeDeque)
// New implementations must update walkType.
}
var (
@ -51,20 +63,11 @@ var (
_ Type = (*Var)(nil)
_ Type = (*Datasec)(nil)
_ Type = (*Float)(nil)
_ Type = (*declTag)(nil)
_ Type = (*typeTag)(nil)
_ Type = (*cycle)(nil)
)
// types is a list of Type.
//
// The order determines the ID of a type.
type types []Type
func (ts types) ByID(id TypeID) (Type, error) {
if int(id) > len(ts) {
return nil, fmt.Errorf("type ID %d: %w", id, ErrNotFound)
}
return ts[id], nil
}
// Void is the unit type of BTF.
type Void struct{}
@ -72,40 +75,32 @@ func (v *Void) Format(fs fmt.State, verb rune) { formatType(fs, verb, v) }
func (v *Void) TypeName() string { return "" }
func (v *Void) size() uint32 { return 0 }
func (v *Void) copy() Type { return (*Void)(nil) }
func (v *Void) walk(*typeDeque) {}
type IntEncoding byte
// Valid IntEncodings.
//
// These may look like they are flags, but they aren't.
const (
Signed IntEncoding = 1 << iota
Char
Bool
Unsigned IntEncoding = 0
Signed IntEncoding = 1
Char IntEncoding = 2
Bool IntEncoding = 4
)
func (ie IntEncoding) IsSigned() bool {
return ie&Signed != 0
}
func (ie IntEncoding) IsChar() bool {
return ie&Char != 0
}
func (ie IntEncoding) IsBool() bool {
return ie&Bool != 0
}
func (ie IntEncoding) String() string {
switch {
case ie.IsChar() && ie.IsSigned():
switch ie {
case Char:
// NB: There is no way to determine signedness for char.
return "char"
case ie.IsChar() && !ie.IsSigned():
return "uchar"
case ie.IsBool():
case Bool:
return "bool"
case ie.IsSigned():
case Signed:
return "signed"
default:
case Unsigned:
return "unsigned"
default:
return fmt.Sprintf("IntEncoding(%d)", byte(ie))
}
}
@ -126,7 +121,6 @@ func (i *Int) Format(fs fmt.State, verb rune) {
func (i *Int) TypeName() string { return i.Name }
func (i *Int) size() uint32 { return i.Size }
func (i *Int) walk(*typeDeque) {}
func (i *Int) copy() Type {
cpy := *i
return &cpy
@ -141,9 +135,8 @@ func (p *Pointer) Format(fs fmt.State, verb rune) {
formatType(fs, verb, p, "target=", p.Target)
}
func (p *Pointer) TypeName() string { return "" }
func (p *Pointer) size() uint32 { return 8 }
func (p *Pointer) walk(tdq *typeDeque) { tdq.push(&p.Target) }
func (p *Pointer) TypeName() string { return "" }
func (p *Pointer) size() uint32 { return 8 }
func (p *Pointer) copy() Type {
cpy := *p
return &cpy
@ -162,11 +155,6 @@ func (arr *Array) Format(fs fmt.State, verb rune) {
func (arr *Array) TypeName() string { return "" }
func (arr *Array) walk(tdq *typeDeque) {
tdq.push(&arr.Index)
tdq.push(&arr.Type)
}
func (arr *Array) copy() Type {
cpy := *arr
return &cpy
@ -188,12 +176,6 @@ func (s *Struct) TypeName() string { return s.Name }
func (s *Struct) size() uint32 { return s.Size }
func (s *Struct) walk(tdq *typeDeque) {
for i := range s.Members {
tdq.push(&s.Members[i].Type)
}
}
func (s *Struct) copy() Type {
cpy := *s
cpy.Members = copyMembers(s.Members)
@ -220,12 +202,6 @@ func (u *Union) TypeName() string { return u.Name }
func (u *Union) size() uint32 { return u.Size }
func (u *Union) walk(tdq *typeDeque) {
for i := range u.Members {
tdq.push(&u.Members[i].Type)
}
}
func (u *Union) copy() Type {
cpy := *u
cpy.Members = copyMembers(u.Members)
@ -243,6 +219,7 @@ func copyMembers(orig []Member) []Member {
}
type composite interface {
Type
members() []Member
}
@ -273,7 +250,9 @@ type Member struct {
type Enum struct {
Name string
// Size of the enum value in bytes.
Size uint32
Size uint32
// True if the values should be interpreted as signed integers.
Signed bool
Values []EnumValue
}
@ -288,11 +267,10 @@ func (e *Enum) TypeName() string { return e.Name }
// Is is not a valid Type
type EnumValue struct {
Name string
Value int32
Value uint64
}
func (e *Enum) size() uint32 { return e.Size }
func (e *Enum) walk(*typeDeque) {}
func (e *Enum) size() uint32 { return e.Size }
func (e *Enum) copy() Type {
cpy := *e
cpy.Values = make([]EnumValue, len(e.Values))
@ -300,6 +278,21 @@ func (e *Enum) copy() Type {
return &cpy
}
// has64BitValues returns true if the Enum contains a value larger than 32 bits.
// Kernels before 6.0 have enum values that overrun u32 replaced with zeroes.
//
// 64-bit enums have their Enum.Size attributes correctly set to 8, but if we
// use the size attribute as a heuristic during BTF marshaling, we'll emit
// ENUM64s to kernels that don't support them.
func (e *Enum) has64BitValues() bool {
for _, v := range e.Values {
if v.Value > math.MaxUint32 {
return true
}
}
return false
}
// FwdKind is the type of forward declaration.
type FwdKind int
@ -332,7 +325,6 @@ func (f *Fwd) Format(fs fmt.State, verb rune) {
func (f *Fwd) TypeName() string { return f.Name }
func (f *Fwd) walk(*typeDeque) {}
func (f *Fwd) copy() Type {
cpy := *f
return &cpy
@ -350,7 +342,6 @@ func (td *Typedef) Format(fs fmt.State, verb rune) {
func (td *Typedef) TypeName() string { return td.Name }
func (td *Typedef) walk(tdq *typeDeque) { tdq.push(&td.Type) }
func (td *Typedef) copy() Type {
cpy := *td
return &cpy
@ -367,8 +358,7 @@ func (v *Volatile) Format(fs fmt.State, verb rune) {
func (v *Volatile) TypeName() string { return "" }
func (v *Volatile) qualify() Type { return v.Type }
func (v *Volatile) walk(tdq *typeDeque) { tdq.push(&v.Type) }
func (v *Volatile) qualify() Type { return v.Type }
func (v *Volatile) copy() Type {
cpy := *v
return &cpy
@ -385,8 +375,7 @@ func (c *Const) Format(fs fmt.State, verb rune) {
func (c *Const) TypeName() string { return "" }
func (c *Const) qualify() Type { return c.Type }
func (c *Const) walk(tdq *typeDeque) { tdq.push(&c.Type) }
func (c *Const) qualify() Type { return c.Type }
func (c *Const) copy() Type {
cpy := *c
return &cpy
@ -403,8 +392,7 @@ func (r *Restrict) Format(fs fmt.State, verb rune) {
func (r *Restrict) TypeName() string { return "" }
func (r *Restrict) qualify() Type { return r.Type }
func (r *Restrict) walk(tdq *typeDeque) { tdq.push(&r.Type) }
func (r *Restrict) qualify() Type { return r.Type }
func (r *Restrict) copy() Type {
cpy := *r
return &cpy
@ -422,13 +410,18 @@ func FuncMetadata(ins *asm.Instruction) *Func {
return fn
}
// WithFuncMetadata adds a btf.Func to the Metadata of asm.Instruction.
func WithFuncMetadata(ins asm.Instruction, fn *Func) asm.Instruction {
ins.Metadata.Set(funcInfoMeta{}, fn)
return ins
}
func (f *Func) Format(fs fmt.State, verb rune) {
formatType(fs, verb, f, f.Linkage, "proto=", f.Type)
}
func (f *Func) TypeName() string { return f.Name }
func (f *Func) walk(tdq *typeDeque) { tdq.push(&f.Type) }
func (f *Func) copy() Type {
cpy := *f
return &cpy
@ -446,13 +439,6 @@ func (fp *FuncProto) Format(fs fmt.State, verb rune) {
func (fp *FuncProto) TypeName() string { return "" }
func (fp *FuncProto) walk(tdq *typeDeque) {
tdq.push(&fp.Return)
for i := range fp.Params {
tdq.push(&fp.Params[i].Type)
}
}
func (fp *FuncProto) copy() Type {
cpy := *fp
cpy.Params = make([]FuncParam, len(fp.Params))
@ -478,7 +464,6 @@ func (v *Var) Format(fs fmt.State, verb rune) {
func (v *Var) TypeName() string { return v.Name }
func (v *Var) walk(tdq *typeDeque) { tdq.push(&v.Type) }
func (v *Var) copy() Type {
cpy := *v
return &cpy
@ -499,12 +484,6 @@ func (ds *Datasec) TypeName() string { return ds.Name }
func (ds *Datasec) size() uint32 { return ds.Size }
func (ds *Datasec) walk(tdq *typeDeque) {
for i := range ds.Vars {
tdq.push(&ds.Vars[i].Type)
}
}
func (ds *Datasec) copy() Type {
cpy := *ds
cpy.Vars = make([]VarSecinfo, len(ds.Vars))
@ -516,6 +495,7 @@ func (ds *Datasec) copy() Type {
//
// It is not a valid Type.
type VarSecinfo struct {
// Var or Func.
Type Type
Offset uint32
Size uint32
@ -535,12 +515,48 @@ func (f *Float) Format(fs fmt.State, verb rune) {
func (f *Float) TypeName() string { return f.Name }
func (f *Float) size() uint32 { return f.Size }
func (f *Float) walk(*typeDeque) {}
func (f *Float) copy() Type {
cpy := *f
return &cpy
}
// declTag associates metadata with a declaration.
type declTag struct {
Type Type
Value string
// The index this tag refers to in the target type. For composite types,
// a value of -1 indicates that the tag refers to the whole type. Otherwise
// it indicates which member or argument the tag applies to.
Index int
}
func (dt *declTag) Format(fs fmt.State, verb rune) {
formatType(fs, verb, dt, "type=", dt.Type, "value=", dt.Value, "index=", dt.Index)
}
func (dt *declTag) TypeName() string { return "" }
func (dt *declTag) copy() Type {
cpy := *dt
return &cpy
}
// typeTag associates metadata with a type.
type typeTag struct {
Type Type
Value string
}
func (tt *typeTag) Format(fs fmt.State, verb rune) {
formatType(fs, verb, tt, "type=", tt.Type, "value=", tt.Value)
}
func (tt *typeTag) TypeName() string { return "" }
func (tt *typeTag) qualify() Type { return tt.Type }
func (tt *typeTag) copy() Type {
cpy := *tt
return &cpy
}
// cycle is a type which had to be elided since it exceeded maxTypeDepth.
type cycle struct {
root Type
@ -549,7 +565,6 @@ type cycle struct {
func (c *cycle) ID() TypeID { return math.MaxUint32 }
func (c *cycle) Format(fs fmt.State, verb rune) { formatType(fs, verb, c, "root=", c.root) }
func (c *cycle) TypeName() string { return "" }
func (c *cycle) walk(*typeDeque) {}
func (c *cycle) copy() Type {
cpy := *c
return &cpy
@ -576,8 +591,11 @@ var (
_ qualifier = (*Const)(nil)
_ qualifier = (*Restrict)(nil)
_ qualifier = (*Volatile)(nil)
_ qualifier = (*typeTag)(nil)
)
var errUnsizedType = errors.New("type is unsized")
// Sizeof returns the size of a type in bytes.
//
// Returns an error if the size can't be computed.
@ -612,7 +630,7 @@ func Sizeof(typ Type) (int, error) {
continue
default:
return 0, fmt.Errorf("unsized type %T", typ)
return 0, fmt.Errorf("type %T: %w", typ, errUnsizedType)
}
if n > 0 && elem > math.MaxInt64/n {
@ -632,16 +650,33 @@ func Sizeof(typ Type) (int, error) {
// alignof returns the alignment of a type.
//
// Currently only supports the subset of types necessary for bitfield relocations.
// Returns an error if the Type can't be aligned, like an integer with an uneven
// size. Currently only supports the subset of types necessary for bitfield
// relocations.
func alignof(typ Type) (int, error) {
var n int
switch t := UnderlyingType(typ).(type) {
case *Enum:
return int(t.size()), nil
n = int(t.size())
case *Int:
return int(t.Size), nil
n = int(t.Size)
case *Array:
return alignof(t.Type)
default:
return 0, fmt.Errorf("can't calculate alignment of %T", t)
}
if !pow(n) {
return 0, fmt.Errorf("alignment value %d is not a power of two", n)
}
return n, nil
}
// pow returns true if n is a power of two.
func pow(n int) bool {
return n != 0 && (n&(n-1)) == 0
}
// Transformer modifies a given Type and returns the result.
@ -655,7 +690,7 @@ type Transformer func(Type) Type
// typ may form a cycle. If transform is not nil, it is called with the
// to be copied type, and the returned value is copied instead.
func Copy(typ Type, transform Transformer) Type {
copies := make(copier)
copies := copier{copies: make(map[Type]Type)}
copies.copy(&typ, transform)
return typ
}
@ -667,7 +702,7 @@ func copyTypes(types []Type, transform Transformer) []Type {
result := make([]Type, len(types))
copy(result, types)
copies := make(copier)
copies := copier{copies: make(map[Type]Type, len(types))}
for i := range result {
copies.copy(&result[i], transform)
}
@ -675,13 +710,15 @@ func copyTypes(types []Type, transform Transformer) []Type {
return result
}
type copier map[Type]Type
type copier struct {
copies map[Type]Type
work typeDeque
}
func (c copier) copy(typ *Type, transform Transformer) {
var work typeDeque
for t := typ; t != nil; t = work.pop() {
func (c *copier) copy(typ *Type, transform Transformer) {
for t := typ; t != nil; t = c.work.Pop() {
// *t is the identity of the type.
if cpy := c[*t]; cpy != nil {
if cpy := c.copies[*t]; cpy != nil {
*t = cpy
continue
}
@ -693,108 +730,41 @@ func (c copier) copy(typ *Type, transform Transformer) {
cpy = (*t).copy()
}
c[*t] = cpy
c.copies[*t] = cpy
*t = cpy
// Mark any nested types for copying.
cpy.walk(&work)
walkType(cpy, c.work.Push)
}
}
// typeDeque keeps track of pointers to types which still
// need to be visited.
type typeDeque struct {
types []*Type
read, write uint64
mask uint64
}
func (dq *typeDeque) empty() bool {
return dq.read == dq.write
}
// push adds a type to the stack.
func (dq *typeDeque) push(t *Type) {
if dq.write-dq.read < uint64(len(dq.types)) {
dq.types[dq.write&dq.mask] = t
dq.write++
return
}
new := len(dq.types) * 2
if new == 0 {
new = 8
}
types := make([]*Type, new)
pivot := dq.read & dq.mask
n := copy(types, dq.types[pivot:])
n += copy(types[n:], dq.types[:pivot])
types[n] = t
dq.types = types
dq.mask = uint64(new) - 1
dq.read, dq.write = 0, uint64(n+1)
}
// shift returns the first element or null.
func (dq *typeDeque) shift() *Type {
if dq.empty() {
return nil
}
index := dq.read & dq.mask
t := dq.types[index]
dq.types[index] = nil
dq.read++
return t
}
// pop returns the last element or null.
func (dq *typeDeque) pop() *Type {
if dq.empty() {
return nil
}
dq.write--
index := dq.write & dq.mask
t := dq.types[index]
dq.types[index] = nil
return t
}
// all returns all elements.
//
// The deque is empty after calling this method.
func (dq *typeDeque) all() []*Type {
length := dq.write - dq.read
types := make([]*Type, 0, length)
for t := dq.shift(); t != nil; t = dq.shift() {
types = append(types, t)
}
return types
}
type typeDeque = internal.Deque[*Type]
// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns
// it into a graph of Types connected via pointers.
//
// If baseTypes are provided, then the raw types are
// considered to be of a split BTF (e.g., a kernel module).
// If base is provided, then the raw types are considered to be of a split BTF
// (e.g., a kernel module).
//
// Returns a slice of types indexed by TypeID. Since BTF ignores compilation
// Returns a slice of types indexed by TypeID. Since BTF ignores compilation
// units, multiple types may share the same name. A Type may form a cyclic graph
// by pointing at itself.
func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTable) ([]Type, error) {
func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([]Type, error) {
types := make([]Type, 0, len(rawTypes)+1) // +1 for Void added to base types
typeIDOffset := TypeID(1) // Void is TypeID(0), so the rest starts from TypeID(1)
// Void is defined to always be type ID 0, and is thus omitted from BTF.
types = append(types, (*Void)(nil))
if baseTypes == nil {
// Void is defined to always be type ID 0, and is thus omitted from BTF.
types = append(types, (*Void)(nil))
} else {
// For split BTF, the next ID is max base BTF type ID + 1
typeIDOffset = TypeID(len(baseTypes))
firstTypeID := TypeID(0)
if base != nil {
var err error
firstTypeID, err = base.nextTypeID()
if err != nil {
return nil, err
}
// Split BTF doesn't contain Void.
types = types[:0]
}
type fixupDef struct {
@ -803,39 +773,42 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
}
var fixups []fixupDef
fixup := func(id TypeID, typ *Type) {
if id < TypeID(len(baseTypes)) {
*typ = baseTypes[id]
return
fixup := func(id TypeID, typ *Type) bool {
if id < firstTypeID {
if baseType, err := base.TypeByID(id); err == nil {
*typ = baseType
return true
}
}
idx := id
if baseTypes != nil {
idx = id - TypeID(len(baseTypes))
}
if idx < TypeID(len(types)) {
idx := int(id - firstTypeID)
if idx < len(types) {
// We've already inflated this type, fix it up immediately.
*typ = types[idx]
return
return true
}
fixups = append(fixups, fixupDef{id, typ})
return false
}
type assertion struct {
id TypeID
typ *Type
want reflect.Type
}
var assertions []assertion
assert := func(typ *Type, want reflect.Type) error {
if *typ != nil {
// The type has already been fixed up, check the type immediately.
if reflect.TypeOf(*typ) != want {
return fmt.Errorf("expected %s, got %T", want, *typ)
}
fixupAndAssert := func(id TypeID, typ *Type, want reflect.Type) error {
if !fixup(id, typ) {
assertions = append(assertions, assertion{id, typ, want})
return nil
}
assertions = append(assertions, assertion{typ, want})
// The type has already been fixed up, check the type immediately.
if reflect.TypeOf(*typ) != want {
return fmt.Errorf("type ID %d: expected %s, got %T", id, want, *typ)
}
return nil
}
@ -903,12 +876,17 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
return members, nil
}
for i, raw := range rawTypes {
var declTags []*declTag
for _, raw := range rawTypes {
var (
id = typeIDOffset + TypeID(i)
id = firstTypeID + TypeID(len(types))
typ Type
)
if id < firstTypeID {
return nil, fmt.Errorf("no more type IDs")
}
name, err := rawStrings.Lookup(raw.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for type id %d: %w", id, err)
@ -936,14 +914,14 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
typ = arr
case kindStruct:
members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag())
members, err := convertMembers(raw.data.([]btfMember), raw.Bitfield())
if err != nil {
return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err)
}
typ = &Struct{name, raw.Size(), members}
case kindUnion:
members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag())
members, err := convertMembers(raw.data.([]btfMember), raw.Bitfield())
if err != nil {
return nil, fmt.Errorf("union %s (id %d): %w", name, id, err)
}
@ -952,24 +930,23 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
case kindEnum:
rawvals := raw.data.([]btfEnum)
vals := make([]EnumValue, 0, len(rawvals))
signed := raw.Signed()
for i, btfVal := range rawvals {
name, err := rawStrings.Lookup(btfVal.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for enum value %d: %s", i, err)
}
vals = append(vals, EnumValue{
Name: name,
Value: btfVal.Val,
})
value := uint64(btfVal.Val)
if signed {
// Sign extend values to 64 bit.
value = uint64(int32(btfVal.Val))
}
vals = append(vals, EnumValue{name, value})
}
typ = &Enum{name, raw.Size(), vals}
typ = &Enum{name, raw.Size(), signed, vals}
case kindForward:
if raw.KindFlag() {
typ = &Fwd{name, FwdUnion}
} else {
typ = &Fwd{name, FwdStruct}
}
typ = &Fwd{name, raw.FwdKind()}
case kindTypedef:
typedef := &Typedef{name, nil}
@ -993,8 +970,7 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
case kindFunc:
fn := &Func{name, nil, raw.Linkage()}
fixup(raw.Type(), &fn.Type)
if err := assert(&fn.Type, reflect.TypeOf((*FuncProto)(nil))); err != nil {
if err := fixupAndAssert(raw.Type(), &fn.Type, reflect.TypeOf((*FuncProto)(nil))); err != nil {
return nil, err
}
typ = fn
@ -1036,15 +1012,42 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
}
for i := range vars {
fixup(btfVars[i].Type, &vars[i].Type)
if err := assert(&vars[i].Type, reflect.TypeOf((*Var)(nil))); err != nil {
return nil, err
}
}
typ = &Datasec{name, raw.SizeType, vars}
typ = &Datasec{name, raw.Size(), vars}
case kindFloat:
typ = &Float{name, raw.Size()}
case kindDeclTag:
btfIndex := raw.data.(*btfDeclTag).ComponentIdx
if uint64(btfIndex) > math.MaxInt {
return nil, fmt.Errorf("type id %d: index exceeds int", id)
}
dt := &declTag{nil, name, int(int32(btfIndex))}
fixup(raw.Type(), &dt.Type)
typ = dt
declTags = append(declTags, dt)
case kindTypeTag:
tt := &typeTag{nil, name}
fixup(raw.Type(), &tt.Type)
typ = tt
case kindEnum64:
rawvals := raw.data.([]btfEnum64)
vals := make([]EnumValue, 0, len(rawvals))
for i, btfVal := range rawvals {
name, err := rawStrings.Lookup(btfVal.NameOff)
if err != nil {
return nil, fmt.Errorf("get name for enum64 value %d: %s", i, err)
}
value := (uint64(btfVal.ValHi32) << 32) | uint64(btfVal.ValLo32)
vals = append(vals, EnumValue{name, value})
}
typ = &Enum{name, raw.Size(), raw.Signed(), vals}
default:
return nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind())
}
@ -1053,19 +1056,20 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
}
for _, fixup := range fixups {
i := int(fixup.id)
if i >= len(types)+len(baseTypes) {
return nil, fmt.Errorf("reference to invalid type id: %d", fixup.id)
}
if i < len(baseTypes) {
return nil, fmt.Errorf("fixup for base type id %d is not expected", i)
if fixup.id < firstTypeID {
return nil, fmt.Errorf("fixup for base type id %d is not expected", fixup.id)
}
*fixup.typ = types[i-len(baseTypes)]
idx := int(fixup.id - firstTypeID)
if idx >= len(types) {
return nil, fmt.Errorf("reference to invalid type id: %d", fixup.id)
}
*fixup.typ = types[idx]
}
for _, bitfieldFixup := range bitfieldFixups {
if bitfieldFixup.id < TypeID(len(baseTypes)) {
if bitfieldFixup.id < firstTypeID {
return nil, fmt.Errorf("bitfield fixup from split to base types is not expected")
}
@ -1079,7 +1083,29 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl
for _, assertion := range assertions {
if reflect.TypeOf(*assertion.typ) != assertion.want {
return nil, fmt.Errorf("expected %s, got %T", assertion.want, *assertion.typ)
return nil, fmt.Errorf("type ID %d: expected %s, got %T", assertion.id, assertion.want, *assertion.typ)
}
}
for _, dt := range declTags {
switch t := dt.Type.(type) {
case *Var, *Typedef:
if dt.Index != -1 {
return nil, fmt.Errorf("type %s: index %d is not -1", dt, dt.Index)
}
case composite:
if dt.Index >= len(t.members()) {
return nil, fmt.Errorf("type %s: index %d exceeds members of %s", dt, dt.Index, t)
}
case *Func:
if dt.Index >= len(t.Type.(*FuncProto).Params) {
return nil, fmt.Errorf("type %s: index %d exceeds params of %s", dt, dt.Index, t)
}
default:
return nil, fmt.Errorf("type %s: decl tag for type %s is not supported", dt, t)
}
}
@ -1123,6 +1149,29 @@ func UnderlyingType(typ Type) Type {
return &cycle{typ}
}
// as returns typ if is of type T. Otherwise it peels qualifiers and Typedefs
// until it finds a T.
//
// Returns the zero value and false if there is no T or if the type is nested
// too deeply.
func as[T Type](typ Type) (T, bool) {
for depth := 0; depth <= maxTypeDepth; depth++ {
switch v := (typ).(type) {
case T:
return v, true
case qualifier:
typ = v.qualify()
case *Typedef:
typ = v.Type
default:
goto notFound
}
}
notFound:
var zero T
return zero, false
}
type formatState struct {
fmt.State
depth int
@ -1145,10 +1194,7 @@ func formatType(f fmt.State, verb rune, t formattableType, extra ...interface{})
return
}
// This is the same as %T, but elides the package name. Assumes that
// formattableType is implemented by a pointer receiver.
goTypeName := reflect.TypeOf(t).Elem().Name()
_, _ = io.WriteString(f, goTypeName)
_, _ = io.WriteString(f, internal.GoTypeName(t))
if name := t.TypeName(); name != "" {
// Output BTF type name if present.

26
vendor/github.com/cilium/ebpf/btf/workarounds.go generated vendored Normal file
View file

@ -0,0 +1,26 @@
package btf
// datasecResolveWorkaround ensures that certain vars in a Datasec are added
// to a Spec before the Datasec. This avoids a bug in kernel BTF validation.
//
// See https://lore.kernel.org/bpf/20230302123440.1193507-1-lmb@isovalent.com/
func datasecResolveWorkaround(b *Builder, ds *Datasec) error {
for _, vsi := range ds.Vars {
v, ok := vsi.Type.(*Var)
if !ok {
continue
}
switch v.Type.(type) {
case *Typedef, *Volatile, *Const, *Restrict, *typeTag:
// NB: We must never call Add on a Datasec, otherwise we risk
// infinite recursion.
_, err := b.Add(v.Type)
if err != nil {
return err
}
}
}
return nil
}

View file

@ -9,6 +9,8 @@ import (
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/kconfig"
)
// CollectionOptions control loading a collection into the kernel.
@ -107,12 +109,22 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
return nil
}
// MissingConstantsError is returned by [CollectionSpec.RewriteConstants].
type MissingConstantsError struct {
// The constants missing from .rodata.
Constants []string
}
func (m *MissingConstantsError) Error() string {
return fmt.Sprintf("some constants are missing from .rodata: %s", strings.Join(m.Constants, ", "))
}
// RewriteConstants replaces the value of multiple constants.
//
// The constant must be defined like so in the C program:
//
// volatile const type foobar;
// volatile const type foobar = default;
// volatile const type foobar;
// volatile const type foobar = default;
//
// Replacement values must be of the same length as the C sizeof(type).
// If necessary, they are marshalled according to the same rules as
@ -120,7 +132,7 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
//
// From Linux 5.5 the verifier will use constants to eliminate dead code.
//
// Returns an error if a constant doesn't exist.
// Returns an error wrapping [MissingConstantsError] if a constant doesn't exist.
func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error {
replaced := make(map[string]bool)
@ -151,6 +163,10 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error
continue
}
if _, ok := v.Type.(*btf.Var); !ok {
return fmt.Errorf("section %s: unexpected type %T for variable %s", name, v.Type, vname)
}
if replaced[vname] {
return fmt.Errorf("section %s: duplicate variable %s", name, vname)
}
@ -180,7 +196,7 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error
}
if len(missing) != 0 {
return fmt.Errorf("spec is missing one or more constants: %s", strings.Join(missing, ","))
return fmt.Errorf("rewrite constants: %w", &MissingConstantsError{Constants: missing})
}
return nil
@ -198,11 +214,11 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error
// The tag's value specifies the name of the program or map as
// found in the CollectionSpec.
//
// struct {
// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"`
// Bar *ebpf.MapSpec `ebpf:"bar_map"`
// Ignored int
// }
// struct {
// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"`
// Bar *ebpf.MapSpec `ebpf:"bar_map"`
// Ignored int
// }
//
// Returns an error if any of the eBPF objects can't be found, or
// if the same MapSpec or ProgramSpec is assigned multiple times.
@ -249,11 +265,11 @@ func (cs *CollectionSpec) Assign(to interface{}) error {
// dependent resources are loaded into the kernel and populated with values if
// specified.
//
// struct {
// Foo *ebpf.Program `ebpf:"xdp_foo"`
// Bar *ebpf.Map `ebpf:"bar_map"`
// Ignored int
// }
// struct {
// Foo *ebpf.Program `ebpf:"xdp_foo"`
// Bar *ebpf.Map `ebpf:"bar_map"`
// Ignored int
// }
//
// opts may be nil.
//
@ -386,42 +402,11 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
}, nil
}
type handleCache struct {
btfHandles map[*btf.Spec]*btf.Handle
}
func newHandleCache() *handleCache {
return &handleCache{
btfHandles: make(map[*btf.Spec]*btf.Handle),
}
}
func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
if hc.btfHandles[spec] != nil {
return hc.btfHandles[spec], nil
}
handle, err := btf.NewHandle(spec)
if err != nil {
return nil, err
}
hc.btfHandles[spec] = handle
return handle, nil
}
func (hc handleCache) close() {
for _, handle := range hc.btfHandles {
handle.Close()
}
}
type collectionLoader struct {
coll *CollectionSpec
opts *CollectionOptions
maps map[string]*Map
programs map[string]*Program
handles *handleCache
}
func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collectionLoader, error) {
@ -436,7 +421,7 @@ func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collec
return nil, fmt.Errorf("replacement map %s not found in CollectionSpec", name)
}
if err := spec.checkCompatibility(m); err != nil {
if err := spec.Compatible(m); err != nil {
return nil, fmt.Errorf("using replacement map %s: %w", spec.Name, err)
}
}
@ -446,13 +431,11 @@ func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collec
opts,
make(map[string]*Map),
make(map[string]*Program),
newHandleCache(),
}, nil
}
// close all resources left over in the collectionLoader.
func (cl *collectionLoader) close() {
cl.handles.close()
for _, m := range cl.maps {
m.Close()
}
@ -471,10 +454,6 @@ func (cl *collectionLoader) loadMap(mapName string) (*Map, error) {
return nil, fmt.Errorf("missing map %s", mapName)
}
if mapSpec.BTF != nil && cl.coll.Types != mapSpec.BTF {
return nil, fmt.Errorf("map %s: BTF doesn't match collection", mapName)
}
if replaceMap, ok := cl.opts.MapReplacements[mapName]; ok {
// Clone the map to avoid closing user's map later on.
m, err := replaceMap.Clone()
@ -486,7 +465,7 @@ func (cl *collectionLoader) loadMap(mapName string) (*Map, error) {
return m, nil
}
m, err := newMapWithOptions(mapSpec, cl.opts.Maps, cl.handles)
m, err := newMapWithOptions(mapSpec, cl.opts.Maps)
if err != nil {
return nil, fmt.Errorf("map %s: %w", mapName, err)
}
@ -511,10 +490,6 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) {
return nil, fmt.Errorf("cannot load program %s: program type is unspecified", progName)
}
if progSpec.BTF != nil && cl.coll.Types != progSpec.BTF {
return nil, fmt.Errorf("program %s: BTF doesn't match collection", progName)
}
progSpec = progSpec.Copy()
// Rewrite any reference to a valid map in the program's instructions,
@ -543,7 +518,7 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) {
}
}
prog, err := newProgramWithOptions(progSpec, cl.opts.Programs, cl.handles)
prog, err := newProgramWithOptions(progSpec, cl.opts.Programs)
if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err)
}
@ -559,17 +534,22 @@ func (cl *collectionLoader) populateMaps() error {
return fmt.Errorf("missing map spec %s", mapName)
}
mapSpec = mapSpec.Copy()
// MapSpecs that refer to inner maps or programs within the same
// CollectionSpec do so using strings. These strings are used as the key
// to look up the respective object in the Maps or Programs fields.
// Resolve those references to actual Map or Program resources that
// have been loaded into the kernel.
for i, kv := range mapSpec.Contents {
if objName, ok := kv.Value.(string); ok {
switch mapSpec.Type {
case ProgramArray:
if mapSpec.Type.canStoreMap() || mapSpec.Type.canStoreProgram() {
mapSpec = mapSpec.Copy()
for i, kv := range mapSpec.Contents {
objName, ok := kv.Value.(string)
if !ok {
continue
}
switch t := mapSpec.Type; {
case t.canStoreProgram():
// loadProgram is idempotent and could return an existing Program.
prog, err := cl.loadProgram(objName)
if err != nil {
@ -577,7 +557,7 @@ func (cl *collectionLoader) populateMaps() error {
}
mapSpec.Contents[i] = MapKV{kv.Key, prog}
case ArrayOfMaps, HashOfMaps:
case t.canStoreMap():
// loadMap is idempotent and could return an existing Map.
innerMap, err := cl.loadMap(objName)
if err != nil {
@ -597,6 +577,95 @@ func (cl *collectionLoader) populateMaps() error {
return nil
}
// resolveKconfig resolves all variables declared in .kconfig and populates
// m.Contents. Does nothing if the given m.Contents is non-empty.
func resolveKconfig(m *MapSpec) error {
ds, ok := m.Value.(*btf.Datasec)
if !ok {
return errors.New("map value is not a Datasec")
}
type configInfo struct {
offset uint32
typ btf.Type
}
configs := make(map[string]configInfo)
data := make([]byte, ds.Size)
for _, vsi := range ds.Vars {
v := vsi.Type.(*btf.Var)
n := v.TypeName()
switch n {
case "LINUX_KERNEL_VERSION":
if integer, ok := v.Type.(*btf.Int); !ok || integer.Size != 4 {
return fmt.Errorf("variable %s must be a 32 bits integer, got %s", n, v.Type)
}
kv, err := internal.KernelVersion()
if err != nil {
return fmt.Errorf("getting kernel version: %w", err)
}
internal.NativeEndian.PutUint32(data[vsi.Offset:], kv.Kernel())
case "LINUX_HAS_SYSCALL_WRAPPER":
if integer, ok := v.Type.(*btf.Int); !ok || integer.Size != 4 {
return fmt.Errorf("variable %s must be a 32 bits integer, got %s", n, v.Type)
}
var value uint32 = 1
if err := haveSyscallWrapper(); errors.Is(err, ErrNotSupported) {
value = 0
} else if err != nil {
return fmt.Errorf("unable to derive a value for LINUX_HAS_SYSCALL_WRAPPER: %w", err)
}
internal.NativeEndian.PutUint32(data[vsi.Offset:], value)
default: // Catch CONFIG_*.
configs[n] = configInfo{
offset: vsi.Offset,
typ: v.Type,
}
}
}
// We only parse kconfig file if a CONFIG_* variable was found.
if len(configs) > 0 {
f, err := kconfig.Find()
if err != nil {
return fmt.Errorf("cannot find a kconfig file: %w", err)
}
defer f.Close()
filter := make(map[string]struct{}, len(configs))
for config := range configs {
filter[config] = struct{}{}
}
kernelConfig, err := kconfig.Parse(f, filter)
if err != nil {
return fmt.Errorf("cannot parse kconfig file: %w", err)
}
for n, info := range configs {
value, ok := kernelConfig[n]
if !ok {
return fmt.Errorf("config option %q does not exists for this kernel", n)
}
err := kconfig.PutValue(data[info.offset:], info.typ, value)
if err != nil {
return fmt.Errorf("problem adding value for %s: %w", n, err)
}
}
}
m.Contents = []MapKV{{uint32(0), data}}
return nil
}
// LoadCollection reads an object file and creates and loads its declared
// resources into the kernel.
//

View file

@ -18,6 +18,15 @@ import (
"github.com/cilium/ebpf/internal/unix"
)
type kconfigMetaKey struct{}
type kconfigMeta struct {
Map *MapSpec
Offset uint32
}
type kfuncMeta struct{}
// elfCode is a convenience to reduce the amount of arguments that have to
// be passed around explicitly. You should treat its contents as immutable.
type elfCode struct {
@ -27,6 +36,9 @@ type elfCode struct {
version uint32
btf *btf.Spec
extInfo *btf.ExtInfos
maps map[string]*MapSpec
kfuncs map[string]*btf.Func
kconfig *MapSpec
}
// LoadCollectionSpec parses an ELF file into a CollectionSpec.
@ -51,6 +63,12 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
return nil, err
}
// Checks if the ELF file is for BPF data.
// Old LLVM versions set e_machine to EM_NONE.
if f.File.Machine != unix.EM_NONE && f.File.Machine != elf.EM_BPF {
return nil, fmt.Errorf("unexpected machine type for BPF ELF: %s", f.File.Machine)
}
var (
licenseSection *elf.Section
versionSection *elf.Section
@ -107,6 +125,8 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
version: version,
btf: btfSpec,
extInfo: btfExtInfo,
maps: make(map[string]*MapSpec),
kfuncs: make(map[string]*btf.Func),
}
symbols, err := f.Symbols()
@ -120,27 +140,33 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
return nil, fmt.Errorf("load relocations: %w", err)
}
// Collect all the various ways to define maps.
maps := make(map[string]*MapSpec)
if err := ec.loadMaps(maps); err != nil {
if err := ec.loadMaps(); err != nil {
return nil, fmt.Errorf("load maps: %w", err)
}
if err := ec.loadBTFMaps(maps); err != nil {
if err := ec.loadBTFMaps(); err != nil {
return nil, fmt.Errorf("load BTF maps: %w", err)
}
if err := ec.loadDataSections(maps); err != nil {
if err := ec.loadDataSections(); err != nil {
return nil, fmt.Errorf("load data sections: %w", err)
}
if err := ec.loadKconfigSection(); err != nil {
return nil, fmt.Errorf("load virtual .kconfig section: %w", err)
}
if err := ec.loadKsymsSection(); err != nil {
return nil, fmt.Errorf("load virtual .ksyms section: %w", err)
}
// Finally, collect programs and link them.
progs, err := ec.loadProgramSections()
if err != nil {
return nil, fmt.Errorf("load programs: %w", err)
}
return &CollectionSpec{maps, progs, btfSpec, ec.ByteOrder}, nil
return &CollectionSpec{ec.maps, progs, btfSpec, ec.ByteOrder}, nil
}
func loadLicense(sec *elf.Section) (string, error) {
@ -261,10 +287,6 @@ func (ec *elfCode) loadRelocations(relSections map[elf.SectionIndex]*elf.Section
return fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported)
}
if target.Flags&elf.SHF_STRINGS > 0 {
return fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported)
}
target.references++
}
@ -312,7 +334,6 @@ func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) {
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
BTF: ec.btf,
}
// Function names must be unique within a single ELF blob.
@ -565,6 +586,10 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
return fmt.Errorf("neither a call nor a load instruction: %v", ins)
}
// The Undefined section is used for 'virtual' symbols that aren't backed by
// an ELF section. This includes symbol references from inline asm, forward
// function declarations, as well as extern kfunc declarations using __ksym
// and extern kconfig variables declared using __kconfig.
case undefSection:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("asm relocation: %s: unsupported binding: %s", name, bind)
@ -574,7 +599,36 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
return fmt.Errorf("asm relocation: %s: unsupported type %s", name, typ)
}
// There is nothing to do here but set ins.Reference.
kf := ec.kfuncs[name]
switch {
// If a Call instruction is found and the datasec has a btf.Func with a Name
// that matches the symbol name we mark the instruction as a call to a kfunc.
case kf != nil && ins.OpCode.JumpOp() == asm.Call:
ins.Metadata.Set(kfuncMeta{}, kf)
ins.Src = asm.PseudoKfuncCall
ins.Constant = -1
// If no kconfig map is found, this must be a symbol reference from inline
// asm (see testdata/loader.c:asm_relocation()) or a call to a forward
// function declaration (see testdata/fwd_decl.c). Don't interfere, These
// remain standard symbol references.
// extern __kconfig reads are represented as dword loads that need to be
// rewritten to pseudo map loads from .kconfig. If the map is present,
// require it to contain the symbol to disambiguate between inline asm
// relos and kconfigs.
case ec.kconfig != nil && ins.OpCode.IsDWordLoad():
for _, vsi := range ec.kconfig.Value.(*btf.Datasec).Vars {
if vsi.Type.(*btf.Var).Name != rel.Name {
continue
}
ins.Src = asm.PseudoMapValue
ins.Metadata.Set(kconfigMetaKey{}, &kconfigMeta{ec.kconfig, vsi.Offset})
return nil
}
return fmt.Errorf("kconfig %s not found in .kconfig", rel.Name)
}
default:
return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported)
@ -584,7 +638,7 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
return nil
}
func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
func (ec *elfCode) loadMaps() error {
for _, sec := range ec.sections {
if sec.kind != mapSection {
continue
@ -610,7 +664,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
}
mapName := mapSym.Name
if maps[mapName] != nil {
if ec.maps[mapName] != nil {
return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym)
}
@ -644,7 +698,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("map %s: %w", mapName, err)
}
maps[mapName] = &spec
ec.maps[mapName] = &spec
}
}
@ -654,7 +708,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
// loadBTFMaps iterates over all ELF sections marked as BTF map sections
// (like .maps) and parses them into MapSpecs. Dump the .maps section and
// any relocations with `readelf -x .maps -r <elf_file>`.
func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
func (ec *elfCode) loadBTFMaps() error {
for _, sec := range ec.sections {
if sec.kind != btfMapSection {
continue
@ -693,7 +747,7 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("section %v: map %s: initializing BTF map definitions: %w", sec.Name, name, internal.ErrNotSupported)
}
if maps[name] != nil {
if ec.maps[name] != nil {
return fmt.Errorf("section %v: map %s already exists", sec.Name, name)
}
@ -712,7 +766,7 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("map %v: %w", name, err)
}
maps[name] = mapSpec
ec.maps[name] = mapSpec
}
// Drain the ELF section reader to make sure all bytes are accounted for
@ -901,13 +955,6 @@ func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *b
}
}
if key == nil {
key = &btf.Void{}
}
if value == nil {
value = &btf.Void{}
}
return &MapSpec{
Name: SanitizeName(name, -1),
Type: MapType(mapType),
@ -917,7 +964,6 @@ func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *b
Flags: flags,
Key: key,
Value: value,
BTF: spec,
Pinning: pinType,
InnerMap: innerMapSpec,
Contents: contents,
@ -1008,14 +1054,14 @@ func resolveBTFValuesContents(es *elfSection, vs *btf.VarSecinfo, member btf.Mem
case elf.STT_OBJECT:
contents = append(contents, MapKV{uint32(k), r.Name})
default:
return nil, fmt.Errorf("unknown relocation type %v", t)
return nil, fmt.Errorf("unknown relocation type %v for symbol %s", t, r.Name)
}
}
return contents, nil
}
func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
func (ec *elfCode) loadDataSections() error {
for _, sec := range ec.sections {
if sec.kind != dataSection {
continue
@ -1027,22 +1073,33 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
continue
}
data, err := sec.Data()
if err != nil {
return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err)
}
if uint64(len(data)) > math.MaxUint32 {
return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name)
}
mapSpec := &MapSpec{
Name: SanitizeName(sec.Name, -1),
Type: Array,
KeySize: 4,
ValueSize: uint32(len(data)),
ValueSize: uint32(sec.Size),
MaxEntries: 1,
Contents: []MapKV{{uint32(0), data}},
}
switch sec.Type {
// Only open the section if we know there's actual data to be read.
case elf.SHT_PROGBITS:
data, err := sec.Data()
if err != nil {
return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err)
}
if uint64(len(data)) > math.MaxUint32 {
return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name)
}
mapSpec.Contents = []MapKV{{uint32(0), data}}
case elf.SHT_NOBITS:
// NOBITS sections like .bss contain only zeroes, and since data sections
// are Arrays, the kernel already preallocates them. Skip reading zeroes
// from the ELF.
default:
return fmt.Errorf("data section %s: unknown section type %s", sec.Name, sec.Type)
}
// It is possible for a data section to exist without a corresponding BTF Datasec
@ -1051,23 +1108,78 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
var ds *btf.Datasec
if ec.btf.TypeByName(sec.Name, &ds) == nil {
// Assign the spec's key and BTF only if the Datasec lookup was successful.
mapSpec.BTF = ec.btf
mapSpec.Key = &btf.Void{}
mapSpec.Value = ds
}
}
switch n := sec.Name; {
case strings.HasPrefix(n, ".rodata"):
if strings.HasPrefix(sec.Name, ".rodata") {
mapSpec.Flags = unix.BPF_F_RDONLY_PROG
mapSpec.Freeze = true
case n == ".bss":
// The kernel already zero-initializes the map
mapSpec.Contents = nil
}
maps[sec.Name] = mapSpec
ec.maps[sec.Name] = mapSpec
}
return nil
}
// loadKconfigSection handles the 'virtual' Datasec .kconfig that doesn't
// have a corresponding ELF section and exist purely in BTF.
func (ec *elfCode) loadKconfigSection() error {
if ec.btf == nil {
return nil
}
var ds *btf.Datasec
err := ec.btf.TypeByName(".kconfig", &ds)
if errors.Is(err, btf.ErrNotFound) {
return nil
}
if err != nil {
return err
}
if ds.Size == 0 {
return errors.New("zero-length .kconfig")
}
ec.kconfig = &MapSpec{
Name: ".kconfig",
Type: Array,
KeySize: uint32(4),
ValueSize: ds.Size,
MaxEntries: 1,
Flags: unix.BPF_F_RDONLY_PROG | unix.BPF_F_MMAPABLE,
Freeze: true,
Key: &btf.Int{Size: 4},
Value: ds,
}
return nil
}
// loadKsymsSection handles the 'virtual' Datasec .ksyms that doesn't
// have a corresponding ELF section and exist purely in BTF.
func (ec *elfCode) loadKsymsSection() error {
if ec.btf == nil {
return nil
}
var ds *btf.Datasec
err := ec.btf.TypeByName(".ksyms", &ds)
if errors.Is(err, btf.ErrNotFound) {
return nil
}
if err != nil {
return err
}
for _, v := range ds.Vars {
// we have already checked the .ksyms Datasec to only contain Func Vars.
ec.kfuncs[v.Type.TypeName()] = v.Type.(*btf.Func)
}
return nil
}
@ -1107,9 +1219,13 @@ func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
{"lsm/", LSM, AttachLSMMac, 0},
{"lsm.s/", LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE},
{"iter/", Tracing, AttachTraceIter, 0},
{"iter.s/", Tracing, AttachTraceIter, unix.BPF_F_SLEEPABLE},
{"syscall", Syscall, AttachNone, 0},
{"xdp.frags_devmap/", XDP, AttachXDPDevMap, unix.BPF_F_XDP_HAS_FRAGS},
{"xdp_devmap/", XDP, AttachXDPDevMap, 0},
{"xdp.frags_cpumap/", XDP, AttachXDPCPUMap, unix.BPF_F_XDP_HAS_FRAGS},
{"xdp_cpumap/", XDP, AttachXDPCPUMap, 0},
{"xdp.frags", XDP, AttachNone, unix.BPF_F_XDP_HAS_FRAGS},
{"xdp", XDP, AttachNone, 0},
{"perf_event", PerfEvent, AttachNone, 0},
{"lwt_in", LWTIn, AttachNone, 0},
@ -1149,8 +1265,9 @@ func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
{"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0},
{"struct_ops+", StructOps, AttachNone, 0},
{"sk_lookup/", SkLookup, AttachSkLookup, 0},
{"seccomp", SocketFilter, AttachNone, 0},
{"kprobe.multi", Kprobe, AttachTraceKprobeMulti, 0},
{"kretprobe.multi", Kprobe, AttachTraceKprobeMulti, 0},
}
for _, t := range types {

View file

@ -48,7 +48,7 @@ func newMapInfoFromFd(fd *sys.FD) (*MapInfo, error) {
info.KeySize,
info.ValueSize,
info.MaxEntries,
info.MapFlags,
uint32(info.MapFlags),
unix.ByteSliceToString(info.Name[:]),
}, nil
}
@ -94,8 +94,10 @@ type ProgramInfo struct {
// Name as supplied by user space at load time. Available from 4.15.
Name string
btf btf.ID
stats *programStats
createdByUID uint32
haveCreatedByUID bool
btf btf.ID
stats *programStats
maps []MapID
insns []byte
@ -130,6 +132,18 @@ func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) {
pi.maps = make([]MapID, info.NrMapIds)
info2.NrMapIds = info.NrMapIds
info2.MapIds = sys.NewPointer(unsafe.Pointer(&pi.maps[0]))
} else if haveProgramInfoMapIDs() == nil {
// This program really has no associated maps.
pi.maps = make([]MapID, 0)
} else {
// The kernel doesn't report associated maps.
pi.maps = nil
}
// createdByUID and NrMapIds were introduced in the same kernel version.
if pi.maps != nil {
pi.createdByUID = info.CreatedByUid
pi.haveCreatedByUID = true
}
if info.XlatedProgLen > 0 {
@ -175,6 +189,15 @@ func (pi *ProgramInfo) ID() (ProgramID, bool) {
return pi.id, pi.id > 0
}
// CreatedByUID returns the Uid that created the program.
//
// Available from 4.15.
//
// The bool return value indicates whether this optional field is available.
func (pi *ProgramInfo) CreatedByUID() (uint32, bool) {
return pi.createdByUID, pi.haveCreatedByUID
}
// BTFID returns the BTF ID associated with the program.
//
// The ID is only valid as long as the associated program is kept alive.
@ -321,3 +344,30 @@ func EnableStats(which uint32) (io.Closer, error) {
}
return fd, nil
}
var haveProgramInfoMapIDs = internal.NewFeatureTest("map IDs in program info", "4.15", func() error {
prog, err := progLoad(asm.Instructions{
asm.LoadImm(asm.R0, 0, asm.DWord),
asm.Return(),
}, SocketFilter, "MIT")
if err != nil {
return err
}
defer prog.Close()
err = sys.ObjInfo(prog, &sys.ProgInfo{
// NB: Don't need to allocate MapIds since the program isn't using
// any maps.
NrMapIds: 1,
})
if errors.Is(err, unix.EINVAL) {
// Most likely the syscall doesn't exist.
return internal.ErrNotSupported
}
if errors.Is(err, unix.E2BIG) {
// We've hit check_uarg_tail_zero on older kernels.
return internal.ErrNotSupported
}
return err
})

View file

@ -1,6 +1,8 @@
package internal
import "golang.org/x/exp/constraints"
// Align returns 'n' updated to 'alignment' boundary.
func Align(n, alignment int) int {
return (int(n) + alignment - 1) / alignment * alignment
func Align[I constraints.Integer](n, alignment I) I {
return (n + alignment - 1) / alignment * alignment
}

31
vendor/github.com/cilium/ebpf/internal/buffer.go generated vendored Normal file
View file

@ -0,0 +1,31 @@
package internal
import (
"bytes"
"sync"
)
var bytesBufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
// NewBuffer retrieves a [bytes.Buffer] from a pool an re-initialises it.
//
// The returned buffer should be passed to [PutBuffer].
func NewBuffer(buf []byte) *bytes.Buffer {
wr := bytesBufferPool.Get().(*bytes.Buffer)
// Reinitialize the Buffer with a new backing slice since it is returned to
// the caller by wr.Bytes() below. Pooling is faster despite calling
// NewBuffer. The pooled alloc is still reused, it only needs to be zeroed.
*wr = *bytes.NewBuffer(buf)
return wr
}
// PutBuffer releases a buffer to the pool.
func PutBuffer(buf *bytes.Buffer) {
// Release reference to the backing buffer.
*buf = *bytes.NewBuffer(nil)
bytesBufferPool.Put(buf)
}

View file

@ -4,24 +4,13 @@ import (
"fmt"
"os"
"strings"
"sync"
)
var sysCPU struct {
once sync.Once
err error
num int
}
// PossibleCPUs returns the max number of CPUs a system may possibly have
// Logical CPU numbers must be of the form 0-n
func PossibleCPUs() (int, error) {
sysCPU.once.Do(func() {
sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
return sysCPU.num, sysCPU.err
}
var PossibleCPUs = Memoize(func() (int, error) {
return parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
func parseCPUsFromFile(path string) (int, error) {
spec, err := os.ReadFile(path)

91
vendor/github.com/cilium/ebpf/internal/deque.go generated vendored Normal file
View file

@ -0,0 +1,91 @@
package internal
import "math/bits"
// Deque implements a double ended queue.
type Deque[T any] struct {
elems []T
read, write uint64
mask uint64
}
// Reset clears the contents of the deque while retaining the backing buffer.
func (dq *Deque[T]) Reset() {
var zero T
for i := dq.read; i < dq.write; i++ {
dq.elems[i&dq.mask] = zero
}
dq.read, dq.write = 0, 0
}
func (dq *Deque[T]) Empty() bool {
return dq.read == dq.write
}
// Push adds an element to the end.
func (dq *Deque[T]) Push(e T) {
dq.Grow(1)
dq.elems[dq.write&dq.mask] = e
dq.write++
}
// Shift returns the first element or the zero value.
func (dq *Deque[T]) Shift() T {
var zero T
if dq.Empty() {
return zero
}
index := dq.read & dq.mask
t := dq.elems[index]
dq.elems[index] = zero
dq.read++
return t
}
// Pop returns the last element or the zero value.
func (dq *Deque[T]) Pop() T {
var zero T
if dq.Empty() {
return zero
}
dq.write--
index := dq.write & dq.mask
t := dq.elems[index]
dq.elems[index] = zero
return t
}
// Grow the deque's capacity, if necessary, to guarantee space for another n
// elements.
func (dq *Deque[T]) Grow(n int) {
have := dq.write - dq.read
need := have + uint64(n)
if need < have {
panic("overflow")
}
if uint64(len(dq.elems)) >= need {
return
}
// Round up to the new power of two which is at least 8.
// See https://jameshfisher.com/2018/03/30/round-up-power-2/
capacity := 1 << (64 - bits.LeadingZeros64(need-1))
if capacity < 8 {
capacity = 8
}
elems := make([]T, have, capacity)
pivot := dq.read & dq.mask
copied := copy(elems, dq.elems[pivot:])
copy(elems[copied:], dq.elems[:pivot])
dq.elems = elems[:capacity]
dq.mask = uint64(capacity) - 1
dq.read, dq.write = 0, have
}

View file

@ -1,5 +1,4 @@
//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64
package internal

View file

@ -1,5 +1,4 @@
//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64
//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
package internal

View file

@ -7,32 +7,25 @@ import (
"strings"
)
// ErrorWithLog returns an error which includes logs from the kernel verifier.
// ErrorWithLog wraps err in a VerifierError that includes the parsed verifier
// log buffer.
//
// The default error output is a summary of the full log. The latter can be
// accessed via VerifierError.Log or by formatting the error, see Format.
//
// A set of heuristics is used to determine whether the log has been truncated.
func ErrorWithLog(err error, log []byte) *VerifierError {
func ErrorWithLog(source string, err error, log []byte, truncated bool) *VerifierError {
const whitespace = "\t\r\v\n "
// Convert verifier log C string by truncating it on the first 0 byte
// and trimming trailing whitespace before interpreting as a Go string.
truncated := false
if i := bytes.IndexByte(log, 0); i != -1 {
if i == len(log)-1 && !bytes.HasSuffix(log[:i], []byte{'\n'}) {
// The null byte is at the end of the buffer and it's not preceded
// by a newline character. Most likely the buffer was too short.
truncated = true
}
log = log[:i]
} else if len(log) > 0 {
// No null byte? Dodgy!
truncated = true
}
log = bytes.Trim(log, whitespace)
if len(log) == 0 {
return &VerifierError{source, err, nil, truncated}
}
logLines := bytes.Split(log, []byte{'\n'})
lines := make([]string, 0, len(logLines))
for _, line := range logLines {
@ -41,13 +34,14 @@ func ErrorWithLog(err error, log []byte) *VerifierError {
lines = append(lines, string(bytes.TrimRight(line, whitespace)))
}
return &VerifierError{err, lines, truncated}
return &VerifierError{source, err, lines, truncated}
}
// VerifierError includes information from the eBPF verifier.
//
// It summarises the log output, see Format if you want to output the full contents.
type VerifierError struct {
source string
// The error which caused this error.
Cause error
// The verifier output split into lines.
@ -67,9 +61,12 @@ func (le *VerifierError) Error() string {
log = log[:n-1]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: %s", le.source, le.Cause.Error())
n := len(log)
if n == 0 {
return le.Cause.Error()
return b.String()
}
lines := log[n-1:]
@ -78,14 +75,9 @@ func (le *VerifierError) Error() string {
lines = log[n-2:]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: ", le.Cause.Error())
for i, line := range lines {
for _, line := range lines {
b.WriteString(": ")
b.WriteString(strings.TrimSpace(line))
if i != len(lines)-1 {
b.WriteString(": ")
}
}
omitted := len(le.Log) - len(lines)
@ -143,8 +135,8 @@ func includePreviousLine(line string) bool {
// Understood verbs are %s and %v, which are equivalent to calling Error(). %v
// allows outputting additional information using the following flags:
//
// + Output the first <width> lines, or all lines if no width is given.
// - Output the last <width> lines, or all lines if no width is given.
// %+<width>v: Output the first <width> lines, or all lines if no width is given.
// %-<width>v: Output the last <width> lines, or all lines if no width is given.
//
// Use width to specify how many lines to output. Use the '-' flag to output
// lines from the end of the log instead of the beginning.
@ -174,7 +166,7 @@ func (le *VerifierError) Format(f fmt.State, verb rune) {
return
}
fmt.Fprintf(f, "%s:", le.Cause.Error())
fmt.Fprintf(f, "%s: %s:", le.source, le.Cause.Error())
omitted := len(le.Log) - n
lines := le.Log[:n]

View file

@ -31,10 +31,20 @@ func (ufe *UnsupportedFeatureError) Is(target error) bool {
return target == ErrNotSupported
}
type featureTest struct {
sync.RWMutex
successful bool
result error
// FeatureTest caches the result of a [FeatureTestFn].
//
// Fields should not be modified after creation.
type FeatureTest struct {
// The name of the feature being detected.
Name string
// Version in in the form Major.Minor[.Patch].
Version string
// The feature test itself.
Fn FeatureTestFn
mu sync.RWMutex
done bool
result error
}
// FeatureTestFn is used to determine whether the kernel supports
@ -42,59 +52,133 @@ type featureTest struct {
//
// The return values have the following semantics:
//
// err == ErrNotSupported: the feature is not available
// err == nil: the feature is available
// err != nil: the test couldn't be executed
// err == ErrNotSupported: the feature is not available
// err == nil: the feature is available
// err != nil: the test couldn't be executed
type FeatureTestFn func() error
// FeatureTest wraps a function so that it is run at most once.
// NewFeatureTest is a convenient way to create a single [FeatureTest].
func NewFeatureTest(name, version string, fn FeatureTestFn) func() error {
ft := &FeatureTest{
Name: name,
Version: version,
Fn: fn,
}
return ft.execute
}
// execute the feature test.
//
// name should identify the tested feature, while version must be in the
// form Major.Minor[.Patch].
// The result is cached if the test is conclusive.
//
// Returns an error wrapping ErrNotSupported if the feature is not supported.
func FeatureTest(name, version string, fn FeatureTestFn) func() error {
ft := new(featureTest)
return func() error {
ft.RLock()
if ft.successful {
defer ft.RUnlock()
return ft.result
}
ft.RUnlock()
ft.Lock()
defer ft.Unlock()
// check one more time on the off
// chance that two go routines
// were able to call into the write
// lock
if ft.successful {
return ft.result
}
err := fn()
switch {
case errors.Is(err, ErrNotSupported):
v, err := NewVersion(version)
// See [FeatureTestFn] for the meaning of the returned error.
func (ft *FeatureTest) execute() error {
ft.mu.RLock()
result, done := ft.result, ft.done
ft.mu.RUnlock()
if done {
return result
}
ft.mu.Lock()
defer ft.mu.Unlock()
// The test may have been executed by another caller while we were
// waiting to acquire ft.mu.
if ft.done {
return ft.result
}
err := ft.Fn()
if err == nil {
ft.done = true
return nil
}
if errors.Is(err, ErrNotSupported) {
var v Version
if ft.Version != "" {
v, err = NewVersion(ft.Version)
if err != nil {
return err
return fmt.Errorf("feature %s: %w", ft.Name, err)
}
}
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: name,
}
fallthrough
case err == nil:
ft.successful = true
default:
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("detect support for %s: %w", name, err)
ft.done = true
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: ft.Name,
}
return ft.result
}
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("detect support for %s: %w", ft.Name, err)
}
// FeatureMatrix groups multiple related feature tests into a map.
//
// Useful when there is a small number of discrete features which are known
// at compile time.
//
// It must not be modified concurrently with calling [FeatureMatrix.Result].
type FeatureMatrix[K comparable] map[K]*FeatureTest
// Result returns the outcome of the feature test for the given key.
//
// It's safe to call this function concurrently.
func (fm FeatureMatrix[K]) Result(key K) error {
ft, ok := fm[key]
if !ok {
return fmt.Errorf("no feature probe for %v", key)
}
return ft.execute()
}
// FeatureCache caches a potentially unlimited number of feature probes.
//
// Useful when there is a high cardinality for a feature test.
type FeatureCache[K comparable] struct {
mu sync.RWMutex
newTest func(K) *FeatureTest
features map[K]*FeatureTest
}
func NewFeatureCache[K comparable](newTest func(K) *FeatureTest) *FeatureCache[K] {
return &FeatureCache[K]{
newTest: newTest,
features: make(map[K]*FeatureTest),
}
}
func (fc *FeatureCache[K]) Result(key K) error {
// NB: Executing the feature test happens without fc.mu taken.
return fc.retrieve(key).execute()
}
func (fc *FeatureCache[K]) retrieve(key K) *FeatureTest {
fc.mu.RLock()
ft := fc.features[key]
fc.mu.RUnlock()
if ft != nil {
return ft
}
fc.mu.Lock()
defer fc.mu.Unlock()
if ft := fc.features[key]; ft != nil {
return ft
}
ft = fc.newTest(key)
fc.features[key] = ft
return ft
}

View file

@ -2,10 +2,14 @@ package internal
import (
"bufio"
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sync"
)
// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized
@ -60,3 +64,65 @@ func ReadAllCompressed(file string) ([]byte, error) {
return io.ReadAll(gz)
}
// ReadUint64FromFile reads a uint64 from a file.
//
// format specifies the contents of the file in fmt.Scanf syntax.
func ReadUint64FromFile(format string, path ...string) (uint64, error) {
filename := filepath.Join(path...)
data, err := os.ReadFile(filename)
if err != nil {
return 0, fmt.Errorf("reading file %q: %w", filename, err)
}
var value uint64
n, err := fmt.Fscanf(bytes.NewReader(data), format, &value)
if err != nil {
return 0, fmt.Errorf("parsing file %q: %w", filename, err)
}
if n != 1 {
return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n)
}
return value, nil
}
type uint64FromFileKey struct {
format, path string
}
var uint64FromFileCache = struct {
sync.RWMutex
values map[uint64FromFileKey]uint64
}{
values: map[uint64FromFileKey]uint64{},
}
// ReadUint64FromFileOnce is like readUint64FromFile but memoizes the result.
func ReadUint64FromFileOnce(format string, path ...string) (uint64, error) {
filename := filepath.Join(path...)
key := uint64FromFileKey{format, filename}
uint64FromFileCache.RLock()
if value, ok := uint64FromFileCache.values[key]; ok {
uint64FromFileCache.RUnlock()
return value, nil
}
uint64FromFileCache.RUnlock()
value, err := ReadUint64FromFile(format, filename)
if err != nil {
return 0, err
}
uint64FromFileCache.Lock()
defer uint64FromFileCache.Unlock()
if value, ok := uint64FromFileCache.values[key]; ok {
// Someone else got here before us, use what is cached.
return value, nil
}
uint64FromFileCache.values[key] = value
return value, nil
}

View file

@ -0,0 +1,267 @@
package kconfig
import (
"bufio"
"bytes"
"compress/gzip"
"fmt"
"io"
"math"
"os"
"strconv"
"strings"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
)
// Find find a kconfig file on the host.
// It first reads from /boot/config- of the current running kernel and tries
// /proc/config.gz if nothing was found in /boot.
// If none of the file provide a kconfig, it returns an error.
func Find() (*os.File, error) {
kernelRelease, err := internal.KernelRelease()
if err != nil {
return nil, fmt.Errorf("cannot get kernel release: %w", err)
}
path := "/boot/config-" + kernelRelease
f, err := os.Open(path)
if err == nil {
return f, nil
}
f, err = os.Open("/proc/config.gz")
if err == nil {
return f, nil
}
return nil, fmt.Errorf("neither %s nor /proc/config.gz provide a kconfig", path)
}
// Parse parses the kconfig file for which a reader is given.
// All the CONFIG_* which are in filter and which are set set will be
// put in the returned map as key with their corresponding value as map value.
// If filter is nil, no filtering will occur.
// If the kconfig file is not valid, error will be returned.
func Parse(source io.ReaderAt, filter map[string]struct{}) (map[string]string, error) {
var r io.Reader
zr, err := gzip.NewReader(io.NewSectionReader(source, 0, math.MaxInt64))
if err != nil {
r = io.NewSectionReader(source, 0, math.MaxInt64)
} else {
// Source is gzip compressed, transparently decompress.
r = zr
}
ret := make(map[string]string, len(filter))
s := bufio.NewScanner(r)
for s.Scan() {
line := s.Bytes()
err = processKconfigLine(line, ret, filter)
if err != nil {
return nil, fmt.Errorf("cannot parse line: %w", err)
}
if filter != nil && len(ret) == len(filter) {
break
}
}
if err := s.Err(); err != nil {
return nil, fmt.Errorf("cannot parse: %w", err)
}
if zr != nil {
return ret, zr.Close()
}
return ret, nil
}
// Golang translation of libbpf bpf_object__process_kconfig_line():
// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/libbpf.c#L1874
// It does the same checks but does not put the data inside the BPF map.
func processKconfigLine(line []byte, m map[string]string, filter map[string]struct{}) error {
// Ignore empty lines and "# CONFIG_* is not set".
if !bytes.HasPrefix(line, []byte("CONFIG_")) {
return nil
}
key, value, found := bytes.Cut(line, []byte{'='})
if !found {
return fmt.Errorf("line %q does not contain separator '='", line)
}
if len(value) == 0 {
return fmt.Errorf("line %q has no value", line)
}
if filter != nil {
// NB: map[string(key)] gets special optimisation help from the compiler
// and doesn't allocate. Don't turn this into a variable.
_, ok := filter[string(key)]
if !ok {
return nil
}
}
// This can seem odd, but libbpf only sets the value the first time the key is
// met:
// https://github.com/torvalds/linux/blob/0d85b27b0cc6/tools/lib/bpf/libbpf.c#L1906-L1908
_, ok := m[string(key)]
if !ok {
m[string(key)] = string(value)
}
return nil
}
// PutValue translates the value given as parameter depending on the BTF
// type, the translated value is then written to the byte array.
func PutValue(data []byte, typ btf.Type, value string) error {
typ = btf.UnderlyingType(typ)
switch value {
case "y", "n", "m":
return putValueTri(data, typ, value)
default:
if strings.HasPrefix(value, `"`) {
return putValueString(data, typ, value)
}
return putValueNumber(data, typ, value)
}
}
// Golang translation of libbpf_tristate enum:
// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/bpf_helpers.h#L169
type triState int
const (
TriNo triState = 0
TriYes triState = 1
TriModule triState = 2
)
func putValueTri(data []byte, typ btf.Type, value string) error {
switch v := typ.(type) {
case *btf.Int:
if v.Encoding != btf.Bool {
return fmt.Errorf("cannot add tri value, expected btf.Bool, got: %v", v.Encoding)
}
if v.Size != 1 {
return fmt.Errorf("cannot add tri value, expected size of 1 byte, got: %d", v.Size)
}
switch value {
case "y":
data[0] = 1
case "n":
data[0] = 0
default:
return fmt.Errorf("cannot use %q for btf.Bool", value)
}
case *btf.Enum:
if v.Name != "libbpf_tristate" {
return fmt.Errorf("cannot use enum %q, only libbpf_tristate is supported", v.Name)
}
var tri triState
switch value {
case "y":
tri = TriYes
case "m":
tri = TriModule
case "n":
tri = TriNo
default:
return fmt.Errorf("value %q is not support for libbpf_tristate", value)
}
internal.NativeEndian.PutUint64(data, uint64(tri))
default:
return fmt.Errorf("cannot add number value, expected btf.Int or btf.Enum, got: %T", v)
}
return nil
}
func putValueString(data []byte, typ btf.Type, value string) error {
array, ok := typ.(*btf.Array)
if !ok {
return fmt.Errorf("cannot add string value, expected btf.Array, got %T", array)
}
contentType, ok := btf.UnderlyingType(array.Type).(*btf.Int)
if !ok {
return fmt.Errorf("cannot add string value, expected array of btf.Int, got %T", contentType)
}
// Any Int, which is not bool, of one byte could be used to store char:
// https://github.com/torvalds/linux/blob/1a5304fecee5/tools/lib/bpf/libbpf.c#L3637-L3638
if contentType.Size != 1 && contentType.Encoding != btf.Bool {
return fmt.Errorf("cannot add string value, expected array of btf.Int of size 1, got array of btf.Int of size: %v", contentType.Size)
}
if !strings.HasPrefix(value, `"`) || !strings.HasSuffix(value, `"`) {
return fmt.Errorf(`value %q must start and finish with '"'`, value)
}
str := strings.Trim(value, `"`)
// We need to trim string if the bpf array is smaller.
if uint32(len(str)) >= array.Nelems {
str = str[:array.Nelems]
}
// Write the string content to .kconfig.
copy(data, str)
return nil
}
func putValueNumber(data []byte, typ btf.Type, value string) error {
integer, ok := typ.(*btf.Int)
if !ok {
return fmt.Errorf("cannot add number value, expected *btf.Int, got: %T", integer)
}
size := integer.Size
sizeInBits := size * 8
var n uint64
var err error
if integer.Encoding == btf.Signed {
parsed, e := strconv.ParseInt(value, 0, int(sizeInBits))
n = uint64(parsed)
err = e
} else {
parsed, e := strconv.ParseUint(value, 0, int(sizeInBits))
n = uint64(parsed)
err = e
}
if err != nil {
return fmt.Errorf("cannot parse value: %w", err)
}
switch size {
case 1:
data[0] = byte(n)
case 2:
internal.NativeEndian.PutUint16(data, uint16(n))
case 4:
internal.NativeEndian.PutUint32(data, uint32(n))
case 8:
internal.NativeEndian.PutUint64(data, uint64(n))
default:
return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", size)
}
return nil
}

26
vendor/github.com/cilium/ebpf/internal/memoize.go generated vendored Normal file
View file

@ -0,0 +1,26 @@
package internal
import (
"sync"
)
type memoizedFunc[T any] struct {
once sync.Once
fn func() (T, error)
result T
err error
}
func (mf *memoizedFunc[T]) do() (T, error) {
mf.once.Do(func() {
mf.result, mf.err = mf.fn()
})
return mf.result, mf.err
}
// Memoize the result of a function call.
//
// fn is only ever called once, even if it returns an error.
func Memoize[T any](fn func() (T, error)) func() (T, error) {
return (&memoizedFunc[T]{fn: fn}).do
}

View file

@ -6,6 +6,7 @@ import (
"go/format"
"go/scanner"
"io"
"reflect"
"strings"
"unicode"
)
@ -82,3 +83,15 @@ func WriteFormatted(src []byte, out io.Writer) error {
return nel
}
// GoTypeName is like %T, but elides the package name.
//
// Pointers to a type are peeled off.
func GoTypeName(t any) string {
rT := reflect.TypeOf(t)
for rT.Kind() == reflect.Pointer {
rT = rT.Elem()
}
// Doesn't return the correct Name for generic types due to https://github.com/golang/go/issues/55924
return rT.Name()
}

View file

@ -6,15 +6,12 @@ import (
"os"
"path/filepath"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
func Pin(currentPath, newPath string, fd *sys.FD) error {
const bpfFSType = 0xcafe4a11
if newPath == "" {
return errors.New("given pinning path cannot be empty")
}
@ -22,20 +19,11 @@ func Pin(currentPath, newPath string, fd *sys.FD) error {
return nil
}
var statfs unix.Statfs_t
if err := unix.Statfs(filepath.Dir(newPath), &statfs); err != nil {
fsType, err := FSType(filepath.Dir(newPath))
if err != nil {
return err
}
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
if fsType != bpfFSType {
if fsType != unix.BPF_FS_MAGIC {
return fmt.Errorf("%s is not on a bpf filesystem", newPath)
}
@ -50,7 +38,7 @@ func Pin(currentPath, newPath string, fd *sys.FD) error {
// Renameat2 is used instead of os.Rename to disallow the new path replacing
// an existing path.
err := unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
if err == nil {
// Object is now moved to the new pinning path.
return nil

43
vendor/github.com/cilium/ebpf/internal/platform.go generated vendored Normal file
View file

@ -0,0 +1,43 @@
package internal
import (
"runtime"
)
// PlatformPrefix returns the platform-dependent syscall wrapper prefix used by
// the linux kernel.
//
// Based on https://github.com/golang/go/blob/master/src/go/build/syslist.go
// and https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L10047
func PlatformPrefix() string {
switch runtime.GOARCH {
case "386":
return "__ia32_"
case "amd64", "amd64p32":
return "__x64_"
case "arm", "armbe":
return "__arm_"
case "arm64", "arm64be":
return "__arm64_"
case "mips", "mipsle", "mips64", "mips64le", "mips64p32", "mips64p32le":
return "__mips_"
case "s390":
return "__s390_"
case "s390x":
return "__s390x_"
case "riscv", "riscv64":
return "__riscv_"
case "ppc":
return "__powerpc_"
case "ppc64", "ppc64le":
return "__powerpc64_"
default:
return ""
}
}

11
vendor/github.com/cilium/ebpf/internal/prog.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
package internal
// EmptyBPFContext is the smallest-possible BPF input context to be used for
// invoking `Program.{Run,Benchmark,Test}`.
//
// Programs require a context input buffer of at least 15 bytes. Looking in
// net/bpf/test_run.c, bpf_test_init() requires that the input is at least
// ETH_HLEN (14) bytes. As of Linux commit fd18942 ("bpf: Don't redirect packets
// with invalid pkt_len"), it also requires the skb to be non-empty after
// removing the Layer 2 header.
var EmptyBPFContext = make([]byte, 15)

23
vendor/github.com/cilium/ebpf/internal/statfs.go generated vendored Normal file
View file

@ -0,0 +1,23 @@
package internal
import (
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
func FSType(path string) (int64, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return 0, err
}
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
return fsType, nil
}

View file

@ -17,11 +17,39 @@ type FD struct {
}
func newFD(value int) *FD {
if onLeakFD != nil {
// Attempt to store the caller's stack for the given fd value.
// Panic if fds contains an existing stack for the fd.
old, exist := fds.LoadOrStore(value, callersFrames())
if exist {
f := old.(*runtime.Frames)
panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f)))
}
}
fd := &FD{value}
runtime.SetFinalizer(fd, (*FD).Close)
runtime.SetFinalizer(fd, (*FD).finalize)
return fd
}
// finalize is set as the FD's runtime finalizer and
// sends a leak trace before calling FD.Close().
func (fd *FD) finalize() {
if fd.raw < 0 {
return
}
// Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback
// is invoked at most once for one sys.FD allocation, runtime.Frames can only
// be unwound once.
f, ok := fds.LoadAndDelete(fd.Int())
if ok && onLeakFD != nil {
onLeakFD(f.(*runtime.Frames))
}
_ = fd.Close()
}
// NewFD wraps a raw fd with a finalizer.
//
// You must not use the raw fd after calling this function, since the underlying
@ -64,15 +92,16 @@ func (fd *FD) Close() error {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.Forget()
return unix.Close(value)
return unix.Close(fd.disown())
}
func (fd *FD) Forget() {
func (fd *FD) disown() int {
value := int(fd.raw)
fds.Delete(int(value))
fd.raw = -1
runtime.SetFinalizer(fd, nil)
return value
}
func (fd *FD) Dup() (*FD, error) {
@ -90,7 +119,15 @@ func (fd *FD) Dup() (*FD, error) {
return newFD(dup), nil
}
// File takes ownership of FD and turns it into an [*os.File].
//
// You must not use the FD after the call returns.
//
// Returns nil if the FD is not valid.
func (fd *FD) File(name string) *os.File {
fd.Forget()
return os.NewFile(uintptr(fd.raw), name)
if fd.raw < 0 {
return nil
}
return os.NewFile(uintptr(fd.disown()), name)
}

93
vendor/github.com/cilium/ebpf/internal/sys/fd_trace.go generated vendored Normal file
View file

@ -0,0 +1,93 @@
package sys
import (
"bytes"
"fmt"
"runtime"
"sync"
)
// OnLeakFD controls tracing [FD] lifetime to detect resources that are not
// closed by Close().
//
// If fn is not nil, tracing is enabled for all FDs created going forward. fn is
// invoked for all FDs that are closed by the garbage collector instead of an
// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn
// (without disabling tracing in the meantime) will cause a panic.
//
// If fn is nil, tracing will be disabled. Any FDs that have not been closed are
// considered to be leaked, fn will be invoked for them, and the process will be
// terminated.
//
// fn will be invoked at most once for every unique sys.FD allocation since a
// runtime.Frames can only be unwound once.
func OnLeakFD(fn func(*runtime.Frames)) {
// Enable leak tracing if new fn is provided.
if fn != nil {
if onLeakFD != nil {
panic("OnLeakFD called twice with non-nil fn")
}
onLeakFD = fn
return
}
// fn is nil past this point.
if onLeakFD == nil {
return
}
// Call onLeakFD for all open fds.
if fs := flushFrames(); len(fs) != 0 {
for _, f := range fs {
onLeakFD(f)
}
}
onLeakFD = nil
}
var onLeakFD func(*runtime.Frames)
// fds is a registry of all file descriptors wrapped into sys.fds that were
// created while an fd tracer was active.
var fds sync.Map // map[int]*runtime.Frames
// flushFrames removes all elements from fds and returns them as a slice. This
// deals with the fact that a runtime.Frames can only be unwound once using
// Next().
func flushFrames() []*runtime.Frames {
var frames []*runtime.Frames
fds.Range(func(key, value any) bool {
frames = append(frames, value.(*runtime.Frames))
fds.Delete(key)
return true
})
return frames
}
func callersFrames() *runtime.Frames {
c := make([]uintptr, 32)
// Skip runtime.Callers and this function.
i := runtime.Callers(2, c)
if i == 0 {
return nil
}
return runtime.CallersFrames(c)
}
// FormatFrames formats a runtime.Frames as a human-readable string.
func FormatFrames(fs *runtime.Frames) string {
var b bytes.Buffer
for {
f, more := fs.Next()
b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line))
if !more {
break
}
}
return b.String()
}

View file

@ -0,0 +1,49 @@
// Code generated by "stringer -type MapFlags"; DO NOT EDIT.
package sys
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BPF_F_NO_PREALLOC-1]
_ = x[BPF_F_NO_COMMON_LRU-2]
_ = x[BPF_F_NUMA_NODE-4]
_ = x[BPF_F_RDONLY-8]
_ = x[BPF_F_WRONLY-16]
_ = x[BPF_F_STACK_BUILD_ID-32]
_ = x[BPF_F_ZERO_SEED-64]
_ = x[BPF_F_RDONLY_PROG-128]
_ = x[BPF_F_WRONLY_PROG-256]
_ = x[BPF_F_CLONE-512]
_ = x[BPF_F_MMAPABLE-1024]
_ = x[BPF_F_PRESERVE_ELEMS-2048]
_ = x[BPF_F_INNER_MAP-4096]
}
const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAP"
var _MapFlags_map = map[MapFlags]string{
1: _MapFlags_name[0:17],
2: _MapFlags_name[17:36],
4: _MapFlags_name[36:51],
8: _MapFlags_name[51:63],
16: _MapFlags_name[63:75],
32: _MapFlags_name[75:95],
64: _MapFlags_name[95:110],
128: _MapFlags_name[110:127],
256: _MapFlags_name[127:144],
512: _MapFlags_name[144:155],
1024: _MapFlags_name[155:169],
2048: _MapFlags_name[169:189],
4096: _MapFlags_name[189:204],
}
func (i MapFlags) String() string {
if str, ok := _MapFlags_map[i]; ok {
return str
}
return "MapFlags(" + strconv.FormatInt(int64(i), 10) + ")"
}

View file

@ -20,7 +20,7 @@ func NewSlicePointer(buf []byte) Pointer {
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
// NewSlicePointerLen creates a 64-bit pointer from a byte slice.
//
// Useful to assign both the pointer and the length in one go.
func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
@ -36,3 +36,17 @@ func NewStringPointer(str string) Pointer {
return Pointer{ptr: unsafe.Pointer(p)}
}
// NewStringSlicePointer allocates an array of Pointers to each string in the
// given slice of strings and returns a 64-bit pointer to the start of the
// resulting array.
//
// Use this function to pass arrays of strings as syscall arguments.
func NewStringSlicePointer(strings []string) Pointer {
sp := make([]Pointer, 0, len(strings))
for _, s := range strings {
sp = append(sp, NewStringPointer(s))
}
return Pointer{ptr: unsafe.Pointer(&sp[0])}
}

View file

@ -1,5 +1,4 @@
//go:build armbe || mips || mips64p32
// +build armbe mips mips64p32
package sys

View file

@ -1,5 +1,4 @@
//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
// +build 386 amd64p32 arm mipsle mips64p32le
package sys

View file

@ -1,5 +1,4 @@
//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le,!armbe,!mips,!mips64p32
package sys

83
vendor/github.com/cilium/ebpf/internal/sys/signals.go generated vendored Normal file
View file

@ -0,0 +1,83 @@
package sys
import (
"fmt"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// A sigset containing only SIGPROF.
var profSet unix.Sigset_t
func init() {
// See sigsetAdd for details on the implementation. Open coded here so
// that the compiler will check the constant calculations for us.
profSet.Val[sigprofBit/wordBits] |= 1 << (sigprofBit % wordBits)
}
// maskProfilerSignal locks the calling goroutine to its underlying OS thread
// and adds SIGPROF to the thread's signal mask. This prevents pprof from
// interrupting expensive syscalls like e.g. BPF_PROG_LOAD.
//
// The caller must defer unmaskProfilerSignal() to reverse the operation.
func maskProfilerSignal() {
runtime.LockOSThread()
if err := unix.PthreadSigmask(unix.SIG_BLOCK, &profSet, nil); err != nil {
runtime.UnlockOSThread()
panic(fmt.Errorf("masking profiler signal: %w", err))
}
}
// unmaskProfilerSignal removes SIGPROF from the underlying thread's signal
// mask, allowing it to be interrupted for profiling once again.
//
// It also unlocks the current goroutine from its underlying OS thread.
func unmaskProfilerSignal() {
defer runtime.UnlockOSThread()
if err := unix.PthreadSigmask(unix.SIG_UNBLOCK, &profSet, nil); err != nil {
panic(fmt.Errorf("unmasking profiler signal: %w", err))
}
}
const (
// Signal is the nth bit in the bitfield.
sigprofBit = int(unix.SIGPROF - 1)
// The number of bits in one Sigset_t word.
wordBits = int(unsafe.Sizeof(unix.Sigset_t{}.Val[0])) * 8
)
// sigsetAdd adds signal to set.
//
// Note: Sigset_t.Val's value type is uint32 or uint64 depending on the arch.
// This function must be able to deal with both and so must avoid any direct
// references to u32 or u64 types.
func sigsetAdd(set *unix.Sigset_t, signal unix.Signal) error {
if signal < 1 {
return fmt.Errorf("signal %d must be larger than 0", signal)
}
// For amd64, runtime.sigaddset() performs the following operation:
// set[(signal-1)/32] |= 1 << ((uint32(signal) - 1) & 31)
//
// This trick depends on sigset being two u32's, causing a signal in the the
// bottom 31 bits to be written to the low word if bit 32 is low, or the high
// word if bit 32 is high.
// Signal is the nth bit in the bitfield.
bit := int(signal - 1)
// Word within the sigset the bit needs to be written to.
word := bit / wordBits
if word >= len(set.Val) {
return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal)
}
// Write the signal bit into its corresponding word at the corrected offset.
set.Val[word] |= 1 << (bit % wordBits)
return nil
}

View file

@ -8,10 +8,22 @@ import (
"github.com/cilium/ebpf/internal/unix"
)
// ENOTSUPP is a Linux internal error code that has leaked into UAPI.
//
// It is not the same as ENOTSUP or EOPNOTSUPP.
var ENOTSUPP = syscall.Errno(524)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Prevent the Go profiler from repeatedly interrupting the verifier,
// which could otherwise lead to a livelock due to receiving EAGAIN.
if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN {
maskProfilerSignal()
defer unmaskProfilerSignal()
}
for {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
@ -33,10 +45,10 @@ func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Info is implemented by all structs that can be passed to the ObjInfo syscall.
//
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
type Info interface {
info() (unsafe.Pointer, uint32)
}
@ -90,12 +102,45 @@ func NewObjName(name string) ObjName {
return result
}
// LogLevel controls the verbosity of the kernel's eBPF program verifier.
type LogLevel uint32
const (
BPF_LOG_LEVEL1 LogLevel = 1 << iota
BPF_LOG_LEVEL2
BPF_LOG_STATS
)
// LinkID uniquely identifies a bpf_link.
type LinkID uint32
// BTFID uniquely identifies a BTF blob loaded into the kernel.
type BTFID uint32
// TypeID identifies a type in a BTF blob.
type TypeID uint32
// MapFlags control map behaviour.
type MapFlags uint32
//go:generate stringer -type MapFlags
const (
BPF_F_NO_PREALLOC MapFlags = 1 << iota
BPF_F_NO_COMMON_LRU
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_STACK_BUILD_ID
BPF_F_ZERO_SEED
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_CLONE
BPF_F_MMAPABLE
BPF_F_PRESERVE_ELEMS
BPF_F_INNER_MAP
)
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
@ -108,6 +153,13 @@ func (we wrappedErrno) Unwrap() error {
return we.Errno
}
func (we wrappedErrno) Error() string {
if we.Errno == ENOTSUPP {
return "operation not supported"
}
return we.Errno.Error()
}
type syscallError struct {
error
errno syscall.Errno

View file

@ -6,14 +6,14 @@ import (
"unsafe"
)
type AdjRoomMode int32
type AdjRoomMode uint32
const (
BPF_ADJ_ROOM_NET AdjRoomMode = 0
BPF_ADJ_ROOM_MAC AdjRoomMode = 1
)
type AttachType int32
type AttachType uint32
const (
BPF_CGROUP_INET_INGRESS AttachType = 0
@ -62,7 +62,7 @@ const (
__MAX_BPF_ATTACH_TYPE AttachType = 43
)
type Cmd int32
type Cmd uint32
const (
BPF_MAP_CREATE Cmd = 0
@ -104,7 +104,7 @@ const (
BPF_PROG_BIND_MAP Cmd = 35
)
type FunctionId int32
type FunctionId uint32
const (
BPF_FUNC_unspec FunctionId = 0
@ -301,17 +301,27 @@ const (
BPF_FUNC_copy_from_user_task FunctionId = 191
BPF_FUNC_skb_set_tstamp FunctionId = 192
BPF_FUNC_ima_file_hash FunctionId = 193
__BPF_FUNC_MAX_ID FunctionId = 194
BPF_FUNC_kptr_xchg FunctionId = 194
BPF_FUNC_map_lookup_percpu_elem FunctionId = 195
BPF_FUNC_skc_to_mptcp_sock FunctionId = 196
BPF_FUNC_dynptr_from_mem FunctionId = 197
BPF_FUNC_ringbuf_reserve_dynptr FunctionId = 198
BPF_FUNC_ringbuf_submit_dynptr FunctionId = 199
BPF_FUNC_ringbuf_discard_dynptr FunctionId = 200
BPF_FUNC_dynptr_read FunctionId = 201
BPF_FUNC_dynptr_write FunctionId = 202
BPF_FUNC_dynptr_data FunctionId = 203
__BPF_FUNC_MAX_ID FunctionId = 204
)
type HdrStartOff int32
type HdrStartOff uint32
const (
BPF_HDR_START_MAC HdrStartOff = 0
BPF_HDR_START_NET HdrStartOff = 1
)
type LinkType int32
type LinkType uint32
const (
BPF_LINK_TYPE_UNSPEC LinkType = 0
@ -323,10 +333,11 @@ const (
BPF_LINK_TYPE_XDP LinkType = 6
BPF_LINK_TYPE_PERF_EVENT LinkType = 7
BPF_LINK_TYPE_KPROBE_MULTI LinkType = 8
MAX_BPF_LINK_TYPE LinkType = 9
BPF_LINK_TYPE_STRUCT_OPS LinkType = 9
MAX_BPF_LINK_TYPE LinkType = 10
)
type MapType int32
type MapType uint32
const (
BPF_MAP_TYPE_UNSPEC MapType = 0
@ -362,7 +373,7 @@ const (
BPF_MAP_TYPE_BLOOM_FILTER MapType = 30
)
type ProgType int32
type ProgType uint32
const (
BPF_PROG_TYPE_UNSPEC ProgType = 0
@ -399,7 +410,7 @@ const (
BPF_PROG_TYPE_SYSCALL ProgType = 31
)
type RetCode int32
type RetCode uint32
const (
BPF_OK RetCode = 0
@ -408,14 +419,14 @@ const (
BPF_LWT_REROUTE RetCode = 128
)
type SkAction int32
type SkAction uint32
const (
SK_DROP SkAction = 0
SK_PASS SkAction = 1
)
type StackBuildIdStatus int32
type StackBuildIdStatus uint32
const (
BPF_STACK_BUILD_ID_EMPTY StackBuildIdStatus = 0
@ -423,13 +434,13 @@ const (
BPF_STACK_BUILD_ID_IP StackBuildIdStatus = 2
)
type StatsType int32
type StatsType uint32
const (
BPF_STATS_RUN_TIME StatsType = 0
)
type XdpAction int32
type XdpAction uint32
const (
XDP_ABORTED XdpAction = 0
@ -474,15 +485,15 @@ type MapInfo struct {
KeySize uint32
ValueSize uint32
MaxEntries uint32
MapFlags uint32
MapFlags MapFlags
Name ObjName
Ifindex uint32
BtfVmlinuxValueTypeId uint32
BtfVmlinuxValueTypeId TypeID
NetnsDev uint64
NetnsIno uint64
BtfId uint32
BtfKeyTypeId uint32
BtfValueTypeId uint32
BtfKeyTypeId TypeID
BtfValueTypeId TypeID
_ [4]byte
MapExtra uint64
}
@ -508,7 +519,7 @@ type ProgInfo struct {
NrJitedFuncLens uint32
JitedKsyms uint64
JitedFuncLens uint64
BtfId uint32
BtfId BTFID
FuncInfoRecSize uint32
FuncInfo uint64
NrFuncInfo uint32
@ -616,7 +627,7 @@ type LinkCreateAttr struct {
TargetFd uint32
AttachType AttachType
Flags uint32
TargetBtfId uint32
TargetBtfId TypeID
_ [28]byte
}
@ -646,6 +657,26 @@ func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) {
return NewFD(int(fd))
}
type LinkCreateKprobeMultiAttr struct {
ProgFd uint32
TargetFd uint32
AttachType AttachType
Flags uint32
KprobeMultiFlags uint32
Count uint32
Syms Pointer
Addrs Pointer
Cookies Pointer
}
func LinkCreateKprobeMulti(attr *LinkCreateKprobeMultiAttr) (*FD, error) {
fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return NewFD(int(fd))
}
type LinkCreatePerfEventAttr struct {
ProgFd uint32
TargetFd uint32
@ -663,6 +694,25 @@ func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) {
return NewFD(int(fd))
}
type LinkCreateTracingAttr struct {
ProgFd uint32
TargetFd uint32
AttachType AttachType
Flags uint32
TargetBtfId BTFID
_ [4]byte
Cookie uint64
_ [16]byte
}
func LinkCreateTracing(attr *LinkCreateTracingAttr) (*FD, error) {
fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return NewFD(int(fd))
}
type LinkUpdateAttr struct {
LinkFd uint32
NewProgFd uint32
@ -680,15 +730,15 @@ type MapCreateAttr struct {
KeySize uint32
ValueSize uint32
MaxEntries uint32
MapFlags uint32
MapFlags MapFlags
InnerMapFd uint32
NumaNode uint32
MapName ObjName
MapIfindex uint32
BtfFd uint32
BtfKeyTypeId uint32
BtfValueTypeId uint32
BtfVmlinuxValueTypeId uint32
BtfKeyTypeId TypeID
BtfValueTypeId TypeID
BtfVmlinuxValueTypeId TypeID
MapExtra uint64
}
@ -951,7 +1001,7 @@ type ProgLoadAttr struct {
InsnCnt uint32
Insns Pointer
License Pointer
LogLevel uint32
LogLevel LogLevel
LogSize uint32
LogBuf Pointer
KernVersion uint32
@ -966,8 +1016,8 @@ type ProgLoadAttr struct {
LineInfoRecSize uint32
LineInfo Pointer
LineInfoCnt uint32
AttachBtfId uint32
AttachProgFd uint32
AttachBtfId TypeID
AttachBtfObjFd uint32
CoreReloCnt uint32
FdArray Pointer
CoreRelos Pointer
@ -983,6 +1033,21 @@ func ProgLoad(attr *ProgLoadAttr) (*FD, error) {
return NewFD(int(fd))
}
type ProgQueryAttr struct {
TargetFd uint32
AttachType AttachType
QueryFlags uint32
AttachFlags uint32
ProgIds Pointer
ProgCount uint32
_ [4]byte
}
func ProgQuery(attr *ProgQueryAttr) error {
_, err := BPF(BPF_PROG_QUERY, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
type ProgRunAttr struct {
ProgFd uint32
Retval uint32
@ -1046,7 +1111,7 @@ type RawTracepointLinkInfo struct {
type TracingLinkInfo struct {
AttachType AttachType
TargetObjId uint32
TargetBtfId uint32
TargetBtfId TypeID
}
type XDPLinkInfo struct{ Ifindex uint32 }

View file

@ -0,0 +1,359 @@
package tracefs
import (
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"syscall"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
var (
ErrInvalidInput = errors.New("invalid input")
ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes")
)
//go:generate stringer -type=ProbeType -linecomment
type ProbeType uint8
const (
Kprobe ProbeType = iota // kprobe
Uprobe // uprobe
)
func (pt ProbeType) eventsFile() (*os.File, error) {
path, err := sanitizeTracefsPath(fmt.Sprintf("%s_events", pt.String()))
if err != nil {
return nil, err
}
return os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0666)
}
type ProbeArgs struct {
Type ProbeType
Symbol, Group, Path string
Offset, RefCtrOffset, Cookie uint64
Pid, RetprobeMaxActive int
Ret bool
}
// RandomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by IsValidTraceID.
func RandomGroup(prefix string) (string, error) {
if !validIdentifier(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, ErrInvalidInput)
}
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("reading random bytes: %w", err)
}
group := fmt.Sprintf("%s_%x", prefix, b)
if len(group) > 63 {
return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, ErrInvalidInput)
}
return group, nil
}
// validIdentifier implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func validIdentifier(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}
func sanitizeTracefsPath(path ...string) (string, error) {
base, err := getTracefsPath()
if err != nil {
return "", err
}
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, ErrInvalidInput)
}
return p, nil
}
// getTracefsPath will return a correct path to the tracefs mount point.
// Since kernel 4.1 tracefs should be mounted by default at /sys/kernel/tracing,
// but may be also be available at /sys/kernel/debug/tracing if debugfs is mounted.
// The available tracefs paths will depends on distribution choices.
var getTracefsPath = internal.Memoize(func() (string, error) {
for _, p := range []struct {
path string
fsType int64
}{
{"/sys/kernel/tracing", unix.TRACEFS_MAGIC},
{"/sys/kernel/debug/tracing", unix.TRACEFS_MAGIC},
// RHEL/CentOS
{"/sys/kernel/debug/tracing", unix.DEBUGFS_MAGIC},
} {
if fsType, err := internal.FSType(p.path); err == nil && fsType == p.fsType {
return p.path, nil
}
}
return "", errors.New("neither debugfs nor tracefs are mounted")
})
// sanitizeIdentifier replaces every invalid character for the tracefs api with an underscore.
//
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeIdentifier(s string) string {
var skip bool
return strings.Map(func(c rune) rune {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
return c
case skip:
return -1
default:
skip = true
return '_'
}
}, s)
}
// EventID reads a trace event's ID from tracefs given its group and name.
// The kernel requires group and name to be alphanumeric or underscore.
func EventID(group, name string) (uint64, error) {
if !validIdentifier(group) {
return 0, fmt.Errorf("invalid tracefs group: %q", group)
}
if !validIdentifier(name) {
return 0, fmt.Errorf("invalid tracefs name: %q", name)
}
path, err := sanitizeTracefsPath("events", group, name, "id")
if err != nil {
return 0, err
}
tid, err := internal.ReadUint64FromFile("%d\n", path)
if errors.Is(err, os.ErrNotExist) {
return 0, err
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
}
return tid, nil
}
func probePrefix(ret bool, maxActive int) string {
if ret {
if maxActive > 0 {
return fmt.Sprintf("r%d", maxActive)
}
return "r"
}
return "p"
}
// Event represents an entry in a tracefs probe events file.
type Event struct {
typ ProbeType
group, name string
// event id allocated by the kernel. 0 if the event has already been removed.
id uint64
}
// NewEvent creates a new ephemeral trace event.
//
// Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists. Returns an error if
// args.RetprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if
// the kernel is too old to support kretprobe maxactive.
func NewEvent(args ProbeArgs) (*Event, error) {
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
eventName := sanitizeIdentifier(args.Symbol)
_, err := EventID(args.Group, eventName)
if err == nil {
return nil, fmt.Errorf("trace event %s/%s: %w", args.Group, eventName, os.ErrExist)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", args.Group, eventName, err)
}
// Open the kprobe_events file in tracefs.
f, err := args.Type.eventsFile()
if err != nil {
return nil, err
}
defer f.Close()
var pe, token string
switch args.Type {
case Kprobe:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
if args.RetprobeMaxActive != 0 && !args.Ret {
return nil, ErrInvalidMaxActive
}
token = KprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, args.RetprobeMaxActive), args.Group, eventName, token)
case Uprobe:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
if args.RetprobeMaxActive != 0 {
return nil, ErrInvalidMaxActive
}
token = UprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, 0), args.Group, eventName, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a retprobe for a missing symbol.
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("token %s: not found: %w", token, err)
}
// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary. The exact conditions that trigger this error are
// arch specific however.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return nil, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return nil, fmt.Errorf("token %s: writing '%s': %w", token, pe, err)
}
// Get the newly-created trace event's id.
tid, err := EventID(args.Group, eventName)
if args.RetprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) {
// Kernels < 4.12 don't support maxactive and therefore auto generate
// group and event names from the symbol and offset. The symbol is used
// without any sanitization.
// See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712
event := fmt.Sprintf("kprobes/r_%s_%d", args.Symbol, args.Offset)
if err := removeEvent(args.Type, event); err != nil {
return nil, fmt.Errorf("failed to remove spurious maxactive event: %s", err)
}
return nil, fmt.Errorf("create trace event with non-default maxactive: %w", internal.ErrNotSupported)
}
if err != nil {
return nil, fmt.Errorf("get trace event id: %w", err)
}
evt := &Event{args.Type, args.Group, eventName, tid}
runtime.SetFinalizer(evt, (*Event).Close)
return evt, nil
}
// Close removes the event from tracefs.
//
// Returns os.ErrClosed if the event has already been closed before.
func (evt *Event) Close() error {
if evt.id == 0 {
return os.ErrClosed
}
evt.id = 0
runtime.SetFinalizer(evt, nil)
pe := fmt.Sprintf("%s/%s", evt.group, evt.name)
return removeEvent(evt.typ, pe)
}
func removeEvent(typ ProbeType, pe string) error {
f, err := typ.eventsFile()
if err != nil {
return err
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
if _, err = f.WriteString("-:" + pe); err != nil {
return fmt.Errorf("remove event %q from %s: %w", pe, f.Name(), err)
}
return nil
}
// ID returns the tracefs ID associated with the event.
func (evt *Event) ID() uint64 {
return evt.id
}
// Group returns the tracefs group used by the event.
func (evt *Event) Group() string {
return evt.group
}
// KprobeToken creates the SYM[+offs] token for the tracefs api.
func KprobeToken(args ProbeArgs) string {
po := args.Symbol
if args.Offset != 0 {
po += fmt.Sprintf("+%#x", args.Offset)
}
return po
}

View file

@ -0,0 +1,24 @@
// Code generated by "stringer -type=ProbeType -linecomment"; DO NOT EDIT.
package tracefs
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[Kprobe-0]
_ = x[Uprobe-1]
}
const _ProbeType_name = "kprobeuprobe"
var _ProbeType_index = [...]uint8{0, 6, 12}
func (i ProbeType) String() string {
if i >= ProbeType(len(_ProbeType_index)-1) {
return "ProbeType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _ProbeType_name[_ProbeType_index[i]:_ProbeType_index[i+1]]
}

View file

@ -0,0 +1,16 @@
package tracefs
import "fmt"
// UprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func UprobeToken(args ProbeArgs) string {
po := fmt.Sprintf("%s:%#x", args.Path, args.Offset)
if args.RefCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.RefCtrOffset)
}
return po
}

11
vendor/github.com/cilium/ebpf/internal/unix/doc.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
// Package unix re-exports Linux specific parts of golang.org/x/sys/unix.
//
// It avoids breaking compilation on other OS by providing stubs as follows:
// - Invoking a function always returns an error.
// - Errnos have distinct, non-zero values.
// - Constants have distinct but meaningless values.
// - Types use the same names for members, but may or may not follow the
// Linux layout.
package unix
// Note: please don't add any custom API to this package. Use internal/sys instead.

View file

@ -1,5 +1,4 @@
//go:build linux
// +build linux
package unix
@ -10,189 +9,178 @@ import (
)
const (
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC
BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE
BPF_F_RDONLY = linux.BPF_F_RDONLY
BPF_F_WRONLY = linux.BPF_F_WRONLY
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE
BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE
BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ
SYS_BPF = linux.SYS_BPF
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
O_CLOEXEC = linux.O_CLOEXEC
O_NONBLOCK = linux.O_NONBLOCK
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_SHARED = linux.MAP_SHARED
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF
PerfBitWatermark = linux.PerfBitWatermark
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME
PERF_RECORD_LOST = linux.PERF_RECORD_LOST
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
EILSEQ = linux.EILSEQ
EOPNOTSUPP = linux.EOPNOTSUPP
)
const (
BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC
BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE
BPF_F_RDONLY = linux.BPF_F_RDONLY
BPF_F_WRONLY = linux.BPF_F_WRONLY
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE
BPF_F_XDP_HAS_FRAGS = linux.BPF_F_XDP_HAS_FRAGS
BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE
BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP
BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ
SYS_BPF = linux.SYS_BPF
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
O_CLOEXEC = linux.O_CLOEXEC
O_NONBLOCK = linux.O_NONBLOCK
PROT_NONE = linux.PROT_NONE
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_ANON = linux.MAP_ANON
MAP_SHARED = linux.MAP_SHARED
MAP_PRIVATE = linux.MAP_PRIVATE
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF
PerfBitWatermark = linux.PerfBitWatermark
PerfBitWriteBackward = linux.PerfBitWriteBackward
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME
PERF_RECORD_LOST = linux.PERF_RECORD_LOST
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
SIGPROF = linux.SIGPROF
SIG_BLOCK = linux.SIG_BLOCK
SIG_UNBLOCK = linux.SIG_UNBLOCK
EM_NONE = linux.EM_NONE
EM_BPF = linux.EM_BPF
BPF_FS_MAGIC = linux.BPF_FS_MAGIC
TRACEFS_MAGIC = linux.TRACEFS_MAGIC
DEBUGFS_MAGIC = linux.DEBUGFS_MAGIC
)
// Statfs_t is a wrapper
type Statfs_t = linux.Statfs_t
type Stat_t = linux.Stat_t
// Rlimit is a wrapper
type Rlimit = linux.Rlimit
type Signal = linux.Signal
type Sigset_t = linux.Sigset_t
type PerfEventMmapPage = linux.PerfEventMmapPage
type EpollEvent = linux.EpollEvent
type PerfEventAttr = linux.PerfEventAttr
type Utsname = linux.Utsname
// Syscall is a wrapper
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return linux.Syscall(trap, a1, a2, a3)
}
// FcntlInt is a wrapper
func PthreadSigmask(how int, set, oldset *Sigset_t) error {
return linux.PthreadSigmask(how, set, oldset)
}
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return linux.FcntlInt(fd, cmd, arg)
}
// IoctlSetInt is a wrapper
func IoctlSetInt(fd int, req uint, value int) error {
return linux.IoctlSetInt(fd, req, value)
}
// Statfs is a wrapper
func Statfs(path string, buf *Statfs_t) (err error) {
return linux.Statfs(path, buf)
}
// Close is a wrapper
func Close(fd int) (err error) {
return linux.Close(fd)
}
// EpollEvent is a wrapper
type EpollEvent = linux.EpollEvent
// EpollWait is a wrapper
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return linux.EpollWait(epfd, events, msec)
}
// EpollCtl is a wrapper
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return linux.EpollCtl(epfd, op, fd, event)
}
// Eventfd is a wrapper
func Eventfd(initval uint, flags int) (fd int, err error) {
return linux.Eventfd(initval, flags)
}
// Write is a wrapper
func Write(fd int, p []byte) (n int, err error) {
return linux.Write(fd, p)
}
// EpollCreate1 is a wrapper
func EpollCreate1(flag int) (fd int, err error) {
return linux.EpollCreate1(flag)
}
// PerfEventMmapPage is a wrapper
type PerfEventMmapPage linux.PerfEventMmapPage
// SetNonblock is a wrapper
func SetNonblock(fd int, nonblocking bool) (err error) {
return linux.SetNonblock(fd, nonblocking)
}
// Mmap is a wrapper
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return linux.Mmap(fd, offset, length, prot, flags)
}
// Munmap is a wrapper
func Munmap(b []byte) (err error) {
return linux.Munmap(b)
}
// PerfEventAttr is a wrapper
type PerfEventAttr = linux.PerfEventAttr
// PerfEventOpen is a wrapper
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
}
// Utsname is a wrapper
type Utsname = linux.Utsname
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return linux.Uname(buf)
}
// Getpid is a wrapper
func Getpid() int {
return linux.Getpid()
}
// Gettid is a wrapper
func Gettid() int {
return linux.Gettid()
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return linux.Tgkill(tgid, tid, sig)
}
// BytePtrFromString is a wrapper
func BytePtrFromString(s string) (*byte, error) {
return linux.BytePtrFromString(s)
}
// ByteSliceToString is a wrapper
func ByteSliceToString(s []byte) string {
return linux.ByteSliceToString(s)
}
// Renameat2 is a wrapper
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
}
@ -208,3 +196,7 @@ func Open(path string, mode int, perm uint32) (int, error) {
func Fstat(fd int, stat *Stat_t) error {
return linux.Fstat(fd, stat)
}
func SetsockoptInt(fd, level, opt, value int) error {
return linux.SetsockoptInt(fd, level, opt, value)
}

View file

@ -1,5 +1,4 @@
//go:build !linux
// +build !linux
package unix
@ -11,70 +10,87 @@ import (
var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
// Errnos are distinct and non-zero.
const (
ENOENT = syscall.ENOENT
EEXIST = syscall.EEXIST
EAGAIN = syscall.EAGAIN
ENOSPC = syscall.ENOSPC
EINVAL = syscall.EINVAL
EINTR = syscall.EINTR
EPERM = syscall.EPERM
ESRCH = syscall.ESRCH
ENODEV = syscall.ENODEV
EBADF = syscall.Errno(0)
E2BIG = syscall.Errno(0)
EFAULT = syscall.EFAULT
EACCES = syscall.Errno(0)
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
BPF_F_NO_PREALLOC = 0
BPF_F_NUMA_NODE = 0
BPF_F_RDONLY = 0
BPF_F_WRONLY = 0
BPF_F_RDONLY_PROG = 0
BPF_F_WRONLY_PROG = 0
BPF_F_SLEEPABLE = 0
BPF_F_MMAPABLE = 0
BPF_F_INNER_MAP = 0
BPF_OBJ_NAME_LEN = 0x10
BPF_TAG_SIZE = 0x8
BPF_RINGBUF_BUSY_BIT = 0
BPF_RINGBUF_DISCARD_BIT = 0
BPF_RINGBUF_HDR_SZ = 0
SYS_BPF = 321
F_DUPFD_CLOEXEC = 0x406
EPOLLIN = 0x1
EPOLL_CTL_ADD = 0x1
EPOLL_CLOEXEC = 0x80000
O_CLOEXEC = 0x80000
O_NONBLOCK = 0x800
PROT_READ = 0x1
PROT_WRITE = 0x2
MAP_SHARED = 0x1
PERF_ATTR_SIZE_VER1 = 0
PERF_TYPE_SOFTWARE = 0x1
PERF_TYPE_TRACEPOINT = 0
PERF_COUNT_SW_BPF_OUTPUT = 0xa
PERF_EVENT_IOC_DISABLE = 0
PERF_EVENT_IOC_ENABLE = 0
PERF_EVENT_IOC_SET_BPF = 0
PerfBitWatermark = 0x4000
PERF_SAMPLE_RAW = 0x400
PERF_FLAG_FD_CLOEXEC = 0x8
RLIM_INFINITY = 0x7fffffffffffffff
RLIMIT_MEMLOCK = 8
BPF_STATS_RUN_TIME = 0
PERF_RECORD_LOST = 2
PERF_RECORD_SAMPLE = 9
AT_FDCWD = -0x2
RENAME_NOREPLACE = 0x1
SO_ATTACH_BPF = 0x32
SO_DETACH_BPF = 0x1b
SOL_SOCKET = 0x1
ENOENT syscall.Errno = iota + 1
EEXIST
EAGAIN
ENOSPC
EINVAL
EINTR
EPERM
ESRCH
ENODEV
EBADF
E2BIG
EFAULT
EACCES
EILSEQ
EOPNOTSUPP
)
// Constants are distinct to avoid breaking switch statements.
const (
BPF_F_NO_PREALLOC = iota
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE
BPF_F_MMAPABLE
BPF_F_INNER_MAP
BPF_F_KPROBE_MULTI_RETURN
BPF_F_XDP_HAS_FRAGS
BPF_OBJ_NAME_LEN
BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ
SYS_BPF
F_DUPFD_CLOEXEC
EPOLLIN
EPOLL_CTL_ADD
EPOLL_CLOEXEC
O_CLOEXEC
O_NONBLOCK
PROT_NONE
PROT_READ
PROT_WRITE
MAP_ANON
MAP_SHARED
MAP_PRIVATE
PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF
PerfBitWatermark
PerfBitWriteBackward
PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY
RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME
PERF_RECORD_LOST
PERF_RECORD_SAMPLE
AT_FDCWD
RENAME_NOREPLACE
SO_ATTACH_BPF
SO_DETACH_BPF
SOL_SOCKET
SIGPROF
SIG_BLOCK
SIG_UNBLOCK
EM_NONE
EM_BPF
BPF_FS_MAGIC
TRACEFS_MAGIC
DEBUGFS_MAGIC
)
// Statfs_t is a wrapper
type Statfs_t struct {
Type int64
Bsize int64
@ -90,72 +106,81 @@ type Statfs_t struct {
Spare [4]int64
}
type Stat_t struct{}
type Stat_t struct {
Dev uint64
Ino uint64
Nlink uint64
Mode uint32
Uid uint32
Gid uint32
_ int32
Rdev uint64
Size int64
Blksize int64
Blocks int64
}
// Rlimit is a wrapper
type Rlimit struct {
Cur uint64
Max uint64
}
// Syscall is a wrapper
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return 0, 0, syscall.Errno(1)
type Signal int
type Sigset_t struct {
Val [4]uint64
}
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return 0, 0, syscall.ENOTSUP
}
func PthreadSigmask(how int, set, oldset *Sigset_t) error {
return errNonLinux
}
// FcntlInt is a wrapper
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return -1, errNonLinux
}
// IoctlSetInt is a wrapper
func IoctlSetInt(fd int, req uint, value int) error {
return errNonLinux
}
// Statfs is a wrapper
func Statfs(path string, buf *Statfs_t) error {
return errNonLinux
}
// Close is a wrapper
func Close(fd int) (err error) {
return errNonLinux
}
// EpollEvent is a wrapper
type EpollEvent struct {
Events uint32
Fd int32
Pad int32
}
// EpollWait is a wrapper
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return 0, errNonLinux
}
// EpollCtl is a wrapper
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return errNonLinux
}
// Eventfd is a wrapper
func Eventfd(initval uint, flags int) (fd int, err error) {
return 0, errNonLinux
}
// Write is a wrapper
func Write(fd int, p []byte) (n int, err error) {
return 0, errNonLinux
}
// EpollCreate1 is a wrapper
func EpollCreate1(flag int) (fd int, err error) {
return 0, errNonLinux
}
// PerfEventMmapPage is a wrapper
type PerfEventMmapPage struct {
Version uint32
Compat_version uint32
@ -182,22 +207,18 @@ type PerfEventMmapPage struct {
Aux_size uint64
}
// SetNonblock is a wrapper
func SetNonblock(fd int, nonblocking bool) (err error) {
return errNonLinux
}
// Mmap is a wrapper
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return []byte{}, errNonLinux
}
// Munmap is a wrapper
func Munmap(b []byte) (err error) {
return errNonLinux
}
// PerfEventAttr is a wrapper
type PerfEventAttr struct {
Type uint32
Size uint32
@ -219,48 +240,39 @@ type PerfEventAttr struct {
Sample_max_stack uint16
}
// PerfEventOpen is a wrapper
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return 0, errNonLinux
}
// Utsname is a wrapper
type Utsname struct {
Release [65]byte
Version [65]byte
}
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return errNonLinux
}
// Getpid is a wrapper
func Getpid() int {
return -1
}
// Gettid is a wrapper
func Gettid() int {
return -1
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return errNonLinux
}
// BytePtrFromString is a wrapper
func BytePtrFromString(s string) (*byte, error) {
return nil, errNonLinux
}
// ByteSliceToString is a wrapper
func ByteSliceToString(s []byte) string {
return ""
}
// Renameat2 is a wrapper
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return errNonLinux
}
@ -276,3 +288,7 @@ func Open(path string, mode int, perm uint32) (int, error) {
func Fstat(fd int, stat *Stat_t) error {
return errNonLinux
}
func SetsockoptInt(fd, level, opt, value int) error {
return errNonLinux
}

View file

@ -23,6 +23,9 @@ func vdsoVersion() (uint32, error) {
// to the process. Go does not expose that data, so we must read it from procfs.
// https://man7.org/linux/man-pages/man3/getauxval.3.html
av, err := os.Open("/proc/self/auxv")
if errors.Is(err, unix.EACCES) {
return 0, fmt.Errorf("opening auxv: %w (process may not be dumpable due to file capabilities)", err)
}
if err != nil {
return 0, fmt.Errorf("opening auxv: %w", err)
}
@ -117,7 +120,7 @@ func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
var name string
if n.NameSize > 0 {
// Read the note name, aligned to 4 bytes.
buf := make([]byte, Align(int(n.NameSize), 4))
buf := make([]byte, Align(n.NameSize, 4))
if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil {
return 0, fmt.Errorf("reading note name: %w", err)
}
@ -139,7 +142,7 @@ func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
}
// Discard the note descriptor if it exists but we're not interested in it.
if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil {
if _, err := io.CopyN(io.Discard, sr, int64(Align(n.DescSize, 4))); err != nil {
return 0, err
}
}

View file

@ -2,7 +2,6 @@ package internal
import (
"fmt"
"sync"
"github.com/cilium/ebpf/internal/unix"
)
@ -15,14 +14,6 @@ const (
MagicKernelVersion = 0xFFFFFFFE
)
var (
kernelVersion = struct {
once sync.Once
version Version
err error
}{}
)
// A Version in the form Major.Minor.Patch.
type Version [3]uint16
@ -88,16 +79,9 @@ func (v Version) Kernel() uint32 {
}
// KernelVersion returns the version of the currently running kernel.
func KernelVersion() (Version, error) {
kernelVersion.once.Do(func() {
kernelVersion.version, kernelVersion.err = detectKernelVersion()
})
if kernelVersion.err != nil {
return Version{}, kernelVersion.err
}
return kernelVersion.version, nil
}
var KernelVersion = Memoize(func() (Version, error) {
return detectKernelVersion()
})
// detectKernelVersion returns the version of the running kernel.
func detectKernelVersion() (Version, error) {

View file

@ -10,10 +10,15 @@ import (
type cgroupAttachFlags uint32
// cgroup attach flags
const (
// Allow programs attached to sub-cgroups to override the verdict of this
// program.
flagAllowOverride cgroupAttachFlags = 1 << iota
// Allow attaching multiple programs to the cgroup. Only works if the cgroup
// has zero or more programs attached using the Multi flag. Implies override.
flagAllowMulti
// Set automatically by progAttachCgroup.Update(). Used for updating a
// specific given program attached in multi-mode.
flagReplace
)
@ -27,29 +32,39 @@ type CgroupOptions struct {
}
// AttachCgroup links a BPF program to a cgroup.
func AttachCgroup(opts CgroupOptions) (Link, error) {
//
// If the running kernel doesn't support bpf_link, attempts to emulate its
// semantics using the legacy PROG_ATTACH mechanism. If bpf_link is not
// available, the returned [Link] will not support pinning to bpffs.
//
// If you need more control over attachment flags or the attachment mechanism
// used, look at [RawAttachProgram] and [AttachRawLink] instead.
func AttachCgroup(opts CgroupOptions) (cg Link, err error) {
cgroup, err := os.Open(opts.Path)
if err != nil {
return nil, fmt.Errorf("can't open cgroup: %s", err)
}
clone, err := opts.Program.Clone()
if err != nil {
defer func() {
if _, ok := cg.(*progAttachCgroup); ok {
// Skip closing the cgroup handle if we return a valid progAttachCgroup,
// where the handle is retained to implement Update().
return
}
cgroup.Close()
return nil, err
}()
cg, err = newLinkCgroup(cgroup, opts.Attach, opts.Program)
if err == nil {
return cg, nil
}
var cg Link
cg, err = newLinkCgroup(cgroup, opts.Attach, clone)
if errors.Is(err, ErrNotSupported) {
cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti)
cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowMulti)
}
if errors.Is(err, ErrNotSupported) {
cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride)
cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowOverride)
}
if err != nil {
cgroup.Close()
clone.Close()
return nil, err
}
@ -67,6 +82,8 @@ var _ Link = (*progAttachCgroup)(nil)
func (cg *progAttachCgroup) isLink() {}
// newProgAttachCgroup attaches prog to cgroup using BPF_PROG_ATTACH.
// cgroup and prog are retained by [progAttachCgroup].
func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) {
if flags&flagAllowMulti > 0 {
if err := haveProgAttachReplace(); err != nil {
@ -74,17 +91,24 @@ func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Pro
}
}
err := RawAttachProgram(RawAttachProgramOptions{
// Use a program handle that cannot be closed by the caller.
clone, err := prog.Clone()
if err != nil {
return nil, err
}
err = RawAttachProgram(RawAttachProgramOptions{
Target: int(cgroup.Fd()),
Program: prog,
Program: clone,
Flags: uint32(flags),
Attach: attach,
})
if err != nil {
clone.Close()
return nil, fmt.Errorf("cgroup: %w", err)
}
return &progAttachCgroup{cgroup, prog, attach, flags}, nil
return &progAttachCgroup{cgroup, clone, attach, flags}, nil
}
func (cg *progAttachCgroup) Close() error {
@ -138,7 +162,7 @@ func (cg *progAttachCgroup) Pin(string) error {
}
func (cg *progAttachCgroup) Unpin() error {
return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
return fmt.Errorf("can't unpin cgroup: %w", ErrNotSupported)
}
func (cg *progAttachCgroup) Info() (*Info, error) {
@ -151,6 +175,7 @@ type linkCgroup struct {
var _ Link = (*linkCgroup)(nil)
// newLinkCgroup attaches prog to cgroup using BPF_LINK_CREATE.
func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) {
link, err := AttachRawLink(RawLinkOptions{
Target: int(cgroup.Fd()),

View file

@ -1,42 +1,20 @@
package link
import (
"bytes"
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/tracefs"
"github.com/cilium/ebpf/internal/unix"
)
var (
kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
kprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
)
type probeType uint8
type probeArgs struct {
symbol, group, path string
offset, refCtrOffset, cookie uint64
pid int
ret bool
}
// KprobeOptions defines additional parameters that will be used
// when loading Kprobes.
type KprobeOptions struct {
@ -49,45 +27,23 @@ type KprobeOptions struct {
// Can be used to insert kprobes at arbitrary offsets in kernel functions,
// e.g. in places where functions have been inlined.
Offset uint64
// Increase the maximum number of concurrent invocations of a kretprobe.
// Required when tracing some long running functions in the kernel.
//
// Deprecated: this setting forces the use of an outdated kernel API and is not portable
// across kernel versions.
RetprobeMaxActive int
// Prefix used for the event name if the kprobe must be attached using tracefs.
// The group name will be formatted as `<prefix>_<randomstr>`.
// The default empty string is equivalent to "ebpf" as the prefix.
TraceFSPrefix string
}
const (
kprobeType probeType = iota
uprobeType
)
func (pt probeType) String() string {
if pt == kprobeType {
return "kprobe"
func (ko *KprobeOptions) cookie() uint64 {
if ko == nil {
return 0
}
return "uprobe"
}
func (pt probeType) EventsPath() string {
if pt == kprobeType {
return kprobeEventsPath
}
return uprobeEventsPath
}
func (pt probeType) PerfEventType(ret bool) perfEventType {
if pt == kprobeType {
if ret {
return kretprobeEvent
}
return kprobeEvent
}
if ret {
return uretprobeEvent
}
return uprobeEvent
}
func (pt probeType) RetprobeBit() (uint64, error) {
if pt == kprobeType {
return kretprobeBit()
}
return uretprobeBit()
return ko.Cookie
}
// Kprobe attaches the given eBPF program to a perf event that fires when the
@ -99,13 +55,17 @@ func (pt probeType) RetprobeBit() (uint64, error) {
// Losing the reference to the resulting Link (kp) will close the Kprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, false)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog)
lnk, err := attachPerfEvent(k, prog, opts.cookie())
if err != nil {
k.Close()
return nil, err
@ -123,13 +83,20 @@ func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error
// Losing the reference to the resulting Link (kp) will close the Kretprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
//
// On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol
// incorrectly returns unix.EINVAL instead of os.ErrNotExist.
func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, true)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog)
lnk, err := attachPerfEvent(k, prog, opts.cookie())
if err != nil {
k.Close()
return nil, err
@ -181,50 +148,51 @@ func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*
return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
}
args := probeArgs{
pid: perfAllThreads,
symbol: symbol,
ret: ret,
args := tracefs.ProbeArgs{
Type: tracefs.Kprobe,
Pid: perfAllThreads,
Symbol: symbol,
Ret: ret,
}
if opts != nil {
args.cookie = opts.Cookie
args.offset = opts.Offset
args.RetprobeMaxActive = opts.RetprobeMaxActive
args.Cookie = opts.Cookie
args.Offset = opts.Offset
args.Group = opts.TraceFSPrefix
}
// Use kprobe PMU if the kernel has it available.
tp, err := pmuKprobe(args)
if errors.Is(err, os.ErrNotExist) {
args.symbol = platformPrefix(symbol)
tp, err = pmuKprobe(args)
tp, err := pmuProbe(args)
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
if prefix := internal.PlatformPrefix(); prefix != "" {
args.Symbol = prefix + symbol
tp, err = pmuProbe(args)
}
}
if err == nil {
return tp, nil
}
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err)
return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err)
}
// Use tracefs if kprobe PMU is missing.
args.symbol = symbol
tp, err = tracefsKprobe(args)
if errors.Is(err, os.ErrNotExist) {
args.symbol = platformPrefix(symbol)
tp, err = tracefsKprobe(args)
args.Symbol = symbol
tp, err = tracefsProbe(args)
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
if prefix := internal.PlatformPrefix(); prefix != "" {
args.Symbol = prefix + symbol
tp, err = tracefsProbe(args)
}
}
if err != nil {
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err)
}
return tp, nil
}
// pmuKprobe opens a perf event based on the kprobe PMU.
// Returns os.ErrNotExist if the given symbol does not exist in the kernel.
func pmuKprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(kprobeType, args)
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
@ -232,17 +200,25 @@ func pmuKprobe(args probeArgs) (*perfEvent, error) {
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU.
et, err := getPMUEventType(typ)
eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported)
}
if err != nil {
return nil, err
}
// Use tracefs if we want to set kretprobe's retprobeMaxActive.
if args.RetprobeMaxActive != 0 {
return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported)
}
var config uint64
if args.ret {
bit, err := typ.RetprobeBit()
if args.Ret {
bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe")
if err != nil {
return nil, err
}
@ -250,75 +226,81 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
}
var (
attr unix.PerfEventAttr
sp unsafe.Pointer
attr unix.PerfEventAttr
sp unsafe.Pointer
token string
)
switch typ {
case kprobeType:
switch args.Type {
case tracefs.Kprobe:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err = unsafeStringPtr(args.symbol)
sp, err = unsafeStringPtr(args.Symbol)
if err != nil {
return nil, err
}
token = tracefs.KprobeToken(args)
attr = unix.PerfEventAttr{
// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Type: uint32(eventType), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Ext2: args.offset, // Kernel symbol offset
Ext2: args.Offset, // Kernel symbol offset
Config: config, // Retprobe flag
}
case uprobeType:
sp, err = unsafeStringPtr(args.path)
case tracefs.Uprobe:
sp, err = unsafeStringPtr(args.Path)
if err != nil {
return nil, err
}
if args.refCtrOffset != 0 {
config |= args.refCtrOffset << uprobeRefCtrOffsetShift
if args.RefCtrOffset != 0 {
config |= args.RefCtrOffset << uprobeRefCtrOffsetShift
}
token = tracefs.UprobeToken(args)
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
// size of the internal buffer the kernel allocates for reading the
// perf_event_attr argument from userspace.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Type: uint32(eventType), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: args.offset, // Uprobe offset
Ext2: args.Offset, // Uprobe offset
Config: config, // RefCtrOffset, Retprobe flag
}
}
rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
rawFd, err := unix.PerfEventOpen(&attr, args.Pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") {
return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported)
if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") {
return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported)
}
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist)
// when trying to create a retprobe for a missing symbol.
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("token %s: not found: %w", token, err)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist)
// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary. The exact conditions that trigger this error are
// arch specific however.
if errors.Is(err, unix.EILSEQ) {
return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
// when attempting to set a uprobe on a trap instruction.
if errors.Is(err, unix.ENOTSUPP) {
return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err)
if errors.Is(err, sys.ENOTSUPP) {
return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err)
}
if err != nil {
return nil, fmt.Errorf("opening perf event: %w", err)
return nil, fmt.Errorf("token %s: opening perf event: %w", token, err)
}
// Ensure the string pointer is not collected before PerfEventOpen returns.
@ -330,18 +312,7 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
}
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
return &perfEvent{
typ: typ.PerfEventType(args.ret),
name: args.symbol,
pmuID: et,
cookie: args.cookie,
fd: fd,
}, nil
}
// tracefsKprobe creates a Kprobe tracefs entry.
func tracefsKprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(kprobeType, args)
return newPerfEvent(fd, nil), nil
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
@ -350,219 +321,37 @@ func tracefsKprobe(args probeArgs) (*perfEvent, error) {
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) {
func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
groupPrefix := "ebpf"
if args.Group != "" {
groupPrefix = args.Group
}
// Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name.
group, err := randomGroup("ebpf")
group, err := tracefs.RandomGroup(groupPrefix)
if err != nil {
return nil, fmt.Errorf("randomizing group name: %w", err)
}
args.group = group
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
_, err = getTraceEventID(group, args.symbol)
if err == nil {
return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err)
}
args.Group = group
// Create the [k,u]probe trace event using tracefs.
if err := createTraceFSProbeEvent(typ, args); err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
defer func() {
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = closeTraceFSProbeEvent(typ, args.group, args.symbol)
}
}()
// Get the newly-created trace event's id.
tid, err := getTraceEventID(group, args.symbol)
evt, err := tracefs.NewEvent(args)
if err != nil {
return nil, fmt.Errorf("getting trace event id: %w", err)
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
// Kprobes are ephemeral tracepoints and share the same perf event type.
fd, err := openTracepointPerfEvent(tid, args.pid)
fd, err := openTracepointPerfEvent(evt.ID(), args.Pid)
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = evt.Close()
return nil, err
}
return &perfEvent{
typ: typ.PerfEventType(args.ret),
group: group,
name: args.symbol,
tracefsID: tid,
cookie: args.cookie,
fd: fd,
}, nil
}
// createTraceFSProbeEvent creates a new ephemeral trace event by writing to
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists.
func createTraceFSProbeEvent(typ probeType, args probeArgs) error {
// Open the kprobe_events file in tracefs.
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err)
}
defer f.Close()
var pe, token string
switch typ {
case kprobeType:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
token = kprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token)
case uprobeType:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
token = uprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
// EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token
// is resolved to an invalid insn boundary.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return fmt.Errorf("token %s: %w", token, os.ErrNotExist)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
return nil
}
// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol
// from <tracefs>/[k,u]probe_events.
func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err)
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol))
if _, err = f.WriteString(pe); err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
return nil
}
// randomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by isValidTraceID.
func randomGroup(prefix string) (string, error) {
if !isValidTraceID(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput)
}
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("reading random bytes: %w", err)
}
group := fmt.Sprintf("%s_%x", prefix, b)
if len(group) > 63 {
return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput)
}
return group, nil
}
func probePrefix(ret bool) string {
if ret {
return "r"
}
return "p"
}
// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit
// from /sys/bus/event_source/devices/<pmu>/format/retprobe.
func determineRetprobeBit(typ probeType) (uint64, error) {
p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe")
data, err := os.ReadFile(p)
if err != nil {
return 0, err
}
var rp uint64
n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp)
if err != nil {
return 0, fmt.Errorf("parse retprobe bit: %w", err)
}
if n != 1 {
return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n)
}
return rp, nil
}
func kretprobeBit() (uint64, error) {
kprobeRetprobeBit.once.Do(func() {
kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType)
})
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
}
// kprobeToken creates the SYM[+offs] token for the tracefs api.
func kprobeToken(args probeArgs) string {
po := args.symbol
if args.offset != 0 {
po += fmt.Sprintf("+%#x", args.offset)
}
return po
return newPerfEvent(fd, evt), nil
}

180
vendor/github.com/cilium/ebpf/link/kprobe_multi.go generated vendored Normal file
View file

@ -0,0 +1,180 @@
package link
import (
"errors"
"fmt"
"os"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// KprobeMultiOptions defines additional parameters that will be used
// when opening a KprobeMulti Link.
type KprobeMultiOptions struct {
// Symbols takes a list of kernel symbol names to attach an ebpf program to.
//
// Mutually exclusive with Addresses.
Symbols []string
// Addresses takes a list of kernel symbol addresses in case they can not
// be referred to by name.
//
// Note that only start addresses can be specified, since the fprobe API
// limits the attach point to the function entry or return.
//
// Mutually exclusive with Symbols.
Addresses []uintptr
// Cookies specifies arbitrary values that can be fetched from an eBPF
// program via `bpf_get_attach_cookie()`.
//
// If set, its length should be equal to the length of Symbols or Addresses.
// Each Cookie is assigned to the Symbol or Address specified at the
// corresponding slice index.
Cookies []uint64
}
// KprobeMulti attaches the given eBPF program to the entry point of a given set
// of kernel symbols.
//
// The difference with Kprobe() is that multi-kprobe accomplishes this in a
// single system call, making it significantly faster than attaching many
// probes one at a time.
//
// Requires at least Linux 5.18.
func KprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) {
return kprobeMulti(prog, opts, 0)
}
// KretprobeMulti attaches the given eBPF program to the return point of a given
// set of kernel symbols.
//
// The difference with Kretprobe() is that multi-kprobe accomplishes this in a
// single system call, making it significantly faster than attaching many
// probes one at a time.
//
// Requires at least Linux 5.18.
func KretprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) {
return kprobeMulti(prog, opts, unix.BPF_F_KPROBE_MULTI_RETURN)
}
func kprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions, flags uint32) (Link, error) {
if prog == nil {
return nil, errors.New("cannot attach a nil program")
}
syms := uint32(len(opts.Symbols))
addrs := uint32(len(opts.Addresses))
cookies := uint32(len(opts.Cookies))
if syms == 0 && addrs == 0 {
return nil, fmt.Errorf("one of Symbols or Addresses is required: %w", errInvalidInput)
}
if syms != 0 && addrs != 0 {
return nil, fmt.Errorf("Symbols and Addresses are mutually exclusive: %w", errInvalidInput)
}
if cookies > 0 && cookies != syms && cookies != addrs {
return nil, fmt.Errorf("Cookies must be exactly Symbols or Addresses in length: %w", errInvalidInput)
}
if err := haveBPFLinkKprobeMulti(); err != nil {
return nil, err
}
attr := &sys.LinkCreateKprobeMultiAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_TRACE_KPROBE_MULTI,
KprobeMultiFlags: flags,
}
switch {
case syms != 0:
attr.Count = syms
attr.Syms = sys.NewStringSlicePointer(opts.Symbols)
case addrs != 0:
attr.Count = addrs
attr.Addrs = sys.NewPointer(unsafe.Pointer(&opts.Addresses[0]))
}
if cookies != 0 {
attr.Cookies = sys.NewPointer(unsafe.Pointer(&opts.Cookies[0]))
}
fd, err := sys.LinkCreateKprobeMulti(attr)
if errors.Is(err, unix.ESRCH) {
return nil, fmt.Errorf("couldn't find one or more symbols: %w", os.ErrNotExist)
}
if errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("%w (missing kernel symbol or prog's AttachType not AttachTraceKprobeMulti?)", err)
}
if err != nil {
return nil, err
}
return &kprobeMultiLink{RawLink{fd, ""}}, nil
}
type kprobeMultiLink struct {
RawLink
}
var _ Link = (*kprobeMultiLink)(nil)
func (kml *kprobeMultiLink) Update(prog *ebpf.Program) error {
return fmt.Errorf("update kprobe_multi: %w", ErrNotSupported)
}
func (kml *kprobeMultiLink) Pin(string) error {
return fmt.Errorf("pin kprobe_multi: %w", ErrNotSupported)
}
func (kml *kprobeMultiLink) Unpin() error {
return fmt.Errorf("unpin kprobe_multi: %w", ErrNotSupported)
}
var haveBPFLinkKprobeMulti = internal.NewFeatureTest("bpf_link_kprobe_multi", "5.18", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Name: "probe_kpm_link",
Type: ebpf.Kprobe,
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
AttachType: ebpf.AttachTraceKprobeMulti,
License: "MIT",
})
if errors.Is(err, unix.E2BIG) {
// Kernel doesn't support AttachType field.
return internal.ErrNotSupported
}
if err != nil {
return err
}
defer prog.Close()
fd, err := sys.LinkCreateKprobeMulti(&sys.LinkCreateKprobeMultiAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_TRACE_KPROBE_MULTI,
Count: 1,
Syms: sys.NewStringSlicePointer([]string{"vprintk"}),
})
switch {
case errors.Is(err, unix.EINVAL):
return internal.ErrNotSupported
// If CONFIG_FPROBE isn't set.
case errors.Is(err, unix.EOPNOTSUPP):
return internal.ErrNotSupported
case err != nil:
return err
}
fd.Close()
return nil
})

View file

@ -46,6 +46,18 @@ type Link interface {
isLink()
}
// NewLinkFromFD creates a link from a raw fd.
//
// You should not use fd after calling this function.
func NewLinkFromFD(fd int) (Link, error) {
sysFD, err := sys.NewFD(fd)
if err != nil {
return nil, err
}
return wrapRawLink(&RawLink{fd: sysFD})
}
// LoadPinnedLink loads a link that was persisted into a bpffs.
func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
raw, err := loadPinnedRawLink(fileName, opts)
@ -59,10 +71,15 @@ func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
// wrap a RawLink in a more specific type if possible.
//
// The function takes ownership of raw and closes it on error.
func wrapRawLink(raw *RawLink) (Link, error) {
func wrapRawLink(raw *RawLink) (_ Link, err error) {
defer func() {
if err != nil {
raw.Close()
}
}()
info, err := raw.Info()
if err != nil {
raw.Close()
return nil, err
}
@ -77,6 +94,10 @@ func wrapRawLink(raw *RawLink) (Link, error) {
return &Iter{*raw}, nil
case NetNsType:
return &NetNsLink{*raw}, nil
case KprobeMultiType:
return &kprobeMultiLink{*raw}, nil
case PerfEventType:
return nil, fmt.Errorf("recovering perf event fd: %w", ErrNotSupported)
default:
return raw, nil
}
@ -172,12 +193,12 @@ func AttachRawLink(opts RawLinkOptions) (*RawLink, error) {
TargetFd: uint32(opts.Target),
ProgFd: uint32(progFd),
AttachType: sys.AttachType(opts.Attach),
TargetBtfId: uint32(opts.BTF),
TargetBtfId: opts.BTF,
Flags: opts.Flags,
}
fd, err := sys.LinkCreate(&attr)
if err != nil {
return nil, fmt.Errorf("can't create link: %s", err)
return nil, fmt.Errorf("create link: %w", err)
}
return &RawLink{fd, ""}, nil
@ -230,6 +251,11 @@ func (l *RawLink) Unpin() error {
return nil
}
// IsPinned returns true if the Link has a non-empty pinned path.
func (l *RawLink) IsPinned() bool {
return l.pinnedPath != ""
}
// Update implements the Link interface.
func (l *RawLink) Update(new *ebpf.Program) error {
return l.UpdateArgs(RawLinkUpdateOptions{
@ -280,27 +306,24 @@ func (l *RawLink) Info() (*Info, error) {
switch info.Type {
case CgroupType:
extra = &CgroupInfo{}
case IterType:
// not supported
case NetNsType:
extra = &NetNsInfo{}
case RawTracepointType:
// not supported
case TracingType:
extra = &TracingInfo{}
case XDPType:
extra = &XDPInfo{}
case PerfEventType:
// no extra
case RawTracepointType, IterType,
PerfEventType, KprobeMultiType:
// Extra metadata not supported.
default:
return nil, fmt.Errorf("unknown link info type: %d", info.Type)
}
if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType {
if extra != nil {
buf := bytes.NewReader(info.Extra[:])
err := binary.Read(buf, internal.NativeEndian, extra)
if err != nil {
return nil, fmt.Errorf("can not read extra link info: %w", err)
return nil, fmt.Errorf("cannot read extra link info: %w", err)
}
}

View file

@ -1,20 +1,16 @@
package link
import (
"bytes"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/tracefs"
"github.com/cilium/ebpf/internal/unix"
)
@ -42,67 +38,41 @@ import (
// stops any further invocations of the attached eBPF program.
var (
tracefsPath = "/sys/kernel/debug/tracing"
errInvalidInput = errors.New("invalid input")
errInvalidInput = tracefs.ErrInvalidInput
)
const (
perfAllThreads = -1
)
type perfEventType uint8
const (
tracepointEvent perfEventType = iota
kprobeEvent
kretprobeEvent
uprobeEvent
uretprobeEvent
)
// A perfEvent represents a perf event kernel object. Exactly one eBPF program
// can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU).
type perfEvent struct {
// The event type determines the types of programs that can be attached.
typ perfEventType
// Group and name of the tracepoint/kprobe/uprobe.
group string
name string
// PMU event ID read from sysfs. Valid IDs are non-zero.
pmuID uint64
// ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64
// User provided arbitrary value.
cookie uint64
// Trace event backing this perfEvent. May be nil.
tracefsEvent *tracefs.Event
// This is the perf event FD.
fd *sys.FD
}
func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent {
pe := &perfEvent{event, fd}
// Both event and fd have their own finalizer, but we want to
// guarantee that they are closed in a certain order.
runtime.SetFinalizer(pe, (*perfEvent).Close)
return pe
}
func (pe *perfEvent) Close() error {
runtime.SetFinalizer(pe, nil)
if err := pe.fd.Close(); err != nil {
return fmt.Errorf("closing perf event fd: %w", err)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent:
// Clean up kprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
}
case uprobeEvent, uretprobeEvent:
// Clean up uprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
}
case tracepointEvent:
// Tracepoint trace events don't hold any extra resources.
return nil
if pe.tracefsEvent != nil {
return pe.tracefsEvent.Close()
}
return nil
@ -136,10 +106,14 @@ func (pl *perfEventLink) Unpin() error {
}
func (pl *perfEventLink) Close() error {
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event link close: %w", err)
if err := pl.fd.Close(); err != nil {
return fmt.Errorf("perf link close: %w", err)
}
return pl.fd.Close()
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event close: %w", err)
}
return nil
}
func (pl *perfEventLink) Update(prog *ebpf.Program) error {
@ -183,7 +157,7 @@ func (pi *perfEventIoctl) Info() (*Info, error) {
// attach the given eBPF prog to the perf event stored in pe.
// pe must contain a valid perf event fd.
// prog's type must match the program type stored in pe.
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) {
if prog == nil {
return nil, errors.New("cannot attach a nil program")
}
@ -191,30 +165,18 @@ func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
if err := haveBPFLinkPerfEvent(); err == nil {
return attachPerfEventLink(pe, prog, cookie)
}
if err := haveBPFLinkPerfEvent(); err == nil {
return attachPerfEventLink(pe, prog)
if cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}
return attachPerfEventIoctl(pe, prog)
}
func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
if pe.cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}
// Assign the eBPF program to the perf event.
err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
if err != nil {
@ -226,32 +188,24 @@ func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, e
return nil, fmt.Errorf("enable perf event: %s", err)
}
pi := &perfEventIoctl{pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
return pi, nil
return &perfEventIoctl{pe}, nil
}
// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
//
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) {
fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
TargetFd: pe.fd.Uint(),
AttachType: sys.BPF_PERF_EVENT,
BpfCookie: pe.cookie,
BpfCookie: cookie,
})
if err != nil {
return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
}
pl := &perfEventLink{RawLink{fd: fd}, pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pl, (*perfEventLink).Close)
return pl, nil
return &perfEventLink{RawLink{fd: fd}, pe}, nil
}
// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
@ -263,40 +217,6 @@ func unsafeStringPtr(str string) (unsafe.Pointer, error) {
return unsafe.Pointer(p), nil
}
// getTraceEventID reads a trace event's ID from tracefs given its group and name.
// The kernel requires group and name to be alphanumeric or underscore.
//
// name automatically has its invalid symbols converted to underscores so the caller
// can pass a raw symbol name, e.g. a kernel symbol containing dots.
func getTraceEventID(group, name string) (uint64, error) {
name = sanitizeSymbol(name)
tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
}
return tid, nil
}
// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
// from /sys/bus/event_source/devices/<pmu>/type.
//
// Returns ErrNotSupported if the pmu type is not supported.
func getPMUEventType(typ probeType) (uint64, error) {
et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
}
if err != nil {
return 0, fmt.Errorf("reading pmu type %s: %w", typ, err)
}
return et, nil
}
// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
// behind the scenes, and can be attached to using these perf events.
@ -317,30 +237,11 @@ func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
return sys.NewFD(fd)
}
// uint64FromFile reads a uint64 from a file. All elements of path are sanitized
// and joined onto base. Returns error if base no longer prefixes the path after
// joining all components.
func uint64FromFile(base string, path ...string) (uint64, error) {
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
}
data, err := os.ReadFile(p)
if err != nil {
return 0, fmt.Errorf("reading file %s: %w", p, err)
}
et := bytes.TrimSpace(data)
return strconv.ParseUint(string(et), 10, 64)
}
// Probe BPF perf link.
//
// https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error {
var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Name: "probe_bpf_perf_link",
Type: ebpf.Kprobe,
@ -367,28 +268,3 @@ var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", f
}
return err
})
// isValidTraceID implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func isValidTraceID(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}

View file

@ -1,25 +0,0 @@
package link
import (
"fmt"
"runtime"
)
func platformPrefix(symbol string) string {
prefix := runtime.GOARCH
// per https://github.com/golang/go/blob/master/src/go/build/syslist.go
switch prefix {
case "386":
prefix = "ia32"
case "amd64", "amd64p32":
prefix = "x64"
case "arm64", "arm64be":
prefix = "arm64"
default:
return symbol
}
return fmt.Sprintf("__%s_%s", prefix, symbol)
}

63
vendor/github.com/cilium/ebpf/link/query.go generated vendored Normal file
View file

@ -0,0 +1,63 @@
package link
import (
"fmt"
"os"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/sys"
)
// QueryOptions defines additional parameters when querying for programs.
type QueryOptions struct {
// Path can be a path to a cgroup, netns or LIRC2 device
Path string
// Attach specifies the AttachType of the programs queried for
Attach ebpf.AttachType
// QueryFlags are flags for BPF_PROG_QUERY, e.g. BPF_F_QUERY_EFFECTIVE
QueryFlags uint32
}
// QueryPrograms retrieves ProgramIDs associated with the AttachType.
//
// Returns (nil, nil) if there are no programs attached to the queried kernel
// resource. Calling QueryPrograms on a kernel missing PROG_QUERY will result in
// ErrNotSupported.
func QueryPrograms(opts QueryOptions) ([]ebpf.ProgramID, error) {
if haveProgQuery() != nil {
return nil, fmt.Errorf("can't query program IDs: %w", ErrNotSupported)
}
f, err := os.Open(opts.Path)
if err != nil {
return nil, fmt.Errorf("can't open file: %s", err)
}
defer f.Close()
// query the number of programs to allocate correct slice size
attr := sys.ProgQueryAttr{
TargetFd: uint32(f.Fd()),
AttachType: sys.AttachType(opts.Attach),
QueryFlags: opts.QueryFlags,
}
if err := sys.ProgQuery(&attr); err != nil {
return nil, fmt.Errorf("can't query program count: %w", err)
}
// return nil if no progs are attached
if attr.ProgCount == 0 {
return nil, nil
}
// we have at least one prog, so we query again
progIds := make([]ebpf.ProgramID, attr.ProgCount)
attr.ProgIds = sys.NewPointer(unsafe.Pointer(&progIds[0]))
attr.ProgCount = uint32(len(progIds))
if err := sys.ProgQuery(&attr); err != nil {
return nil, fmt.Errorf("can't query program IDs: %w", err)
}
return progIds, nil
}

View file

@ -15,7 +15,7 @@ func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error {
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD())
ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD())
})
if ssoErr != nil {
return ssoErr
@ -31,7 +31,7 @@ func DetachSocketFilter(conn syscall.Conn) error {
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0)
ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0)
})
if ssoErr != nil {
return ssoErr

View file

@ -23,9 +23,10 @@ const (
NetNsType = sys.BPF_LINK_TYPE_NETNS
XDPType = sys.BPF_LINK_TYPE_XDP
PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT
KprobeMultiType = sys.BPF_LINK_TYPE_KPROBE_MULTI
)
var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
var haveProgAttach = internal.NewFeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupSKB,
License: "MIT",
@ -45,7 +46,7 @@ var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() erro
return nil
})
var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error {
var haveProgAttachReplace = internal.NewFeatureTest("BPF_PROG_ATTACH atomic replacement of MULTI progs", "5.5", func() error {
if err := haveProgAttach(); err != nil {
return err
}
@ -85,7 +86,7 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace
return err
})
var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
var haveBPFLink = internal.NewFeatureTest("bpf_link", "5.7", func() error {
attr := sys.LinkCreateAttr{
// This is a hopefully invalid file descriptor, which triggers EBADF.
TargetFd: ^uint32(0),
@ -101,3 +102,22 @@ var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
}
return err
})
var haveProgQuery = internal.NewFeatureTest("BPF_PROG_QUERY", "4.15", func() error {
attr := sys.ProgQueryAttr{
// We rely on this being checked during the syscall.
// With an otherwise correct payload we expect EBADF here
// as an indication that the feature is present.
TargetFd: ^uint32(0),
AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress),
}
err := sys.ProgQuery(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})

View file

@ -4,6 +4,7 @@ import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/tracefs"
)
// TracepointOptions defines additional parameters that will be used
@ -17,7 +18,7 @@ type TracepointOptions struct {
}
// Tracepoint attaches the given eBPF program to the tracepoint with the given
// group and name. See /sys/kernel/debug/tracing/events to find available
// group and name. See /sys/kernel/tracing/events to find available
// tracepoints. The top-level directory is the group, the event's subdirectory
// is the name. Example:
//
@ -36,14 +37,11 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions)
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !isValidTraceID(group) || !isValidTraceID(name) {
return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput)
}
if prog.Type() != ebpf.TracePoint {
return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput)
}
tid, err := getTraceEventID(group, name)
tid, err := tracefs.EventID(group, name)
if err != nil {
return nil, err
}
@ -58,16 +56,9 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions)
cookie = opts.Cookie
}
pe := &perfEvent{
typ: tracepointEvent,
group: group,
name: name,
tracefsID: tid,
cookie: cookie,
fd: fd,
}
pe := newPerfEvent(fd, nil)
lnk, err := attachPerfEvent(pe, prog)
lnk, err := attachPerfEvent(pe, prog, cookie)
if err != nil {
pe.Close()
return nil, err

View file

@ -1,11 +1,13 @@
package link
import (
"errors"
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
type tracing struct {
@ -70,6 +72,10 @@ func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (
Attach: ebpf.AttachNone,
BTF: typeID,
})
if errors.Is(err, sys.ENOTSUPP) {
// This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke.
return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported)
}
if err != nil {
return nil, err
}
@ -82,25 +88,71 @@ type TracingOptions struct {
// AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or
// AttachTraceRawTp.
Program *ebpf.Program
// Program attach type. Can be one of:
// - AttachTraceFEntry
// - AttachTraceFExit
// - AttachModifyReturn
// - AttachTraceRawTp
// This field is optional.
AttachType ebpf.AttachType
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
Cookie uint64
}
type LSMOptions struct {
// Program must be of type LSM with attach type
// AttachLSMMac.
Program *ebpf.Program
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
Cookie uint64
}
// attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id.
func attachBTFID(program *ebpf.Program) (Link, error) {
func attachBTFID(program *ebpf.Program, at ebpf.AttachType, cookie uint64) (Link, error) {
if program.FD() < 0 {
return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd)
}
fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
ProgFd: uint32(program.FD()),
})
if err != nil {
return nil, err
var (
fd *sys.FD
err error
)
switch at {
case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachTraceRawTp,
ebpf.AttachModifyReturn, ebpf.AttachLSMMac:
// Attach via BPF link
fd, err = sys.LinkCreateTracing(&sys.LinkCreateTracingAttr{
ProgFd: uint32(program.FD()),
AttachType: sys.AttachType(at),
Cookie: cookie,
})
if err == nil {
break
}
if !errors.Is(err, unix.EINVAL) && !errors.Is(err, sys.ENOTSUPP) {
return nil, fmt.Errorf("create tracing link: %w", err)
}
fallthrough
case ebpf.AttachNone:
// Attach via RawTracepointOpen
if cookie > 0 {
return nil, fmt.Errorf("create raw tracepoint with cookie: %w", ErrNotSupported)
}
fd, err = sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
ProgFd: uint32(program.FD()),
})
if errors.Is(err, sys.ENOTSUPP) {
// This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke.
return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported)
}
if err != nil {
return nil, fmt.Errorf("create raw tracepoint: %w", err)
}
default:
return nil, fmt.Errorf("invalid attach type: %s", at.String())
}
raw := RawLink{fd: fd}
@ -115,8 +167,7 @@ func attachBTFID(program *ebpf.Program) (Link, error) {
// a raw_tracepoint link. Other types return a tracing link.
return &rawTracepoint{raw}, nil
}
return &tracing{RawLink: RawLink{fd: fd}}, nil
return &tracing{raw}, nil
}
// AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or
@ -127,7 +178,14 @@ func AttachTracing(opts TracingOptions) (Link, error) {
return nil, fmt.Errorf("invalid program type %s, expected Tracing", t)
}
return attachBTFID(opts.Program)
switch opts.AttachType {
case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachModifyReturn,
ebpf.AttachTraceRawTp, ebpf.AttachNone:
default:
return nil, fmt.Errorf("invalid attach type: %s", opts.AttachType.String())
}
return attachBTFID(opts.Program, opts.AttachType, opts.Cookie)
}
// AttachLSM links a Linux security module (LSM) BPF Program to a BPF
@ -137,5 +195,5 @@ func AttachLSM(opts LSMOptions) (Link, error) {
return nil, fmt.Errorf("invalid program type %s, expected LSM", t)
}
return attachBTFID(opts.Program)
return attachBTFID(opts.Program, ebpf.AttachLSMMac, opts.Cookie)
}

View file

@ -5,27 +5,18 @@ import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/tracefs"
)
var (
uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events")
uprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799
uprobeRefCtrOffsetShift = 32
haveRefCtrOffsetPMU = internal.FeatureTest("RefCtrOffsetPMU", "4.20", func() error {
haveRefCtrOffsetPMU = internal.NewFeatureTest("RefCtrOffsetPMU", "4.20", func() error {
_, err := os.Stat(uprobeRefCtrOffsetPMUPath)
if err != nil {
return internal.ErrNotSupported
@ -44,6 +35,8 @@ type Executable struct {
path string
// Parsed ELF and dynamic symbols' addresses.
addresses map[string]uint64
// Keep track of symbol table lazy load.
addressesOnce sync.Once
}
// UprobeOptions defines additional parameters that will be used
@ -77,11 +70,22 @@ type UprobeOptions struct {
//
// Needs kernel 5.15+.
Cookie uint64
// Prefix used for the event name if the uprobe must be attached using tracefs.
// The group name will be formatted as `<prefix>_<randomstr>`.
// The default empty string is equivalent to "ebpf" as the prefix.
TraceFSPrefix string
}
func (uo *UprobeOptions) cookie() uint64 {
if uo == nil {
return 0
}
return uo.Cookie
}
// To open a new Executable, use:
//
// OpenExecutable("/bin/bash")
// OpenExecutable("/bin/bash")
//
// The returned value can then be used to open Uprobe(s).
func OpenExecutable(path string) (*Executable, error) {
@ -89,32 +93,21 @@ func OpenExecutable(path string) (*Executable, error) {
return nil, fmt.Errorf("path cannot be empty")
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file '%s': %w", path, err)
}
defer f.Close()
se, err := internal.NewSafeELFFile(f)
f, err := internal.OpenSafeELFFile(path)
if err != nil {
return nil, fmt.Errorf("parse ELF file: %w", err)
}
defer f.Close()
if se.Type != elf.ET_EXEC && se.Type != elf.ET_DYN {
if f.Type != elf.ET_EXEC && f.Type != elf.ET_DYN {
// ELF is not an executable or a shared object.
return nil, errors.New("the given file is not an executable or a shared object")
}
ex := Executable{
return &Executable{
path: path,
addresses: make(map[string]uint64),
}
if err := ex.load(se); err != nil {
return nil, err
}
return &ex, nil
}, nil
}
func (ex *Executable) load(f *internal.SafeELFFile) error {
@ -171,6 +164,22 @@ func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error
return opts.Address + opts.Offset, nil
}
var err error
ex.addressesOnce.Do(func() {
var f *internal.SafeELFFile
f, err = internal.OpenSafeELFFile(ex.path)
if err != nil {
err = fmt.Errorf("parse ELF file: %w", err)
return
}
defer f.Close()
err = ex.load(f)
})
if err != nil {
return 0, fmt.Errorf("lazy load symbols: %w", err)
}
address, ok := ex.addresses[symbol]
if !ok {
return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol)
@ -194,13 +203,13 @@ func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error
// given symbol starts executing in the given Executable.
// For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog, nil)
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog, nil)
//
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
@ -216,7 +225,7 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
return nil, err
}
lnk, err := attachPerfEvent(u, prog)
lnk, err := attachPerfEvent(u, prog, opts.cookie())
if err != nil {
u.Close()
return nil, err
@ -228,13 +237,13 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
// Uretprobe attaches the given eBPF program to a perf event that fires right
// before the given symbol exits. For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog, nil)
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog, nil)
//
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
@ -250,7 +259,7 @@ func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeO
return nil, err
}
lnk, err := attachPerfEvent(u, prog)
lnk, err := attachPerfEvent(u, prog, opts.cookie())
if err != nil {
u.Close()
return nil, err
@ -288,18 +297,20 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
}
}
args := probeArgs{
symbol: symbol,
path: ex.path,
offset: offset,
pid: pid,
refCtrOffset: opts.RefCtrOffset,
ret: ret,
cookie: opts.Cookie,
args := tracefs.ProbeArgs{
Type: tracefs.Uprobe,
Symbol: symbol,
Path: ex.path,
Offset: offset,
Pid: pid,
RefCtrOffset: opts.RefCtrOffset,
Ret: ret,
Cookie: opts.Cookie,
Group: opts.TraceFSPrefix,
}
// Use uprobe PMU if the kernel has it available.
tp, err := pmuUprobe(args)
tp, err := pmuProbe(args)
if err == nil {
return tp, nil
}
@ -308,66 +319,10 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
}
// Use tracefs if uprobe PMU is missing.
args.symbol = sanitizeSymbol(symbol)
tp, err = tracefsUprobe(args)
tp, err = tracefsProbe(args)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err)
}
return tp, nil
}
// pmuUprobe opens a perf event based on the uprobe PMU.
func pmuUprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(uprobeType, args)
}
// tracefsUprobe creates a Uprobe tracefs entry.
func tracefsUprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(uprobeType, args)
}
// sanitizeSymbol replaces every invalid character for the tracefs api with an underscore.
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeSymbol(s string) string {
var b strings.Builder
b.Grow(len(s))
var skip bool
for _, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
b.WriteByte(c)
default:
if !skip {
b.WriteByte('_')
skip = true
}
}
}
return b.String()
}
// uprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func uprobeToken(args probeArgs) string {
po := fmt.Sprintf("%s:%#x", args.path, args.offset)
if args.refCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.refCtrOffset)
}
return po
}
func uretprobeBit() (uint64, error) {
uprobeRetprobeBit.once.Do(func() {
uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType)
})
return uprobeRetprobeBit.value, uprobeRetprobeBit.err
}

View file

@ -1,14 +1,51 @@
package ebpf
import (
"encoding/binary"
"errors"
"fmt"
"sync"
"io"
"math"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
)
// handles stores handle objects to avoid gc cleanup
type handles []*btf.Handle
func (hs *handles) add(h *btf.Handle) (int, error) {
if h == nil {
return 0, nil
}
if len(*hs) == math.MaxInt16 {
return 0, fmt.Errorf("can't add more than %d module FDs to fdArray", math.MaxInt16)
}
*hs = append(*hs, h)
// return length of slice so that indexes start at 1
return len(*hs), nil
}
func (hs handles) fdArray() []int32 {
// first element of fda is reserved as no module can be indexed with 0
fda := []int32{0}
for _, h := range hs {
fda = append(fda, int32(h.FD()))
}
return fda
}
func (hs handles) close() {
for _, h := range hs {
h.Close()
}
}
// splitSymbols splits insns into subsections delimited by Symbol Instructions.
// insns cannot be empty and must start with a Symbol Instruction.
//
@ -67,7 +104,7 @@ func hasFunctionReferences(insns asm.Instructions) bool {
//
// Passing a nil target will relocate against the running kernel. insns are
// modified in place.
func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error {
func applyRelocations(insns asm.Instructions, target *btf.Spec, bo binary.ByteOrder) error {
var relos []*btf.CORERelocation
var reloInsns []*asm.Instruction
iter := insns.Iterate()
@ -82,19 +119,18 @@ func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error {
return nil
}
target, err := maybeLoadKernelBTF(target)
if err != nil {
return err
if bo == nil {
bo = internal.NativeEndian
}
fixups, err := btf.CORERelocate(local, target, relos)
fixups, err := btf.CORERelocate(relos, target, bo)
if err != nil {
return err
}
for i, fixup := range fixups {
if err := fixup.Apply(reloInsns[i]); err != nil {
return fmt.Errorf("apply fixup %s: %w", &fixup, err)
return fmt.Errorf("fixup for %s: %w", relos[i], err)
}
}
@ -181,8 +217,9 @@ func fixupAndValidate(insns asm.Instructions) error {
ins := iter.Ins
// Map load was tagged with a Reference, but does not contain a Map pointer.
if ins.IsLoadFromMap() && ins.Reference() != "" && ins.Map() == nil {
return fmt.Errorf("instruction %d: map %s: %w", iter.Index, ins.Reference(), asm.ErrUnsatisfiedMapReference)
needsMap := ins.Reference() != "" || ins.Metadata.Get(kconfigMetaKey{}) != nil
if ins.IsLoadFromMap() && needsMap && ins.Map() == nil {
return fmt.Errorf("instruction %d: %w", iter.Index, asm.ErrUnsatisfiedMapReference)
}
fixupProbeReadKernel(ins)
@ -191,6 +228,88 @@ func fixupAndValidate(insns asm.Instructions) error {
return nil
}
// fixupKfuncs loops over all instructions in search for kfunc calls.
// If at least one is found, the current kernels BTF and module BTFis are searched to set Instruction.Constant
// and Instruction.Offset to the correct values.
func fixupKfuncs(insns asm.Instructions) (handles, error) {
iter := insns.Iterate()
for iter.Next() {
ins := iter.Ins
if ins.IsKfuncCall() {
goto fixups
}
}
return nil, nil
fixups:
// only load the kernel spec if we found at least one kfunc call
kernelSpec, err := btf.LoadKernelSpec()
if err != nil {
return nil, err
}
fdArray := make(handles, 0)
for {
ins := iter.Ins
if !ins.IsKfuncCall() {
if !iter.Next() {
// break loop if this was the last instruction in the stream.
break
}
continue
}
// check meta, if no meta return err
kfm, _ := ins.Metadata.Get(kfuncMeta{}).(*btf.Func)
if kfm == nil {
return nil, fmt.Errorf("kfunc call has no kfuncMeta")
}
target := btf.Type((*btf.Func)(nil))
spec, module, err := findTargetInKernel(kernelSpec, kfm.Name, &target)
if errors.Is(err, btf.ErrNotFound) {
return nil, fmt.Errorf("kfunc %q: %w", kfm.Name, ErrNotSupported)
}
if err != nil {
return nil, err
}
if err := btf.CheckTypeCompatibility(kfm.Type, target.(*btf.Func).Type); err != nil {
return nil, &incompatibleKfuncError{kfm.Name, err}
}
id, err := spec.TypeID(target)
if err != nil {
return nil, err
}
idx, err := fdArray.add(module)
if err != nil {
return nil, err
}
ins.Constant = int64(id)
ins.Offset = int16(idx)
if !iter.Next() {
break
}
}
return fdArray, nil
}
type incompatibleKfuncError struct {
name string
err error
}
func (ike *incompatibleKfuncError) Error() string {
return fmt.Sprintf("kfunc %q: %s", ike.name, ike.err)
}
// fixupProbeReadKernel replaces calls to bpf_probe_read_{kernel,user}(_str)
// with bpf_probe_read(_str) on kernels that don't support it yet.
func fixupProbeReadKernel(ins *asm.Instruction) {
@ -211,28 +330,62 @@ func fixupProbeReadKernel(ins *asm.Instruction) {
}
}
var kernelBTF struct {
sync.Mutex
spec *btf.Spec
}
// maybeLoadKernelBTF loads the current kernel's BTF if spec is nil, otherwise
// it returns spec unchanged.
// resolveKconfigReferences creates and populates a .kconfig map if necessary.
//
// The kernel BTF is cached for the lifetime of the process.
func maybeLoadKernelBTF(spec *btf.Spec) (*btf.Spec, error) {
if spec != nil {
return spec, nil
// Returns a nil Map and no error if no references exist.
func resolveKconfigReferences(insns asm.Instructions) (_ *Map, err error) {
closeOnError := func(c io.Closer) {
if err != nil {
c.Close()
}
}
kernelBTF.Lock()
defer kernelBTF.Unlock()
if kernelBTF.spec != nil {
return kernelBTF.spec, nil
var spec *MapSpec
iter := insns.Iterate()
for iter.Next() {
meta, _ := iter.Ins.Metadata.Get(kconfigMetaKey{}).(*kconfigMeta)
if meta != nil {
spec = meta.Map
break
}
}
var err error
kernelBTF.spec, err = btf.LoadKernelSpec()
return kernelBTF.spec, err
if spec == nil {
return nil, nil
}
cpy := spec.Copy()
if err := resolveKconfig(cpy); err != nil {
return nil, err
}
kconfig, err := NewMap(cpy)
if err != nil {
return nil, err
}
defer closeOnError(kconfig)
// Resolve all instructions which load from .kconfig map with actual map
// and offset inside it.
iter = insns.Iterate()
for iter.Next() {
meta, _ := iter.Ins.Metadata.Get(kconfigMetaKey{}).(*kconfigMeta)
if meta == nil {
continue
}
if meta.Map != spec {
return nil, fmt.Errorf("instruction %d: reference to multiple .kconfig maps is not allowed", iter.Index)
}
if err := iter.Ins.AssociateMap(kconfig); err != nil {
return nil, fmt.Errorf("instruction %d: %w", iter.Index, err)
}
// Encode a map read at the offset of the var in the datasec.
iter.Ins.Constant = int64(uint64(meta.Offset) << 32)
iter.Ins.Metadata.Set(kconfigMetaKey{}, nil)
}
return kconfig, nil
}

206
vendor/github.com/cilium/ebpf/map.go generated vendored
View file

@ -6,6 +6,7 @@ import (
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"reflect"
"time"
@ -77,9 +78,6 @@ type MapSpec struct {
// The key and value type of this map. May be nil.
Key, Value btf.Type
// The BTF associated with this map.
BTF *btf.Spec
}
func (ms *MapSpec) String() string {
@ -104,12 +102,6 @@ func (ms *MapSpec) Copy() *MapSpec {
return &cpy
}
// hasBTF returns true if the MapSpec has a valid BTF spec and if its
// map type supports associated BTF metadata in the kernel.
func (ms *MapSpec) hasBTF() bool {
return ms.BTF != nil && ms.Type.hasBTF()
}
func (ms *MapSpec) clampPerfEventArraySize() error {
if ms.Type != PerfEventArray {
return nil
@ -158,7 +150,11 @@ type MapKV struct {
Value interface{}
}
func (ms *MapSpec) checkCompatibility(m *Map) error {
// Compatible returns nil if an existing map may be used instead of creating
// one from the spec.
//
// Returns an error wrapping [ErrMapIncompatible] otherwise.
func (ms *MapSpec) Compatible(m *Map) error {
switch {
case m.typ != ms.Type:
return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible)
@ -173,7 +169,10 @@ func (ms *MapSpec) checkCompatibility(m *Map) error {
m.maxEntries != ms.MaxEntries:
return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible)
case m.flags != ms.Flags:
// BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow
// this mismatch.
case !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) &&
m.flags != ms.Flags:
return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible)
}
return nil
@ -241,10 +240,7 @@ func NewMap(spec *MapSpec) (*Map, error) {
//
// May return an error wrapping ErrMapIncompatible.
func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
handles := newHandleCache()
defer handles.close()
m, err := newMapWithOptions(spec, opts, handles)
m, err := newMapWithOptions(spec, opts)
if err != nil {
return nil, fmt.Errorf("creating map: %w", err)
}
@ -257,7 +253,7 @@ func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
return m, nil
}
func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) {
func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) {
closeOnError := func(c io.Closer) {
if err != nil {
c.Close()
@ -284,7 +280,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
}
defer closeOnError(m)
if err := spec.checkCompatibility(m); err != nil {
if err := spec.Compatible(m); err != nil {
return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err)
}
@ -307,7 +303,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
return nil, errors.New("inner maps cannot be pinned")
}
template, err := spec.InnerMap.createMap(nil, opts, handles)
template, err := spec.InnerMap.createMap(nil, opts)
if err != nil {
return nil, fmt.Errorf("inner map: %w", err)
}
@ -319,7 +315,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
innerFd = template.fd
}
m, err := spec.createMap(innerFd, opts, handles)
m, err := spec.createMap(innerFd, opts)
if err != nil {
return nil, err
}
@ -328,7 +324,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
if spec.Pinning == PinByName {
path := filepath.Join(opts.PinPath, spec.Name)
if err := m.Pin(path); err != nil {
return nil, fmt.Errorf("pin map: %w", err)
return nil, fmt.Errorf("pin map to %s: %w", path, err)
}
}
@ -337,15 +333,13 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
// createMap validates the spec's properties and creates the map in the kernel
// using the given opts. It does not populate or freeze the map.
func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) {
func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) {
closeOnError := func(closer io.Closer) {
if err != nil {
closer.Close()
}
}
spec = spec.Copy()
// Kernels 4.13 through 5.4 used a struct bpf_map_def that contained
// additional 'inner_map_idx' and later 'numa_node' fields.
// In order to support loading these definitions, tolerate the presence of
@ -365,17 +359,21 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCa
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for map of map")
}
spec = spec.Copy()
spec.ValueSize = 4
case PerfEventArray:
if spec.KeySize != 0 && spec.KeySize != 4 {
return nil, errors.New("KeySize must be zero or four for perf event array")
}
spec.KeySize = 4
if spec.ValueSize != 0 && spec.ValueSize != 4 {
return nil, errors.New("ValueSize must be zero or four for perf event array")
}
spec = spec.Copy()
spec.KeySize = 4
spec.ValueSize = 4
if spec.MaxEntries == 0 {
@ -413,7 +411,7 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCa
KeySize: spec.KeySize,
ValueSize: spec.ValueSize,
MaxEntries: spec.MaxEntries,
MapFlags: spec.Flags,
MapFlags: sys.MapFlags(spec.Flags),
NumaNode: spec.NumaNode,
}
@ -425,40 +423,43 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCa
attr.MapName = sys.NewObjName(spec.Name)
}
if spec.hasBTF() {
handle, err := handles.btfHandle(spec.BTF)
if spec.Key != nil || spec.Value != nil {
handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value)
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, fmt.Errorf("load BTF: %w", err)
}
if handle != nil {
keyTypeID, err := spec.BTF.TypeID(spec.Key)
if err != nil {
return nil, err
}
valueTypeID, err := spec.BTF.TypeID(spec.Value)
if err != nil {
return nil, err
}
defer handle.Close()
// Use BTF k/v during map creation.
attr.BtfFd = uint32(handle.FD())
attr.BtfKeyTypeId = uint32(keyTypeID)
attr.BtfValueTypeId = uint32(valueTypeID)
attr.BtfKeyTypeId = keyTypeID
attr.BtfValueTypeId = valueTypeID
}
}
fd, err := sys.MapCreate(&attr)
// Some map types don't support BTF k/v in earlier kernel versions.
// Remove BTF metadata and retry map creation.
if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 {
attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0
fd, err = sys.MapCreate(&attr)
}
if err != nil {
if errors.Is(err, unix.EPERM) {
return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
}
if !spec.hasBTF() {
return nil, fmt.Errorf("map create without BTF: %w", err)
}
if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 {
return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
}
if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap {
return nil, fmt.Errorf("map create: cannot use type %s", UnspecifiedMap)
}
if attr.BtfFd == 0 {
return nil, fmt.Errorf("map create: %w (without BTF k/v)", err)
}
return nil, fmt.Errorf("map create: %w", err)
}
defer closeOnError(fd)
@ -495,7 +496,7 @@ func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries
return nil, err
}
m.fullValueSize = internal.Align(int(valueSize), 8) * possibleCPUs
m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs
return m, nil
}
@ -549,12 +550,7 @@ const LookupLock MapLookupFlags = 4
//
// Returns an error if the key doesn't exist, see ErrKeyNotExist.
func (m *Map) Lookup(key, valueOut interface{}) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookup(key, valuePtr, 0); err != nil {
return err
}
return m.unmarshalValue(valueOut, valueBytes)
return m.LookupWithFlags(key, valueOut, 0)
}
// LookupWithFlags retrieves a value from a Map with flags.
@ -568,6 +564,10 @@ func (m *Map) Lookup(key, valueOut interface{}) error {
//
// Returns an error if the key doesn't exist, see ErrKeyNotExist.
func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
if m.typ.hasPerCPUValue() {
return m.lookupPerCPU(key, valueOut, flags)
}
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookup(key, valuePtr, flags); err != nil {
return err
@ -580,7 +580,7 @@ func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) e
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
return m.lookupAndDelete(key, valueOut, 0)
return m.LookupAndDeleteWithFlags(key, valueOut, 0)
}
// LookupAndDeleteWithFlags retrieves and deletes a value from a Map.
@ -591,7 +591,15 @@ func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
//
// Returns ErrKeyNotExist if the key doesn't exist.
func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
return m.lookupAndDelete(key, valueOut, flags)
if m.typ.hasPerCPUValue() {
return m.lookupAndDeletePerCPU(key, valueOut, flags)
}
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
if err := m.lookupAndDelete(key, valuePtr, flags); err != nil {
return err
}
return m.unmarshalValue(valueOut, valueBytes)
}
// LookupBytes gets a value from Map.
@ -609,6 +617,14 @@ func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
return valueBytes, err
}
func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error {
valueBytes := make([]byte, m.fullValueSize)
if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
return err
}
return unmarshalPerCPUValue(valueOut, int(m.valueSize), valueBytes)
}
func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error {
keyPtr, err := m.marshalKey(key)
if err != nil {
@ -628,9 +644,15 @@ func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags
return nil
}
func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error {
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error {
valueBytes := make([]byte, m.fullValueSize)
if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
return err
}
return unmarshalPerCPUValue(valueOut, int(m.valueSize), valueBytes)
}
func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error {
keyPtr, err := m.marshalKey(key)
if err != nil {
return fmt.Errorf("can't marshal key: %w", err)
@ -647,7 +669,7 @@ func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) e
return fmt.Errorf("lookup and delete: %w", wrapMapError(err))
}
return m.unmarshalValue(valueOut, valueBytes)
return nil
}
// MapUpdateFlags controls the behaviour of the Map.Update call.
@ -674,15 +696,32 @@ func (m *Map) Put(key, value interface{}) error {
}
// Update changes the value of a key.
func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
keyPtr, err := m.marshalKey(key)
if err != nil {
return fmt.Errorf("can't marshal key: %w", err)
func (m *Map) Update(key, value any, flags MapUpdateFlags) error {
if m.typ.hasPerCPUValue() {
return m.updatePerCPU(key, value, flags)
}
valuePtr, err := m.marshalValue(value)
if err != nil {
return fmt.Errorf("can't marshal value: %w", err)
return fmt.Errorf("marshal value: %w", err)
}
return m.update(key, valuePtr, flags)
}
func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error {
valuePtr, err := marshalPerCPUValue(value, int(m.valueSize))
if err != nil {
return fmt.Errorf("marshal value: %w", err)
}
return m.update(key, valuePtr, flags)
}
func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error {
keyPtr, err := m.marshalKey(key)
if err != nil {
return fmt.Errorf("marshal key: %w", err)
}
attr := sys.MapUpdateElemAttr{
@ -798,12 +837,22 @@ func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error {
return nil
}
var mmapProtectedPage = internal.Memoize(func() ([]byte, error) {
return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED)
})
// guessNonExistentKey attempts to perform a map lookup that returns ENOENT.
// This is necessary on kernels before 4.4.132, since those don't support
// iterating maps from the start by providing an invalid key pointer.
func (m *Map) guessNonExistentKey() ([]byte, error) {
// Provide an invalid value pointer to prevent a copy on the kernel side.
valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0)))
// Map a protected page and use that as the value pointer. This saves some
// work copying out the value, which we're not interested in.
page, err := mmapProtectedPage()
if err != nil {
return nil, err
}
valuePtr := sys.NewSlicePointer(page)
randKey := make([]byte, int(m.keySize))
for i := 0; i < 4; i++ {
@ -1095,7 +1144,8 @@ func (m *Map) Clone() (*Map, error) {
// the new path already exists. Re-pinning across filesystems is not supported.
// You can Clone a map to pin it to a different path.
//
// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs
// This requires bpffs to be mounted above fileName.
// See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd
func (m *Map) Pin(fileName string) error {
if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil {
return err
@ -1180,10 +1230,6 @@ func (m *Map) unmarshalKey(data interface{}, buf []byte) error {
}
func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
if m.typ.hasPerCPUValue() {
return marshalPerCPUValue(data, int(m.valueSize))
}
var (
buf []byte
err error
@ -1316,8 +1362,7 @@ func marshalMap(m *Map, length int) ([]byte, error) {
// See Map.Iterate.
type MapIterator struct {
target *Map
prevKey interface{}
prevBytes []byte
curKey []byte
count, maxEntries uint32
done bool
err error
@ -1327,7 +1372,6 @@ func newMapIterator(target *Map) *MapIterator {
return &MapIterator{
target: target,
maxEntries: target.maxEntries,
prevBytes: make([]byte, target.keySize),
}
}
@ -1349,26 +1393,35 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
// For array-like maps NextKeyBytes returns nil only on after maxEntries
// iterations.
for mi.count <= mi.maxEntries {
var nextBytes []byte
nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey)
var nextKey []byte
if mi.curKey == nil {
// Pass nil interface to NextKeyBytes to make sure the Map's first key
// is returned. If we pass an uninitialized []byte instead, it'll see a
// non-nil interface and try to marshal it.
nextKey, mi.err = mi.target.NextKeyBytes(nil)
mi.curKey = make([]byte, mi.target.keySize)
} else {
nextKey, mi.err = mi.target.NextKeyBytes(mi.curKey)
}
if mi.err != nil {
mi.err = fmt.Errorf("get next key: %w", mi.err)
return false
}
if nextBytes == nil {
if nextKey == nil {
mi.done = true
return false
}
// The user can get access to nextBytes since unmarshalBytes
// The user can get access to nextKey since unmarshalBytes
// does not copy when unmarshaling into a []byte.
// Make a copy to prevent accidental corruption of
// iterator state.
copy(mi.prevBytes, nextBytes)
mi.prevKey = mi.prevBytes
copy(mi.curKey, nextKey)
mi.count++
mi.err = mi.target.Lookup(nextBytes, valueOut)
mi.err = mi.target.Lookup(nextKey, valueOut)
if errors.Is(mi.err, ErrKeyNotExist) {
// Even though the key should be valid, we couldn't look up
// its value. If we're iterating a hash map this is probably
@ -1381,10 +1434,11 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
continue
}
if mi.err != nil {
mi.err = fmt.Errorf("look up next key: %w", mi.err)
return false
}
mi.err = mi.target.unmarshalKey(keyOut, nextBytes)
mi.err = mi.target.unmarshalKey(keyOut, nextKey)
return mi.err == nil
}

View file

@ -57,8 +57,10 @@ func marshalBytes(data interface{}, length int) (buf []byte, err error) {
case Map, *Map, Program, *Program:
err = fmt.Errorf("can't marshal %T", value)
default:
var wr bytes.Buffer
err = binary.Write(&wr, internal.NativeEndian, value)
wr := internal.NewBuffer(make([]byte, 0, length))
defer internal.PutBuffer(wr)
err = binary.Write(wr, internal.NativeEndian, value)
if err != nil {
err = fmt.Errorf("encoding %T: %v", value, err)
}

391
vendor/github.com/cilium/ebpf/prog.go generated vendored
View file

@ -10,6 +10,7 @@ import (
"runtime"
"strings"
"time"
"unsafe"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/btf"
@ -35,16 +36,44 @@ const (
// verifier log.
const DefaultVerifierLogSize = 64 * 1024
// maxVerifierLogSize is the maximum size of verifier log buffer the kernel
// will accept before returning EINVAL.
const maxVerifierLogSize = math.MaxUint32 >> 2
// ProgramOptions control loading a program into the kernel.
type ProgramOptions struct {
// Controls the detail emitted by the kernel verifier. Set to non-zero
// to enable logging.
LogLevel uint32
// Controls the output buffer size for the verifier. Defaults to
// DefaultVerifierLogSize.
// Bitmap controlling the detail emitted by the kernel's eBPF verifier log.
// LogLevel-type values can be ORed together to request specific kinds of
// verifier output. See the documentation on [ebpf.LogLevel] for details.
//
// opts.LogLevel = (ebpf.LogLevelBranch | ebpf.LogLevelStats)
//
// If left to its default value, the program will first be loaded without
// verifier output enabled. Upon error, the program load will be repeated
// with LogLevelBranch and the given (or default) LogSize value.
//
// Setting this to a non-zero value will unconditionally enable the verifier
// log, populating the [ebpf.Program.VerifierLog] field on successful loads
// and including detailed verifier errors if the program is rejected. This
// will always allocate an output buffer, but will result in only a single
// attempt at loading the program.
LogLevel LogLevel
// Controls the output buffer size for the verifier log, in bytes. See the
// documentation on ProgramOptions.LogLevel for details about how this value
// is used.
//
// If this value is set too low to fit the verifier log, the resulting
// [ebpf.VerifierError]'s Truncated flag will be true, and the error string
// will also contain a hint to that effect.
//
// Defaults to DefaultVerifierLogSize.
LogSize int
// Type information used for CO-RE relocations and when attaching to
// kernel functions.
// Disables the verifier log completely, regardless of other options.
LogDisabled bool
// Type information used for CO-RE relocations.
//
// This is useful in environments where the kernel BTF is not available
// (containers) or where it is in a non-standard location. Defaults to
@ -74,7 +103,7 @@ type ProgramSpec struct {
// The program to attach to. Must be provided manually.
AttachTarget *Program
// The name of the ELF section this program orininated from.
// The name of the ELF section this program originated from.
SectionName string
Instructions asm.Instructions
@ -95,11 +124,6 @@ type ProgramSpec struct {
// detect this value automatically.
KernelVersion uint32
// The BTF associated with this program. Changing Instructions
// will most likely invalidate the contained data, and may
// result in errors when attempting to load it into the kernel.
BTF *btf.Spec
// The byte order this program was compiled for, may be nil.
ByteOrder binary.ByteOrder
}
@ -123,6 +147,10 @@ func (ps *ProgramSpec) Tag() (string, error) {
return ps.Instructions.Tag(internal.NativeEndian)
}
// VerifierError is returned by [NewProgram] and [NewProgramWithOptions] if a
// program is rejected by the verifier.
//
// Use [errors.As] to access the error.
type VerifierError = internal.VerifierError
// Program represents BPF program loaded into the kernel.
@ -141,7 +169,10 @@ type Program struct {
// NewProgram creates a new Program.
//
// See NewProgramWithOptions for details.
// See [NewProgramWithOptions] for details.
//
// Returns a [VerifierError] containing the full verifier log if the program is
// rejected by the kernel.
func NewProgram(spec *ProgramSpec) (*Program, error) {
return NewProgramWithOptions(spec, ProgramOptions{})
}
@ -151,24 +182,21 @@ func NewProgram(spec *ProgramSpec) (*Program, error) {
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
//
// Returns an error wrapping VerifierError if the program or its BTF is rejected
// by the kernel.
// Returns a [VerifierError] containing the full verifier log if the program is
// rejected by the kernel.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
if spec == nil {
return nil, errors.New("can't load a program from a nil spec")
}
handles := newHandleCache()
defer handles.close()
prog, err := newProgramWithOptions(spec, opts, handles)
prog, err := newProgramWithOptions(spec, opts)
if errors.Is(err, asm.ErrUnsatisfiedMapReference) {
return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err)
}
return prog, err
}
func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) {
func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
if len(spec.Instructions) == 0 {
return nil, errors.New("instructions cannot be empty")
}
@ -181,6 +209,10 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
}
if opts.LogSize < 0 {
return nil, errors.New("ProgramOptions.LogSize must be a positive value; disable verifier logs using ProgramOptions.LogDisabled")
}
// Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load")
// require the version field to be set to the value of the KERNEL_VERSION
// macro for kprobe-type programs.
@ -206,47 +238,54 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
attr.ProgName = sys.NewObjName(spec.Name)
}
kernelTypes := opts.KernelTypes
insns := make(asm.Instructions, len(spec.Instructions))
copy(insns, spec.Instructions)
var btfDisabled bool
if spec.BTF != nil {
if err := applyRelocations(insns, spec.BTF, kernelTypes); err != nil {
return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
}
handle, err := handles.btfHandle(spec.BTF)
btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err)
}
if handle != nil {
attr.ProgBtfFd = uint32(handle.FD())
fib, lib, err := btf.MarshalExtInfos(insns, spec.BTF.TypeID)
if err != nil {
return nil, err
}
attr.FuncInfoRecSize = btf.FuncInfoSize
attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize
attr.FuncInfo = sys.NewSlicePointer(fib)
attr.LineInfoRecSize = btf.LineInfoSize
attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize
attr.LineInfo = sys.NewSlicePointer(lib)
}
handle, fib, lib, err := btf.MarshalExtInfos(insns)
if err != nil && !errors.Is(err, btf.ErrNotSupported) {
return nil, fmt.Errorf("load ext_infos: %w", err)
}
if handle != nil {
defer handle.Close()
attr.ProgBtfFd = uint32(handle.FD())
attr.FuncInfoRecSize = btf.FuncInfoSize
attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize
attr.FuncInfo = sys.NewSlicePointer(fib)
attr.LineInfoRecSize = btf.LineInfoSize
attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize
attr.LineInfo = sys.NewSlicePointer(lib)
}
if err := applyRelocations(insns, opts.KernelTypes, spec.ByteOrder); err != nil {
return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
}
kconfig, err := resolveKconfigReferences(insns)
if err != nil {
return nil, fmt.Errorf("resolve .kconfig: %w", err)
}
defer kconfig.Close()
if err := fixupAndValidate(insns); err != nil {
return nil, err
}
handles, err := fixupKfuncs(insns)
if err != nil {
return nil, fmt.Errorf("fixing up kfuncs: %w", err)
}
defer handles.close()
if len(handles) > 0 {
fdArray := handles.fdArray()
attr.FdArray = sys.NewPointer(unsafe.Pointer(&fdArray[0]))
}
buf := bytes.NewBuffer(make([]byte, 0, insns.Size()))
err := insns.Marshal(buf, internal.NativeEndian)
err = insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
@ -261,28 +300,32 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
}
attr.AttachBtfId = uint32(targetID)
attr.AttachProgFd = uint32(spec.AttachTarget.FD())
attr.AttachBtfId = targetID
attr.AttachBtfObjFd = uint32(spec.AttachTarget.FD())
defer runtime.KeepAlive(spec.AttachTarget)
} else if spec.AttachTo != "" {
targetID, err := findTargetInKernel(kernelTypes, spec.AttachTo, spec.Type, spec.AttachType)
module, targetID, err := findProgramTargetInKernel(spec.AttachTo, spec.Type, spec.AttachType)
if err != nil && !errors.Is(err, errUnrecognizedAttachType) {
// We ignore errUnrecognizedAttachType since AttachTo may be non-empty
// for programs that don't attach anywhere.
return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
}
attr.AttachBtfId = uint32(targetID)
attr.AttachBtfId = targetID
if module != nil {
attr.AttachBtfObjFd = uint32(module.FD())
defer module.Close()
}
}
logSize := DefaultVerifierLogSize
if opts.LogSize > 0 {
logSize = opts.LogSize
if opts.LogSize == 0 {
opts.LogSize = DefaultVerifierLogSize
}
// The caller requested a specific verifier log level. Set up the log buffer.
var logBuf []byte
if opts.LogLevel > 0 {
logBuf = make([]byte, logSize)
if !opts.LogDisabled && opts.LogLevel != 0 {
logBuf = make([]byte, opts.LogSize)
attr.LogLevel = opts.LogLevel
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = sys.NewSlicePointer(logBuf)
@ -293,13 +336,19 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
}
if opts.LogLevel == 0 && opts.LogSize >= 0 {
// Re-run with the verifier enabled to get better error messages.
logBuf = make([]byte, logSize)
attr.LogLevel = 1
// An error occurred loading the program, but the caller did not explicitly
// enable the verifier log. Re-run with branch-level verifier logs enabled to
// obtain more info. Preserve the original error to return it to the caller.
// An undersized log buffer will result in ENOSPC regardless of the underlying
// cause.
var err2 error
if !opts.LogDisabled && opts.LogLevel == 0 {
logBuf = make([]byte, opts.LogSize)
attr.LogLevel = LogLevelBranch
attr.LogSize = uint32(len(logBuf))
attr.LogBuf = sys.NewSlicePointer(logBuf)
_, _ = sys.ProgLoad(attr)
_, err2 = sys.ProgLoad(attr)
}
switch {
@ -318,13 +367,14 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
return nil, fmt.Errorf("load program: %w", err)
}
}
if opts.LogSize > maxVerifierLogSize {
return nil, fmt.Errorf("load program: %w (ProgramOptions.LogSize exceeds maximum value of %d)", err, maxVerifierLogSize)
}
}
err = internal.ErrorWithLog(err, logBuf)
if btfDisabled {
return nil, fmt.Errorf("load program: %w (BTF disabled)", err)
}
return nil, fmt.Errorf("load program: %w", err)
truncated := errors.Is(err, unix.ENOSPC) || errors.Is(err2, unix.ENOSPC)
return nil, internal.ErrorWithLog("load program", err, logBuf, truncated)
}
// NewProgramFromFD creates a program from a raw fd.
@ -362,7 +412,7 @@ func newProgramFromFD(fd *sys.FD) (*Program, error) {
return nil, fmt.Errorf("discover program type: %w", err)
}
return &Program{"", fd, "", "", info.Type}, nil
return &Program{"", fd, info.Name, "", info.Type}, nil
}
func (p *Program) String() string {
@ -433,7 +483,8 @@ func (p *Program) Clone() (*Program, error) {
// Calling Pin on a previously pinned program will overwrite the path, except when
// the new path already exists. Re-pinning across filesystems is not supported.
//
// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs
// This requires bpffs to be mounted above fileName.
// See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd
func (p *Program) Pin(fileName string) error {
if err := internal.Pin(p.pinnedPath, fileName, p.fd); err != nil {
return err
@ -474,6 +525,9 @@ func (p *Program) Close() error {
// Various options for Run'ing a Program
type RunOptions struct {
// Program's data input. Required field.
//
// The kernel expects at least 14 bytes input for an ethernet header for
// XDP and SKB programs.
Data []byte
// Program's data after Program has run. Caller must allocate. Optional field.
DataOut []byte
@ -481,7 +535,10 @@ type RunOptions struct {
Context interface{}
// Program's context after Program has run. Must be a pointer or slice. Optional field.
ContextOut interface{}
// Number of times to run Program. Optional field. Defaults to 1.
// Minimum number of times to run Program. Optional field. Defaults to 1.
//
// The program may be executed more often than this due to interruptions, e.g.
// when runtime.AllThreadsSyscall is invoked.
Repeat uint32
// Optional flags.
Flags uint32
@ -490,6 +547,8 @@ type RunOptions struct {
CPU uint32
// Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer
// or similar. Typically used during benchmarking. Optional field.
//
// Deprecated: use [testing.B.ReportMetric] with unit "ns/op" instead.
Reset func()
}
@ -517,9 +576,9 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) {
Repeat: 1,
}
ret, _, err := p.testRun(&opts)
ret, _, err := p.run(&opts)
if err != nil {
return ret, nil, fmt.Errorf("can't test program: %w", err)
return ret, nil, fmt.Errorf("test program: %w", err)
}
return ret, opts.DataOut, nil
}
@ -528,9 +587,9 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) {
//
// Note: the same restrictions from Test apply.
func (p *Program) Run(opts *RunOptions) (uint32, error) {
ret, _, err := p.testRun(opts)
ret, _, err := p.run(opts)
if err != nil {
return ret, fmt.Errorf("can't test program: %w", err)
return ret, fmt.Errorf("run program: %w", err)
}
return ret, nil
}
@ -542,9 +601,6 @@ func (p *Program) Run(opts *RunOptions) (uint32, error) {
// run or an error. reset is called whenever the benchmark syscall is
// interrupted, and should be set to testing.B.ResetTimer or similar.
//
// Note: profiling a call to this function will skew it's results, see
// https://github.com/cilium/ebpf/issues/24
//
// This function requires at least Linux 4.12.
func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) {
if uint(repeat) > math.MaxUint32 {
@ -557,14 +613,14 @@ func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.D
Reset: reset,
}
ret, total, err := p.testRun(&opts)
ret, total, err := p.run(&opts)
if err != nil {
return ret, total, fmt.Errorf("can't benchmark program: %w", err)
return ret, total, fmt.Errorf("benchmark program: %w", err)
}
return ret, total, nil
}
var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error {
var haveProgRun = internal.NewFeatureTest("BPF_PROG_RUN", "4.12", func() error {
prog, err := NewProgram(&ProgramSpec{
// SocketFilter does not require privileges on newer kernels.
Type: SocketFilter,
@ -580,8 +636,7 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e
}
defer prog.Close()
// Programs require at least 14 bytes input
in := make([]byte, 14)
in := internal.EmptyBPFContext
attr := sys.ProgRunAttr{
ProgFd: uint32(prog.FD()),
DataSizeIn: uint32(len(in)),
@ -599,7 +654,7 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e
// We know that PROG_TEST_RUN is supported if we get EINTR.
return nil
case errors.Is(err, unix.ENOTSUPP):
case errors.Is(err, sys.ENOTSUPP):
// The first PROG_TEST_RUN patches shipped in 4.12 didn't include
// a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is
// supported, but not for the program type used in the probe.
@ -609,12 +664,12 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e
return err
})
func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) {
func (p *Program) run(opts *RunOptions) (uint32, time.Duration, error) {
if uint(len(opts.Data)) > math.MaxUint32 {
return 0, 0, fmt.Errorf("input is too long")
}
if err := haveProgTestRun(); err != nil {
if err := haveProgRun(); err != nil {
return 0, 0, err
}
@ -647,24 +702,45 @@ func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) {
Cpu: opts.CPU,
}
if attr.Repeat == 0 {
attr.Repeat = 1
}
retry:
for {
err := sys.ProgRun(&attr)
if err == nil {
break
break retry
}
if errors.Is(err, unix.EINTR) {
if attr.Repeat == 1 {
// Older kernels check whether enough repetitions have been
// executed only after checking for pending signals.
//
// run signal? done? run ...
//
// As a result we can get EINTR for repeat==1 even though
// the program was run exactly once. Treat this as a
// successful run instead.
//
// Since commit 607b9cc92bd7 ("bpf: Consolidate shared test timing code")
// the conditions are reversed:
// run done? signal? ...
break retry
}
if opts.Reset != nil {
opts.Reset()
}
continue
continue retry
}
if errors.Is(err, unix.ENOTSUPP) {
return 0, 0, fmt.Errorf("kernel doesn't support testing program type %s: %w", p.Type(), ErrNotSupported)
if errors.Is(err, sys.ENOTSUPP) {
return 0, 0, fmt.Errorf("kernel doesn't support running %s: %w", p.Type(), ErrNotSupported)
}
return 0, 0, fmt.Errorf("can't run test: %w", err)
return 0, 0, err
}
if opts.DataOut != nil {
@ -726,7 +802,14 @@ func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error)
return nil, fmt.Errorf("info for %s: %w", fileName, err)
}
return &Program{"", fd, filepath.Base(fileName), fileName, info.Type}, nil
var progName string
if haveObjName() == nil {
progName = info.Name
} else {
progName = filepath.Base(fileName)
}
return &Program{"", fd, progName, fileName, info.Type}, nil
}
// SanitizeName replaces all invalid characters in name with replacement.
@ -770,11 +853,15 @@ var errUnrecognizedAttachType = errors.New("unrecognized attach type")
// find an attach target type in the kernel.
//
// spec may be nil and defaults to the canonical kernel BTF. name together with
// progType and attachType determine which type we need to attach to.
// name, progType and attachType determine which type we need to attach to.
//
// Returns errUnrecognizedAttachType.
func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) {
// The attach target may be in a loaded kernel module.
// In that case the returned handle will be non-nil.
// The caller is responsible for closing the handle.
//
// Returns errUnrecognizedAttachType if the combination of progType and attachType
// is not recognised.
func findProgramTargetInKernel(name string, progType ProgramType, attachType AttachType) (*btf.Handle, btf.TypeID, error) {
type match struct {
p ProgramType
a AttachType
@ -782,59 +869,123 @@ func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attac
var (
typeName, featureName string
isBTFTypeFunc = true
target btf.Type
)
switch (match{progType, attachType}) {
case match{LSM, AttachLSMMac}:
typeName = "bpf_lsm_" + name
featureName = name + " LSM hook"
target = (*btf.Func)(nil)
case match{Tracing, AttachTraceIter}:
typeName = "bpf_iter_" + name
featureName = name + " iterator"
target = (*btf.Func)(nil)
case match{Tracing, AttachTraceFEntry}:
typeName = name
featureName = fmt.Sprintf("fentry %s", name)
target = (*btf.Func)(nil)
case match{Tracing, AttachTraceFExit}:
typeName = name
featureName = fmt.Sprintf("fexit %s", name)
target = (*btf.Func)(nil)
case match{Tracing, AttachModifyReturn}:
typeName = name
featureName = fmt.Sprintf("fmod_ret %s", name)
target = (*btf.Func)(nil)
case match{Tracing, AttachTraceRawTp}:
typeName = fmt.Sprintf("btf_trace_%s", name)
featureName = fmt.Sprintf("raw_tp %s", name)
isBTFTypeFunc = false
target = (*btf.Typedef)(nil)
default:
return 0, errUnrecognizedAttachType
return nil, 0, errUnrecognizedAttachType
}
spec, err := maybeLoadKernelBTF(spec)
spec, err := btf.LoadKernelSpec()
if err != nil {
return 0, fmt.Errorf("load kernel spec: %w", err)
return nil, 0, fmt.Errorf("load kernel spec: %w", err)
}
var target btf.Type
if isBTFTypeFunc {
var targetFunc *btf.Func
err = spec.TypeByName(typeName, &targetFunc)
target = targetFunc
} else {
var targetTypedef *btf.Typedef
err = spec.TypeByName(typeName, &targetTypedef)
target = targetTypedef
spec, module, err := findTargetInKernel(spec, typeName, &target)
if errors.Is(err, btf.ErrNotFound) {
return nil, 0, &internal.UnsupportedFeatureError{Name: featureName}
}
// See cilium/ebpf#894. Until we can disambiguate between equally-named kernel
// symbols, we should explicitly refuse program loads. They will not reliably
// do what the caller intended.
if errors.Is(err, btf.ErrMultipleMatches) {
return nil, 0, fmt.Errorf("attaching to ambiguous kernel symbol is not supported: %w", err)
}
if err != nil {
if errors.Is(err, btf.ErrNotFound) {
return 0, &internal.UnsupportedFeatureError{
Name: featureName,
}
return nil, 0, fmt.Errorf("find target for %s: %w", featureName, err)
}
id, err := spec.TypeID(target)
return module, id, err
}
// findTargetInKernel attempts to find a named type in the current kernel.
//
// target will point at the found type after a successful call. Searches both
// vmlinux and any loaded modules.
//
// Returns a non-nil handle if the type was found in a module, or btf.ErrNotFound
// if the type wasn't found at all.
func findTargetInKernel(kernelSpec *btf.Spec, typeName string, target *btf.Type) (*btf.Spec, *btf.Handle, error) {
err := kernelSpec.TypeByName(typeName, target)
if errors.Is(err, btf.ErrNotFound) {
spec, module, err := findTargetInModule(kernelSpec, typeName, target)
if err != nil {
return nil, nil, fmt.Errorf("find target in modules: %w", err)
}
return 0, fmt.Errorf("find target for %s: %w", featureName, err)
return spec, module, nil
}
if err != nil {
return nil, nil, fmt.Errorf("find target in vmlinux: %w", err)
}
return kernelSpec, nil, err
}
// findTargetInModule attempts to find a named type in any loaded module.
//
// base must contain the kernel's types and is used to parse kmod BTF. Modules
// are searched in the order they were loaded.
//
// Returns btf.ErrNotFound if the target can't be found in any module.
func findTargetInModule(base *btf.Spec, typeName string, target *btf.Type) (*btf.Spec, *btf.Handle, error) {
it := new(btf.HandleIterator)
defer it.Handle.Close()
for it.Next() {
info, err := it.Handle.Info()
if err != nil {
return nil, nil, fmt.Errorf("get info for BTF ID %d: %w", it.ID, err)
}
if !info.IsModule() {
continue
}
spec, err := it.Handle.Spec(base)
if err != nil {
return nil, nil, fmt.Errorf("parse types for module %s: %w", info.Name, err)
}
err = spec.TypeByName(typeName, target)
if errors.Is(err, btf.ErrNotFound) {
continue
}
if err != nil {
return nil, nil, fmt.Errorf("lookup type in module %s: %w", info.Name, err)
}
return spec, it.Take(), nil
}
if err := it.Err(); err != nil {
return nil, nil, fmt.Errorf("iterate modules: %w", err)
}
return spec.TypeID(target)
return nil, nil, btf.ErrNotFound
}
// find an attach target type in a program.

View file

@ -6,6 +6,8 @@
# $ ./run-tests.sh 5.4
# Run a subset of tests:
# $ ./run-tests.sh 5.4 ./link
# Run using a local kernel image
# $ ./run-tests.sh /path/to/bzImage
set -euo pipefail
@ -95,38 +97,45 @@ elif [[ "${1:-}" = "--exec-test" ]]; then
exit $rc # this return code is "swallowed" by qemu
fi
readonly kernel_version="${1:-}"
if [[ -z "${kernel_version}" ]]; then
echo "Expecting kernel version as first argument"
if [[ -z "${1:-}" ]]; then
echo "Expecting kernel version or path as first argument"
exit 1
fi
shift
readonly kernel="linux-${kernel_version}.bz"
readonly selftests="linux-${kernel_version}-selftests-bpf.tgz"
readonly input="$(mktemp -d)"
readonly tmp_dir="${TMPDIR:-/tmp}"
readonly branch="${BRANCH:-master}"
fetch() {
echo Fetching "${1}"
pushd "${tmp_dir}" > /dev/null
curl -s -L -O --fail --etag-compare "${1}.etag" --etag-save "${1}.etag" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}"
curl --no-progress-meter -L -O --fail --etag-compare "${1}.etag" --etag-save "${1}.etag" "https://github.com/cilium/ci-kernels/raw/${BRANCH:-master}/${1}"
local ret=$?
popd > /dev/null
return $ret
}
fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
if fetch "${selftests}"; then
echo "Decompressing selftests"
mkdir "${input}/bpf"
tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf"
if [[ -f "${1}" ]]; then
readonly kernel="${1}"
cp "${1}" "${input}/bzImage"
else
echo "No selftests found, disabling"
# LINUX_VERSION_CODE test compares this to discovered value.
export KERNEL_VERSION="${1}"
readonly kernel="linux-${1}.bz"
readonly selftests="linux-${1}-selftests-bpf.tgz"
fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
if fetch "${selftests}"; then
echo "Decompressing selftests"
mkdir "${input}/bpf"
tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf"
else
echo "No selftests found, disabling"
fi
fi
shift
args=(-short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...)
if (( $# > 0 )); then
@ -135,11 +144,9 @@ fi
export GOFLAGS=-mod=readonly
export CGO_ENABLED=0
# LINUX_VERSION_CODE test compares this to discovered value.
export KERNEL_VERSION="${kernel_version}"
echo Testing on "${kernel_version}"
echo Testing on "${kernel}"
go test -exec "$script --exec-vm $input" "${args[@]}"
echo "Test successful on ${kernel_version}"
echo "Test successful on ${kernel}"
rm -r "${input}"

View file

@ -4,13 +4,25 @@ import (
"bytes"
"errors"
"fmt"
"os"
"runtime"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/tracefs"
"github.com/cilium/ebpf/internal/unix"
)
var (
// pre-allocating these here since they may
// get called in hot code paths and cause
// unnecessary memory allocations
sysErrKeyNotExist = sys.Error(ErrKeyNotExist, unix.ENOENT)
sysErrKeyExist = sys.Error(ErrKeyExist, unix.EEXIST)
sysErrNotSupported = sys.Error(ErrNotSupported, sys.ENOTSUPP)
)
// invalidBPFObjNameChar returns true if char may not appear in
// a BPF object name.
func invalidBPFObjNameChar(char rune) bool {
@ -47,7 +59,7 @@ func progLoad(insns asm.Instructions, typ ProgramType, license string) (*sys.FD,
})
}
var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error {
var haveNestedMaps = internal.NewFeatureTest("nested maps", "4.12", func() error {
_, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(ArrayOfMaps),
KeySize: 4,
@ -65,7 +77,7 @@ var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error {
return err
})
var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() error {
var haveMapMutabilityModifiers = internal.NewFeatureTest("read- and write-only maps", "5.2", func() error {
// This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since
// BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check.
m, err := sys.MapCreate(&sys.MapCreateAttr{
@ -82,7 +94,7 @@ var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps
return nil
})
var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error {
var haveMmapableMaps = internal.NewFeatureTest("mmapable maps", "5.5", func() error {
// This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
@ -98,7 +110,7 @@ var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error
return nil
})
var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error {
var haveInnerMaps = internal.NewFeatureTest("inner maps", "5.10", func() error {
// This checks BPF_F_INNER_MAP, which appeared in 5.10.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Array),
@ -114,7 +126,7 @@ var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error {
return nil
})
var haveNoPreallocMaps = internal.FeatureTest("prealloc maps", "4.6", func() error {
var haveNoPreallocMaps = internal.NewFeatureTest("prealloc maps", "4.6", func() error {
// This checks BPF_F_NO_PREALLOC, which appeared in 4.6.
m, err := sys.MapCreate(&sys.MapCreateAttr{
MapType: sys.MapType(Hash),
@ -136,15 +148,15 @@ func wrapMapError(err error) error {
}
if errors.Is(err, unix.ENOENT) {
return sys.Error(ErrKeyNotExist, unix.ENOENT)
return sysErrKeyNotExist
}
if errors.Is(err, unix.EEXIST) {
return sys.Error(ErrKeyExist, unix.EEXIST)
return sysErrKeyExist
}
if errors.Is(err, unix.ENOTSUPP) {
return sys.Error(ErrNotSupported, unix.ENOTSUPP)
if errors.Is(err, sys.ENOTSUPP) {
return sysErrNotSupported
}
if errors.Is(err, unix.E2BIG) {
@ -154,7 +166,7 @@ func wrapMapError(err error) error {
return err
}
var haveObjName = internal.FeatureTest("object names", "4.15", func() error {
var haveObjName = internal.NewFeatureTest("object names", "4.15", func() error {
attr := sys.MapCreateAttr{
MapType: sys.MapType(Array),
KeySize: 4,
@ -172,7 +184,7 @@ var haveObjName = internal.FeatureTest("object names", "4.15", func() error {
return nil
})
var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() error {
var objNameAllowsDot = internal.NewFeatureTest("dot in object names", "5.2", func() error {
if err := haveObjName(); err != nil {
return err
}
@ -194,7 +206,7 @@ var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func()
return nil
})
var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error {
var haveBatchAPI = internal.NewFeatureTest("map batch api", "5.6", func() error {
var maxEntries uint32 = 2
attr := sys.MapCreateAttr{
MapType: sys.MapType(Hash),
@ -226,7 +238,7 @@ var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error {
return nil
})
var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", func() error {
var haveProbeReadKernel = internal.NewFeatureTest("bpf_probe_read_kernel", "5.5", func() error {
insns := asm.Instructions{
asm.Mov.Reg(asm.R1, asm.R10),
asm.Add.Imm(asm.R1, -8),
@ -244,7 +256,7 @@ var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", f
return nil
})
var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() error {
var haveBPFToBPFCalls = internal.NewFeatureTest("bpf2bpf calls", "4.16", func() error {
insns := asm.Instructions{
asm.Call.Label("prog2").WithSymbol("prog1"),
asm.Return(),
@ -262,3 +274,32 @@ var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() err
_ = fd.Close()
return nil
})
var haveSyscallWrapper = internal.NewFeatureTest("syscall wrapper", "4.17", func() error {
prefix := internal.PlatformPrefix()
if prefix == "" {
return fmt.Errorf("unable to find the platform prefix for (%s)", runtime.GOARCH)
}
args := tracefs.ProbeArgs{
Type: tracefs.Kprobe,
Symbol: prefix + "sys_bpf",
Pid: -1,
}
var err error
args.Group, err = tracefs.RandomGroup("ebpf_probe")
if err != nil {
return err
}
evt, err := tracefs.NewEvent(args)
if errors.Is(err, os.ErrNotExist) {
return internal.ErrNotSupported
}
if err != nil {
return err
}
return evt.Close()
})

View file

@ -1,6 +1,7 @@
package ebpf
import (
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
@ -10,11 +11,6 @@ import (
// that will be initialized in the kernel.
type MapType uint32
// Max returns the latest supported MapType.
func (MapType) Max() MapType {
return maxMapType - 1
}
// All the various map types that can be created
const (
UnspecifiedMap MapType = iota
@ -99,8 +95,6 @@ const (
InodeStorage
// TaskStorage - Specialized local storage map for task_struct.
TaskStorage
// maxMapType - Bound enum of MapTypes, has to be last in enum.
maxMapType
)
// hasPerCPUValue returns true if the Map stores a value per CPU.
@ -120,25 +114,9 @@ func (mt MapType) canStoreProgram() bool {
return mt == ProgramArray
}
// hasBTF returns true if the map type supports BTF key/value metadata.
func (mt MapType) hasBTF() bool {
switch mt {
case PerfEventArray, CGroupArray, StackTrace, ArrayOfMaps, HashOfMaps, DevMap,
DevMapHash, CPUMap, XSKMap, SockMap, SockHash, Queue, Stack, RingBuf:
return false
default:
return true
}
}
// ProgramType of the eBPF program
type ProgramType uint32
// Max return the latest supported ProgramType.
func (ProgramType) Max() ProgramType {
return maxProgramType - 1
}
// eBPF program types
const (
UnspecifiedProgram ProgramType = iota
@ -173,7 +151,6 @@ const (
LSM
SkLookup
Syscall
maxProgramType
)
// AttachType of the eBPF program, needed to differentiate allowed context accesses in
@ -229,6 +206,7 @@ const (
AttachSkReuseportSelect
AttachSkReuseportSelectOrMigrate
AttachPerfEvent
AttachTraceKprobeMulti
)
// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command
@ -282,3 +260,20 @@ type BatchOptions struct {
ElemFlags uint64
Flags uint64
}
// LogLevel controls the verbosity of the kernel's eBPF program verifier.
// These constants can be used for the ProgramOptions.LogLevel field.
type LogLevel = sys.LogLevel
const (
// Print verifier state at branch points.
LogLevelBranch = sys.BPF_LOG_LEVEL1
// Print verifier state for every instruction.
// Available since Linux v5.2.
LogLevelInstruction = sys.BPF_LOG_LEVEL2
// Print verifier errors and stats at the end of the verification process.
// Available since Linux v5.2.
LogLevelStats = sys.BPF_LOG_STATS
)

View file

@ -38,12 +38,11 @@ func _() {
_ = x[RingBuf-27]
_ = x[InodeStorage-28]
_ = x[TaskStorage-29]
_ = x[maxMapType-30]
}
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStoragemaxMapType"
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStorage"
var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290, 300}
var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290}
func (i MapType) String() string {
if i >= MapType(len(_MapType_index)-1) {
@ -87,12 +86,11 @@ func _() {
_ = x[LSM-29]
_ = x[SkLookup-30]
_ = x[Syscall-31]
_ = x[maxProgramType-32]
}
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallmaxProgramType"
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscall"
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 315}
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301}
func (i ProgramType) String() string {
if i >= ProgramType(len(_ProgramType_index)-1) {

27
vendor/golang.org/x/exp/LICENSE generated vendored Normal file
View file

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
vendor/golang.org/x/exp/PATENTS generated vendored Normal file
View file

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

50
vendor/golang.org/x/exp/constraints/constraints.go generated vendored Normal file
View file

@ -0,0 +1,50 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package constraints defines a set of useful constraints to be used
// with type parameters.
package constraints
// Signed is a constraint that permits any signed integer type.
// If future releases of Go add new predeclared signed integer types,
// this constraint will be modified to include them.
type Signed interface {
~int | ~int8 | ~int16 | ~int32 | ~int64
}
// Unsigned is a constraint that permits any unsigned integer type.
// If future releases of Go add new predeclared unsigned integer types,
// this constraint will be modified to include them.
type Unsigned interface {
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
}
// Integer is a constraint that permits any integer type.
// If future releases of Go add new predeclared integer types,
// this constraint will be modified to include them.
type Integer interface {
Signed | Unsigned
}
// Float is a constraint that permits any floating-point type.
// If future releases of Go add new predeclared floating-point types,
// this constraint will be modified to include them.
type Float interface {
~float32 | ~float64
}
// Complex is a constraint that permits any complex numeric type.
// If future releases of Go add new predeclared complex numeric types,
// this constraint will be modified to include them.
type Complex interface {
~complex64 | ~complex128
}
// Ordered is a constraint that permits any ordered type: any type
// that supports the operators < <= >= >.
// If future releases of Go add new ordered types,
// this constraint will be modified to include them.
type Ordered interface {
Integer | Float | ~string
}

94
vendor/golang.org/x/exp/maps/maps.go generated vendored Normal file
View file

@ -0,0 +1,94 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package maps defines various functions useful with maps of any type.
package maps
// Keys returns the keys of the map m.
// The keys will be in an indeterminate order.
func Keys[M ~map[K]V, K comparable, V any](m M) []K {
r := make([]K, 0, len(m))
for k := range m {
r = append(r, k)
}
return r
}
// Values returns the values of the map m.
// The values will be in an indeterminate order.
func Values[M ~map[K]V, K comparable, V any](m M) []V {
r := make([]V, 0, len(m))
for _, v := range m {
r = append(r, v)
}
return r
}
// Equal reports whether two maps contain the same key/value pairs.
// Values are compared using ==.
func Equal[M1, M2 ~map[K]V, K, V comparable](m1 M1, m2 M2) bool {
if len(m1) != len(m2) {
return false
}
for k, v1 := range m1 {
if v2, ok := m2[k]; !ok || v1 != v2 {
return false
}
}
return true
}
// EqualFunc is like Equal, but compares values using eq.
// Keys are still compared with ==.
func EqualFunc[M1 ~map[K]V1, M2 ~map[K]V2, K comparable, V1, V2 any](m1 M1, m2 M2, eq func(V1, V2) bool) bool {
if len(m1) != len(m2) {
return false
}
for k, v1 := range m1 {
if v2, ok := m2[k]; !ok || !eq(v1, v2) {
return false
}
}
return true
}
// Clear removes all entries from m, leaving it empty.
func Clear[M ~map[K]V, K comparable, V any](m M) {
for k := range m {
delete(m, k)
}
}
// Clone returns a copy of m. This is a shallow clone:
// the new keys and values are set using ordinary assignment.
func Clone[M ~map[K]V, K comparable, V any](m M) M {
// Preserve nil in case it matters.
if m == nil {
return nil
}
r := make(M, len(m))
for k, v := range m {
r[k] = v
}
return r
}
// Copy copies all key/value pairs in src adding them to dst.
// When a key in src is already present in dst,
// the value in dst will be overwritten by the value associated
// with the key in src.
func Copy[M1 ~map[K]V, M2 ~map[K]V, K comparable, V any](dst M1, src M2) {
for k, v := range src {
dst[k] = v
}
}
// DeleteFunc deletes any key/value pairs from m for which del returns true.
func DeleteFunc[M ~map[K]V, K comparable, V any](m M, del func(K, V) bool) {
for k, v := range m {
if del(k, v) {
delete(m, k)
}
}
}

258
vendor/golang.org/x/exp/slices/slices.go generated vendored Normal file
View file

@ -0,0 +1,258 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package slices defines various functions useful with slices of any type.
// Unless otherwise specified, these functions all apply to the elements
// of a slice at index 0 <= i < len(s).
//
// Note that the less function in IsSortedFunc, SortFunc, SortStableFunc requires a
// strict weak ordering (https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings),
// or the sorting may fail to sort correctly. A common case is when sorting slices of
// floating-point numbers containing NaN values.
package slices
import "golang.org/x/exp/constraints"
// Equal reports whether two slices are equal: the same length and all
// elements equal. If the lengths are different, Equal returns false.
// Otherwise, the elements are compared in increasing index order, and the
// comparison stops at the first unequal pair.
// Floating point NaNs are not considered equal.
func Equal[E comparable](s1, s2 []E) bool {
if len(s1) != len(s2) {
return false
}
for i := range s1 {
if s1[i] != s2[i] {
return false
}
}
return true
}
// EqualFunc reports whether two slices are equal using a comparison
// function on each pair of elements. If the lengths are different,
// EqualFunc returns false. Otherwise, the elements are compared in
// increasing index order, and the comparison stops at the first index
// for which eq returns false.
func EqualFunc[E1, E2 any](s1 []E1, s2 []E2, eq func(E1, E2) bool) bool {
if len(s1) != len(s2) {
return false
}
for i, v1 := range s1 {
v2 := s2[i]
if !eq(v1, v2) {
return false
}
}
return true
}
// Compare compares the elements of s1 and s2.
// The elements are compared sequentially, starting at index 0,
// until one element is not equal to the other.
// The result of comparing the first non-matching elements is returned.
// If both slices are equal until one of them ends, the shorter slice is
// considered less than the longer one.
// The result is 0 if s1 == s2, -1 if s1 < s2, and +1 if s1 > s2.
// Comparisons involving floating point NaNs are ignored.
func Compare[E constraints.Ordered](s1, s2 []E) int {
s2len := len(s2)
for i, v1 := range s1 {
if i >= s2len {
return +1
}
v2 := s2[i]
switch {
case v1 < v2:
return -1
case v1 > v2:
return +1
}
}
if len(s1) < s2len {
return -1
}
return 0
}
// CompareFunc is like Compare but uses a comparison function
// on each pair of elements. The elements are compared in increasing
// index order, and the comparisons stop after the first time cmp
// returns non-zero.
// The result is the first non-zero result of cmp; if cmp always
// returns 0 the result is 0 if len(s1) == len(s2), -1 if len(s1) < len(s2),
// and +1 if len(s1) > len(s2).
func CompareFunc[E1, E2 any](s1 []E1, s2 []E2, cmp func(E1, E2) int) int {
s2len := len(s2)
for i, v1 := range s1 {
if i >= s2len {
return +1
}
v2 := s2[i]
if c := cmp(v1, v2); c != 0 {
return c
}
}
if len(s1) < s2len {
return -1
}
return 0
}
// Index returns the index of the first occurrence of v in s,
// or -1 if not present.
func Index[E comparable](s []E, v E) int {
for i, vs := range s {
if v == vs {
return i
}
}
return -1
}
// IndexFunc returns the first index i satisfying f(s[i]),
// or -1 if none do.
func IndexFunc[E any](s []E, f func(E) bool) int {
for i, v := range s {
if f(v) {
return i
}
}
return -1
}
// Contains reports whether v is present in s.
func Contains[E comparable](s []E, v E) bool {
return Index(s, v) >= 0
}
// ContainsFunc reports whether at least one
// element e of s satisfies f(e).
func ContainsFunc[E any](s []E, f func(E) bool) bool {
return IndexFunc(s, f) >= 0
}
// Insert inserts the values v... into s at index i,
// returning the modified slice.
// In the returned slice r, r[i] == v[0].
// Insert panics if i is out of range.
// This function is O(len(s) + len(v)).
func Insert[S ~[]E, E any](s S, i int, v ...E) S {
tot := len(s) + len(v)
if tot <= cap(s) {
s2 := s[:tot]
copy(s2[i+len(v):], s[i:])
copy(s2[i:], v)
return s2
}
s2 := make(S, tot)
copy(s2, s[:i])
copy(s2[i:], v)
copy(s2[i+len(v):], s[i:])
return s2
}
// Delete removes the elements s[i:j] from s, returning the modified slice.
// Delete panics if s[i:j] is not a valid slice of s.
// Delete modifies the contents of the slice s; it does not create a new slice.
// Delete is O(len(s)-j), so if many items must be deleted, it is better to
// make a single call deleting them all together than to delete one at a time.
// Delete might not modify the elements s[len(s)-(j-i):len(s)]. If those
// elements contain pointers you might consider zeroing those elements so that
// objects they reference can be garbage collected.
func Delete[S ~[]E, E any](s S, i, j int) S {
_ = s[i:j] // bounds check
return append(s[:i], s[j:]...)
}
// Replace replaces the elements s[i:j] by the given v, and returns the
// modified slice. Replace panics if s[i:j] is not a valid slice of s.
func Replace[S ~[]E, E any](s S, i, j int, v ...E) S {
_ = s[i:j] // verify that i:j is a valid subslice
tot := len(s[:i]) + len(v) + len(s[j:])
if tot <= cap(s) {
s2 := s[:tot]
copy(s2[i+len(v):], s[j:])
copy(s2[i:], v)
return s2
}
s2 := make(S, tot)
copy(s2, s[:i])
copy(s2[i:], v)
copy(s2[i+len(v):], s[j:])
return s2
}
// Clone returns a copy of the slice.
// The elements are copied using assignment, so this is a shallow clone.
func Clone[S ~[]E, E any](s S) S {
// Preserve nil in case it matters.
if s == nil {
return nil
}
return append(S([]E{}), s...)
}
// Compact replaces consecutive runs of equal elements with a single copy.
// This is like the uniq command found on Unix.
// Compact modifies the contents of the slice s; it does not create a new slice.
// When Compact discards m elements in total, it might not modify the elements
// s[len(s)-m:len(s)]. If those elements contain pointers you might consider
// zeroing those elements so that objects they reference can be garbage collected.
func Compact[S ~[]E, E comparable](s S) S {
if len(s) < 2 {
return s
}
i := 1
last := s[0]
for _, v := range s[1:] {
if v != last {
s[i] = v
i++
last = v
}
}
return s[:i]
}
// CompactFunc is like Compact but uses a comparison function.
func CompactFunc[S ~[]E, E any](s S, eq func(E, E) bool) S {
if len(s) < 2 {
return s
}
i := 1
last := s[0]
for _, v := range s[1:] {
if !eq(v, last) {
s[i] = v
i++
last = v
}
}
return s[:i]
}
// Grow increases the slice's capacity, if necessary, to guarantee space for
// another n elements. After Grow(n), at least n elements can be appended
// to the slice without another allocation. If n is negative or too large to
// allocate the memory, Grow panics.
func Grow[S ~[]E, E any](s S, n int) S {
if n < 0 {
panic("cannot be negative")
}
if n -= cap(s) - len(s); n > 0 {
// TODO(https://go.dev/issue/53888): Make using []E instead of S
// to workaround a compiler bug where the runtime.growslice optimization
// does not take effect. Revert when the compiler is fixed.
s = append([]E(s)[:cap(s)], make([]E, n)...)[:len(s)]
}
return s
}
// Clip removes unused capacity from the slice, returning s[:len(s):len(s)].
func Clip[S ~[]E, E any](s S) S {
return s[:len(s):len(s)]
}

126
vendor/golang.org/x/exp/slices/sort.go generated vendored Normal file
View file

@ -0,0 +1,126 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
import (
"math/bits"
"golang.org/x/exp/constraints"
)
// Sort sorts a slice of any ordered type in ascending order.
// Sort may fail to sort correctly when sorting slices of floating-point
// numbers containing Not-a-number (NaN) values.
// Use slices.SortFunc(x, func(a, b float64) bool {return a < b || (math.IsNaN(a) && !math.IsNaN(b))})
// instead if the input may contain NaNs.
func Sort[E constraints.Ordered](x []E) {
n := len(x)
pdqsortOrdered(x, 0, n, bits.Len(uint(n)))
}
// SortFunc sorts the slice x in ascending order as determined by the less function.
// This sort is not guaranteed to be stable.
//
// SortFunc requires that less is a strict weak ordering.
// See https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings.
func SortFunc[E any](x []E, less func(a, b E) bool) {
n := len(x)
pdqsortLessFunc(x, 0, n, bits.Len(uint(n)), less)
}
// SortStableFunc sorts the slice x while keeping the original order of equal
// elements, using less to compare elements.
func SortStableFunc[E any](x []E, less func(a, b E) bool) {
stableLessFunc(x, len(x), less)
}
// IsSorted reports whether x is sorted in ascending order.
func IsSorted[E constraints.Ordered](x []E) bool {
for i := len(x) - 1; i > 0; i-- {
if x[i] < x[i-1] {
return false
}
}
return true
}
// IsSortedFunc reports whether x is sorted in ascending order, with less as the
// comparison function.
func IsSortedFunc[E any](x []E, less func(a, b E) bool) bool {
for i := len(x) - 1; i > 0; i-- {
if less(x[i], x[i-1]) {
return false
}
}
return true
}
// BinarySearch searches for target in a sorted slice and returns the position
// where target is found, or the position where target would appear in the
// sort order; it also returns a bool saying whether the target is really found
// in the slice. The slice must be sorted in increasing order.
func BinarySearch[E constraints.Ordered](x []E, target E) (int, bool) {
// Inlining is faster than calling BinarySearchFunc with a lambda.
n := len(x)
// Define x[-1] < target and x[n] >= target.
// Invariant: x[i-1] < target, x[j] >= target.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if x[h] < target {
i = h + 1 // preserves x[i-1] < target
} else {
j = h // preserves x[j] >= target
}
}
// i == j, x[i-1] < target, and x[j] (= x[i]) >= target => answer is i.
return i, i < n && x[i] == target
}
// BinarySearchFunc works like BinarySearch, but uses a custom comparison
// function. The slice must be sorted in increasing order, where "increasing" is
// defined by cmp. cmp(a, b) is expected to return an integer comparing the two
// parameters: 0 if a == b, a negative number if a < b and a positive number if
// a > b.
func BinarySearchFunc[E, T any](x []E, target T, cmp func(E, T) int) (int, bool) {
n := len(x)
// Define cmp(x[-1], target) < 0 and cmp(x[n], target) >= 0 .
// Invariant: cmp(x[i - 1], target) < 0, cmp(x[j], target) >= 0.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if cmp(x[h], target) < 0 {
i = h + 1 // preserves cmp(x[i - 1], target) < 0
} else {
j = h // preserves cmp(x[j], target) >= 0
}
}
// i == j, cmp(x[i-1], target) < 0, and cmp(x[j], target) (= cmp(x[i], target)) >= 0 => answer is i.
return i, i < n && cmp(x[i], target) == 0
}
type sortedHint int // hint for pdqsort when choosing the pivot
const (
unknownHint sortedHint = iota
increasingHint
decreasingHint
)
// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
type xorshift uint64
func (r *xorshift) Next() uint64 {
*r ^= *r << 13
*r ^= *r >> 17
*r ^= *r << 5
return uint64(*r)
}
func nextPowerOfTwo(length int) uint {
return 1 << bits.Len(uint(length))
}

479
vendor/golang.org/x/exp/slices/zsortfunc.go generated vendored Normal file
View file

@ -0,0 +1,479 @@
// Code generated by gen_sort_variants.go; DO NOT EDIT.
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
// insertionSortLessFunc sorts data[a:b] using insertion sort.
func insertionSortLessFunc[E any](data []E, a, b int, less func(a, b E) bool) {
for i := a + 1; i < b; i++ {
for j := i; j > a && less(data[j], data[j-1]); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// siftDownLessFunc implements the heap property on data[lo:hi].
// first is an offset into the array where the root of the heap lies.
func siftDownLessFunc[E any](data []E, lo, hi, first int, less func(a, b E) bool) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && less(data[first+child], data[first+child+1]) {
child++
}
if !less(data[first+root], data[first+child]) {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func heapSortLessFunc[E any](data []E, a, b int, less func(a, b E) bool) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDownLessFunc(data, i, hi, first, less)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDownLessFunc(data, lo, i, first, less)
}
}
// pdqsortLessFunc sorts data[a:b].
// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
// C++ implementation: https://github.com/orlp/pdqsort
// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
func pdqsortLessFunc[E any](data []E, a, b, limit int, less func(a, b E) bool) {
const maxInsertion = 12
var (
wasBalanced = true // whether the last partitioning was reasonably balanced
wasPartitioned = true // whether the slice was already partitioned
)
for {
length := b - a
if length <= maxInsertion {
insertionSortLessFunc(data, a, b, less)
return
}
// Fall back to heapsort if too many bad choices were made.
if limit == 0 {
heapSortLessFunc(data, a, b, less)
return
}
// If the last partitioning was imbalanced, we need to breaking patterns.
if !wasBalanced {
breakPatternsLessFunc(data, a, b, less)
limit--
}
pivot, hint := choosePivotLessFunc(data, a, b, less)
if hint == decreasingHint {
reverseRangeLessFunc(data, a, b, less)
// The chosen pivot was pivot-a elements after the start of the array.
// After reversing it is pivot-a elements before the end of the array.
// The idea came from Rust's implementation.
pivot = (b - 1) - (pivot - a)
hint = increasingHint
}
// The slice is likely already sorted.
if wasBalanced && wasPartitioned && hint == increasingHint {
if partialInsertionSortLessFunc(data, a, b, less) {
return
}
}
// Probably the slice contains many duplicate elements, partition the slice into
// elements equal to and elements greater than the pivot.
if a > 0 && !less(data[a-1], data[pivot]) {
mid := partitionEqualLessFunc(data, a, b, pivot, less)
a = mid
continue
}
mid, alreadyPartitioned := partitionLessFunc(data, a, b, pivot, less)
wasPartitioned = alreadyPartitioned
leftLen, rightLen := mid-a, b-mid
balanceThreshold := length / 8
if leftLen < rightLen {
wasBalanced = leftLen >= balanceThreshold
pdqsortLessFunc(data, a, mid, limit, less)
a = mid + 1
} else {
wasBalanced = rightLen >= balanceThreshold
pdqsortLessFunc(data, mid+1, b, limit, less)
b = mid
}
}
}
// partitionLessFunc does one quicksort partition.
// Let p = data[pivot]
// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
// On return, data[newpivot] = p
func partitionLessFunc[E any](data []E, a, b, pivot int, less func(a, b E) bool) (newpivot int, alreadyPartitioned bool) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for i <= j && less(data[i], data[a]) {
i++
}
for i <= j && !less(data[j], data[a]) {
j--
}
if i > j {
data[j], data[a] = data[a], data[j]
return j, true
}
data[i], data[j] = data[j], data[i]
i++
j--
for {
for i <= j && less(data[i], data[a]) {
i++
}
for i <= j && !less(data[j], data[a]) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
data[j], data[a] = data[a], data[j]
return j, false
}
// partitionEqualLessFunc partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
func partitionEqualLessFunc[E any](data []E, a, b, pivot int, less func(a, b E) bool) (newpivot int) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for {
for i <= j && !less(data[a], data[i]) {
i++
}
for i <= j && less(data[a], data[j]) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
return i
}
// partialInsertionSortLessFunc partially sorts a slice, returns true if the slice is sorted at the end.
func partialInsertionSortLessFunc[E any](data []E, a, b int, less func(a, b E) bool) bool {
const (
maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
shortestShifting = 50 // don't shift any elements on short arrays
)
i := a + 1
for j := 0; j < maxSteps; j++ {
for i < b && !less(data[i], data[i-1]) {
i++
}
if i == b {
return true
}
if b-a < shortestShifting {
return false
}
data[i], data[i-1] = data[i-1], data[i]
// Shift the smaller one to the left.
if i-a >= 2 {
for j := i - 1; j >= 1; j-- {
if !less(data[j], data[j-1]) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
// Shift the greater one to the right.
if b-i >= 2 {
for j := i + 1; j < b; j++ {
if !less(data[j], data[j-1]) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
}
return false
}
// breakPatternsLessFunc scatters some elements around in an attempt to break some patterns
// that might cause imbalanced partitions in quicksort.
func breakPatternsLessFunc[E any](data []E, a, b int, less func(a, b E) bool) {
length := b - a
if length >= 8 {
random := xorshift(length)
modulus := nextPowerOfTwo(length)
for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
other := int(uint(random.Next()) & (modulus - 1))
if other >= length {
other -= length
}
data[idx], data[a+other] = data[a+other], data[idx]
}
}
}
// choosePivotLessFunc chooses a pivot in data[a:b].
//
// [0,8): chooses a static pivot.
// [8,shortestNinther): uses the simple median-of-three method.
// [shortestNinther,∞): uses the Tukey ninther method.
func choosePivotLessFunc[E any](data []E, a, b int, less func(a, b E) bool) (pivot int, hint sortedHint) {
const (
shortestNinther = 50
maxSwaps = 4 * 3
)
l := b - a
var (
swaps int
i = a + l/4*1
j = a + l/4*2
k = a + l/4*3
)
if l >= 8 {
if l >= shortestNinther {
// Tukey ninther method, the idea came from Rust's implementation.
i = medianAdjacentLessFunc(data, i, &swaps, less)
j = medianAdjacentLessFunc(data, j, &swaps, less)
k = medianAdjacentLessFunc(data, k, &swaps, less)
}
// Find the median among i, j, k and stores it into j.
j = medianLessFunc(data, i, j, k, &swaps, less)
}
switch swaps {
case 0:
return j, increasingHint
case maxSwaps:
return j, decreasingHint
default:
return j, unknownHint
}
}
// order2LessFunc returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
func order2LessFunc[E any](data []E, a, b int, swaps *int, less func(a, b E) bool) (int, int) {
if less(data[b], data[a]) {
*swaps++
return b, a
}
return a, b
}
// medianLessFunc returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
func medianLessFunc[E any](data []E, a, b, c int, swaps *int, less func(a, b E) bool) int {
a, b = order2LessFunc(data, a, b, swaps, less)
b, c = order2LessFunc(data, b, c, swaps, less)
a, b = order2LessFunc(data, a, b, swaps, less)
return b
}
// medianAdjacentLessFunc finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
func medianAdjacentLessFunc[E any](data []E, a int, swaps *int, less func(a, b E) bool) int {
return medianLessFunc(data, a-1, a, a+1, swaps, less)
}
func reverseRangeLessFunc[E any](data []E, a, b int, less func(a, b E) bool) {
i := a
j := b - 1
for i < j {
data[i], data[j] = data[j], data[i]
i++
j--
}
}
func swapRangeLessFunc[E any](data []E, a, b, n int, less func(a, b E) bool) {
for i := 0; i < n; i++ {
data[a+i], data[b+i] = data[b+i], data[a+i]
}
}
func stableLessFunc[E any](data []E, n int, less func(a, b E) bool) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
for b <= n {
insertionSortLessFunc(data, a, b, less)
a = b
b += blockSize
}
insertionSortLessFunc(data, a, n, less)
for blockSize < n {
a, b = 0, 2*blockSize
for b <= n {
symMergeLessFunc(data, a, a+blockSize, b, less)
a = b
b += 2 * blockSize
}
if m := a + blockSize; m < n {
symMergeLessFunc(data, a, m, n, less)
}
blockSize *= 2
}
}
// symMergeLessFunc merges the two sorted subsequences data[a:m] and data[m:b] using
// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
// Computer Science, pages 714-723. Springer, 2004.
//
// Let M = m-a and N = b-n. Wolog M < N.
// The recursion depth is bound by ceil(log(N+M)).
// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
//
// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
// in the paper carries through for Swap operations, especially as the block
// swapping rotate uses only O(M+N) Swaps.
//
// symMerge assumes non-degenerate arguments: a < m && m < b.
// Having the caller check this condition eliminates many leaf recursion calls,
// which improves performance.
func symMergeLessFunc[E any](data []E, a, m, b int, less func(a, b E) bool) {
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[a] into data[m:b]
// if data[a:m] only contains one element.
if m-a == 1 {
// Use binary search to find the lowest index i
// such that data[i] >= data[a] for m <= i < b.
// Exit the search loop with i == b in case no such index exists.
i := m
j := b
for i < j {
h := int(uint(i+j) >> 1)
if less(data[h], data[a]) {
i = h + 1
} else {
j = h
}
}
// Swap values until data[a] reaches the position before i.
for k := a; k < i-1; k++ {
data[k], data[k+1] = data[k+1], data[k]
}
return
}
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[m] into data[a:m]
// if data[m:b] only contains one element.
if b-m == 1 {
// Use binary search to find the lowest index i
// such that data[i] > data[m] for a <= i < m.
// Exit the search loop with i == m in case no such index exists.
i := a
j := m
for i < j {
h := int(uint(i+j) >> 1)
if !less(data[m], data[h]) {
i = h + 1
} else {
j = h
}
}
// Swap values until data[m] reaches the position i.
for k := m; k > i; k-- {
data[k], data[k-1] = data[k-1], data[k]
}
return
}
mid := int(uint(a+b) >> 1)
n := mid + m
var start, r int
if m > mid {
start = n - b
r = mid
} else {
start = a
r = m
}
p := n - 1
for start < r {
c := int(uint(start+r) >> 1)
if !less(data[p-c], data[c]) {
start = c + 1
} else {
r = c
}
}
end := n - start
if start < m && m < end {
rotateLessFunc(data, start, m, end, less)
}
if a < start && start < mid {
symMergeLessFunc(data, a, start, mid, less)
}
if mid < end && end < b {
symMergeLessFunc(data, mid, end, b, less)
}
}
// rotateLessFunc rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
// Data of the form 'x u v y' is changed to 'x v u y'.
// rotate performs at most b-a many calls to data.Swap,
// and it assumes non-degenerate arguments: a < m && m < b.
func rotateLessFunc[E any](data []E, a, m, b int, less func(a, b E) bool) {
i := m - a
j := b - m
for i != j {
if i > j {
swapRangeLessFunc(data, m-i, m, j, less)
i -= j
} else {
swapRangeLessFunc(data, m-i, m+j-i, i, less)
j -= i
}
}
// i == j
swapRangeLessFunc(data, m-i, m, i, less)
}

481
vendor/golang.org/x/exp/slices/zsortordered.go generated vendored Normal file
View file

@ -0,0 +1,481 @@
// Code generated by gen_sort_variants.go; DO NOT EDIT.
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package slices
import "golang.org/x/exp/constraints"
// insertionSortOrdered sorts data[a:b] using insertion sort.
func insertionSortOrdered[E constraints.Ordered](data []E, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (data[j] < data[j-1]); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// siftDownOrdered implements the heap property on data[lo:hi].
// first is an offset into the array where the root of the heap lies.
func siftDownOrdered[E constraints.Ordered](data []E, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && (data[first+child] < data[first+child+1]) {
child++
}
if !(data[first+root] < data[first+child]) {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func heapSortOrdered[E constraints.Ordered](data []E, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDownOrdered(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDownOrdered(data, lo, i, first)
}
}
// pdqsortOrdered sorts data[a:b].
// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
// C++ implementation: https://github.com/orlp/pdqsort
// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
func pdqsortOrdered[E constraints.Ordered](data []E, a, b, limit int) {
const maxInsertion = 12
var (
wasBalanced = true // whether the last partitioning was reasonably balanced
wasPartitioned = true // whether the slice was already partitioned
)
for {
length := b - a
if length <= maxInsertion {
insertionSortOrdered(data, a, b)
return
}
// Fall back to heapsort if too many bad choices were made.
if limit == 0 {
heapSortOrdered(data, a, b)
return
}
// If the last partitioning was imbalanced, we need to breaking patterns.
if !wasBalanced {
breakPatternsOrdered(data, a, b)
limit--
}
pivot, hint := choosePivotOrdered(data, a, b)
if hint == decreasingHint {
reverseRangeOrdered(data, a, b)
// The chosen pivot was pivot-a elements after the start of the array.
// After reversing it is pivot-a elements before the end of the array.
// The idea came from Rust's implementation.
pivot = (b - 1) - (pivot - a)
hint = increasingHint
}
// The slice is likely already sorted.
if wasBalanced && wasPartitioned && hint == increasingHint {
if partialInsertionSortOrdered(data, a, b) {
return
}
}
// Probably the slice contains many duplicate elements, partition the slice into
// elements equal to and elements greater than the pivot.
if a > 0 && !(data[a-1] < data[pivot]) {
mid := partitionEqualOrdered(data, a, b, pivot)
a = mid
continue
}
mid, alreadyPartitioned := partitionOrdered(data, a, b, pivot)
wasPartitioned = alreadyPartitioned
leftLen, rightLen := mid-a, b-mid
balanceThreshold := length / 8
if leftLen < rightLen {
wasBalanced = leftLen >= balanceThreshold
pdqsortOrdered(data, a, mid, limit)
a = mid + 1
} else {
wasBalanced = rightLen >= balanceThreshold
pdqsortOrdered(data, mid+1, b, limit)
b = mid
}
}
}
// partitionOrdered does one quicksort partition.
// Let p = data[pivot]
// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
// On return, data[newpivot] = p
func partitionOrdered[E constraints.Ordered](data []E, a, b, pivot int) (newpivot int, alreadyPartitioned bool) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for i <= j && (data[i] < data[a]) {
i++
}
for i <= j && !(data[j] < data[a]) {
j--
}
if i > j {
data[j], data[a] = data[a], data[j]
return j, true
}
data[i], data[j] = data[j], data[i]
i++
j--
for {
for i <= j && (data[i] < data[a]) {
i++
}
for i <= j && !(data[j] < data[a]) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
data[j], data[a] = data[a], data[j]
return j, false
}
// partitionEqualOrdered partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
func partitionEqualOrdered[E constraints.Ordered](data []E, a, b, pivot int) (newpivot int) {
data[a], data[pivot] = data[pivot], data[a]
i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
for {
for i <= j && !(data[a] < data[i]) {
i++
}
for i <= j && (data[a] < data[j]) {
j--
}
if i > j {
break
}
data[i], data[j] = data[j], data[i]
i++
j--
}
return i
}
// partialInsertionSortOrdered partially sorts a slice, returns true if the slice is sorted at the end.
func partialInsertionSortOrdered[E constraints.Ordered](data []E, a, b int) bool {
const (
maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
shortestShifting = 50 // don't shift any elements on short arrays
)
i := a + 1
for j := 0; j < maxSteps; j++ {
for i < b && !(data[i] < data[i-1]) {
i++
}
if i == b {
return true
}
if b-a < shortestShifting {
return false
}
data[i], data[i-1] = data[i-1], data[i]
// Shift the smaller one to the left.
if i-a >= 2 {
for j := i - 1; j >= 1; j-- {
if !(data[j] < data[j-1]) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
// Shift the greater one to the right.
if b-i >= 2 {
for j := i + 1; j < b; j++ {
if !(data[j] < data[j-1]) {
break
}
data[j], data[j-1] = data[j-1], data[j]
}
}
}
return false
}
// breakPatternsOrdered scatters some elements around in an attempt to break some patterns
// that might cause imbalanced partitions in quicksort.
func breakPatternsOrdered[E constraints.Ordered](data []E, a, b int) {
length := b - a
if length >= 8 {
random := xorshift(length)
modulus := nextPowerOfTwo(length)
for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
other := int(uint(random.Next()) & (modulus - 1))
if other >= length {
other -= length
}
data[idx], data[a+other] = data[a+other], data[idx]
}
}
}
// choosePivotOrdered chooses a pivot in data[a:b].
//
// [0,8): chooses a static pivot.
// [8,shortestNinther): uses the simple median-of-three method.
// [shortestNinther,∞): uses the Tukey ninther method.
func choosePivotOrdered[E constraints.Ordered](data []E, a, b int) (pivot int, hint sortedHint) {
const (
shortestNinther = 50
maxSwaps = 4 * 3
)
l := b - a
var (
swaps int
i = a + l/4*1
j = a + l/4*2
k = a + l/4*3
)
if l >= 8 {
if l >= shortestNinther {
// Tukey ninther method, the idea came from Rust's implementation.
i = medianAdjacentOrdered(data, i, &swaps)
j = medianAdjacentOrdered(data, j, &swaps)
k = medianAdjacentOrdered(data, k, &swaps)
}
// Find the median among i, j, k and stores it into j.
j = medianOrdered(data, i, j, k, &swaps)
}
switch swaps {
case 0:
return j, increasingHint
case maxSwaps:
return j, decreasingHint
default:
return j, unknownHint
}
}
// order2Ordered returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
func order2Ordered[E constraints.Ordered](data []E, a, b int, swaps *int) (int, int) {
if data[b] < data[a] {
*swaps++
return b, a
}
return a, b
}
// medianOrdered returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
func medianOrdered[E constraints.Ordered](data []E, a, b, c int, swaps *int) int {
a, b = order2Ordered(data, a, b, swaps)
b, c = order2Ordered(data, b, c, swaps)
a, b = order2Ordered(data, a, b, swaps)
return b
}
// medianAdjacentOrdered finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
func medianAdjacentOrdered[E constraints.Ordered](data []E, a int, swaps *int) int {
return medianOrdered(data, a-1, a, a+1, swaps)
}
func reverseRangeOrdered[E constraints.Ordered](data []E, a, b int) {
i := a
j := b - 1
for i < j {
data[i], data[j] = data[j], data[i]
i++
j--
}
}
func swapRangeOrdered[E constraints.Ordered](data []E, a, b, n int) {
for i := 0; i < n; i++ {
data[a+i], data[b+i] = data[b+i], data[a+i]
}
}
func stableOrdered[E constraints.Ordered](data []E, n int) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
for b <= n {
insertionSortOrdered(data, a, b)
a = b
b += blockSize
}
insertionSortOrdered(data, a, n)
for blockSize < n {
a, b = 0, 2*blockSize
for b <= n {
symMergeOrdered(data, a, a+blockSize, b)
a = b
b += 2 * blockSize
}
if m := a + blockSize; m < n {
symMergeOrdered(data, a, m, n)
}
blockSize *= 2
}
}
// symMergeOrdered merges the two sorted subsequences data[a:m] and data[m:b] using
// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
// Computer Science, pages 714-723. Springer, 2004.
//
// Let M = m-a and N = b-n. Wolog M < N.
// The recursion depth is bound by ceil(log(N+M)).
// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
//
// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
// in the paper carries through for Swap operations, especially as the block
// swapping rotate uses only O(M+N) Swaps.
//
// symMerge assumes non-degenerate arguments: a < m && m < b.
// Having the caller check this condition eliminates many leaf recursion calls,
// which improves performance.
func symMergeOrdered[E constraints.Ordered](data []E, a, m, b int) {
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[a] into data[m:b]
// if data[a:m] only contains one element.
if m-a == 1 {
// Use binary search to find the lowest index i
// such that data[i] >= data[a] for m <= i < b.
// Exit the search loop with i == b in case no such index exists.
i := m
j := b
for i < j {
h := int(uint(i+j) >> 1)
if data[h] < data[a] {
i = h + 1
} else {
j = h
}
}
// Swap values until data[a] reaches the position before i.
for k := a; k < i-1; k++ {
data[k], data[k+1] = data[k+1], data[k]
}
return
}
// Avoid unnecessary recursions of symMerge
// by direct insertion of data[m] into data[a:m]
// if data[m:b] only contains one element.
if b-m == 1 {
// Use binary search to find the lowest index i
// such that data[i] > data[m] for a <= i < m.
// Exit the search loop with i == m in case no such index exists.
i := a
j := m
for i < j {
h := int(uint(i+j) >> 1)
if !(data[m] < data[h]) {
i = h + 1
} else {
j = h
}
}
// Swap values until data[m] reaches the position i.
for k := m; k > i; k-- {
data[k], data[k-1] = data[k-1], data[k]
}
return
}
mid := int(uint(a+b) >> 1)
n := mid + m
var start, r int
if m > mid {
start = n - b
r = mid
} else {
start = a
r = m
}
p := n - 1
for start < r {
c := int(uint(start+r) >> 1)
if !(data[p-c] < data[c]) {
start = c + 1
} else {
r = c
}
}
end := n - start
if start < m && m < end {
rotateOrdered(data, start, m, end)
}
if a < start && start < mid {
symMergeOrdered(data, a, start, mid)
}
if mid < end && end < b {
symMergeOrdered(data, mid, end, b)
}
}
// rotateOrdered rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
// Data of the form 'x u v y' is changed to 'x v u y'.
// rotate performs at most b-a many calls to data.Swap,
// and it assumes non-degenerate arguments: a < m && m < b.
func rotateOrdered[E constraints.Ordered](data []E, a, m, b int) {
i := m - a
j := b - m
for i != j {
if i > j {
swapRangeOrdered(data, m-i, m, j)
i -= j
} else {
swapRangeOrdered(data, m-i, m+j-i, i)
j -= i
}
}
// i == j
swapRangeOrdered(data, m-i, m, i)
}

11
vendor/modules.txt vendored
View file

@ -192,13 +192,15 @@ github.com/cenkalti/backoff/v4
# github.com/cespare/xxhash/v2 v2.2.0
## explicit; go 1.11
github.com/cespare/xxhash/v2
# github.com/cilium/ebpf v0.9.1
## explicit; go 1.17
# github.com/cilium/ebpf v0.11.0
## explicit; go 1.19
github.com/cilium/ebpf
github.com/cilium/ebpf/asm
github.com/cilium/ebpf/btf
github.com/cilium/ebpf/internal
github.com/cilium/ebpf/internal/kconfig
github.com/cilium/ebpf/internal/sys
github.com/cilium/ebpf/internal/tracefs
github.com/cilium/ebpf/internal/unix
github.com/cilium/ebpf/link
# github.com/cloudflare/cfssl v1.6.4
@ -1240,6 +1242,11 @@ golang.org/x/crypto/pkcs12/internal/rc2
golang.org/x/crypto/salsa20/salsa
golang.org/x/crypto/ssh
golang.org/x/crypto/ssh/internal/bcrypt_pbkdf
# golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2
## explicit; go 1.18
golang.org/x/exp/constraints
golang.org/x/exp/maps
golang.org/x/exp/slices
# golang.org/x/mod v0.11.0
## explicit; go 1.17
golang.org/x/mod/internal/lazyregexp