0ct0pu5
/
moby


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026
							package ebpf

import (
	"bytes"
	"encoding/binary"
	"errors"
	"fmt"
	"math"
	"path/filepath"
	"runtime"
	"strings"
	"time"
	"unsafe"

	"github.com/cilium/ebpf/asm"
	"github.com/cilium/ebpf/btf"
	"github.com/cilium/ebpf/internal"
	"github.com/cilium/ebpf/internal/sys"
	"github.com/cilium/ebpf/internal/unix"
)

// ErrNotSupported is returned whenever the kernel doesn't support a feature.
var ErrNotSupported = internal.ErrNotSupported

// ProgramID represents the unique ID of an eBPF program.
type ProgramID uint32

const (
	// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
	// This is currently the maximum of spare space allocated for SKB
	// and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN.
	outputPad = 256 + 2
)

// DefaultVerifierLogSize is the default number of bytes allocated for the
// verifier log.
const DefaultVerifierLogSize = 64 * 1024

// maxVerifierLogSize is the maximum size of verifier log buffer the kernel
// will accept before returning EINVAL.
const maxVerifierLogSize = math.MaxUint32 >> 2

// ProgramOptions control loading a program into the kernel.
type ProgramOptions struct {
	// Bitmap controlling the detail emitted by the kernel's eBPF verifier log.
	// LogLevel-type values can be ORed together to request specific kinds of
	// verifier output. See the documentation on [ebpf.LogLevel] for details.
	//
	//  opts.LogLevel = (ebpf.LogLevelBranch | ebpf.LogLevelStats)
	//
	// If left to its default value, the program will first be loaded without
	// verifier output enabled. Upon error, the program load will be repeated
	// with LogLevelBranch and the given (or default) LogSize value.
	//
	// Setting this to a non-zero value will unconditionally enable the verifier
	// log, populating the [ebpf.Program.VerifierLog] field on successful loads
	// and including detailed verifier errors if the program is rejected. This
	// will always allocate an output buffer, but will result in only a single
	// attempt at loading the program.
	LogLevel LogLevel

	// Controls the output buffer size for the verifier log, in bytes. See the
	// documentation on ProgramOptions.LogLevel for details about how this value
	// is used.
	//
	// If this value is set too low to fit the verifier log, the resulting
	// [ebpf.VerifierError]'s Truncated flag will be true, and the error string
	// will also contain a hint to that effect.
	//
	// Defaults to DefaultVerifierLogSize.
	LogSize int

	// Disables the verifier log completely, regardless of other options.
	LogDisabled bool

	// Type information used for CO-RE relocations.
	//
	// This is useful in environments where the kernel BTF is not available
	// (containers) or where it is in a non-standard location. Defaults to
	// use the kernel BTF from a well-known location if nil.
	KernelTypes *btf.Spec
}

// ProgramSpec defines a Program.
type ProgramSpec struct {
	// Name is passed to the kernel as a debug aid. Must only contain
	// alpha numeric and '_' characters.
	Name string

	// Type determines at which hook in the kernel a program will run.
	Type ProgramType

	// AttachType of the program, needed to differentiate allowed context
	// accesses in some newer program types like CGroupSockAddr.
	//
	// Available on kernels 4.17 and later.
	AttachType AttachType

	// Name of a kernel data structure or function to attach to. Its
	// interpretation depends on Type and AttachType.
	AttachTo string

	// The program to attach to. Must be provided manually.
	AttachTarget *Program

	// The name of the ELF section this program originated from.
	SectionName string

	Instructions asm.Instructions

	// Flags is passed to the kernel and specifies additional program
	// load attributes.
	Flags uint32

	// License of the program. Some helpers are only available if
	// the license is deemed compatible with the GPL.
	//
	// See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1
	License string

	// Version used by Kprobe programs.
	//
	// Deprecated on kernels 5.0 and later. Leave empty to let the library
	// detect this value automatically.
	KernelVersion uint32

	// The byte order this program was compiled for, may be nil.
	ByteOrder binary.ByteOrder
}

// Copy returns a copy of the spec.
func (ps *ProgramSpec) Copy() *ProgramSpec {
	if ps == nil {
		return nil
	}

	cpy := *ps
	cpy.Instructions = make(asm.Instructions, len(ps.Instructions))
	copy(cpy.Instructions, ps.Instructions)
	return &cpy
}

// Tag calculates the kernel tag for a series of instructions.
//
// Use asm.Instructions.Tag if you need to calculate for non-native endianness.
func (ps *ProgramSpec) Tag() (string, error) {
	return ps.Instructions.Tag(internal.NativeEndian)
}

// VerifierError is returned by [NewProgram] and [NewProgramWithOptions] if a
// program is rejected by the verifier.
//
// Use [errors.As] to access the error.
type VerifierError = internal.VerifierError

// Program represents BPF program loaded into the kernel.
//
// It is not safe to close a Program which is used by other goroutines.
type Program struct {
	// Contains the output of the kernel verifier if enabled,
	// otherwise it is empty.
	VerifierLog string

	fd         *sys.FD
	name       string
	pinnedPath string
	typ        ProgramType
}

// NewProgram creates a new Program.
//
// See [NewProgramWithOptions] for details.
//
// Returns a [VerifierError] containing the full verifier log if the program is
// rejected by the kernel.
func NewProgram(spec *ProgramSpec) (*Program, error) {
	return NewProgramWithOptions(spec, ProgramOptions{})
}

// NewProgramWithOptions creates a new Program.
//
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
//
// Returns a [VerifierError] containing the full verifier log if the program is
// rejected by the kernel.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
	if spec == nil {
		return nil, errors.New("can't load a program from a nil spec")
	}

	prog, err := newProgramWithOptions(spec, opts)
	if errors.Is(err, asm.ErrUnsatisfiedMapReference) {
		return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err)
	}
	return prog, err
}

func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
	if len(spec.Instructions) == 0 {
		return nil, errors.New("instructions cannot be empty")
	}

	if spec.Type == UnspecifiedProgram {
		return nil, errors.New("can't load program of unspecified type")
	}

	if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
		return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
	}

	if opts.LogSize < 0 {
		return nil, errors.New("ProgramOptions.LogSize must be a positive value; disable verifier logs using ProgramOptions.LogDisabled")
	}

	// Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load")
	// require the version field to be set to the value of the KERNEL_VERSION
	// macro for kprobe-type programs.
	// Overwrite Kprobe program version if set to zero or the magic version constant.
	kv := spec.KernelVersion
	if spec.Type == Kprobe && (kv == 0 || kv == internal.MagicKernelVersion) {
		v, err := internal.KernelVersion()
		if err != nil {
			return nil, fmt.Errorf("detecting kernel version: %w", err)
		}
		kv = v.Kernel()
	}

	attr := &sys.ProgLoadAttr{
		ProgType:           sys.ProgType(spec.Type),
		ProgFlags:          spec.Flags,
		ExpectedAttachType: sys.AttachType(spec.AttachType),
		License:            sys.NewStringPointer(spec.License),
		KernVersion:        kv,
	}

	if haveObjName() == nil {
		attr.ProgName = sys.NewObjName(spec.Name)
	}

	insns := make(asm.Instructions, len(spec.Instructions))
	copy(insns, spec.Instructions)

	handle, fib, lib, err := btf.MarshalExtInfos(insns)
	if err != nil && !errors.Is(err, btf.ErrNotSupported) {
		return nil, fmt.Errorf("load ext_infos: %w", err)
	}
	if handle != nil {
		defer handle.Close()

		attr.ProgBtfFd = uint32(handle.FD())

		attr.FuncInfoRecSize = btf.FuncInfoSize
		attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize
		attr.FuncInfo = sys.NewSlicePointer(fib)

		attr.LineInfoRecSize = btf.LineInfoSize
		attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize
		attr.LineInfo = sys.NewSlicePointer(lib)
	}

	if err := applyRelocations(insns, opts.KernelTypes, spec.ByteOrder); err != nil {
		return nil, fmt.Errorf("apply CO-RE relocations: %w", err)
	}

	kconfig, err := resolveKconfigReferences(insns)
	if err != nil {
		return nil, fmt.Errorf("resolve .kconfig: %w", err)
	}
	defer kconfig.Close()

	if err := fixupAndValidate(insns); err != nil {
		return nil, err
	}

	handles, err := fixupKfuncs(insns)
	if err != nil {
		return nil, fmt.Errorf("fixing up kfuncs: %w", err)
	}
	defer handles.close()

	if len(handles) > 0 {
		fdArray := handles.fdArray()
		attr.FdArray = sys.NewPointer(unsafe.Pointer(&fdArray[0]))
	}

	buf := bytes.NewBuffer(make([]byte, 0, insns.Size()))
	err = insns.Marshal(buf, internal.NativeEndian)
	if err != nil {
		return nil, err
	}

	bytecode := buf.Bytes()
	attr.Insns = sys.NewSlicePointer(bytecode)
	attr.InsnCnt = uint32(len(bytecode) / asm.InstructionSize)

	if spec.AttachTarget != nil {
		targetID, err := findTargetInProgram(spec.AttachTarget, spec.AttachTo, spec.Type, spec.AttachType)
		if err != nil {
			return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
		}

		attr.AttachBtfId = targetID
		attr.AttachBtfObjFd = uint32(spec.AttachTarget.FD())
		defer runtime.KeepAlive(spec.AttachTarget)
	} else if spec.AttachTo != "" {
		module, targetID, err := findProgramTargetInKernel(spec.AttachTo, spec.Type, spec.AttachType)
		if err != nil && !errors.Is(err, errUnrecognizedAttachType) {
			// We ignore errUnrecognizedAttachType since AttachTo may be non-empty
			// for programs that don't attach anywhere.
			return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err)
		}

		attr.AttachBtfId = targetID
		if module != nil {
			attr.AttachBtfObjFd = uint32(module.FD())
			defer module.Close()
		}
	}

	if opts.LogSize == 0 {
		opts.LogSize = DefaultVerifierLogSize
	}

	// The caller requested a specific verifier log level. Set up the log buffer.
	var logBuf []byte
	if !opts.LogDisabled && opts.LogLevel != 0 {
		logBuf = make([]byte, opts.LogSize)
		attr.LogLevel = opts.LogLevel
		attr.LogSize = uint32(len(logBuf))
		attr.LogBuf = sys.NewSlicePointer(logBuf)
	}

	fd, err := sys.ProgLoad(attr)
	if err == nil {
		return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
	}

	// An error occurred loading the program, but the caller did not explicitly
	// enable the verifier log. Re-run with branch-level verifier logs enabled to
	// obtain more info. Preserve the original error to return it to the caller.
	// An undersized log buffer will result in ENOSPC regardless of the underlying
	// cause.
	var err2 error
	if !opts.LogDisabled && opts.LogLevel == 0 {
		logBuf = make([]byte, opts.LogSize)
		attr.LogLevel = LogLevelBranch
		attr.LogSize = uint32(len(logBuf))
		attr.LogBuf = sys.NewSlicePointer(logBuf)

		_, err2 = sys.ProgLoad(attr)
	}

	switch {
	case errors.Is(err, unix.EPERM):
		if len(logBuf) > 0 && logBuf[0] == 0 {
			// EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can
			// check that the log is empty to reduce false positives.
			return nil, fmt.Errorf("load program: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
		}

		fallthrough

	case errors.Is(err, unix.EINVAL):
		if hasFunctionReferences(spec.Instructions) {
			if err := haveBPFToBPFCalls(); err != nil {
				return nil, fmt.Errorf("load program: %w", err)
			}
		}

		if opts.LogSize > maxVerifierLogSize {
			return nil, fmt.Errorf("load program: %w (ProgramOptions.LogSize exceeds maximum value of %d)", err, maxVerifierLogSize)
		}
	}

	truncated := errors.Is(err, unix.ENOSPC) || errors.Is(err2, unix.ENOSPC)
	return nil, internal.ErrorWithLog("load program", err, logBuf, truncated)
}

// NewProgramFromFD creates a program from a raw fd.
//
// You should not use fd after calling this function.
//
// Requires at least Linux 4.10.
func NewProgramFromFD(fd int) (*Program, error) {
	f, err := sys.NewFD(fd)
	if err != nil {
		return nil, err
	}

	return newProgramFromFD(f)
}

// NewProgramFromID returns the program for a given id.
//
// Returns ErrNotExist, if there is no eBPF program with the given id.
func NewProgramFromID(id ProgramID) (*Program, error) {
	fd, err := sys.ProgGetFdById(&sys.ProgGetFdByIdAttr{
		Id: uint32(id),
	})
	if err != nil {
		return nil, fmt.Errorf("get program by id: %w", err)
	}

	return newProgramFromFD(fd)
}

func newProgramFromFD(fd *sys.FD) (*Program, error) {
	info, err := newProgramInfoFromFd(fd)
	if err != nil {
		fd.Close()
		return nil, fmt.Errorf("discover program type: %w", err)
	}

	return &Program{"", fd, info.Name, "", info.Type}, nil
}

func (p *Program) String() string {
	if p.name != "" {
		return fmt.Sprintf("%s(%s)#%v", p.typ, p.name, p.fd)
	}
	return fmt.Sprintf("%s(%v)", p.typ, p.fd)
}

// Type returns the underlying type of the program.
func (p *Program) Type() ProgramType {
	return p.typ
}

// Info returns metadata about the program.
//
// Requires at least 4.10.
func (p *Program) Info() (*ProgramInfo, error) {
	return newProgramInfoFromFd(p.fd)
}

// Handle returns a reference to the program's type information in the kernel.
//
// Returns ErrNotSupported if the kernel has no BTF support, or if there is no
// BTF associated with the program.
func (p *Program) Handle() (*btf.Handle, error) {
	info, err := p.Info()
	if err != nil {
		return nil, err
	}

	id, ok := info.BTFID()
	if !ok {
		return nil, fmt.Errorf("program %s: retrieve BTF ID: %w", p, ErrNotSupported)
	}

	return btf.NewHandleFromID(id)
}

// FD gets the file descriptor of the Program.
//
// It is invalid to call this function after Close has been called.
func (p *Program) FD() int {
	return p.fd.Int()
}

// Clone creates a duplicate of the Program.
//
// Closing the duplicate does not affect the original, and vice versa.
//
// Cloning a nil Program returns nil.
func (p *Program) Clone() (*Program, error) {
	if p == nil {
		return nil, nil
	}

	dup, err := p.fd.Dup()
	if err != nil {
		return nil, fmt.Errorf("can't clone program: %w", err)
	}

	return &Program{p.VerifierLog, dup, p.name, "", p.typ}, nil
}

// Pin persists the Program on the BPF virtual file system past the lifetime of
// the process that created it
//
// Calling Pin on a previously pinned program will overwrite the path, except when
// the new path already exists. Re-pinning across filesystems is not supported.
//
// This requires bpffs to be mounted above fileName.
// See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd
func (p *Program) Pin(fileName string) error {
	if err := internal.Pin(p.pinnedPath, fileName, p.fd); err != nil {
		return err
	}
	p.pinnedPath = fileName
	return nil
}

// Unpin removes the persisted state for the Program from the BPF virtual filesystem.
//
// Failed calls to Unpin will not alter the state returned by IsPinned.
//
// Unpinning an unpinned Program returns nil.
func (p *Program) Unpin() error {
	if err := internal.Unpin(p.pinnedPath); err != nil {
		return err
	}
	p.pinnedPath = ""
	return nil
}

// IsPinned returns true if the Program has a non-empty pinned path.
func (p *Program) IsPinned() bool {
	return p.pinnedPath != ""
}

// Close the Program's underlying file descriptor, which could unload
// the program from the kernel if it is not pinned or attached to a
// kernel hook.
func (p *Program) Close() error {
	if p == nil {
		return nil
	}

	return p.fd.Close()
}

// Various options for Run'ing a Program
type RunOptions struct {
	// Program's data input. Required field.
	//
	// The kernel expects at least 14 bytes input for an ethernet header for
	// XDP and SKB programs.
	Data []byte
	// Program's data after Program has run. Caller must allocate. Optional field.
	DataOut []byte
	// Program's context input. Optional field.
	Context interface{}
	// Program's context after Program has run. Must be a pointer or slice. Optional field.
	ContextOut interface{}
	// Minimum number of times to run Program. Optional field. Defaults to 1.
	//
	// The program may be executed more often than this due to interruptions, e.g.
	// when runtime.AllThreadsSyscall is invoked.
	Repeat uint32
	// Optional flags.
	Flags uint32
	// CPU to run Program on. Optional field.
	// Note not all program types support this field.
	CPU uint32
	// Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer
	// or similar. Typically used during benchmarking. Optional field.
	//
	// Deprecated: use [testing.B.ReportMetric] with unit "ns/op" instead.
	Reset func()
}

// Test runs the Program in the kernel with the given input and returns the
// value returned by the eBPF program. outLen may be zero.
//
// Note: the kernel expects at least 14 bytes input for an ethernet header for
// XDP and SKB programs.
//
// This function requires at least Linux 4.12.
func (p *Program) Test(in []byte) (uint32, []byte, error) {
	// Older kernels ignore the dataSizeOut argument when copying to user space.
	// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
	// size will be. Hence we allocate an output buffer which we hope will always be large
	// enough, and panic if the kernel wrote past the end of the allocation.
	// See https://patchwork.ozlabs.org/cover/1006822/
	var out []byte
	if len(in) > 0 {
		out = make([]byte, len(in)+outputPad)
	}

	opts := RunOptions{
		Data:    in,
		DataOut: out,
		Repeat:  1,
	}

	ret, _, err := p.run(&opts)
	if err != nil {
		return ret, nil, fmt.Errorf("test program: %w", err)
	}
	return ret, opts.DataOut, nil
}

// Run runs the Program in kernel with given RunOptions.
//
// Note: the same restrictions from Test apply.
func (p *Program) Run(opts *RunOptions) (uint32, error) {
	ret, _, err := p.run(opts)
	if err != nil {
		return ret, fmt.Errorf("run program: %w", err)
	}
	return ret, nil
}

// Benchmark runs the Program with the given input for a number of times
// and returns the time taken per iteration.
//
// Returns the result of the last execution of the program and the time per
// run or an error. reset is called whenever the benchmark syscall is
// interrupted, and should be set to testing.B.ResetTimer or similar.
//
// This function requires at least Linux 4.12.
func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) {
	if uint(repeat) > math.MaxUint32 {
		return 0, 0, fmt.Errorf("repeat is too high")
	}

	opts := RunOptions{
		Data:   in,
		Repeat: uint32(repeat),
		Reset:  reset,
	}

	ret, total, err := p.run(&opts)
	if err != nil {
		return ret, total, fmt.Errorf("benchmark program: %w", err)
	}
	return ret, total, nil
}

var haveProgRun = internal.NewFeatureTest("BPF_PROG_RUN", "4.12", func() error {
	prog, err := NewProgram(&ProgramSpec{
		// SocketFilter does not require privileges on newer kernels.
		Type: SocketFilter,
		Instructions: asm.Instructions{
			asm.LoadImm(asm.R0, 0, asm.DWord),
			asm.Return(),
		},
		License: "MIT",
	})
	if err != nil {
		// This may be because we lack sufficient permissions, etc.
		return err
	}
	defer prog.Close()

	in := internal.EmptyBPFContext
	attr := sys.ProgRunAttr{
		ProgFd:     uint32(prog.FD()),
		DataSizeIn: uint32(len(in)),
		DataIn:     sys.NewSlicePointer(in),
	}

	err = sys.ProgRun(&attr)
	switch {
	case errors.Is(err, unix.EINVAL):
		// Check for EINVAL specifically, rather than err != nil since we
		// otherwise misdetect due to insufficient permissions.
		return internal.ErrNotSupported

	case errors.Is(err, unix.EINTR):
		// We know that PROG_TEST_RUN is supported if we get EINTR.
		return nil

	case errors.Is(err, sys.ENOTSUPP):
		// The first PROG_TEST_RUN patches shipped in 4.12 didn't include
		// a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is
		// supported, but not for the program type used in the probe.
		return nil
	}

	return err
})

func (p *Program) run(opts *RunOptions) (uint32, time.Duration, error) {
	if uint(len(opts.Data)) > math.MaxUint32 {
		return 0, 0, fmt.Errorf("input is too long")
	}

	if err := haveProgRun(); err != nil {
		return 0, 0, err
	}

	var ctxBytes []byte
	if opts.Context != nil {
		ctx := new(bytes.Buffer)
		if err := binary.Write(ctx, internal.NativeEndian, opts.Context); err != nil {
			return 0, 0, fmt.Errorf("cannot serialize context: %v", err)
		}
		ctxBytes = ctx.Bytes()
	}

	var ctxOut []byte
	if opts.ContextOut != nil {
		ctxOut = make([]byte, binary.Size(opts.ContextOut))
	}

	attr := sys.ProgRunAttr{
		ProgFd:      p.fd.Uint(),
		DataSizeIn:  uint32(len(opts.Data)),
		DataSizeOut: uint32(len(opts.DataOut)),
		DataIn:      sys.NewSlicePointer(opts.Data),
		DataOut:     sys.NewSlicePointer(opts.DataOut),
		Repeat:      uint32(opts.Repeat),
		CtxSizeIn:   uint32(len(ctxBytes)),
		CtxSizeOut:  uint32(len(ctxOut)),
		CtxIn:       sys.NewSlicePointer(ctxBytes),
		CtxOut:      sys.NewSlicePointer(ctxOut),
		Flags:       opts.Flags,
		Cpu:         opts.CPU,
	}

	if attr.Repeat == 0 {
		attr.Repeat = 1
	}

retry:
	for {
		err := sys.ProgRun(&attr)
		if err == nil {
			break retry
		}

		if errors.Is(err, unix.EINTR) {
			if attr.Repeat == 1 {
				// Older kernels check whether enough repetitions have been
				// executed only after checking for pending signals.
				//
				//     run signal? done? run ...
				//
				// As a result we can get EINTR for repeat==1 even though
				// the program was run exactly once. Treat this as a
				// successful run instead.
				//
				// Since commit 607b9cc92bd7 ("bpf: Consolidate shared test timing code")
				// the conditions are reversed:
				//     run done? signal? ...
				break retry
			}

			if opts.Reset != nil {
				opts.Reset()
			}
			continue retry
		}

		if errors.Is(err, sys.ENOTSUPP) {
			return 0, 0, fmt.Errorf("kernel doesn't support running %s: %w", p.Type(), ErrNotSupported)
		}

		return 0, 0, err
	}

	if opts.DataOut != nil {
		if int(attr.DataSizeOut) > cap(opts.DataOut) {
			// Houston, we have a problem. The program created more data than we allocated,
			// and the kernel wrote past the end of our buffer.
			panic("kernel wrote past end of output buffer")
		}
		opts.DataOut = opts.DataOut[:int(attr.DataSizeOut)]
	}

	if len(ctxOut) != 0 {
		b := bytes.NewReader(ctxOut)
		if err := binary.Read(b, internal.NativeEndian, opts.ContextOut); err != nil {
			return 0, 0, fmt.Errorf("failed to decode ContextOut: %v", err)
		}
	}

	total := time.Duration(attr.Duration) * time.Nanosecond
	return attr.Retval, total, nil
}

func unmarshalProgram(buf []byte) (*Program, error) {
	if len(buf) != 4 {
		return nil, errors.New("program id requires 4 byte value")
	}

	// Looking up an entry in a nested map or prog array returns an id,
	// not an fd.
	id := internal.NativeEndian.Uint32(buf)
	return NewProgramFromID(ProgramID(id))
}

func marshalProgram(p *Program, length int) ([]byte, error) {
	if length != 4 {
		return nil, fmt.Errorf("can't marshal program to %d bytes", length)
	}

	buf := make([]byte, 4)
	internal.NativeEndian.PutUint32(buf, p.fd.Uint())
	return buf, nil
}

// LoadPinnedProgram loads a Program from a BPF file.
//
// Requires at least Linux 4.11.
func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) {
	fd, err := sys.ObjGet(&sys.ObjGetAttr{
		Pathname:  sys.NewStringPointer(fileName),
		FileFlags: opts.Marshal(),
	})
	if err != nil {
		return nil, err
	}

	info, err := newProgramInfoFromFd(fd)
	if err != nil {
		_ = fd.Close()
		return nil, fmt.Errorf("info for %s: %w", fileName, err)
	}

	var progName string
	if haveObjName() == nil {
		progName = info.Name
	} else {
		progName = filepath.Base(fileName)
	}

	return &Program{"", fd, progName, fileName, info.Type}, nil
}

// SanitizeName replaces all invalid characters in name with replacement.
// Passing a negative value for replacement will delete characters instead
// of replacing them. Use this to automatically generate valid names for maps
// and programs at runtime.
//
// The set of allowed characters depends on the running kernel version.
// Dots are only allowed as of kernel 5.2.
func SanitizeName(name string, replacement rune) string {
	return strings.Map(func(char rune) rune {
		if invalidBPFObjNameChar(char) {
			return replacement
		}
		return char
	}, name)
}

// ProgramGetNextID returns the ID of the next eBPF program.
//
// Returns ErrNotExist, if there is no next eBPF program.
func ProgramGetNextID(startID ProgramID) (ProgramID, error) {
	attr := &sys.ProgGetNextIdAttr{Id: uint32(startID)}
	return ProgramID(attr.NextId), sys.ProgGetNextId(attr)
}

// BindMap binds map to the program and is only released once program is released.
//
// This may be used in cases where metadata should be associated with the program
// which otherwise does not contain any references to the map.
func (p *Program) BindMap(m *Map) error {
	attr := &sys.ProgBindMapAttr{
		ProgFd: uint32(p.FD()),
		MapFd:  uint32(m.FD()),
	}

	return sys.ProgBindMap(attr)
}

var errUnrecognizedAttachType = errors.New("unrecognized attach type")

// find an attach target type in the kernel.
//
// name, progType and attachType determine which type we need to attach to.
//
// The attach target may be in a loaded kernel module.
// In that case the returned handle will be non-nil.
// The caller is responsible for closing the handle.
//
// Returns errUnrecognizedAttachType if the combination of progType and attachType
// is not recognised.
func findProgramTargetInKernel(name string, progType ProgramType, attachType AttachType) (*btf.Handle, btf.TypeID, error) {
	type match struct {
		p ProgramType
		a AttachType
	}

	var (
		typeName, featureName string
		target                btf.Type
	)

	switch (match{progType, attachType}) {
	case match{LSM, AttachLSMMac}:
		typeName = "bpf_lsm_" + name
		featureName = name + " LSM hook"
		target = (*btf.Func)(nil)
	case match{Tracing, AttachTraceIter}:
		typeName = "bpf_iter_" + name
		featureName = name + " iterator"
		target = (*btf.Func)(nil)
	case match{Tracing, AttachTraceFEntry}:
		typeName = name
		featureName = fmt.Sprintf("fentry %s", name)
		target = (*btf.Func)(nil)
	case match{Tracing, AttachTraceFExit}:
		typeName = name
		featureName = fmt.Sprintf("fexit %s", name)
		target = (*btf.Func)(nil)
	case match{Tracing, AttachModifyReturn}:
		typeName = name
		featureName = fmt.Sprintf("fmod_ret %s", name)
		target = (*btf.Func)(nil)
	case match{Tracing, AttachTraceRawTp}:
		typeName = fmt.Sprintf("btf_trace_%s", name)
		featureName = fmt.Sprintf("raw_tp %s", name)
		target = (*btf.Typedef)(nil)
	default:
		return nil, 0, errUnrecognizedAttachType
	}

	spec, err := btf.LoadKernelSpec()
	if err != nil {
		return nil, 0, fmt.Errorf("load kernel spec: %w", err)
	}

	spec, module, err := findTargetInKernel(spec, typeName, &target)
	if errors.Is(err, btf.ErrNotFound) {
		return nil, 0, &internal.UnsupportedFeatureError{Name: featureName}
	}
	// See cilium/ebpf#894. Until we can disambiguate between equally-named kernel
	// symbols, we should explicitly refuse program loads. They will not reliably
	// do what the caller intended.
	if errors.Is(err, btf.ErrMultipleMatches) {
		return nil, 0, fmt.Errorf("attaching to ambiguous kernel symbol is not supported: %w", err)
	}
	if err != nil {
		return nil, 0, fmt.Errorf("find target for %s: %w", featureName, err)
	}

	id, err := spec.TypeID(target)
	return module, id, err
}

// findTargetInKernel attempts to find a named type in the current kernel.
//
// target will point at the found type after a successful call. Searches both
// vmlinux and any loaded modules.
//
// Returns a non-nil handle if the type was found in a module, or btf.ErrNotFound
// if the type wasn't found at all.
func findTargetInKernel(kernelSpec *btf.Spec, typeName string, target *btf.Type) (*btf.Spec, *btf.Handle, error) {
	err := kernelSpec.TypeByName(typeName, target)
	if errors.Is(err, btf.ErrNotFound) {
		spec, module, err := findTargetInModule(kernelSpec, typeName, target)
		if err != nil {
			return nil, nil, fmt.Errorf("find target in modules: %w", err)
		}
		return spec, module, nil
	}
	if err != nil {
		return nil, nil, fmt.Errorf("find target in vmlinux: %w", err)
	}
	return kernelSpec, nil, err
}

// findTargetInModule attempts to find a named type in any loaded module.
//
// base must contain the kernel's types and is used to parse kmod BTF. Modules
// are searched in the order they were loaded.
//
// Returns btf.ErrNotFound if the target can't be found in any module.
func findTargetInModule(base *btf.Spec, typeName string, target *btf.Type) (*btf.Spec, *btf.Handle, error) {
	it := new(btf.HandleIterator)
	defer it.Handle.Close()

	for it.Next() {
		info, err := it.Handle.Info()
		if err != nil {
			return nil, nil, fmt.Errorf("get info for BTF ID %d: %w", it.ID, err)
		}

		if !info.IsModule() {
			continue
		}

		spec, err := it.Handle.Spec(base)
		if err != nil {
			return nil, nil, fmt.Errorf("parse types for module %s: %w", info.Name, err)
		}

		err = spec.TypeByName(typeName, target)
		if errors.Is(err, btf.ErrNotFound) {
			continue
		}
		if err != nil {
			return nil, nil, fmt.Errorf("lookup type in module %s: %w", info.Name, err)
		}

		return spec, it.Take(), nil
	}
	if err := it.Err(); err != nil {
		return nil, nil, fmt.Errorf("iterate modules: %w", err)
	}

	return nil, nil, btf.ErrNotFound
}

// find an attach target type in a program.
//
// Returns errUnrecognizedAttachType.
func findTargetInProgram(prog *Program, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) {
	type match struct {
		p ProgramType
		a AttachType
	}

	var typeName string
	switch (match{progType, attachType}) {
	case match{Extension, AttachNone}:
		typeName = name
	default:
		return 0, errUnrecognizedAttachType
	}

	btfHandle, err := prog.Handle()
	if err != nil {
		return 0, fmt.Errorf("load target BTF: %w", err)
	}
	defer btfHandle.Close()

	spec, err := btfHandle.Spec(nil)
	if err != nil {
		return 0, err
	}

	var targetFunc *btf.Func
	err = spec.TypeByName(typeName, &targetFunc)
	if err != nil {
		return 0, fmt.Errorf("find target %s: %w", typeName, err)
	}

	return spec.TypeID(targetFunc)
}