elf_reader.go 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197
  1. package ebpf
  2. import (
  3. "bufio"
  4. "bytes"
  5. "debug/elf"
  6. "encoding/binary"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "math"
  11. "os"
  12. "strings"
  13. "github.com/cilium/ebpf/asm"
  14. "github.com/cilium/ebpf/btf"
  15. "github.com/cilium/ebpf/internal"
  16. "github.com/cilium/ebpf/internal/unix"
  17. )
  18. // elfCode is a convenience to reduce the amount of arguments that have to
  19. // be passed around explicitly. You should treat its contents as immutable.
  20. type elfCode struct {
  21. *internal.SafeELFFile
  22. sections map[elf.SectionIndex]*elfSection
  23. license string
  24. version uint32
  25. btf *btf.Spec
  26. extInfo *btf.ExtInfos
  27. }
  28. // LoadCollectionSpec parses an ELF file into a CollectionSpec.
  29. func LoadCollectionSpec(file string) (*CollectionSpec, error) {
  30. f, err := os.Open(file)
  31. if err != nil {
  32. return nil, err
  33. }
  34. defer f.Close()
  35. spec, err := LoadCollectionSpecFromReader(f)
  36. if err != nil {
  37. return nil, fmt.Errorf("file %s: %w", file, err)
  38. }
  39. return spec, nil
  40. }
  41. // LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
  42. func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
  43. f, err := internal.NewSafeELFFile(rd)
  44. if err != nil {
  45. return nil, err
  46. }
  47. var (
  48. licenseSection *elf.Section
  49. versionSection *elf.Section
  50. sections = make(map[elf.SectionIndex]*elfSection)
  51. relSections = make(map[elf.SectionIndex]*elf.Section)
  52. )
  53. // This is the target of relocations generated by inline assembly.
  54. sections[elf.SHN_UNDEF] = newElfSection(new(elf.Section), undefSection)
  55. // Collect all the sections we're interested in. This includes relocations
  56. // which we parse later.
  57. for i, sec := range f.Sections {
  58. idx := elf.SectionIndex(i)
  59. switch {
  60. case strings.HasPrefix(sec.Name, "license"):
  61. licenseSection = sec
  62. case strings.HasPrefix(sec.Name, "version"):
  63. versionSection = sec
  64. case strings.HasPrefix(sec.Name, "maps"):
  65. sections[idx] = newElfSection(sec, mapSection)
  66. case sec.Name == ".maps":
  67. sections[idx] = newElfSection(sec, btfMapSection)
  68. case sec.Name == ".bss" || sec.Name == ".data" || strings.HasPrefix(sec.Name, ".rodata"):
  69. sections[idx] = newElfSection(sec, dataSection)
  70. case sec.Type == elf.SHT_REL:
  71. // Store relocations under the section index of the target
  72. relSections[elf.SectionIndex(sec.Info)] = sec
  73. case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
  74. sections[idx] = newElfSection(sec, programSection)
  75. }
  76. }
  77. license, err := loadLicense(licenseSection)
  78. if err != nil {
  79. return nil, fmt.Errorf("load license: %w", err)
  80. }
  81. version, err := loadVersion(versionSection, f.ByteOrder)
  82. if err != nil {
  83. return nil, fmt.Errorf("load version: %w", err)
  84. }
  85. btfSpec, btfExtInfo, err := btf.LoadSpecAndExtInfosFromReader(rd)
  86. if err != nil && !errors.Is(err, btf.ErrNotFound) {
  87. return nil, fmt.Errorf("load BTF: %w", err)
  88. }
  89. ec := &elfCode{
  90. SafeELFFile: f,
  91. sections: sections,
  92. license: license,
  93. version: version,
  94. btf: btfSpec,
  95. extInfo: btfExtInfo,
  96. }
  97. symbols, err := f.Symbols()
  98. if err != nil {
  99. return nil, fmt.Errorf("load symbols: %v", err)
  100. }
  101. ec.assignSymbols(symbols)
  102. if err := ec.loadRelocations(relSections, symbols); err != nil {
  103. return nil, fmt.Errorf("load relocations: %w", err)
  104. }
  105. // Collect all the various ways to define maps.
  106. maps := make(map[string]*MapSpec)
  107. if err := ec.loadMaps(maps); err != nil {
  108. return nil, fmt.Errorf("load maps: %w", err)
  109. }
  110. if err := ec.loadBTFMaps(maps); err != nil {
  111. return nil, fmt.Errorf("load BTF maps: %w", err)
  112. }
  113. if err := ec.loadDataSections(maps); err != nil {
  114. return nil, fmt.Errorf("load data sections: %w", err)
  115. }
  116. // Finally, collect programs and link them.
  117. progs, err := ec.loadProgramSections()
  118. if err != nil {
  119. return nil, fmt.Errorf("load programs: %w", err)
  120. }
  121. return &CollectionSpec{maps, progs, btfSpec, ec.ByteOrder}, nil
  122. }
  123. func loadLicense(sec *elf.Section) (string, error) {
  124. if sec == nil {
  125. return "", nil
  126. }
  127. data, err := sec.Data()
  128. if err != nil {
  129. return "", fmt.Errorf("section %s: %v", sec.Name, err)
  130. }
  131. return string(bytes.TrimRight(data, "\000")), nil
  132. }
  133. func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
  134. if sec == nil {
  135. return 0, nil
  136. }
  137. var version uint32
  138. if err := binary.Read(sec.Open(), bo, &version); err != nil {
  139. return 0, fmt.Errorf("section %s: %v", sec.Name, err)
  140. }
  141. return version, nil
  142. }
  143. type elfSectionKind int
  144. const (
  145. undefSection elfSectionKind = iota
  146. mapSection
  147. btfMapSection
  148. programSection
  149. dataSection
  150. )
  151. type elfSection struct {
  152. *elf.Section
  153. kind elfSectionKind
  154. // Offset from the start of the section to a symbol
  155. symbols map[uint64]elf.Symbol
  156. // Offset from the start of the section to a relocation, which points at
  157. // a symbol in another section.
  158. relocations map[uint64]elf.Symbol
  159. // The number of relocations pointing at this section.
  160. references int
  161. }
  162. func newElfSection(section *elf.Section, kind elfSectionKind) *elfSection {
  163. return &elfSection{
  164. section,
  165. kind,
  166. make(map[uint64]elf.Symbol),
  167. make(map[uint64]elf.Symbol),
  168. 0,
  169. }
  170. }
  171. // assignSymbols takes a list of symbols and assigns them to their
  172. // respective sections, indexed by name.
  173. func (ec *elfCode) assignSymbols(symbols []elf.Symbol) {
  174. for _, symbol := range symbols {
  175. symType := elf.ST_TYPE(symbol.Info)
  176. symSection := ec.sections[symbol.Section]
  177. if symSection == nil {
  178. continue
  179. }
  180. // Anonymous symbols only occur in debug sections which we don't process
  181. // relocations for. Anonymous symbols are not referenced from other sections.
  182. if symbol.Name == "" {
  183. continue
  184. }
  185. // Older versions of LLVM don't tag symbols correctly, so keep
  186. // all NOTYPE ones.
  187. switch symSection.kind {
  188. case mapSection, btfMapSection, dataSection:
  189. if symType != elf.STT_NOTYPE && symType != elf.STT_OBJECT {
  190. continue
  191. }
  192. case programSection:
  193. if symType != elf.STT_NOTYPE && symType != elf.STT_FUNC {
  194. continue
  195. }
  196. // LLVM emits LBB_ (Local Basic Block) symbols that seem to be jump
  197. // targets within sections, but BPF has no use for them.
  198. if symType == elf.STT_NOTYPE && elf.ST_BIND(symbol.Info) == elf.STB_LOCAL &&
  199. strings.HasPrefix(symbol.Name, "LBB") {
  200. continue
  201. }
  202. // Only collect symbols that occur in program/maps/data sections.
  203. default:
  204. continue
  205. }
  206. symSection.symbols[symbol.Value] = symbol
  207. }
  208. }
  209. // loadRelocations iterates .rel* sections and extracts relocation entries for
  210. // sections of interest. Makes sure relocations point at valid sections.
  211. func (ec *elfCode) loadRelocations(relSections map[elf.SectionIndex]*elf.Section, symbols []elf.Symbol) error {
  212. for idx, relSection := range relSections {
  213. section := ec.sections[idx]
  214. if section == nil {
  215. continue
  216. }
  217. rels, err := ec.loadSectionRelocations(relSection, symbols)
  218. if err != nil {
  219. return fmt.Errorf("relocation for section %q: %w", section.Name, err)
  220. }
  221. for _, rel := range rels {
  222. target := ec.sections[rel.Section]
  223. if target == nil {
  224. return fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported)
  225. }
  226. if target.Flags&elf.SHF_STRINGS > 0 {
  227. return fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported)
  228. }
  229. target.references++
  230. }
  231. section.relocations = rels
  232. }
  233. return nil
  234. }
  235. // loadProgramSections iterates ec's sections and emits a ProgramSpec
  236. // for each function it finds.
  237. //
  238. // The resulting map is indexed by function name.
  239. func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) {
  240. progs := make(map[string]*ProgramSpec)
  241. // Generate a ProgramSpec for each function found in each program section.
  242. var export []string
  243. for _, sec := range ec.sections {
  244. if sec.kind != programSection {
  245. continue
  246. }
  247. if len(sec.symbols) == 0 {
  248. return nil, fmt.Errorf("section %v: missing symbols", sec.Name)
  249. }
  250. funcs, err := ec.loadFunctions(sec)
  251. if err != nil {
  252. return nil, fmt.Errorf("section %v: %w", sec.Name, err)
  253. }
  254. progType, attachType, progFlags, attachTo := getProgType(sec.Name)
  255. for name, insns := range funcs {
  256. spec := &ProgramSpec{
  257. Name: name,
  258. Type: progType,
  259. Flags: progFlags,
  260. AttachType: attachType,
  261. AttachTo: attachTo,
  262. SectionName: sec.Name,
  263. License: ec.license,
  264. KernelVersion: ec.version,
  265. Instructions: insns,
  266. ByteOrder: ec.ByteOrder,
  267. BTF: ec.btf,
  268. }
  269. // Function names must be unique within a single ELF blob.
  270. if progs[name] != nil {
  271. return nil, fmt.Errorf("duplicate program name %s", name)
  272. }
  273. progs[name] = spec
  274. if spec.SectionName != ".text" {
  275. export = append(export, name)
  276. }
  277. }
  278. }
  279. flattenPrograms(progs, export)
  280. // Hide programs (e.g. library functions) that were not explicitly emitted
  281. // to an ELF section. These could be exposed in a separate CollectionSpec
  282. // field later to allow them to be modified.
  283. for n, p := range progs {
  284. if p.SectionName == ".text" {
  285. delete(progs, n)
  286. }
  287. }
  288. return progs, nil
  289. }
  290. // loadFunctions extracts instruction streams from the given program section
  291. // starting at each symbol in the section. The section's symbols must already
  292. // be narrowed down to STT_NOTYPE (emitted by clang <8) or STT_FUNC.
  293. //
  294. // The resulting map is indexed by function name.
  295. func (ec *elfCode) loadFunctions(section *elfSection) (map[string]asm.Instructions, error) {
  296. r := bufio.NewReader(section.Open())
  297. // Decode the section's instruction stream.
  298. var insns asm.Instructions
  299. if err := insns.Unmarshal(r, ec.ByteOrder); err != nil {
  300. return nil, fmt.Errorf("decoding instructions for section %s: %w", section.Name, err)
  301. }
  302. if len(insns) == 0 {
  303. return nil, fmt.Errorf("no instructions found in section %s", section.Name)
  304. }
  305. iter := insns.Iterate()
  306. for iter.Next() {
  307. ins := iter.Ins
  308. offset := iter.Offset.Bytes()
  309. // Tag Symbol Instructions.
  310. if sym, ok := section.symbols[offset]; ok {
  311. *ins = ins.WithSymbol(sym.Name)
  312. }
  313. // Apply any relocations for the current instruction.
  314. // If no relocation is present, resolve any section-relative function calls.
  315. if rel, ok := section.relocations[offset]; ok {
  316. if err := ec.relocateInstruction(ins, rel); err != nil {
  317. return nil, fmt.Errorf("offset %d: relocating instruction: %w", offset, err)
  318. }
  319. } else {
  320. if err := referenceRelativeJump(ins, offset, section.symbols); err != nil {
  321. return nil, fmt.Errorf("offset %d: resolving relative jump: %w", offset, err)
  322. }
  323. }
  324. }
  325. if ec.extInfo != nil {
  326. ec.extInfo.Assign(insns, section.Name)
  327. }
  328. return splitSymbols(insns)
  329. }
  330. // referenceRelativeJump turns a relative jump to another bpf subprogram within
  331. // the same ELF section into a Reference Instruction.
  332. //
  333. // Up to LLVM 9, calls to subprograms within the same ELF section are sometimes
  334. // encoded using relative jumps instead of relocation entries. These jumps go
  335. // out of bounds of the current program, so their targets must be memoized
  336. // before the section's instruction stream is split.
  337. //
  338. // The relative jump Constant is blinded to -1 and the target Symbol is set as
  339. // the Instruction's Reference so it can be resolved by the linker.
  340. func referenceRelativeJump(ins *asm.Instruction, offset uint64, symbols map[uint64]elf.Symbol) error {
  341. if !ins.IsFunctionReference() || ins.Constant == -1 {
  342. return nil
  343. }
  344. tgt := jumpTarget(offset, *ins)
  345. sym := symbols[tgt].Name
  346. if sym == "" {
  347. return fmt.Errorf("no jump target found at offset %d", tgt)
  348. }
  349. *ins = ins.WithReference(sym)
  350. ins.Constant = -1
  351. return nil
  352. }
  353. // jumpTarget takes ins' offset within an instruction stream (in bytes)
  354. // and returns its absolute jump destination (in bytes) within the
  355. // instruction stream.
  356. func jumpTarget(offset uint64, ins asm.Instruction) uint64 {
  357. // A relative jump instruction describes the amount of raw BPF instructions
  358. // to jump, convert the offset into bytes.
  359. dest := ins.Constant * asm.InstructionSize
  360. // The starting point of the jump is the end of the current instruction.
  361. dest += int64(offset + asm.InstructionSize)
  362. if dest < 0 {
  363. return 0
  364. }
  365. return uint64(dest)
  366. }
  367. func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error {
  368. var (
  369. typ = elf.ST_TYPE(rel.Info)
  370. bind = elf.ST_BIND(rel.Info)
  371. name = rel.Name
  372. )
  373. target := ec.sections[rel.Section]
  374. switch target.kind {
  375. case mapSection, btfMapSection:
  376. if bind != elf.STB_GLOBAL {
  377. return fmt.Errorf("possible erroneous static qualifier on map definition: found reference to %q", name)
  378. }
  379. if typ != elf.STT_OBJECT && typ != elf.STT_NOTYPE {
  380. // STT_NOTYPE is generated on clang < 8 which doesn't tag
  381. // relocations appropriately.
  382. return fmt.Errorf("map load: incorrect relocation type %v", typ)
  383. }
  384. ins.Src = asm.PseudoMapFD
  385. case dataSection:
  386. var offset uint32
  387. switch typ {
  388. case elf.STT_SECTION:
  389. if bind != elf.STB_LOCAL {
  390. return fmt.Errorf("direct load: %s: unsupported section relocation %s", name, bind)
  391. }
  392. // This is really a reference to a static symbol, which clang doesn't
  393. // emit a symbol table entry for. Instead it encodes the offset in
  394. // the instruction itself.
  395. offset = uint32(uint64(ins.Constant))
  396. case elf.STT_OBJECT:
  397. // LLVM 9 emits OBJECT-LOCAL symbols for anonymous constants.
  398. if bind != elf.STB_GLOBAL && bind != elf.STB_LOCAL {
  399. return fmt.Errorf("direct load: %s: unsupported object relocation %s", name, bind)
  400. }
  401. offset = uint32(rel.Value)
  402. case elf.STT_NOTYPE:
  403. // LLVM 7 emits NOTYPE-LOCAL symbols for anonymous constants.
  404. if bind != elf.STB_LOCAL {
  405. return fmt.Errorf("direct load: %s: unsupported untyped relocation %s", name, bind)
  406. }
  407. offset = uint32(rel.Value)
  408. default:
  409. return fmt.Errorf("incorrect relocation type %v for direct map load", typ)
  410. }
  411. // We rely on using the name of the data section as the reference. It
  412. // would be nicer to keep the real name in case of an STT_OBJECT, but
  413. // it's not clear how to encode that into Instruction.
  414. name = target.Name
  415. // The kernel expects the offset in the second basic BPF instruction.
  416. ins.Constant = int64(uint64(offset) << 32)
  417. ins.Src = asm.PseudoMapValue
  418. case programSection:
  419. switch opCode := ins.OpCode; {
  420. case opCode.JumpOp() == asm.Call:
  421. if ins.Src != asm.PseudoCall {
  422. return fmt.Errorf("call: %s: incorrect source register", name)
  423. }
  424. switch typ {
  425. case elf.STT_NOTYPE, elf.STT_FUNC:
  426. if bind != elf.STB_GLOBAL {
  427. return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
  428. }
  429. case elf.STT_SECTION:
  430. if bind != elf.STB_LOCAL {
  431. return fmt.Errorf("call: %s: unsupported binding: %s", name, bind)
  432. }
  433. // The function we want to call is in the indicated section,
  434. // at the offset encoded in the instruction itself. Reverse
  435. // the calculation to find the real function we're looking for.
  436. // A value of -1 references the first instruction in the section.
  437. offset := int64(int32(ins.Constant)+1) * asm.InstructionSize
  438. sym, ok := target.symbols[uint64(offset)]
  439. if !ok {
  440. return fmt.Errorf("call: no symbol at offset %d", offset)
  441. }
  442. name = sym.Name
  443. ins.Constant = -1
  444. default:
  445. return fmt.Errorf("call: %s: invalid symbol type %s", name, typ)
  446. }
  447. case opCode.IsDWordLoad():
  448. switch typ {
  449. case elf.STT_FUNC:
  450. if bind != elf.STB_GLOBAL {
  451. return fmt.Errorf("load: %s: unsupported binding: %s", name, bind)
  452. }
  453. case elf.STT_SECTION:
  454. if bind != elf.STB_LOCAL {
  455. return fmt.Errorf("load: %s: unsupported binding: %s", name, bind)
  456. }
  457. // ins.Constant already contains the offset in bytes from the
  458. // start of the section. This is different than a call to a
  459. // static function.
  460. default:
  461. return fmt.Errorf("load: %s: invalid symbol type %s", name, typ)
  462. }
  463. sym, ok := target.symbols[uint64(ins.Constant)]
  464. if !ok {
  465. return fmt.Errorf("load: no symbol at offset %d", ins.Constant)
  466. }
  467. name = sym.Name
  468. ins.Constant = -1
  469. ins.Src = asm.PseudoFunc
  470. default:
  471. return fmt.Errorf("neither a call nor a load instruction: %v", ins)
  472. }
  473. case undefSection:
  474. if bind != elf.STB_GLOBAL {
  475. return fmt.Errorf("asm relocation: %s: unsupported binding: %s", name, bind)
  476. }
  477. if typ != elf.STT_NOTYPE {
  478. return fmt.Errorf("asm relocation: %s: unsupported type %s", name, typ)
  479. }
  480. // There is nothing to do here but set ins.Reference.
  481. default:
  482. return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported)
  483. }
  484. *ins = ins.WithReference(name)
  485. return nil
  486. }
  487. func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
  488. for _, sec := range ec.sections {
  489. if sec.kind != mapSection {
  490. continue
  491. }
  492. nSym := len(sec.symbols)
  493. if nSym == 0 {
  494. return fmt.Errorf("section %v: no symbols", sec.Name)
  495. }
  496. if sec.Size%uint64(nSym) != 0 {
  497. return fmt.Errorf("section %v: map descriptors are not of equal size", sec.Name)
  498. }
  499. var (
  500. r = bufio.NewReader(sec.Open())
  501. size = sec.Size / uint64(nSym)
  502. )
  503. for i, offset := 0, uint64(0); i < nSym; i, offset = i+1, offset+size {
  504. mapSym, ok := sec.symbols[offset]
  505. if !ok {
  506. return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
  507. }
  508. mapName := mapSym.Name
  509. if maps[mapName] != nil {
  510. return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym)
  511. }
  512. lr := io.LimitReader(r, int64(size))
  513. spec := MapSpec{
  514. Name: SanitizeName(mapName, -1),
  515. }
  516. switch {
  517. case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
  518. return fmt.Errorf("map %s: missing type", mapName)
  519. case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
  520. return fmt.Errorf("map %s: missing key size", mapName)
  521. case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
  522. return fmt.Errorf("map %s: missing value size", mapName)
  523. case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
  524. return fmt.Errorf("map %s: missing max entries", mapName)
  525. case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
  526. return fmt.Errorf("map %s: missing flags", mapName)
  527. }
  528. extra, err := io.ReadAll(lr)
  529. if err != nil {
  530. return fmt.Errorf("map %s: reading map tail: %w", mapName, err)
  531. }
  532. if len(extra) > 0 {
  533. spec.Extra = bytes.NewReader(extra)
  534. }
  535. if err := spec.clampPerfEventArraySize(); err != nil {
  536. return fmt.Errorf("map %s: %w", mapName, err)
  537. }
  538. maps[mapName] = &spec
  539. }
  540. }
  541. return nil
  542. }
  543. // loadBTFMaps iterates over all ELF sections marked as BTF map sections
  544. // (like .maps) and parses them into MapSpecs. Dump the .maps section and
  545. // any relocations with `readelf -x .maps -r <elf_file>`.
  546. func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
  547. for _, sec := range ec.sections {
  548. if sec.kind != btfMapSection {
  549. continue
  550. }
  551. if ec.btf == nil {
  552. return fmt.Errorf("missing BTF")
  553. }
  554. // Each section must appear as a DataSec in the ELF's BTF blob.
  555. var ds *btf.Datasec
  556. if err := ec.btf.TypeByName(sec.Name, &ds); err != nil {
  557. return fmt.Errorf("cannot find section '%s' in BTF: %w", sec.Name, err)
  558. }
  559. // Open a Reader to the ELF's raw section bytes so we can assert that all
  560. // of them are zero on a per-map (per-Var) basis. For now, the section's
  561. // sole purpose is to receive relocations, so all must be zero.
  562. rs := sec.Open()
  563. for _, vs := range ds.Vars {
  564. // BPF maps are declared as and assigned to global variables,
  565. // so iterate over each Var in the DataSec and validate their types.
  566. v, ok := vs.Type.(*btf.Var)
  567. if !ok {
  568. return fmt.Errorf("section %v: unexpected type %s", sec.Name, vs.Type)
  569. }
  570. name := string(v.Name)
  571. // The BTF metadata for each Var contains the full length of the map
  572. // declaration, so read the corresponding amount of bytes from the ELF.
  573. // This way, we can pinpoint which map declaration contains unexpected
  574. // (and therefore unsupported) data.
  575. _, err := io.Copy(internal.DiscardZeroes{}, io.LimitReader(rs, int64(vs.Size)))
  576. if err != nil {
  577. return fmt.Errorf("section %v: map %s: initializing BTF map definitions: %w", sec.Name, name, internal.ErrNotSupported)
  578. }
  579. if maps[name] != nil {
  580. return fmt.Errorf("section %v: map %s already exists", sec.Name, name)
  581. }
  582. // Each Var representing a BTF map definition contains a Struct.
  583. mapStruct, ok := v.Type.(*btf.Struct)
  584. if !ok {
  585. return fmt.Errorf("expected struct, got %s", v.Type)
  586. }
  587. mapSpec, err := mapSpecFromBTF(sec, &vs, mapStruct, ec.btf, name, false)
  588. if err != nil {
  589. return fmt.Errorf("map %v: %w", name, err)
  590. }
  591. if err := mapSpec.clampPerfEventArraySize(); err != nil {
  592. return fmt.Errorf("map %v: %w", name, err)
  593. }
  594. maps[name] = mapSpec
  595. }
  596. // Drain the ELF section reader to make sure all bytes are accounted for
  597. // with BTF metadata.
  598. i, err := io.Copy(io.Discard, rs)
  599. if err != nil {
  600. return fmt.Errorf("section %v: unexpected error reading remainder of ELF section: %w", sec.Name, err)
  601. }
  602. if i > 0 {
  603. return fmt.Errorf("section %v: %d unexpected remaining bytes in ELF section, invalid BTF?", sec.Name, i)
  604. }
  605. }
  606. return nil
  607. }
  608. // mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing
  609. // a BTF map definition. The name and spec arguments will be copied to the
  610. // resulting MapSpec, and inner must be true on any resursive invocations.
  611. func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *btf.Spec, name string, inner bool) (*MapSpec, error) {
  612. var (
  613. key, value btf.Type
  614. keySize, valueSize uint32
  615. mapType MapType
  616. flags, maxEntries uint32
  617. pinType PinType
  618. innerMapSpec *MapSpec
  619. contents []MapKV
  620. err error
  621. )
  622. for i, member := range def.Members {
  623. switch member.Name {
  624. case "type":
  625. mt, err := uintFromBTF(member.Type)
  626. if err != nil {
  627. return nil, fmt.Errorf("can't get type: %w", err)
  628. }
  629. mapType = MapType(mt)
  630. case "map_flags":
  631. flags, err = uintFromBTF(member.Type)
  632. if err != nil {
  633. return nil, fmt.Errorf("can't get BTF map flags: %w", err)
  634. }
  635. case "max_entries":
  636. maxEntries, err = uintFromBTF(member.Type)
  637. if err != nil {
  638. return nil, fmt.Errorf("can't get BTF map max entries: %w", err)
  639. }
  640. case "key":
  641. if keySize != 0 {
  642. return nil, errors.New("both key and key_size given")
  643. }
  644. pk, ok := member.Type.(*btf.Pointer)
  645. if !ok {
  646. return nil, fmt.Errorf("key type is not a pointer: %T", member.Type)
  647. }
  648. key = pk.Target
  649. size, err := btf.Sizeof(pk.Target)
  650. if err != nil {
  651. return nil, fmt.Errorf("can't get size of BTF key: %w", err)
  652. }
  653. keySize = uint32(size)
  654. case "value":
  655. if valueSize != 0 {
  656. return nil, errors.New("both value and value_size given")
  657. }
  658. vk, ok := member.Type.(*btf.Pointer)
  659. if !ok {
  660. return nil, fmt.Errorf("value type is not a pointer: %T", member.Type)
  661. }
  662. value = vk.Target
  663. size, err := btf.Sizeof(vk.Target)
  664. if err != nil {
  665. return nil, fmt.Errorf("can't get size of BTF value: %w", err)
  666. }
  667. valueSize = uint32(size)
  668. case "key_size":
  669. // Key needs to be nil and keySize needs to be 0 for key_size to be
  670. // considered a valid member.
  671. if key != nil || keySize != 0 {
  672. return nil, errors.New("both key and key_size given")
  673. }
  674. keySize, err = uintFromBTF(member.Type)
  675. if err != nil {
  676. return nil, fmt.Errorf("can't get BTF key size: %w", err)
  677. }
  678. case "value_size":
  679. // Value needs to be nil and valueSize needs to be 0 for value_size to be
  680. // considered a valid member.
  681. if value != nil || valueSize != 0 {
  682. return nil, errors.New("both value and value_size given")
  683. }
  684. valueSize, err = uintFromBTF(member.Type)
  685. if err != nil {
  686. return nil, fmt.Errorf("can't get BTF value size: %w", err)
  687. }
  688. case "pinning":
  689. if inner {
  690. return nil, errors.New("inner maps can't be pinned")
  691. }
  692. pinning, err := uintFromBTF(member.Type)
  693. if err != nil {
  694. return nil, fmt.Errorf("can't get pinning: %w", err)
  695. }
  696. pinType = PinType(pinning)
  697. case "values":
  698. // The 'values' field in BTF map definitions is used for declaring map
  699. // value types that are references to other BPF objects, like other maps
  700. // or programs. It is always expected to be an array of pointers.
  701. if i != len(def.Members)-1 {
  702. return nil, errors.New("'values' must be the last member in a BTF map definition")
  703. }
  704. if valueSize != 0 && valueSize != 4 {
  705. return nil, errors.New("value_size must be 0 or 4")
  706. }
  707. valueSize = 4
  708. valueType, err := resolveBTFArrayMacro(member.Type)
  709. if err != nil {
  710. return nil, fmt.Errorf("can't resolve type of member 'values': %w", err)
  711. }
  712. switch t := valueType.(type) {
  713. case *btf.Struct:
  714. // The values member pointing to an array of structs means we're expecting
  715. // a map-in-map declaration.
  716. if mapType != ArrayOfMaps && mapType != HashOfMaps {
  717. return nil, errors.New("outer map needs to be an array or a hash of maps")
  718. }
  719. if inner {
  720. return nil, fmt.Errorf("nested inner maps are not supported")
  721. }
  722. // This inner map spec is used as a map template, but it needs to be
  723. // created as a traditional map before it can be used to do so.
  724. // libbpf names the inner map template '<outer_name>.inner', but we
  725. // opted for _inner to simplify validation logic. (dots only supported
  726. // on kernels 5.2 and up)
  727. // Pass the BTF spec from the parent object, since both parent and
  728. // child must be created from the same BTF blob (on kernels that support BTF).
  729. innerMapSpec, err = mapSpecFromBTF(es, vs, t, spec, name+"_inner", true)
  730. if err != nil {
  731. return nil, fmt.Errorf("can't parse BTF map definition of inner map: %w", err)
  732. }
  733. case *btf.FuncProto:
  734. // The values member contains an array of function pointers, meaning an
  735. // autopopulated PROG_ARRAY.
  736. if mapType != ProgramArray {
  737. return nil, errors.New("map needs to be a program array")
  738. }
  739. default:
  740. return nil, fmt.Errorf("unsupported value type %q in 'values' field", t)
  741. }
  742. contents, err = resolveBTFValuesContents(es, vs, member)
  743. if err != nil {
  744. return nil, fmt.Errorf("resolving values contents: %w", err)
  745. }
  746. default:
  747. return nil, fmt.Errorf("unrecognized field %s in BTF map definition", member.Name)
  748. }
  749. }
  750. if key == nil {
  751. key = &btf.Void{}
  752. }
  753. if value == nil {
  754. value = &btf.Void{}
  755. }
  756. return &MapSpec{
  757. Name: SanitizeName(name, -1),
  758. Type: MapType(mapType),
  759. KeySize: keySize,
  760. ValueSize: valueSize,
  761. MaxEntries: maxEntries,
  762. Flags: flags,
  763. Key: key,
  764. Value: value,
  765. BTF: spec,
  766. Pinning: pinType,
  767. InnerMap: innerMapSpec,
  768. Contents: contents,
  769. }, nil
  770. }
  771. // uintFromBTF resolves the __uint macro, which is a pointer to a sized
  772. // array, e.g. for int (*foo)[10], this function will return 10.
  773. func uintFromBTF(typ btf.Type) (uint32, error) {
  774. ptr, ok := typ.(*btf.Pointer)
  775. if !ok {
  776. return 0, fmt.Errorf("not a pointer: %v", typ)
  777. }
  778. arr, ok := ptr.Target.(*btf.Array)
  779. if !ok {
  780. return 0, fmt.Errorf("not a pointer to array: %v", typ)
  781. }
  782. return arr.Nelems, nil
  783. }
  784. // resolveBTFArrayMacro resolves the __array macro, which declares an array
  785. // of pointers to a given type. This function returns the target Type of
  786. // the pointers in the array.
  787. func resolveBTFArrayMacro(typ btf.Type) (btf.Type, error) {
  788. arr, ok := typ.(*btf.Array)
  789. if !ok {
  790. return nil, fmt.Errorf("not an array: %v", typ)
  791. }
  792. ptr, ok := arr.Type.(*btf.Pointer)
  793. if !ok {
  794. return nil, fmt.Errorf("not an array of pointers: %v", typ)
  795. }
  796. return ptr.Target, nil
  797. }
  798. // resolveBTFValuesContents resolves relocations into ELF sections belonging
  799. // to btf.VarSecinfo's. This can be used on the 'values' member in BTF map
  800. // definitions to extract static declarations of map contents.
  801. func resolveBTFValuesContents(es *elfSection, vs *btf.VarSecinfo, member btf.Member) ([]MapKV, error) {
  802. // The elements of a .values pointer array are not encoded in BTF.
  803. // Instead, relocations are generated into each array index.
  804. // However, it's possible to leave certain array indices empty, so all
  805. // indices' offsets need to be checked for emitted relocations.
  806. // The offset of the 'values' member within the _struct_ (in bits)
  807. // is the starting point of the array. Convert to bytes. Add VarSecinfo
  808. // offset to get the absolute position in the ELF blob.
  809. start := member.Offset.Bytes() + vs.Offset
  810. // 'values' is encoded in BTF as a zero (variable) length struct
  811. // member, and its contents run until the end of the VarSecinfo.
  812. // Add VarSecinfo offset to get the absolute position in the ELF blob.
  813. end := vs.Size + vs.Offset
  814. // The size of an address in this section. This determines the width of
  815. // an index in the array.
  816. align := uint32(es.SectionHeader.Addralign)
  817. // Check if variable-length section is aligned.
  818. if (end-start)%align != 0 {
  819. return nil, errors.New("unaligned static values section")
  820. }
  821. elems := (end - start) / align
  822. if elems == 0 {
  823. return nil, nil
  824. }
  825. contents := make([]MapKV, 0, elems)
  826. // k is the array index, off is its corresponding ELF section offset.
  827. for k, off := uint32(0), start; k < elems; k, off = k+1, off+align {
  828. r, ok := es.relocations[uint64(off)]
  829. if !ok {
  830. continue
  831. }
  832. // Relocation exists for the current offset in the ELF section.
  833. // Emit a value stub based on the type of relocation to be replaced by
  834. // a real fd later in the pipeline before populating the map.
  835. // Map keys are encoded in MapKV entries, so empty array indices are
  836. // skipped here.
  837. switch t := elf.ST_TYPE(r.Info); t {
  838. case elf.STT_FUNC:
  839. contents = append(contents, MapKV{uint32(k), r.Name})
  840. case elf.STT_OBJECT:
  841. contents = append(contents, MapKV{uint32(k), r.Name})
  842. default:
  843. return nil, fmt.Errorf("unknown relocation type %v", t)
  844. }
  845. }
  846. return contents, nil
  847. }
  848. func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error {
  849. for _, sec := range ec.sections {
  850. if sec.kind != dataSection {
  851. continue
  852. }
  853. if sec.references == 0 {
  854. // Prune data sections which are not referenced by any
  855. // instructions.
  856. continue
  857. }
  858. data, err := sec.Data()
  859. if err != nil {
  860. return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err)
  861. }
  862. if uint64(len(data)) > math.MaxUint32 {
  863. return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name)
  864. }
  865. mapSpec := &MapSpec{
  866. Name: SanitizeName(sec.Name, -1),
  867. Type: Array,
  868. KeySize: 4,
  869. ValueSize: uint32(len(data)),
  870. MaxEntries: 1,
  871. Contents: []MapKV{{uint32(0), data}},
  872. }
  873. // It is possible for a data section to exist without a corresponding BTF Datasec
  874. // if it only contains anonymous values like macro-defined arrays.
  875. if ec.btf != nil {
  876. var ds *btf.Datasec
  877. if ec.btf.TypeByName(sec.Name, &ds) == nil {
  878. // Assign the spec's key and BTF only if the Datasec lookup was successful.
  879. mapSpec.BTF = ec.btf
  880. mapSpec.Key = &btf.Void{}
  881. mapSpec.Value = ds
  882. }
  883. }
  884. switch n := sec.Name; {
  885. case strings.HasPrefix(n, ".rodata"):
  886. mapSpec.Flags = unix.BPF_F_RDONLY_PROG
  887. mapSpec.Freeze = true
  888. case n == ".bss":
  889. // The kernel already zero-initializes the map
  890. mapSpec.Contents = nil
  891. }
  892. maps[sec.Name] = mapSpec
  893. }
  894. return nil
  895. }
  896. func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
  897. types := []struct {
  898. prefix string
  899. progType ProgramType
  900. attachType AttachType
  901. progFlags uint32
  902. }{
  903. // Please update the types from libbpf.c and follow the order of it.
  904. // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c
  905. {"socket", SocketFilter, AttachNone, 0},
  906. {"sk_reuseport/migrate", SkReuseport, AttachSkReuseportSelectOrMigrate, 0},
  907. {"sk_reuseport", SkReuseport, AttachSkReuseportSelect, 0},
  908. {"kprobe/", Kprobe, AttachNone, 0},
  909. {"uprobe/", Kprobe, AttachNone, 0},
  910. {"kretprobe/", Kprobe, AttachNone, 0},
  911. {"uretprobe/", Kprobe, AttachNone, 0},
  912. {"tc", SchedCLS, AttachNone, 0},
  913. {"classifier", SchedCLS, AttachNone, 0},
  914. {"action", SchedACT, AttachNone, 0},
  915. {"tracepoint/", TracePoint, AttachNone, 0},
  916. {"tp/", TracePoint, AttachNone, 0},
  917. {"raw_tracepoint/", RawTracepoint, AttachNone, 0},
  918. {"raw_tp/", RawTracepoint, AttachNone, 0},
  919. {"raw_tracepoint.w/", RawTracepointWritable, AttachNone, 0},
  920. {"raw_tp.w/", RawTracepointWritable, AttachNone, 0},
  921. {"tp_btf/", Tracing, AttachTraceRawTp, 0},
  922. {"fentry/", Tracing, AttachTraceFEntry, 0},
  923. {"fmod_ret/", Tracing, AttachModifyReturn, 0},
  924. {"fexit/", Tracing, AttachTraceFExit, 0},
  925. {"fentry.s/", Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE},
  926. {"fmod_ret.s/", Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE},
  927. {"fexit.s/", Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE},
  928. {"freplace/", Extension, AttachNone, 0},
  929. {"lsm/", LSM, AttachLSMMac, 0},
  930. {"lsm.s/", LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE},
  931. {"iter/", Tracing, AttachTraceIter, 0},
  932. {"syscall", Syscall, AttachNone, 0},
  933. {"xdp_devmap/", XDP, AttachXDPDevMap, 0},
  934. {"xdp_cpumap/", XDP, AttachXDPCPUMap, 0},
  935. {"xdp", XDP, AttachNone, 0},
  936. {"perf_event", PerfEvent, AttachNone, 0},
  937. {"lwt_in", LWTIn, AttachNone, 0},
  938. {"lwt_out", LWTOut, AttachNone, 0},
  939. {"lwt_xmit", LWTXmit, AttachNone, 0},
  940. {"lwt_seg6local", LWTSeg6Local, AttachNone, 0},
  941. {"cgroup_skb/ingress", CGroupSKB, AttachCGroupInetIngress, 0},
  942. {"cgroup_skb/egress", CGroupSKB, AttachCGroupInetEgress, 0},
  943. {"cgroup/skb", CGroupSKB, AttachNone, 0},
  944. {"cgroup/sock_create", CGroupSock, AttachCGroupInetSockCreate, 0},
  945. {"cgroup/sock_release", CGroupSock, AttachCgroupInetSockRelease, 0},
  946. {"cgroup/sock", CGroupSock, AttachCGroupInetSockCreate, 0},
  947. {"cgroup/post_bind4", CGroupSock, AttachCGroupInet4PostBind, 0},
  948. {"cgroup/post_bind6", CGroupSock, AttachCGroupInet6PostBind, 0},
  949. {"cgroup/dev", CGroupDevice, AttachCGroupDevice, 0},
  950. {"sockops", SockOps, AttachCGroupSockOps, 0},
  951. {"sk_skb/stream_parser", SkSKB, AttachSkSKBStreamParser, 0},
  952. {"sk_skb/stream_verdict", SkSKB, AttachSkSKBStreamVerdict, 0},
  953. {"sk_skb", SkSKB, AttachNone, 0},
  954. {"sk_msg", SkMsg, AttachSkMsgVerdict, 0},
  955. {"lirc_mode2", LircMode2, AttachLircMode2, 0},
  956. {"flow_dissector", FlowDissector, AttachFlowDissector, 0},
  957. {"cgroup/bind4", CGroupSockAddr, AttachCGroupInet4Bind, 0},
  958. {"cgroup/bind6", CGroupSockAddr, AttachCGroupInet6Bind, 0},
  959. {"cgroup/connect4", CGroupSockAddr, AttachCGroupInet4Connect, 0},
  960. {"cgroup/connect6", CGroupSockAddr, AttachCGroupInet6Connect, 0},
  961. {"cgroup/sendmsg4", CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0},
  962. {"cgroup/sendmsg6", CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0},
  963. {"cgroup/recvmsg4", CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0},
  964. {"cgroup/recvmsg6", CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0},
  965. {"cgroup/getpeername4", CGroupSockAddr, AttachCgroupInet4GetPeername, 0},
  966. {"cgroup/getpeername6", CGroupSockAddr, AttachCgroupInet6GetPeername, 0},
  967. {"cgroup/getsockname4", CGroupSockAddr, AttachCgroupInet4GetSockname, 0},
  968. {"cgroup/getsockname6", CGroupSockAddr, AttachCgroupInet6GetSockname, 0},
  969. {"cgroup/sysctl", CGroupSysctl, AttachCGroupSysctl, 0},
  970. {"cgroup/getsockopt", CGroupSockopt, AttachCGroupGetsockopt, 0},
  971. {"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0},
  972. {"struct_ops+", StructOps, AttachNone, 0},
  973. {"sk_lookup/", SkLookup, AttachSkLookup, 0},
  974. {"seccomp", SocketFilter, AttachNone, 0},
  975. }
  976. for _, t := range types {
  977. if !strings.HasPrefix(sectionName, t.prefix) {
  978. continue
  979. }
  980. if !strings.HasSuffix(t.prefix, "/") {
  981. return t.progType, t.attachType, t.progFlags, ""
  982. }
  983. return t.progType, t.attachType, t.progFlags, sectionName[len(t.prefix):]
  984. }
  985. return UnspecifiedProgram, AttachNone, 0, ""
  986. }
  987. func (ec *elfCode) loadSectionRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) {
  988. rels := make(map[uint64]elf.Symbol)
  989. if sec.Entsize < 16 {
  990. return nil, fmt.Errorf("section %s: relocations are less than 16 bytes", sec.Name)
  991. }
  992. r := bufio.NewReader(sec.Open())
  993. for off := uint64(0); off < sec.Size; off += sec.Entsize {
  994. ent := io.LimitReader(r, int64(sec.Entsize))
  995. var rel elf.Rel64
  996. if binary.Read(ent, ec.ByteOrder, &rel) != nil {
  997. return nil, fmt.Errorf("can't parse relocation at offset %v", off)
  998. }
  999. symNo := int(elf.R_SYM64(rel.Info) - 1)
  1000. if symNo >= len(symbols) {
  1001. return nil, fmt.Errorf("offset %d: symbol %d doesn't exist", off, symNo)
  1002. }
  1003. symbol := symbols[symNo]
  1004. rels[rel.Off] = symbol
  1005. }
  1006. return rels, nil
  1007. }