binutils.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. # Copyright 2024 Google LLC
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os.path
  15. import re
  16. import sh
  17. import subprocess
  18. import sys
  19. import tempfile
  20. NM_LINE_PATTERN = re.compile(r"""([0-9a-f]+)\s+ # address
  21. ([0-9a-f]+)\s+ # size
  22. ([dDbBtTrR])\s+ # section type
  23. (\S+) # name
  24. \s*((\S+)\:([0-9]+))?$ # filename + line
  25. """, flags=re.VERBOSE)
  26. class Symbol(object):
  27. def __init__(self, name, size):
  28. self.name = name
  29. self.size = size
  30. def __str__(self):
  31. return '<Symbol %s: %u>' % (self.name, self.size)
  32. class FileInfo(object):
  33. def __init__(self, filename):
  34. self.filename = filename
  35. self.size = 0
  36. self.symbols = {}
  37. def add_entry(self, symbol_name, size):
  38. if symbol_name in self.symbols:
  39. return
  40. self.size += size
  41. self.symbols[symbol_name] = Symbol(symbol_name, size)
  42. def remove_entry(self, symbol_name):
  43. result = self.symbols.pop(symbol_name, None)
  44. if result is not None:
  45. self.size -= result.size
  46. return result
  47. def pprint(self, verbose):
  48. print(' %s: size %u' % (self.filename, self.size))
  49. if verbose:
  50. l = sorted(self.symbols.itervalues(), key=lambda x: -x.size)
  51. for s in l:
  52. print(' %6u %-36s' % (s.size, s.name))
  53. def __str__(self):
  54. return '<FileInfo %s: %u>' % (self.filename, self.size)
  55. class SectionInfo(object):
  56. def __init__(self, name):
  57. self.name = name
  58. self.count = 0
  59. self.size = 0
  60. self.files = {}
  61. def add_entry(self, name, filename, size):
  62. self.count += 1
  63. self.size += size
  64. if filename not in self.files:
  65. self.files[filename] = FileInfo(filename)
  66. self.files[filename].add_entry(name, size)
  67. def remove_unknown_entry(self, name):
  68. if 'Unknown' not in self.files:
  69. return
  70. result = self.files['Unknown'].remove_entry(name)
  71. if result is not None:
  72. self.size -= result.size
  73. return result
  74. def get_files(self):
  75. return self.files.values()
  76. def pprint(self, summary, verbose):
  77. print('%s: count %u size %u' % (self.name, self.count, self.size))
  78. if not summary:
  79. l = self.files.values()
  80. l = sorted(l, key=lambda f: -f.size)
  81. for f in l:
  82. f.pprint(verbose)
  83. def analyze_elf(elf_file_path, sections_letters, use_fast_nm):
  84. """ Analyzes the elf file, using binutils.
  85. section_letters -- string of letters representing the sections to
  86. analyze, e.g. 'tbd' => text, bss and data.
  87. use_fast_nm -- If False, a slow lookup method is used to avoid a bug in
  88. `nm`. If True, the faster `nm -S -l` is used.
  89. Returns a dictionary with SectionInfo objects for each section.
  90. """
  91. def make_sections_dict(sections_letters):
  92. sections = {}
  93. for s in sections_letters:
  94. if s == 'b':
  95. sections['b'] = SectionInfo('.bss')
  96. elif s == 'd':
  97. sections['d'] = SectionInfo('.data')
  98. elif s == 't':
  99. sections['t'] = SectionInfo('.text')
  100. else:
  101. raise Exception('Invalid section <%s>, must be a combination'
  102. ' of [bdt] characters\n' % s)
  103. return sections
  104. sections = make_sections_dict(sections_letters)
  105. generator = nm_generator(elf_file_path, use_fast_nm)
  106. for (_, section, symbol_name, filename, line, size) in generator:
  107. if not filename:
  108. filename = 'Unknown'
  109. if section in sections:
  110. sections[section].add_entry(symbol_name, filename, size)
  111. return sections
  112. def nm_generator(elf_path, use_fast_nm=True):
  113. if use_fast_nm:
  114. return _nm_generator_fast(elf_path)
  115. else:
  116. return _nm_generator_slow(elf_path)
  117. def _get_symbols_table(f):
  118. # NOTE: nm crashes when we pass in the -l command line option. As a
  119. # workaround, we use readelf to get the symbol to address mappings and then
  120. # we use addr2line to get file/lines from the addresses.
  121. infile = sh.arm_none_eabi_readelf('-s', '-W', f)
  122. line_pattern = re.compile(r"""\s+([0-9]+\:)\s+ # number
  123. ([0-9a-f]+)\s+ # address
  124. ([0-9]+)\s+ # size
  125. (\S+)\s+ # type
  126. (\S+)\s+ # Bind
  127. (\S+)\s+ # Visibility
  128. (\S+)\s+ # Ndx
  129. (\S+) # symbol name
  130. """, flags=re.VERBOSE)
  131. def create_addr2line_process():
  132. return subprocess.Popen(['arm-none-eabi-addr2line', '-e', f],
  133. stdin=subprocess.PIPE,
  134. stdout=subprocess.PIPE,
  135. stderr=subprocess.PIPE)
  136. addr2line = create_addr2line_process()
  137. symbols = {}
  138. for line_num, line in enumerate(infile):
  139. if (line_num % 300) == 0:
  140. sys.stdout.write(".")
  141. sys.stdout.flush()
  142. match = line_pattern.match(line)
  143. if match is None:
  144. continue
  145. type = match.group(4)
  146. if type not in ['FUNC', 'OBJECT']:
  147. continue
  148. addr = match.group(2)
  149. symbol_name = match.group(8)
  150. success = False
  151. while not success:
  152. try:
  153. addr2line.stdin.write("0x%s\n" % addr)
  154. success = True
  155. except IOError:
  156. # This happens if the previous iteration caused an error
  157. addr2line = create_addr2line_process()
  158. src_file_line = addr2line.stdout.readline().strip()
  159. if src_file_line:
  160. # Some Bluetopia paths start with 'C:\...'
  161. components = src_file_line.split(':')
  162. src_file = ":".join(components[:-1])
  163. line = components[-1:][0]
  164. else:
  165. (src_file, line) = ('?', '0')
  166. symbols[symbol_name] = (src_file, line)
  167. addr2line.kill()
  168. return symbols
  169. # This method is quite slow, but works around a bug in nm.
  170. def _nm_generator_slow(f):
  171. print("Getting list of symbols...")
  172. symbols = _get_symbols_table(f)
  173. print("Aggregating...")
  174. infile = sh.arm_none_eabi_nm('-S', f)
  175. line_pattern = re.compile(r"""([0-9a-f]+)\s+ # address
  176. ([0-9a-f]+)\s+ # size
  177. ([dDbBtTrR])\s+ # section type
  178. (\S+) # name
  179. """, flags=re.VERBOSE)
  180. for line in infile:
  181. match = line_pattern.match(line)
  182. if match is None:
  183. continue
  184. addr = int(match.group(1), 16)
  185. size = int(match.group(2), 16)
  186. section = match.group(3).lower()
  187. if section == 'r':
  188. section = 't'
  189. symbol_name = match.group(4)
  190. if symbol_name not in symbols:
  191. continue
  192. rel_file_path, line = symbols[symbol_name]
  193. if rel_file_path:
  194. rel_file_path = os.path.relpath(rel_file_path)
  195. yield (addr, section, symbol_name, rel_file_path, line, size)
  196. # This method is much faster, and *should* work, but as of 2014-08-01, we get
  197. # exceptions when we try to run nm -l on the tintin ELF file. So, the
  198. # _nm_generator_slow() method above can be used as a workaround.
  199. def _nm_generator_fast(f):
  200. """ Given a path to an .elf, generates tuples:
  201. (section, symbol_name, rel_file_path, line, size)
  202. Note, rel_file_path and line can be None.
  203. """
  204. infile = sh.arm_none_eabi_nm('-l', '-S', f)
  205. for line in infile:
  206. match = NM_LINE_PATTERN.match(line)
  207. if match is None:
  208. continue
  209. addr = int(match.group(1), 16)
  210. size = int(match.group(2), 16)
  211. section = match.group(3).lower()
  212. if section == 'r':
  213. section = 't'
  214. symbol_name = match.group(4)
  215. rel_file_path = match.group(6)
  216. if rel_file_path:
  217. rel_file_path = os.path.relpath(rel_file_path)
  218. line = match.group(7)
  219. if line:
  220. line = int(line)
  221. yield (addr, section, symbol_name, rel_file_path, line, size)
  222. def size(elf_path):
  223. """ Returns size (text, data, bss)
  224. """
  225. output = subprocess.check_output(["arm-none-eabi-size", elf_path])
  226. lines = output.decode("utf8").splitlines()
  227. if len(lines) < 2:
  228. return 0
  229. match = re.match(r"^\s*([0-9]+)\s+([0-9]+)\s+([0-9]+)", lines[1])
  230. if not match:
  231. return 0
  232. # text, data, bss
  233. return (int(match.groups()[0]),
  234. int(match.groups()[1]),
  235. int(match.groups()[2]))
  236. def strip(elf_path):
  237. """ Strip debug info from specified .elf file
  238. """
  239. sh.arm_none_eabi_strip(elf_path)
  240. def copy_elf_section(in_elf_path, out_elf_path, section_name_list):
  241. """ Creates out_elf_path containing only sections in 'section name list'
  242. """
  243. args = []
  244. for name in section_name_list:
  245. args.append('-j')
  246. args.append(name)
  247. args.append(in_elf_path)
  248. args.append(out_elf_path)
  249. sh.arm_none_eabi_objcopy(args)
  250. def section_bytes(elf_path, section_name):
  251. """ Returns the bytes in a section of a given .elf file
  252. """
  253. with tempfile.NamedTemporaryFile() as temp:
  254. sh.arm_none_eabi_objcopy(['-j', section_name, '-O', 'binary',
  255. elf_path, temp.name])
  256. with open(temp.name, "rb") as f:
  257. return f.read()