2 # Copyright 2020 The Pigweed Authors
4 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 # use this file except in compliance with the License. You may obtain a copy of
8 # https://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 # License for the specific language governing permissions and limitations under
15 """Reads data from ELF sections.
17 This module provides tools for dumping the contents of an ELF section. It can
18 also be used to read values at a particular address. A command line interface
19 for both of these features is provided.
21 This module supports any ELF-format file, including .o and .so files. This
22 module also has basic support for archive (.a) files. All ELF files in an
23 archive are read as one unit.
27 from pathlib import Path
31 from typing import BinaryIO, Dict, Iterable, NamedTuple, Optional
32 from typing import Pattern, Tuple, Union
34 ARCHIVE_MAGIC = b'!<arch>\n'
35 ELF_MAGIC = b'\x7fELF'
38 def _check_next_bytes(fd: BinaryIO, expected: bytes, what: str) -> None:
39 actual = fd.read(len(expected))
40 if expected != actual:
41 raise FileDecodeError(
42 f'Invalid {what}: expected {expected!r}, found {actual!r} in file '
43 f'{getattr(fd, "name", "(unknown")}')
46 def files_in_archive(fd: BinaryIO) -> Iterable[int]:
47 """Seeks to each file in an archive and yields its size."""
49 _check_next_bytes(fd, ARCHIVE_MAGIC, 'archive magic number')
52 # In some archives, the first file ends with an additional \n. If that
53 # is present, skip it.
54 if fd.read(1) != b'\n':
57 # Each file in an archive is prefixed with an ASCII header:
59 # 16 B - file identifier (text)
60 # 12 B - file modification timestamp (decimal)
61 # 6 B - owner ID (decimal)
62 # 6 B - group ID (decimal)
63 # 8 B - file mode (octal)
64 # 10 B - file size in bytes (decimal)
65 # 2 B - ending characters (`\n)
67 # Skip the unused portions of the file header, then read the size.
68 fd.seek(16 + 12 + 6 + 6 + 8, 1)
69 size_str = fd.read(10)
74 size = int(size_str, 10)
75 except ValueError as exc:
76 raise FileDecodeError(
77 'Archive file sizes must be decimal integers') from exc
79 _check_next_bytes(fd, b'`\n', 'archive file header ending')
80 offset = fd.tell() # Store offset in case the caller reads the file.
84 fd.seek(offset + size)
87 def _elf_files_in_archive(fd: BinaryIO):
88 if _bytes_match(fd, ELF_MAGIC):
89 yield # The value isn't used, so just yield None.
91 for _ in files_in_archive(fd):
92 if _bytes_match(fd, ELF_MAGIC):
96 class Field(NamedTuple):
97 """A field in an ELF file.
99 Fields refer to a particular piece of data in an ELF file or section header.
109 class _FileHeader(NamedTuple):
110 """Fields in the ELF file header."""
112 section_header_offset: Field = Field('e_shoff', 0x20, 0x28, 4, 8)
113 section_count: Field = Field('e_shnum', 0x30, 0x3C, 2, 2)
114 section_names_index: Field = Field('e_shstrndx', 0x32, 0x3E, 2, 2)
117 FILE_HEADER = _FileHeader()
120 class _SectionHeader(NamedTuple):
121 """Fields in an ELF section header."""
123 section_name_offset: Field = Field('sh_name', 0x00, 0x00, 4, 4)
124 section_address: Field = Field('sh_addr', 0x0C, 0x10, 4, 8)
125 section_offset: Field = Field('sh_offset', 0x10, 0x18, 4, 8)
126 section_size: Field = Field('sh_size', 0x14, 0x20, 4, 8)
128 # section_header_end records the size of the header.
129 section_header_end: Field = Field('section end', 0x28, 0x40, 0, 0)
132 SECTION_HEADER = _SectionHeader()
135 def read_c_string(fd: BinaryIO) -> bytes:
136 """Reads a null-terminated string from the provided file descriptor."""
140 if not byte or byte == b'\0':
145 def _bytes_match(fd: BinaryIO, expected: bytes) -> bool:
146 """Peeks at the next bytes to see if they match the expected."""
149 data = fd.read(len(expected))
151 return data == expected
156 def compatible_file(file: Union[BinaryIO, str, Path]) -> bool:
157 """True if the file type is supported (ELF or archive)."""
159 fd = open(file, 'rb') if isinstance(file, (str, Path)) else file
163 result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC)
166 if isinstance(file, (str, Path)):
172 class FileDecodeError(Exception):
173 """Invalid data was read from an ELF file."""
177 """Reads ELF fields defined with a Field tuple from an ELF file."""
178 def __init__(self, elf: BinaryIO):
180 self.file_offset = self._elf.tell()
182 _check_next_bytes(self._elf, ELF_MAGIC, 'ELF file header')
183 size_field = self._elf.read(1) # e_ident[EI_CLASS] (address size)
185 int_unpacker = self._determine_integer_format()
187 if size_field == b'\x01':
188 self.offset = lambda field: field.offset_32
189 self._size = lambda field: field.size_32
190 self._decode = lambda f, d: int_unpacker[f.size_32].unpack(d)[0]
191 elif size_field == b'\x02':
192 self.offset = lambda field: field.offset_64
193 self._size = lambda field: field.size_64
194 self._decode = lambda f, d: int_unpacker[f.size_64].unpack(d)[0]
196 raise FileDecodeError('Unknown size {!r}'.format(size_field))
198 def _determine_integer_format(self) -> Dict[int, struct.Struct]:
199 """Returns a dict of structs used for converting bytes to integers."""
200 endianness_byte = self._elf.read(1) # e_ident[EI_DATA] (endianness)
201 if endianness_byte == b'\x01':
203 elif endianness_byte == b'\x02':
206 raise FileDecodeError(
207 'Unknown endianness {!r}'.format(endianness_byte))
210 1: struct.Struct(endianness + 'B'),
211 2: struct.Struct(endianness + 'H'),
212 4: struct.Struct(endianness + 'I'),
213 8: struct.Struct(endianness + 'Q'),
216 def read(self, field: Field, base: int = 0) -> int:
217 self._elf.seek(self.file_offset + base + self.offset(field))
218 data = self._elf.read(self._size(field))
219 return self._decode(field, data)
221 def read_string(self, offset: int) -> str:
222 self._elf.seek(self.file_offset + offset)
223 return read_c_string(self._elf).decode()
227 """Represents an ELF file and the sections in it."""
228 class Section(NamedTuple):
229 """Info about a section in an ELF file."""
235 file_offset: int # Starting place in the file; 0 unless in an archive.
237 def range(self) -> range:
238 return range(self.address, self.address + self.size)
240 def __lt__(self, other) -> bool:
241 return self.address < other.address
243 def __init__(self, elf: BinaryIO):
245 self.sections: Tuple[Elf.Section, ...] = tuple(self._list_sections())
247 def _list_sections(self) -> Iterable['Elf.Section']:
248 """Reads the section headers to enumerate all ELF sections."""
249 for _ in _elf_files_in_archive(self._elf):
250 reader = FieldReader(self._elf)
251 base = reader.read(FILE_HEADER.section_header_offset)
252 section_header_size = reader.offset(
253 SECTION_HEADER.section_header_end)
255 # Find the section with the section names in it.
256 names_section_header_base = (
257 base + section_header_size *
258 reader.read(FILE_HEADER.section_names_index))
259 names_table_base = reader.read(SECTION_HEADER.section_offset,
260 names_section_header_base)
262 base = reader.read(FILE_HEADER.section_header_offset)
263 for _ in range(reader.read(FILE_HEADER.section_count)):
264 name_offset = reader.read(SECTION_HEADER.section_name_offset,
268 reader.read_string(names_table_base + name_offset),
269 reader.read(SECTION_HEADER.section_address, base),
270 reader.read(SECTION_HEADER.section_offset, base),
271 reader.read(SECTION_HEADER.section_size, base),
274 base += section_header_size
276 def section_by_address(self, address: int) -> Optional['Elf.Section']:
277 """Returns the section that contains the provided address, if any."""
278 # Iterate in reverse to give priority to sections with nonzero addresses
279 for section in sorted(self.sections, reverse=True):
280 if address in section.range():
285 def sections_with_name(self, name: str) -> Iterable['Elf.Section']:
286 for section in self.sections:
287 if section.name == name:
292 size: Optional[int] = None) -> Union[None, bytes, int]:
293 """Reads specified bytes or null-terminated string at address."""
294 section = self.section_by_address(address)
298 assert section.address <= address
299 self._elf.seek(section.file_offset + section.offset + address -
303 return read_c_string(self._elf)
305 return self._elf.read(size)
307 def dump_sections(self, name: Union[str,
308 Pattern[str]]) -> Dict[str, bytes]:
309 """Dumps a binary string containing the sections matching the regex."""
310 name_regex = re.compile(name)
312 sections: Dict[str, bytes] = {}
313 for section in self.sections:
314 if name_regex.match(section.name):
315 self._elf.seek(section.file_offset + section.offset)
316 sections[section.name] = self._elf.read(section.size)
320 def dump_section_contents(
321 self, name: Union[str, Pattern[str]]) -> Optional[bytes]:
322 sections = self.dump_sections(name)
323 return b''.join(sections.values()) if sections else None
325 def summary(self) -> str:
327 '[{0:2}] {1.address:08x} {1.offset:08x} {1.size:08x} {1.name}'.
328 format(i, section) for i, section in enumerate(self.sections))
330 def __str__(self) -> str:
331 return 'Elf({}\n)'.format(''.join('\n {},'.format(s)
332 for s in self.sections))
335 def _read_addresses(elf, size: int, output, address: Iterable[int]) -> None:
337 value = elf.read_value(addr, size)
340 raise ValueError('Invalid address 0x{:08x}'.format(addr))
345 def _dump_sections(elf: Elf, output, sections: Iterable[Pattern[str]]) -> None:
347 output(elf.summary().encode())
350 for section_pattern in sections:
351 output(elf.dump_section_contents(section_pattern))
354 def _parse_args() -> argparse.Namespace:
355 """Parses and returns command line arguments."""
356 parser = argparse.ArgumentParser(description=__doc__)
361 parser.add_argument('-e',
363 type=argparse.FileType('rb'),
364 help='the ELF file to examine',
372 help=r'delimiter to write after each value; \n by default')
374 parser.set_defaults(handler=lambda **_: parser.print_help())
376 subparsers = parser.add_subparsers(
377 help='select whether to work with addresses or whole sections')
379 section_parser = subparsers.add_parser('section')
380 section_parser.set_defaults(handler=_dump_sections)
381 section_parser.add_argument(
383 metavar='section_regex',
385 type=re.compile, # type: ignore
386 help='section name regular expression')
388 address_parser = subparsers.add_parser('address')
389 address_parser.set_defaults(handler=_read_addresses)
390 address_parser.add_argument(
393 help='the size to read; reads until a null terminator by default')
394 address_parser.add_argument('address',
397 help='hexadecimal addresses to read')
399 return parser.parse_args()
403 """Calls the appropriate handler for the command line options."""
404 handler = args.handler
407 delim = args.delimiter
411 if value is not None:
412 sys.stdout.buffer.write(value)
413 sys.stdout.buffer.write(bytearray([delim]))
417 args.elf = Elf(args.elf)
419 handler(**vars(args))
422 if __name__ == '__main__':