tools/binman/elf.py

   1 # SPDX-License-Identifier: GPL-2.0+
   2 # Copyright (c) 2016 Google, Inc
   3 # Written by Simon Glass <sjg@chromium.org>
   4 #
   5 # Handle various things related to ELF images
   6 #
   7
   8 from collections import namedtuple, OrderedDict
   9 import io
  10 import os
  11 import re
  12 import shutil
  13 import struct
  14 import tempfile
  15
  16 from patman import command
  17 from patman import tools
  18 from patman import tout
  19
  20 ELF_TOOLS = True
  21 try:
  22     from elftools.elf.elffile import ELFFile
  23     from elftools.elf.sections import SymbolTableSection
  24 except:  # pragma: no cover
  25     ELF_TOOLS = False
  26
  27 # Information about an EFL symbol:
  28 # section (str): Name of the section containing this symbol
  29 # address (int): Address of the symbol (its value)
  30 # size (int): Size of the symbol in bytes
  31 # weak (bool): True if the symbol is weak
  32 # offset (int or None): Offset of the symbol's data in the ELF file, or None if
  33 #   not known
  34 Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
  35
  36 # Information about an ELF file:
  37 #    data: Extracted program contents of ELF file (this would be loaded by an
  38 #           ELF loader when reading this file
  39 #    load: Load address of code
  40 #    entry: Entry address of code
  41 #    memsize: Number of bytes in memory occupied by loading this ELF file
  42 ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
  43
  44
  45 def GetSymbols(fname, patterns):
  46     """Get the symbols from an ELF file
  47
  48     Args:
  49         fname: Filename of the ELF file to read
  50         patterns: List of regex patterns to search for, each a string
  51
  52     Returns:
  53         None, if the file does not exist, or Dict:
  54           key: Name of symbol
  55           value: Hex value of symbol
  56     """
  57     stdout = tools.run('objdump', '-t', fname)
  58     lines = stdout.splitlines()
  59     if patterns:
  60         re_syms = re.compile('|'.join(patterns))
  61     else:
  62         re_syms = None
  63     syms = {}
  64     syms_started = False
  65     for line in lines:
  66         if not line or not syms_started:
  67             if 'SYMBOL TABLE' in line:
  68                 syms_started = True
  69             line = None  # Otherwise code coverage complains about 'continue'
  70             continue
  71         if re_syms and not re_syms.search(line):
  72             continue
  73
  74         space_pos = line.find(' ')
  75         value, rest = line[:space_pos], line[space_pos + 1:]
  76         flags = rest[:7]
  77         parts = rest[7:].split()
  78         section, size =  parts[:2]
  79         if len(parts) > 2:
  80             name = parts[2] if parts[2] != '.hidden' else parts[3]
  81             syms[name] = Symbol(section, int(value, 16), int(size, 16),
  82                                 flags[1] == 'w', None)
  83
  84     # Sort dict by address
  85     return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
  86
  87 def GetSymbolFileOffset(fname, patterns):
  88     """Get the symbols from an ELF file
  89
  90     Args:
  91         fname: Filename of the ELF file to read
  92         patterns: List of regex patterns to search for, each a string
  93
  94     Returns:
  95         None, if the file does not exist, or Dict:
  96           key: Name of symbol
  97           value: Hex value of symbol
  98     """
  99     def _GetFileOffset(elf, addr):
 100         for seg in elf.iter_segments():
 101             seg_end = seg['p_vaddr'] + seg['p_filesz']
 102             if seg.header['p_type'] == 'PT_LOAD':
 103                 if addr >= seg['p_vaddr'] and addr < seg_end:
 104                     return addr - seg['p_vaddr'] + seg['p_offset']
 105
 106     if not ELF_TOOLS:
 107         raise ValueError('Python elftools package is not available')
 108
 109     syms = {}
 110     with open(fname, 'rb') as fd:
 111         elf = ELFFile(fd)
 112
 113         re_syms = re.compile('|'.join(patterns))
 114         for section in elf.iter_sections():
 115             if isinstance(section, SymbolTableSection):
 116                 for symbol in section.iter_symbols():
 117                     if not re_syms or re_syms.search(symbol.name):
 118                         addr = symbol.entry['st_value']
 119                         syms[symbol.name] = Symbol(
 120                             section.name, addr, symbol.entry['st_size'],
 121                             symbol.entry['st_info']['bind'] == 'STB_WEAK',
 122                             _GetFileOffset(elf, addr))
 123
 124     # Sort dict by address
 125     return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
 126
 127 def GetSymbolAddress(fname, sym_name):
 128     """Get a value of a symbol from an ELF file
 129
 130     Args:
 131         fname: Filename of the ELF file to read
 132         patterns: List of regex patterns to search for, each a string
 133
 134     Returns:
 135         Symbol value (as an integer) or None if not found
 136     """
 137     syms = GetSymbols(fname, [sym_name])
 138     sym = syms.get(sym_name)
 139     if not sym:
 140         return None
 141     return sym.address
 142
 143 def LookupAndWriteSymbols(elf_fname, entry, section):
 144     """Replace all symbols in an entry with their correct values
 145
 146     The entry contents is updated so that values for referenced symbols will be
 147     visible at run time. This is done by finding out the symbols offsets in the
 148     entry (using the ELF file) and replacing them with values from binman's data
 149     structures.
 150
 151     Args:
 152         elf_fname: Filename of ELF image containing the symbol information for
 153             entry
 154         entry: Entry to process
 155         section: Section which can be used to lookup symbol values
 156     """
 157     fname = tools.get_input_filename(elf_fname)
 158     syms = GetSymbols(fname, ['image', 'binman'])
 159     if not syms:
 160         return
 161     base = syms.get('__image_copy_start')
 162     if not base:
 163         return
 164     for name, sym in syms.items():
 165         if name.startswith('_binman'):
 166             msg = ("Section '%s': Symbol '%s'\n   in entry '%s'" %
 167                    (section.GetPath(), name, entry.GetPath()))
 168             offset = sym.address - base.address
 169             if offset < 0 or offset + sym.size > entry.contents_size:
 170                 raise ValueError('%s has offset %x (size %x) but the contents '
 171                                  'size is %x' % (entry.GetPath(), offset,
 172                                                  sym.size, entry.contents_size))
 173             if sym.size == 4:
 174                 pack_string = '<I'
 175             elif sym.size == 8:
 176                 pack_string = '<Q'
 177             else:
 178                 raise ValueError('%s has size %d: only 4 and 8 are supported' %
 179                                  (msg, sym.size))
 180
 181             # Look up the symbol in our entry tables.
 182             value = section.GetImage().LookupImageSymbol(name, sym.weak, msg,
 183                                                          base.address)
 184             if value is None:
 185                 value = -1
 186                 pack_string = pack_string.lower()
 187             value_bytes = struct.pack(pack_string, value)
 188             tout.Debug('%s:\n   insert %s, offset %x, value %x, length %d' %
 189                        (msg, name, offset, value, len(value_bytes)))
 190             entry.data = (entry.data[:offset] + value_bytes +
 191                         entry.data[offset + sym.size:])
 192
 193 def MakeElf(elf_fname, text, data):
 194     """Make an elf file with the given data in a single section
 195
 196     The output file has a several section including '.text' and '.data',
 197     containing the info provided in arguments.
 198
 199     Args:
 200         elf_fname: Output filename
 201         text: Text (code) to put in the file's .text section
 202         data: Data to put in the file's .data section
 203     """
 204     outdir = tempfile.mkdtemp(prefix='binman.elf.')
 205     s_file = os.path.join(outdir, 'elf.S')
 206
 207     # Spilt the text into two parts so that we can make the entry point two
 208     # bytes after the start of the text section
 209     text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
 210     text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
 211     data_bytes = ['\t.byte\t%#x' % byte for byte in data]
 212     with open(s_file, 'w') as fd:
 213         print('''/* Auto-generated C program to produce an ELF file for testing */
 214
 215 .section .text
 216 .code32
 217 .globl _start
 218 .type _start, @function
 219 %s
 220 _start:
 221 %s
 222 .ident "comment"
 223
 224 .comm fred,8,4
 225
 226 .section .empty
 227 .globl _empty
 228 _empty:
 229 .byte 1
 230
 231 .globl ernie
 232 .data
 233 .type ernie, @object
 234 .size ernie, 4
 235 ernie:
 236 %s
 237 ''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
 238         file=fd)
 239     lds_file = os.path.join(outdir, 'elf.lds')
 240
 241     # Use a linker script to set the alignment and text address.
 242     with open(lds_file, 'w') as fd:
 243         print('''/* Auto-generated linker script to produce an ELF file for testing */
 244
 245 PHDRS
 246 {
 247     text PT_LOAD ;
 248     data PT_LOAD ;
 249     empty PT_LOAD FLAGS ( 6 ) ;
 250     note PT_NOTE ;
 251 }
 252
 253 SECTIONS
 254 {
 255     . = 0xfef20000;
 256     ENTRY(_start)
 257     .text . : SUBALIGN(0)
 258     {
 259         *(.text)
 260     } :text
 261     .data : {
 262         *(.data)
 263     } :data
 264     _bss_start = .;
 265     .empty : {
 266         *(.empty)
 267     } :empty
 268     /DISCARD/ : {
 269         *(.note.gnu.property)
 270     }
 271     .note : {
 272         *(.comment)
 273     } :note
 274     .bss _bss_start  (OVERLAY) : {
 275         *(.bss)
 276     }
 277 }
 278 ''', file=fd)
 279     # -static: Avoid requiring any shared libraries
 280     # -nostdlib: Don't link with C library
 281     # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
 282     #   text section at the start
 283     # -m32: Build for 32-bit x86
 284     # -T...: Specifies the link script, which sets the start address
 285     cc, args = tools.get_target_compile_tool('cc')
 286     args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
 287             lds_file, '-o', elf_fname, s_file]
 288     stdout = command.Output(cc, *args)
 289     shutil.rmtree(outdir)
 290
 291 def DecodeElf(data, location):
 292     """Decode an ELF file and return information about it
 293
 294     Args:
 295         data: Data from ELF file
 296         location: Start address of data to return
 297
 298     Returns:
 299         ElfInfo object containing information about the decoded ELF file
 300     """
 301     file_size = len(data)
 302     with io.BytesIO(data) as fd:
 303         elf = ELFFile(fd)
 304         data_start = 0xffffffff;
 305         data_end = 0;
 306         mem_end = 0;
 307         virt_to_phys = 0;
 308
 309         for i in range(elf.num_segments()):
 310             segment = elf.get_segment(i)
 311             if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
 312                 skipped = 1  # To make code-coverage see this line
 313                 continue
 314             start = segment['p_paddr']
 315             mend = start + segment['p_memsz']
 316             rend = start + segment['p_filesz']
 317             data_start = min(data_start, start)
 318             data_end = max(data_end, rend)
 319             mem_end = max(mem_end, mend)
 320             if not virt_to_phys:
 321                 virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
 322
 323         output = bytearray(data_end - data_start)
 324         for i in range(elf.num_segments()):
 325             segment = elf.get_segment(i)
 326             if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
 327                 skipped = 1  # To make code-coverage see this line
 328                 continue
 329             start = segment['p_paddr']
 330             offset = 0
 331             if start < location:
 332                 offset = location - start
 333                 start = location
 334             # A legal ELF file can have a program header with non-zero length
 335             # but zero-length file size and a non-zero offset which, added
 336             # together, are greater than input->size (i.e. the total file size).
 337             #  So we need to not even test in the case that p_filesz is zero.
 338             # Note: All of this code is commented out since we don't have a test
 339             # case for it.
 340             size = segment['p_filesz']
 341             #if not size:
 342                 #continue
 343             #end = segment['p_offset'] + segment['p_filesz']
 344             #if end > file_size:
 345                 #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
 346                                  #file_size, end)
 347             output[start - data_start:start - data_start + size] = (
 348                 segment.data()[offset:])
 349     return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
 350                    mem_end - data_start)
 351
 352 def UpdateFile(infile, outfile, start_sym, end_sym, insert):
 353     tout.Notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
 354                 (outfile, len(insert), len(insert), start_sym, end_sym))
 355     syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
 356     if len(syms) != 2:
 357         raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
 358                          (start_sym, end_sym, len(syms),
 359                           ','.join(syms.keys())))
 360
 361     size = syms[end_sym].offset - syms[start_sym].offset
 362     if len(insert) > size:
 363         raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
 364                          (infile, len(insert), len(insert), size, size))
 365
 366     data = tools.read_file(infile)
 367     newdata = data[:syms[start_sym].offset]
 368     newdata += insert + tools.get_bytes(0, size - len(insert))
 369     newdata += data[syms[end_sym].offset:]
 370     tools.write_file(outfile, newdata)
 371     tout.Info('Written to offset %#x' % syms[start_sym].offset)