1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32.h"
10 #include "base/functional/bind.h"
11 #include "base/logging.h"
12 #include "courgette/assembly_program.h"
13 #include "courgette/courgette.h"
19 // Sorts |section_headers| by file offset and stores the resulting permutation
20 // of section ids in |order|.
21 std::vector<Elf32_Half> GetSectionHeaderFileOffsetOrder(
22 const std::vector<Elf32_Shdr>& section_headers) {
23 size_t size = section_headers.size();
24 std::vector<Elf32_Half> order(size);
25 for (size_t i = 0; i < size; ++i)
26 order[i] = static_cast<Elf32_Half>(i);
28 auto comp = [&](int idx1, int idx2) {
29 return section_headers[idx1].sh_offset < section_headers[idx2].sh_offset;
31 std::stable_sort(order.begin(), order.end(), comp);
37 DisassemblerElf32::Elf32RvaVisitor_Rel32::Elf32RvaVisitor_Rel32(
38 const std::vector<std::unique_ptr<TypedRVA>>& rva_locations)
39 : VectorRvaVisitor<std::unique_ptr<TypedRVA>>(rva_locations) {
42 RVA DisassemblerElf32::Elf32RvaVisitor_Rel32::Get() const {
43 return (*it_)->rva() + (*it_)->relative_target();
46 DisassemblerElf32::DisassemblerElf32(const uint8_t* start, size_t length)
47 : Disassembler(start, length),
49 section_header_table_size_(0),
50 program_header_table_(nullptr),
51 program_header_table_size_(0),
52 default_string_section_(nullptr) {}
54 RVA DisassemblerElf32::FileOffsetToRVA(FileOffset offset) const {
55 // File offsets can be 64-bit values, but we are dealing with 32-bit
56 // executables and so only need to support 32-bit file sizes.
57 uint32_t offset32 = static_cast<uint32_t>(offset);
59 // Visit section headers ordered by file offset.
60 for (Elf32_Half section_id : section_header_file_offset_order_) {
61 const Elf32_Shdr* section_header = SectionHeader(section_id);
62 // These can appear to have a size in the file, but don't.
63 if (section_header->sh_type == SHT_NOBITS)
66 Elf32_Off section_begin = section_header->sh_offset;
67 Elf32_Off section_end = section_begin + section_header->sh_size;
69 if (offset32 >= section_begin && offset32 < section_end) {
70 return section_header->sh_addr + (offset32 - section_begin);
77 FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const {
78 for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
80 const Elf32_Shdr* section_header = SectionHeader(section_id);
81 // These can appear to have a size in the file, but don't.
82 if (section_header->sh_type == SHT_NOBITS)
84 Elf32_Addr begin = section_header->sh_addr;
85 Elf32_Addr end = begin + section_header->sh_size;
87 if (rva >= begin && rva < end)
88 return section_header->sh_offset + (rva - begin);
93 RVA DisassemblerElf32::PointerToTargetRVA(const uint8_t* p) const {
94 // TODO(huangs): Add check (e.g., IsValidTargetRVA(), but more efficient).
95 return Read32LittleEndian(p);
98 bool DisassemblerElf32::ParseHeader() {
99 if (length() < sizeof(Elf32_Ehdr))
100 return Bad("Too small");
102 header_ = reinterpret_cast<const Elf32_Ehdr*>(start());
104 // Perform DisassemblerElf32::QuickDetect() checks (with error messages).
106 // Have magic for ELF header?
107 if (header_->e_ident[EI_MAG0] != 0x7F || header_->e_ident[EI_MAG1] != 'E' ||
108 header_->e_ident[EI_MAG2] != 'L' || header_->e_ident[EI_MAG3] != 'F') {
109 return Bad("No Magic Number");
112 if (header_->e_ident[EI_CLASS] != ELFCLASS32 ||
113 header_->e_ident[EI_DATA] != ELFDATA2LSB ||
114 header_->e_machine != ElfEM()) {
115 return Bad("Not a supported architecture");
118 if (header_->e_type != ET_EXEC && header_->e_type != ET_DYN)
119 return Bad("Not an executable file or shared library");
121 if (header_->e_version != 1 || header_->e_ident[EI_VERSION] != 1)
122 return Bad("Unknown file version");
124 if (header_->e_shentsize != sizeof(Elf32_Shdr))
125 return Bad("Unexpected section header size");
127 // Perform more complex checks, while extracting data.
129 if (header_->e_shoff < sizeof(Elf32_Ehdr) ||
130 !IsArrayInBounds(header_->e_shoff, header_->e_shnum,
131 sizeof(Elf32_Shdr))) {
132 return Bad("Out of bounds section header table");
135 // Extract |section_header_table_|, ordered by section id.
136 const Elf32_Shdr* section_header_table_raw =
137 reinterpret_cast<const Elf32_Shdr*>(
138 FileOffsetToPointer(header_->e_shoff));
139 section_header_table_size_ = header_->e_shnum;
140 section_header_table_.assign(section_header_table_raw,
141 section_header_table_raw + section_header_table_size_);
142 if (!CheckSectionRanges())
143 return Bad("Out of bound section");
144 section_header_file_offset_order_ =
145 GetSectionHeaderFileOffsetOrder(section_header_table_);
146 if (header_->e_phoff < sizeof(Elf32_Ehdr) ||
147 !IsArrayInBounds(header_->e_phoff, header_->e_phnum,
148 sizeof(Elf32_Phdr))) {
149 return Bad("Out of bounds program header table");
152 // Extract |program_header_table_|.
153 program_header_table_size_ = header_->e_phnum;
154 program_header_table_ = reinterpret_cast<const Elf32_Phdr*>(
155 FileOffsetToPointer(header_->e_phoff));
156 if (!CheckProgramSegmentRanges())
157 return Bad("Out of bound segment");
159 // Extract |default_string_section_|.
160 Elf32_Half string_section_id = header_->e_shstrndx;
161 if (string_section_id == SHN_UNDEF)
162 return Bad("Missing string section");
163 if (string_section_id >= header_->e_shnum)
164 return Bad("Out of bounds string section index");
165 if (SectionHeader(string_section_id)->sh_type != SHT_STRTAB)
166 return Bad("Invalid string section");
167 default_string_section_size_ = SectionHeader(string_section_id)->sh_size;
168 default_string_section_ =
169 reinterpret_cast<const char*>(SectionBody(string_section_id));
170 // String section may be empty. If nonempty, then last byte must be null.
171 if (default_string_section_size_ > 0) {
172 if (default_string_section_[default_string_section_size_ - 1] != '\0')
173 return Bad("String section does not terminate");
181 CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const {
182 if (rva == kUnassignedRVA)
185 // |rva| is valid if it's contained in any program segment.
186 for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
188 const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
190 if (segment_header->p_type != PT_LOAD)
193 Elf32_Addr begin = segment_header->p_vaddr;
194 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
196 if (rva >= begin && rva < end)
204 bool DisassemblerElf32::QuickDetect(const uint8_t* start,
206 e_machine_values elf_em) {
207 if (length < sizeof(Elf32_Ehdr))
210 const Elf32_Ehdr* header = reinterpret_cast<const Elf32_Ehdr*>(start);
212 // Have magic for ELF header?
213 if (header->e_ident[EI_MAG0] != 0x7F || header->e_ident[EI_MAG1] != 'E' ||
214 header->e_ident[EI_MAG2] != 'L' || header->e_ident[EI_MAG3] != 'F') {
217 if (header->e_ident[EI_CLASS] != ELFCLASS32 ||
218 header->e_ident[EI_DATA] != ELFDATA2LSB || header->e_machine != elf_em) {
221 if (header->e_type != ET_EXEC && header->e_type != ET_DYN)
223 if (header->e_version != 1 || header->e_ident[EI_VERSION] != 1)
225 if (header->e_shentsize != sizeof(Elf32_Shdr))
231 bool DisassemblerElf32::CheckSectionRanges() {
232 for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
234 const Elf32_Shdr* section_header = SectionHeader(section_id);
235 if (section_header->sh_type == SHT_NOBITS) // E.g., .bss.
237 if (!IsRangeInBounds(section_header->sh_offset, section_header->sh_size))
243 bool DisassemblerElf32::CheckProgramSegmentRanges() {
244 for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
246 const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
247 if (!IsRangeInBounds(segment_header->p_offset, segment_header->p_filesz))
253 void DisassemblerElf32::UpdateLength() {
254 Elf32_Off result = 0;
256 // Find the end of the last section.
257 for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
259 const Elf32_Shdr* section_header = SectionHeader(section_id);
260 if (section_header->sh_type == SHT_NOBITS)
262 DCHECK(IsRangeInBounds(section_header->sh_offset, section_header->sh_size));
263 Elf32_Off section_end = section_header->sh_offset + section_header->sh_size;
264 result = std::max(result, section_end);
267 // Find the end of the last segment.
268 for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
270 const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
271 DCHECK(IsRangeInBounds(segment_header->p_offset, segment_header->p_filesz));
272 Elf32_Off segment_end = segment_header->p_offset + segment_header->p_filesz;
273 result = std::max(result, segment_end);
276 Elf32_Off section_table_end =
277 header_->e_shoff + (header_->e_shnum * sizeof(Elf32_Shdr));
278 result = std::max(result, section_table_end);
280 Elf32_Off segment_table_end =
281 header_->e_phoff + (header_->e_phnum * sizeof(Elf32_Phdr));
282 result = std::max(result, segment_table_end);
284 ReduceLength(result);
287 CheckBool DisassemblerElf32::SectionName(const Elf32_Shdr& shdr,
288 std::string* name) const {
290 size_t string_pos = shdr.sh_name;
291 if (string_pos == 0) {
292 // Empty string by convention. Valid even if string section is empty.
295 if (string_pos >= default_string_section_size_)
297 // Safe because string section must terminate with null.
298 *name = default_string_section_ + string_pos;
303 CheckBool DisassemblerElf32::RVAsToFileOffsets(
304 const std::vector<RVA>& rvas,
305 std::vector<FileOffset>* file_offsets) const {
306 file_offsets->clear();
307 file_offsets->reserve(rvas.size());
308 for (RVA rva : rvas) {
309 FileOffset file_offset = RVAToFileOffset(rva);
310 if (file_offset == kNoFileOffset)
312 file_offsets->push_back(file_offset);
317 CheckBool DisassemblerElf32::RVAsToFileOffsets(
318 std::vector<std::unique_ptr<TypedRVA>>* typed_rvas) const {
319 for (auto& typed_rva : *typed_rvas) {
320 FileOffset file_offset = RVAToFileOffset(typed_rva->rva());
321 if (file_offset == kNoFileOffset)
323 typed_rva->set_file_offset(file_offset);
328 bool DisassemblerElf32::ExtractAbs32Locations() {
329 abs32_locations_.clear();
331 // Loop through sections for relocation sections
332 for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
334 const Elf32_Shdr* section_header = SectionHeader(section_id);
336 if (section_header->sh_type == SHT_REL) {
337 const Elf32_Rel* relocs_table =
338 reinterpret_cast<const Elf32_Rel*>(SectionBody(section_id));
339 // Reject if malformed.
340 if (section_header->sh_entsize != sizeof(Elf32_Rel))
342 if (section_header->sh_size % section_header->sh_entsize != 0)
345 int relocs_table_count =
346 section_header->sh_size / section_header->sh_entsize;
348 // Elf32_Word relocation_section_id = section_header->sh_info;
350 // Loop through relocation objects in the relocation section
351 for (int rel_id = 0; rel_id < relocs_table_count; ++rel_id) {
354 // Quite a few of these conversions fail, and we simply skip
355 // them, that's okay.
356 if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva))
357 abs32_locations_.push_back(rva);
362 std::sort(abs32_locations_.begin(), abs32_locations_.end());
363 DCHECK(abs32_locations_.empty() || abs32_locations_.back() != kUnassignedRVA);
367 bool DisassemblerElf32::ExtractRel32Locations() {
368 rel32_locations_.clear();
369 bool found_rel32 = false;
371 // Loop through sections for relocation sections
372 for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
374 const Elf32_Shdr* section_header = SectionHeader(section_id);
376 // Some debug sections can have sh_type=SHT_PROGBITS but sh_addr=0.
377 if (section_header->sh_type != SHT_PROGBITS || section_header->sh_addr == 0)
380 // Heuristic: Only consider ".text" section.
381 std::string section_name;
382 if (!SectionName(*section_header, §ion_name))
384 if (section_name != ".text")
388 if (!ParseRel32RelocsFromSection(section_header))
392 VLOG(1) << "Warning: Found no rel32 addresses. Missing .text section?";
394 std::sort(rel32_locations_.begin(), rel32_locations_.end(),
395 TypedRVA::IsLessThanByRVA);
396 DCHECK(rel32_locations_.empty() ||
397 rel32_locations_.back()->rva() != kUnassignedRVA);
402 RvaVisitor* DisassemblerElf32::CreateAbs32TargetRvaVisitor() {
403 return new RvaVisitor_Abs32(abs32_locations_, *this);
406 RvaVisitor* DisassemblerElf32::CreateRel32TargetRvaVisitor() {
407 return new Elf32RvaVisitor_Rel32(rel32_locations_);
410 void DisassemblerElf32::RemoveUnusedRel32Locations(AssemblyProgram* program) {
411 auto tail_it = rel32_locations_.begin();
412 for (auto head_it = rel32_locations_.begin();
413 head_it != rel32_locations_.end(); ++head_it) {
414 RVA target_rva = (*head_it)->rva() + (*head_it)->relative_target();
415 if (program->FindRel32Label(target_rva) == nullptr) {
416 // If address does not match a Label (because it was removed), deallocate.
417 (*head_it).reset(nullptr);
419 // Else squeeze nullptr to end to compactify.
420 if (tail_it != head_it)
421 (*tail_it).swap(*head_it);
425 rel32_locations_.resize(std::distance(rel32_locations_.begin(), tail_it));
428 InstructionGenerator DisassemblerElf32::GetInstructionGenerator(
429 AssemblyProgram* program) {
430 return base::BindRepeating(&DisassemblerElf32::ParseFile,
431 base::Unretained(this), program);
434 CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program,
435 InstructionReceptor* receptor) const {
436 // Walk all the bytes in the file, whether or not in a section.
437 FileOffset file_offset = 0;
439 // File parsing follows file offset order, and we visit abs32 and rel32
440 // locations in lockstep. Therefore we need to extract and sort file offsets
441 // of all abs32 and rel32 locations. For abs32, we copy the offsets to a new
443 std::vector<FileOffset> abs_offsets;
444 if (!RVAsToFileOffsets(abs32_locations_, &abs_offsets))
446 std::sort(abs_offsets.begin(), abs_offsets.end());
448 // For rel32, TypedRVA (rather than raw offset) is stored, so sort-by-offset
449 // is performed in place to save memory. At the end of function we will
451 if (!RVAsToFileOffsets(&rel32_locations_))
453 std::sort(rel32_locations_.begin(),
454 rel32_locations_.end(),
455 TypedRVA::IsLessThanByFileOffset);
457 std::vector<FileOffset>::iterator current_abs_offset = abs_offsets.begin();
458 std::vector<FileOffset>::iterator end_abs_offset = abs_offsets.end();
460 std::vector<std::unique_ptr<TypedRVA>>::iterator current_rel =
461 rel32_locations_.begin();
462 std::vector<std::unique_ptr<TypedRVA>>::iterator end_rel =
463 rel32_locations_.end();
465 // Visit section headers ordered by file offset.
466 for (Elf32_Half section_id : section_header_file_offset_order_) {
467 const Elf32_Shdr* section_header = SectionHeader(section_id);
469 if (section_header->sh_type == SHT_NOBITS)
472 if (!ParseSimpleRegion(file_offset, section_header->sh_offset, receptor))
475 file_offset = section_header->sh_offset;
477 switch (section_header->sh_type) {
479 if (!ParseRelocationSection(section_header, receptor))
481 file_offset = section_header->sh_offset + section_header->sh_size;
484 if (!ParseProgbitsSection(section_header, ¤t_abs_offset,
485 end_abs_offset, ¤t_rel, end_rel,
486 program, receptor)) {
489 file_offset = section_header->sh_offset + section_header->sh_size;
494 while (current_abs_offset != end_abs_offset &&
495 *current_abs_offset >= section_header->sh_offset &&
496 *current_abs_offset <
497 section_header->sh_offset + section_header->sh_size) {
498 // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
499 VLOG(1) << "Skipping relocation entry for unsupported section: "
500 << section_header->sh_type;
501 ++current_abs_offset;
505 if (current_abs_offset != end_abs_offset &&
506 *current_abs_offset >= section_header->sh_offset &&
507 *current_abs_offset <
508 section_header->sh_offset + section_header->sh_size) {
509 VLOG(1) << "Relocation address in unrecognized ELF section: "
510 << section_header->sh_type;
516 // Rest of the file past the last section
517 if (!ParseSimpleRegion(file_offset, length(), receptor))
520 // Restore original rel32 location order and sort by RVA order.
521 std::sort(rel32_locations_.begin(), rel32_locations_.end(),
522 TypedRVA::IsLessThanByRVA);
524 // Make certain we consume all of the relocations as expected
525 return (current_abs_offset == end_abs_offset);
528 CheckBool DisassemblerElf32::ParseProgbitsSection(
529 const Elf32_Shdr* section_header,
530 std::vector<FileOffset>::iterator* current_abs_offset,
531 std::vector<FileOffset>::iterator end_abs_offset,
532 std::vector<std::unique_ptr<TypedRVA>>::iterator* current_rel,
533 std::vector<std::unique_ptr<TypedRVA>>::iterator end_rel,
534 AssemblyProgram* program,
535 InstructionReceptor* receptor) const {
536 // Walk all the bytes in the file, whether or not in a section.
537 FileOffset file_offset = section_header->sh_offset;
538 FileOffset section_end = section_header->sh_offset + section_header->sh_size;
540 Elf32_Addr origin = section_header->sh_addr;
541 FileOffset origin_offset = section_header->sh_offset;
542 if (!receptor->EmitOrigin(origin))
545 while (file_offset < section_end) {
546 if (*current_abs_offset != end_abs_offset &&
547 file_offset > **current_abs_offset)
550 while (*current_rel != end_rel &&
551 file_offset > (**current_rel)->file_offset()) {
555 FileOffset next_relocation = section_end;
557 if (*current_abs_offset != end_abs_offset &&
558 next_relocation > **current_abs_offset)
559 next_relocation = **current_abs_offset;
561 // Rel offsets are heuristically derived, and might (incorrectly) overlap
562 // an Abs value, or the end of the section, so +3 to make sure there is
563 // room for the full 4 byte value.
564 if (*current_rel != end_rel &&
565 next_relocation > ((**current_rel)->file_offset() + 3))
566 next_relocation = (**current_rel)->file_offset();
568 if (next_relocation > file_offset) {
569 if (!ParseSimpleRegion(file_offset, next_relocation, receptor))
572 file_offset = next_relocation;
576 if (*current_abs_offset != end_abs_offset &&
577 file_offset == **current_abs_offset) {
578 RVA target_rva = PointerToTargetRVA(FileOffsetToPointer(file_offset));
579 DCHECK_NE(kNoRVA, target_rva);
581 Label* label = program->FindAbs32Label(target_rva);
583 if (!receptor->EmitAbs32(label))
585 file_offset += sizeof(RVA);
586 ++(*current_abs_offset);
590 if (*current_rel != end_rel &&
591 file_offset == (**current_rel)->file_offset()) {
592 uint32_t relative_target = (**current_rel)->relative_target();
593 CHECK_EQ(RVA(origin + (file_offset - origin_offset)),
594 (**current_rel)->rva());
595 // This cast is for 64 bit systems, and is only safe because we
596 // are working on 32 bit executables.
597 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
600 Label* label = program->FindRel32Label(target_rva);
603 if (!(**current_rel)->EmitInstruction(label, receptor))
605 file_offset += (**current_rel)->op_size();
611 // Rest of the section (if any)
612 return ParseSimpleRegion(file_offset, section_end, receptor);
615 CheckBool DisassemblerElf32::ParseSimpleRegion(
616 FileOffset start_file_offset,
617 FileOffset end_file_offset,
618 InstructionReceptor* receptor) const {
619 // Callers don't guarantee start < end
620 if (start_file_offset >= end_file_offset)
623 const size_t len = end_file_offset - start_file_offset;
625 if (!receptor->EmitMultipleBytes(FileOffsetToPointer(start_file_offset),
633 CheckBool DisassemblerElf32::CheckSection(RVA rva) {
634 // Handle 32-bit references only.
635 constexpr uint8_t kWidth = 4;
637 FileOffset file_offset = RVAToFileOffset(rva);
638 if (file_offset == kNoFileOffset)
641 for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
643 const Elf32_Shdr* section_header = SectionHeader(section_id);
644 // Take account of pointer |kWidth|, and reject pointers that start within
645 // the section but whose span lies outside.
646 FileOffset start_offset = section_header->sh_offset;
647 if (file_offset < start_offset || section_header->sh_size < kWidth)
649 FileOffset end_offset = start_offset + section_header->sh_size - kWidth + 1;
650 if (file_offset >= end_offset)
653 switch (section_header->sh_type) {
654 case SHT_REL: // Falls through.
663 } // namespace courgette