1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/encoded_program.h"
16 #include "base/environment.h"
17 #include "base/logging.h"
18 #include "base/memory/raw_ptr.h"
19 #include "base/numerics/safe_conversions.h"
20 #include "base/numerics/safe_math.h"
21 #include "base/strings/string_number_conversions.h"
22 #include "base/strings/string_util.h"
23 #include "courgette/label_manager.h"
24 #include "courgette/streams.h"
30 // Serializes a vector of integral values using Varint32 coding.
32 CheckBool WriteVector(const V& items, SinkStream* buffer) {
33 size_t count = items.size();
34 bool ok = buffer->WriteSizeVarint32(count);
35 for (size_t i = 0; ok && i < count; ++i) {
36 ok = buffer->WriteSizeVarint32(items[i]);
42 bool ReadVector(V* items, SourceStream* buffer) {
44 if (!buffer->ReadVarint32(&count))
49 bool ok = items->reserve(count);
50 for (size_t i = 0; ok && i < count; ++i) {
52 ok = buffer->ReadVarint32(&item);
54 ok = items->push_back(static_cast<typename V::value_type>(item));
60 // Serializes a vector, using delta coding followed by Varint32Signed coding.
62 CheckBool WriteSigned32Delta(const V& set, SinkStream* buffer) {
63 size_t count = set.size();
64 bool ok = buffer->WriteSizeVarint32(count);
66 for (size_t i = 0; ok && i < count; ++i) {
67 uint32_t current = set[i];
68 int32_t delta = current - prev;
69 ok = buffer->WriteVarint32Signed(delta);
76 static CheckBool ReadSigned32Delta(V* set, SourceStream* buffer) {
79 if (!buffer->ReadVarint32(&count))
83 bool ok = set->reserve(count);
85 for (size_t i = 0; ok && i < count; ++i) {
87 ok = buffer->ReadVarint32Signed(&delta);
89 uint32_t current = static_cast<uint32_t>(prev + delta);
90 ok = set->push_back(current);
97 // Write a vector as the byte representation of the contents.
99 // (This only really makes sense for a type T that has sizeof(T)==1, otherwise
100 // serialized representation is not endian-agnostic. But it is useful to keep
101 // the possibility of a greater size for experiments comparing Varint32 encoding
102 // of a vector of larger integrals vs a plain form.)
105 CheckBool WriteVectorU8(const V& items, SinkStream* buffer) {
106 size_t count = items.size();
107 bool ok = buffer->WriteSizeVarint32(count);
108 if (count != 0 && ok) {
109 size_t byte_count = count * sizeof(typename V::value_type);
110 ok = buffer->Write(static_cast<const void*>(&items[0]), byte_count);
116 bool ReadVectorU8(V* items, SourceStream* buffer) {
118 if (!buffer->ReadVarint32(&count))
122 bool ok = items->resize(count, 0);
123 if (ok && count != 0) {
124 size_t byte_count = count * sizeof(typename V::value_type);
125 return buffer->Read(static_cast<void*>(&((*items)[0])), byte_count);
130 /******** InstructionStoreReceptor ********/
132 // An InstructionReceptor that stores emitted instructions.
133 class InstructionStoreReceptor : public InstructionReceptor {
135 explicit InstructionStoreReceptor(ExecutableType exe_type,
136 EncodedProgram* encoded)
137 : exe_type_(exe_type), encoded_(encoded) {
141 InstructionStoreReceptor(const InstructionStoreReceptor&) = delete;
142 InstructionStoreReceptor& operator=(const InstructionStoreReceptor&) = delete;
144 CheckBool EmitPeRelocs() override {
145 return encoded_->AddPeMakeRelocs(exe_type_);
147 CheckBool EmitElfRelocation() override {
148 return encoded_->AddElfMakeRelocs();
150 CheckBool EmitOrigin(RVA rva) override { return encoded_->AddOrigin(rva); }
151 CheckBool EmitSingleByte(uint8_t byte) override {
152 return encoded_->AddCopy(1, &byte);
154 CheckBool EmitMultipleBytes(const uint8_t* bytes, size_t len) override {
155 return encoded_->AddCopy(len, bytes);
157 CheckBool EmitRel32(Label* label) override {
158 return encoded_->AddRel32(label->index_);
160 CheckBool EmitAbs32(Label* label) override {
161 return encoded_->AddAbs32(label->index_);
163 CheckBool EmitAbs64(Label* label) override {
164 return encoded_->AddAbs64(label->index_);
168 ExecutableType exe_type_;
169 raw_ptr<EncodedProgram> encoded_;
174 ////////////////////////////////////////////////////////////////////////////////
176 // Constructor is here rather than in the header. Although the constructor
177 // appears to do nothing it is fact quite large because of the implicit calls to
178 // field constructors. Ditto for the destructor.
179 EncodedProgram::EncodedProgram() = default;
180 EncodedProgram::~EncodedProgram() = default;
182 CheckBool EncodedProgram::ImportLabels(
183 const LabelManager& abs32_label_manager,
184 const LabelManager& rel32_label_manager) {
185 if (!WriteRvasToList(abs32_label_manager, &abs32_rva_) ||
186 !WriteRvasToList(rel32_label_manager, &rel32_rva_)) {
189 FillUnassignedRvaSlots(&abs32_rva_);
190 FillUnassignedRvaSlots(&rel32_rva_);
194 CheckBool EncodedProgram::AddOrigin(RVA origin) {
195 return ops_.push_back(ORIGIN) && origins_.push_back(origin);
198 CheckBool EncodedProgram::AddCopy(size_t count, const void* bytes) {
199 const uint8_t* source = static_cast<const uint8_t*>(bytes);
203 // Fold adjacent COPY instructions into one. This nearly halves the size of
204 // an EncodedProgram with only COPY1 instructions since there are approx plain
205 // 16 bytes per reloc. This has a working-set benefit during decompression.
206 // For compression of files with large differences this makes a small (4%)
207 // improvement in size. For files with small differences this degrades the
208 // compressed size by 1.3%
210 if (ops_.back() == COPY1) {
212 ok = copy_counts_.push_back(1);
214 if (ok && ops_.back() == COPY) {
215 copy_counts_.back() += count;
216 for (size_t i = 0; ok && i < count; ++i) {
217 ok = copy_bytes_.push_back(source[i]);
225 ok = ops_.push_back(COPY1) && copy_bytes_.push_back(source[0]);
227 ok = ops_.push_back(COPY) && copy_counts_.push_back(count);
228 for (size_t i = 0; ok && i < count; ++i) {
229 ok = copy_bytes_.push_back(source[i]);
237 CheckBool EncodedProgram::AddAbs32(int label_index) {
238 return ops_.push_back(ABS32) && abs32_ix_.push_back(label_index);
241 CheckBool EncodedProgram::AddAbs64(int label_index) {
242 return ops_.push_back(ABS64) && abs32_ix_.push_back(label_index);
245 CheckBool EncodedProgram::AddRel32(int label_index) {
246 return ops_.push_back(REL32) && rel32_ix_.push_back(label_index);
249 CheckBool EncodedProgram::AddPeMakeRelocs(ExecutableType kind) {
250 if (kind == EXE_WIN_32_X86)
251 return ops_.push_back(MAKE_PE_RELOCATION_TABLE);
252 return ops_.push_back(MAKE_PE64_RELOCATION_TABLE);
255 CheckBool EncodedProgram::AddElfMakeRelocs() {
256 return ops_.push_back(MAKE_ELF_RELOCATION_TABLE);
259 void EncodedProgram::DebuggingSummary() {
260 VLOG(1) << "EncodedProgram Summary"
261 << "\n image base " << image_base_
262 << "\n abs32 rvas " << abs32_rva_.size()
263 << "\n rel32 rvas " << rel32_rva_.size()
264 << "\n ops " << ops_.size()
265 << "\n origins " << origins_.size()
266 << "\n copy_counts " << copy_counts_.size()
267 << "\n copy_bytes " << copy_bytes_.size()
268 << "\n abs32_ix " << abs32_ix_.size()
269 << "\n rel32_ix " << rel32_ix_.size();
272 ////////////////////////////////////////////////////////////////////////////////
274 // For algorithm refinement purposes it is useful to write subsets of the file
275 // format. This gives us the ability to estimate the entropy of the
276 // differential compression of the individual streams, which can provide
277 // invaluable insights. The default, of course, is to include all the streams.
280 INCLUDE_ABS32_ADDRESSES = 0x0001,
281 INCLUDE_REL32_ADDRESSES = 0x0002,
282 INCLUDE_ABS32_INDEXES = 0x0010,
283 INCLUDE_REL32_INDEXES = 0x0020,
284 INCLUDE_OPS = 0x0100,
285 INCLUDE_BYTES = 0x0200,
286 INCLUDE_COPY_COUNTS = 0x0400,
287 INCLUDE_MISC = 0x1000
290 static FieldSelect GetFieldSelect() {
291 // TODO(sra): Use better configuration.
292 std::unique_ptr<base::Environment> env(base::Environment::Create());
294 env->GetVar("A_FIELDS", &s);
296 if (!base::StringToUint64(s, &fields))
297 return static_cast<FieldSelect>(~0);
298 return static_cast<FieldSelect>(fields);
301 CheckBool EncodedProgram::WriteTo(SinkStreamSet* streams) {
302 FieldSelect select = GetFieldSelect();
304 // The order of fields must be consistent in WriteTo and ReadFrom, regardless
305 // of the streams used. The code can be configured with all kStreamXXX
306 // constants the same.
308 // If we change the code to pipeline reading with assembly (to avoid temporary
309 // storage vectors by consuming operands directly from the stream) then we
310 // need to read the base address and the random access address tables first,
311 // the rest can be interleaved.
313 if (select & INCLUDE_MISC) {
314 uint32_t high = static_cast<uint32_t>(image_base_ >> 32);
315 uint32_t low = static_cast<uint32_t>(image_base_ & 0xffffffffU);
317 if (!streams->stream(kStreamMisc)->WriteVarint32(high) ||
318 !streams->stream(kStreamMisc)->WriteVarint32(low)) {
325 if (select & INCLUDE_ABS32_ADDRESSES) {
326 success &= WriteSigned32Delta(abs32_rva_,
327 streams->stream(kStreamAbs32Addresses));
330 if (select & INCLUDE_REL32_ADDRESSES) {
331 success &= WriteSigned32Delta(rel32_rva_,
332 streams->stream(kStreamRel32Addresses));
335 if (select & INCLUDE_MISC)
336 success &= WriteVector(origins_, streams->stream(kStreamOriginAddresses));
338 if (select & INCLUDE_OPS) {
340 success &= streams->stream(kStreamOps)->Reserve(ops_.size() + 5);
341 success &= WriteVector(ops_, streams->stream(kStreamOps));
344 if (select & INCLUDE_COPY_COUNTS)
345 success &= WriteVector(copy_counts_, streams->stream(kStreamCopyCounts));
347 if (select & INCLUDE_BYTES)
348 success &= WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes));
350 if (select & INCLUDE_ABS32_INDEXES)
351 success &= WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes));
353 if (select & INCLUDE_REL32_INDEXES)
354 success &= WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes));
359 bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {
363 if (!streams->stream(kStreamMisc)->ReadVarint32(&high) ||
364 !streams->stream(kStreamMisc)->ReadVarint32(&low)) {
367 image_base_ = (static_cast<uint64_t>(high) << 32) | low;
369 if (!ReadSigned32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
371 if (!ReadSigned32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
373 if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))
375 if (!ReadVector(&ops_, streams->stream(kStreamOps)))
377 if (!ReadVector(©_counts_, streams->stream(kStreamCopyCounts)))
379 if (!ReadVectorU8(©_bytes_, streams->stream(kStreamBytes)))
381 if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes)))
383 if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes)))
386 // Check that streams have been completely consumed.
387 for (int i = 0; i < kStreamLimit; ++i) {
388 if (streams->stream(i)->Remaining() > 0)
395 // Safe, non-throwing version of std::vector::at(). Returns 'true' for success,
396 // 'false' for out-of-bounds index error.
397 template<typename V, typename T>
398 bool VectorAt(const V& v, size_t index, T* output) {
399 if (index >= v.size())
405 CheckBool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
406 // For the most part, the assembly process walks the various tables.
407 // ix_mumble is the index into the mumble table.
408 size_t ix_origins = 0;
409 size_t ix_copy_counts = 0;
410 size_t ix_copy_bytes = 0;
411 size_t ix_abs32_ix = 0;
412 size_t ix_rel32_ix = 0;
416 bool pending_pe_relocation_table = false;
417 uint8_t pending_pe_relocation_table_type = 0x03; // IMAGE_REL_BASED_HIGHLOW
418 Elf32_Word pending_elf_relocation_table_type = 0;
419 SinkStream bytes_following_relocation_table;
421 SinkStream* output = final_buffer;
423 for (size_t ix_ops = 0; ix_ops < ops_.size(); ++ix_ops) {
424 OP op = ops_[ix_ops];
432 if (!VectorAt(origins_, ix_origins, §ion_rva))
435 current_rva = section_rva;
441 if (!VectorAt(copy_counts_, ix_copy_counts, &count))
444 for (size_t i = 0; i < count; ++i) {
446 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
449 if (!output->Write(&b, 1))
452 current_rva += static_cast<RVA>(count);
458 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
461 if (!output->Write(&b, 1))
469 if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
473 if (!VectorAt(rel32_rva_, index, &rva))
475 uint32_t offset = (rva - (current_rva + 4));
476 if (!output->Write(&offset, 4))
485 if (!VectorAt(abs32_ix_, ix_abs32_ix, &index))
489 if (!VectorAt(abs32_rva_, index, &rva))
492 base::CheckedNumeric<uint32_t> abs32 = image_base_;
494 uint32_t safe_abs32 = abs32.ValueOrDie();
495 if (!abs32_relocs_.push_back(current_rva) ||
496 !output->Write(&safe_abs32, 4)) {
501 base::CheckedNumeric<uint64_t> abs64 = image_base_;
503 uint64_t safe_abs64 = abs64.ValueOrDie();
504 if (!abs32_relocs_.push_back(current_rva) ||
505 !output->Write(&safe_abs64, 8)) {
513 case MAKE_PE_RELOCATION_TABLE: {
514 // We can see the base relocation anywhere, but we only have the
515 // information to generate it at the very end. So we divert the bytes
516 // we are generating to a temporary stream.
517 if (pending_pe_relocation_table)
518 return false; // Can't have two base relocation tables.
520 pending_pe_relocation_table = true;
521 output = &bytes_following_relocation_table;
523 // There is a potential problem *if* the instruction stream contains
524 // some REL32 relocations following the base relocation and in the same
525 // section. We don't know the size of the table, so 'current_rva' will
526 // be wrong, causing REL32 offsets to be miscalculated. This never
527 // happens; the base relocation table is usually in a section of its
528 // own, a data-only section, and following everything else in the
529 // executable except some padding zero bytes. We could fix this by
530 // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.
533 case MAKE_PE64_RELOCATION_TABLE: {
534 if (pending_pe_relocation_table)
535 return false; // Can't have two base relocation tables.
537 pending_pe_relocation_table = true;
538 pending_pe_relocation_table_type = 0x0A; // IMAGE_REL_BASED_DIR64
539 output = &bytes_following_relocation_table;
543 case MAKE_ELF_RELOCATION_TABLE: {
544 // We can see the base relocation anywhere, but we only have the
545 // information to generate it at the very end. So we divert the bytes
546 // we are generating to a temporary stream.
547 if (pending_elf_relocation_table_type)
548 return false; // Can't have two base relocation tables.
550 pending_elf_relocation_table_type = R_386_RELATIVE;
551 output = &bytes_following_relocation_table;
557 if (pending_pe_relocation_table) {
558 if (!GeneratePeRelocations(final_buffer,
559 pending_pe_relocation_table_type) ||
560 !final_buffer->Append(&bytes_following_relocation_table))
564 if (pending_elf_relocation_table_type) {
565 if (!GenerateElfRelocations(pending_elf_relocation_table_type,
567 !final_buffer->Append(&bytes_following_relocation_table))
571 // Final verification check: did we consume all lists?
572 if (ix_copy_counts != copy_counts_.size())
574 if (ix_copy_bytes != copy_bytes_.size())
576 if (ix_abs32_ix != abs32_ix_.size())
578 if (ix_rel32_ix != rel32_ix_.size())
584 CheckBool EncodedProgram::GenerateInstructions(
585 ExecutableType exe_type,
586 const InstructionGenerator& gen) {
587 InstructionStoreReceptor store_receptor(exe_type, this);
588 return gen.Run(&store_receptor);
591 // RelocBlock has the layout of a block of relocations in the base relocation
592 // table file format.
593 struct RelocBlockPOD {
596 uint16_t relocs[4096]; // Allow up to one relocation per byte of a 4k page.
599 static_assert(offsetof(RelocBlockPOD, relocs) == 8, "reloc block header size");
604 pod.page_rva = 0xFFFFFFFF;
608 void Add(uint16_t item) {
609 pod.relocs[(pod.block_size-8)/2] = item;
613 [[nodiscard]] CheckBool Flush(SinkStream* buffer) {
615 if (pod.block_size != 8) {
616 if (pod.block_size % 4 != 0) { // Pad to make size multiple of 4 bytes.
619 ok = buffer->Write(&pod, pod.block_size);
628 // Updates |rvas| so |rvas[label.index_] == label.rva_| for each |label| in
629 // |label_manager|, assuming |label.index_| is properly assigned. Takes care of
630 // |rvas| resizing. Unused slots in |rvas| are assigned |kUnassignedRVA|.
631 // Returns true on success, and false otherwise.
632 CheckBool EncodedProgram::WriteRvasToList(const LabelManager& label_manager,
635 int index_bound = LabelManager::GetLabelIndexBound(label_manager.Labels());
636 if (!rvas->resize(index_bound, kUnassignedRVA))
639 // For each Label, write its RVA to assigned index.
640 for (const Label& label : label_manager.Labels()) {
641 DCHECK_NE(label.index_, Label::kNoIndex);
642 DCHECK_EQ((*rvas)[label.index_], kUnassignedRVA)
643 << "ExportToList() double assigned " << label.index_;
644 (*rvas)[label.index_] = label.rva_;
650 // Replaces all unassigned slots in |rvas| with the value at the previous index
651 // so they delta-encode to zero. (There might be better values than zero. The
652 // way to get that is have the higher level assembly program assign the
653 // unassigned slots.)
654 void EncodedProgram::FillUnassignedRvaSlots(RvaVector* rvas) {
656 for (RVA& rva : *rvas) {
657 if (rva == kUnassignedRVA)
664 CheckBool EncodedProgram::GeneratePeRelocations(SinkStream* buffer,
666 std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
667 DCHECK(abs32_relocs_.empty() || abs32_relocs_.back() != kUnassignedRVA);
672 for (size_t i = 0; ok && i < abs32_relocs_.size(); ++i) {
673 uint32_t rva = abs32_relocs_[i];
674 uint32_t page_rva = rva & ~0xFFF;
675 if (page_rva != block.pod.page_rva) {
676 ok &= block.Flush(buffer);
677 block.pod.page_rva = page_rva;
680 block.Add(((static_cast<uint16_t>(type)) << 12) | (rva & 0xFFF));
682 ok &= block.Flush(buffer);
686 CheckBool EncodedProgram::GenerateElfRelocations(Elf32_Word r_info,
687 SinkStream* buffer) {
688 std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
689 DCHECK(abs32_relocs_.empty() || abs32_relocs_.back() != kUnassignedRVA);
691 Elf32_Rel relocation_block;
693 relocation_block.r_info = r_info;
696 for (size_t i = 0; ok && i < abs32_relocs_.size(); ++i) {
697 relocation_block.r_offset = abs32_relocs_[i];
698 ok = buffer->Write(&relocation_block, sizeof(Elf32_Rel));
703 ////////////////////////////////////////////////////////////////////////////////
705 Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) {
706 if (!encoded->WriteTo(sink))
707 return C_STREAM_ERROR;
711 Status ReadEncodedProgram(SourceStreamSet* streams,
712 std::unique_ptr<EncodedProgram>* output) {
714 std::unique_ptr<EncodedProgram> encoded(new EncodedProgram());
715 if (!encoded->ReadFrom(streams))
716 return C_DESERIALIZATION_FAILED;
718 *output = std::move(encoded);
722 Status Assemble(EncodedProgram* encoded, SinkStream* buffer) {
723 bool assembled = encoded->AssembleTo(buffer);
726 return C_ASSEMBLY_FAILED;
729 } // namespace courgette