[WebAssembly] Add option to remove LEB padding at relocate sites

author Sam Clegg <sbc@chromium.org>

Fri, 18 May 2018 23:28:05 +0000 (23:28 +0000)

committer Sam Clegg <sbc@chromium.org>

Fri, 18 May 2018 23:28:05 +0000 (23:28 +0000)
author Sam Clegg <sbc@chromium.org>
Fri, 18 May 2018 23:28:05 +0000 (23:28 +0000)
committer Sam Clegg <sbc@chromium.org>
Fri, 18 May 2018 23:28:05 +0000 (23:28 +0000)
diff --git a/lld/test/wasm/compress-relocs.ll b/lld/test/wasm/compress-relocs.ll

new file mode 100644 (file)

index 0000000..b137d5a
--- /dev/null
+++ b/lld/test/wasm/compress-relocs.ll
@@ -0,0 +1,22 @@
+; RUN: llc -filetype=obj %p/Inputs/call-indirect.ll -o %t2.o
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: wasm-ld -o %t.wasm %t2.o %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; RUN: wasm-ld -O2 -o %t-compressed.wasm %t2.o %t.o
+; RUN: obj2yaml %t-compressed.wasm | FileCheck %s -check-prefix=COMPRESS
+
+target triple = "wasm32-unknown-unknown-wasm"
+
+define i32 @foo() {
+entry:
+  ret i32 2
+}
+
+define void @_start() local_unnamed_addr {
+entry:
+  ret void
+}
+
+; CHECK:    Body:            4100280284888080002100410028028088808000118080808000001A2000118180808000001A0B
+; COMPRESS: Body:            41002802840821004100280280081100001A20001101001A0B
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h

index da0da17..08355b5 100644 (file)
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -19,6 +19,7 @@ namespace wasm {
  
  struct Configuration {
    bool AllowUndefined;
+  bool CompressRelocTargets;
    bool Demangle;
    bool ExportTable;
    bool GcSections;
@@ -33,6 +34,7 @@ struct Configuration {
    uint32_t GlobalBase;
    uint32_t InitialMemory;
    uint32_t MaxMemory;
+  uint32_t Optimize;
    uint32_t ZStackSize;
    llvm::StringRef Entry;
    llvm::StringRef OutputFile;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp

index 7a836c5..840ebce 100644 (file)
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -290,6 +290,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
        Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
    Config->ImportMemory = Args.hasArg(OPT_import_memory);
    Config->ImportTable = Args.hasArg(OPT_import_table);
+  Config->Optimize = args::getInteger(Args, OPT_O, 0);
    Config->OutputFile = Args.getLastArgValue(OPT_o);
    Config->Relocatable = Args.hasArg(OPT_relocatable);
    Config->GcSections =
@@ -312,6 +313,8 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
    Config->ZStackSize =
        args::getZOptionValue(Args, OPT_z, "stack-size", WasmPageSize);
  
+  Config->CompressRelocTargets = Config->Optimize > 0 && !Config->Relocatable;
+
    if (auto *Arg = Args.getLastArg(OPT_allow_undefined_file))
      readImportFile(Arg->getValue());
  
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp

index 925c623..d38d67c 100644 (file)
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -47,7 +47,7 @@ void InputChunk::copyRelocations(const WasmSection &Section) {
    if (Section.Relocations.empty())
      return;
    size_t Start = getInputSectionOffset();
-  size_t Size = getSize();
+  size_t Size = getInputSize();
    for (const WasmRelocation &R : Section.Relocations)
      if (R.Offset >= Start && R.Offset < Start + Size)
        Relocations.push_back(R);
@@ -179,3 +179,123 @@ void InputFunction::setTableIndex(uint32_t Index) {
    assert(!hasTableIndex());
    TableIndex = Index;
  }
+
+// Write a relocation value without padding and return the number of bytes
+// witten.
+static unsigned writeCompressedReloc(uint8_t *Buf, const WasmRelocation &Rel,
+                                     uint32_t Value) {
+  switch (Rel.Type) {
+  case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+  case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+  case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+  case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+    return encodeULEB128(Value, Buf);
+  case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+  case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+    return encodeSLEB128(static_cast<int32_t>(Value), Buf);
+  case R_WEBASSEMBLY_TABLE_INDEX_I32:
+  case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+    return 4;
+  default:
+    llvm_unreachable("unknown relocation type");
+  }
+}
+
+static unsigned getRelocWidthPadded(const WasmRelocation &Rel) {
+  switch (Rel.Type) {
+  case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+  case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+  case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+  case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+  case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+  case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+    return 5;
+  case R_WEBASSEMBLY_TABLE_INDEX_I32:
+  case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+    return 4;
+  default:
+    llvm_unreachable("unknown relocation type");
+  }
+}
+
+static unsigned getRelocWidth(const WasmRelocation &Rel, uint32_t Value) {
+  uint8_t Buf[5];
+  return writeCompressedReloc(Buf, Rel, Value);
+}
+
+// Relocations of type LEB and SLEB in the code section are padded to 5 bytes
+// so that a fast linker can blindly overwrite them without needing to worry
+// about the number of bytes needed to encode the values.
+// However, for optimal output the code section can be compressed to remove
+// the padding then outputting non-relocatable files.
+// In this case we need to perform a size calculation based on the value at each
+// relocation.  At best we end up saving 4 bytes for each relocation entry.
+//
+// This function only computes the final output size.  It must be called
+// before getSize() is used to calculate of layout of the code section.
+void InputFunction::calculateSize() {
+  if (!File || !Config->CompressRelocTargets)
+    return;
+
+  DEBUG(dbgs() << "calculateSize: " << getName() << "\n");
+
+  const uint8_t *SecStart = File->CodeSection->Content.data();
+  const uint8_t *FuncStart = SecStart + getInputSectionOffset();
+  uint32_t FunctionSizeLength;
+  decodeULEB128(FuncStart, &FunctionSizeLength);
+
+  uint32_t Start = getInputSectionOffset();
+  uint32_t End = Start + Function->Size;
+
+  uint32_t LastRelocEnd = Start + FunctionSizeLength;
+  for (WasmRelocation &Rel : Relocations) {
+    DEBUG(dbgs() << "  region: " << (Rel.Offset - LastRelocEnd) << "\n");
+    CompressedFuncSize += Rel.Offset - LastRelocEnd;
+    CompressedFuncSize += getRelocWidth(Rel, File->calcNewValue(Rel));
+    LastRelocEnd = Rel.Offset + getRelocWidthPadded(Rel);
+  }
+  DEBUG(dbgs() << "  final region: " << (End - LastRelocEnd) << "\n");
+  CompressedFuncSize += End - LastRelocEnd;
+
+  // Now we know how long the resulting function is we can add the encoding
+  // of its length
+  uint8_t Buf[5];
+  CompressedSize = CompressedFuncSize + encodeULEB128(CompressedFuncSize, Buf);
+
+  DEBUG(dbgs() << "  calculateSize orig: " << Function->Size << "\n");
+  DEBUG(dbgs() << "  calculateSize  new: " << CompressedSize << "\n");
+}
+
+// Override the default writeTo method so that we can (optionally) write the
+// compressed version of the function.
+void InputFunction::writeTo(uint8_t *Buf) const {
+  if (!File || !Config->CompressRelocTargets)
+    return InputChunk::writeTo(Buf);
+
+  Buf += OutputOffset;
+  uint8_t *Orig = Buf;
+
+  const uint8_t *SecStart = File->CodeSection->Content.data();
+  const uint8_t *FuncStart = SecStart + getInputSectionOffset();
+  const uint8_t *End = FuncStart + Function->Size;
+  uint32_t Count;
+  decodeULEB128(Buf, &Count);
+  FuncStart += Count;
+
+  DEBUG(dbgs() << "write func: " << getName() << "\n");
+  Buf += encodeULEB128(CompressedFuncSize, Buf);
+  const uint8_t *LastRelocEnd = FuncStart;
+  for (const WasmRelocation &Rel : Relocations) {
+    unsigned ChunkSize = (SecStart + Rel.Offset) - LastRelocEnd;
+    DEBUG(dbgs() << "  write chunk: " << ChunkSize << "\n");
+    memcpy(Buf, LastRelocEnd, ChunkSize);
+    Buf += ChunkSize;
+    Buf += writeCompressedReloc(Buf, Rel, File->calcNewValue(Rel));
+    LastRelocEnd = SecStart + Rel.Offset + getRelocWidthPadded(Rel);
+  }
+
+  unsigned ChunkSize = End - LastRelocEnd;
+  DEBUG(dbgs() << "  write final chunk: " << ChunkSize << "\n");
+  memcpy(Buf, LastRelocEnd, ChunkSize);
+  DEBUG(dbgs() << "  total: " << (Buf + ChunkSize - Orig) << "\n");
+}
diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h

index 80a8ab4..526e298 100644 (file)
--- a/lld/wasm/InputChunks.h
+++ b/lld/wasm/InputChunks.h
@@ -48,11 +48,11 @@ public:
  
    Kind kind() const { return SectionKind; }
  
-  uint32_t getSize() const { return data().size(); }
+  virtual uint32_t getSize() const { return data().size(); }
  
    void copyRelocations(const WasmSection &Section);
  
-  void writeTo(uint8_t *SectionStart) const;
+  virtual void writeTo(uint8_t *SectionStart) const;
  
    ArrayRef<WasmRelocation> getRelocations() const { return Relocations; }
  
@@ -78,6 +78,7 @@ protected:
    virtual ~InputChunk() = default;
    virtual ArrayRef<uint8_t> data() const = 0;
    virtual uint32_t getInputSectionOffset() const = 0;
+  virtual uint32_t getInputSize() const { return getSize(); };
  
    // Verifies the existing data at relocation targets matches our expectations.
    // This is performed only debug builds as an extra sanity check.
@@ -131,11 +132,19 @@ public:
             C->kind() == InputChunk::SyntheticFunction;
    }
  
+  void writeTo(uint8_t *SectionStart) const override;
    StringRef getName() const override { return Function->SymbolName; }
    StringRef getDebugName() const override { return Function->DebugName; }
    uint32_t getComdat() const override { return Function->Comdat; }
    uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); }
    uint32_t getFunctionCodeOffset() const { return Function->CodeOffset; }
+  uint32_t getSize() const override {
+    if (Config->CompressRelocTargets && File) {
+      assert(CompressedSize);
+      return CompressedSize;
+    }
+    return data().size();
+  }
    uint32_t getFunctionIndex() const { return FunctionIndex.getValue(); }
    bool hasFunctionIndex() const { return FunctionIndex.hasValue(); }
    void setFunctionIndex(uint32_t Index);
@@ -143,13 +152,23 @@ public:
    bool hasTableIndex() const { return TableIndex.hasValue(); }
    void setTableIndex(uint32_t Index);
  
+  // The size of a given input function can depend on the values of the
+  // LEB relocations within it.  This finalizeContents method is called after
+  // all the symbol values have be calcualted but before getSize() is ever
+  // called.
+  void calculateSize();
+
    const WasmSignature &Signature;
  
  protected:
    ArrayRef<uint8_t> data() const override {
+    assert(!Config->CompressRelocTargets);
      return File->CodeSection->Content.slice(getInputSectionOffset(),
                                              Function->Size);
    }
+
+  uint32_t getInputSize() const override { return Function->Size; }
+
    uint32_t getInputSectionOffset() const override {
      return Function->CodeSectionOffset;
    }
@@ -157,6 +176,8 @@ protected:
    const WasmFunction *Function;
    llvm::Optional<uint32_t> FunctionIndex;
    llvm::Optional<uint32_t> TableIndex;
+  uint32_t CompressedFuncSize = 0;
+  uint32_t CompressedSize = 0;
  };
  
  class SyntheticFunction : public InputFunction {
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td

index 8f7eed6..7fb6881 100644 (file)
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -65,6 +65,8 @@ def no_fatal_warnings: F<"no-fatal-warnings">;
  def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
    HelpText<"Path to file to write output">;
  
+def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">;
+
  defm print_gc_sections: B<"print-gc-sections",
      "List removed unused sections",
      "Do not list removed unused sections">;
diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp

index 05d5dda..256a988 100644 (file)
--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@@ -85,8 +85,9 @@ CodeSection::CodeSection(ArrayRef<InputFunction *> Functions)
    OS.flush();
    BodySize = CodeSectionHeader.size();
  
-  for (InputChunk *Func : Functions) {
+  for (InputFunction *Func : Functions) {
      Func->OutputOffset = BodySize;
+    Func->calculateSize();
      BodySize += Func->getSize();
    }
author	Sam Clegg <sbc@chromium.org>
	Fri, 18 May 2018 23:28:05 +0000 (23:28 +0000)
committer	Sam Clegg <sbc@chromium.org>
	Fri, 18 May 2018 23:28:05 +0000 (23:28 +0000)
lld/test/wasm/compress-relocs.ll	[new file with mode: 0644]	patch \| blob
lld/wasm/Config.h		patch \| blob \| history
lld/wasm/Driver.cpp		patch \| blob \| history
lld/wasm/InputChunks.cpp		patch \| blob \| history
lld/wasm/InputChunks.h		patch \| blob \| history
lld/wasm/Options.td		patch \| blob \| history
lld/wasm/OutputSections.cpp		patch \| blob \| history