[WebAssembly][Objcopy] Write output section headers identically to inputs
authorDerek Schuff <dschuff@chromium.org>
Tue, 25 Jul 2023 21:13:05 +0000 (14:13 -0700)
committerTobias Hieta <tobias@hieta.se>
Thu, 17 Aug 2023 06:25:01 +0000 (08:25 +0200)
Previously when objcopy generated section headers, it padded the LEB
that encodes the section size out to 5 bytes, matching the behavior of
clang. This is correct, but results in a binary that differs from the
input. This can sometimes have undesirable consequences (e.g. breaking
source maps).

This change makes the object reader remember the size of the LEB
encoding in the section header, so that llvm-objcopy can reproduce it
exactly. For sections not read from an object file (e.g. that
llvm-objcopy is adding itself), pad to 5 bytes.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D155535

(cherry picked from commit 1b21067cf247c62c2442daa7ee2d3915249d1ee2)

llvm/include/llvm/Object/Wasm.h
llvm/include/llvm/ObjectYAML/WasmYAML.h
llvm/lib/ObjCopy/wasm/WasmObject.h
llvm/lib/ObjCopy/wasm/WasmReader.cpp
llvm/lib/ObjCopy/wasm/WasmWriter.cpp
llvm/lib/Object/WasmObjectFile.cpp
llvm/lib/ObjectYAML/WasmEmitter.cpp
llvm/lib/ObjectYAML/WasmYAML.cpp
llvm/test/ObjectYAML/wasm/section_header_size.yaml [new file with mode: 0644]
llvm/test/tools/llvm-objcopy/wasm/section-header-size.test [new file with mode: 0644]
llvm/tools/obj2yaml/wasm2yaml.cpp

index 8dd8918..dfab4c6 100644 (file)
@@ -104,12 +104,14 @@ public:
 struct WasmSection {
   WasmSection() = default;
 
-  uint32_t Type = 0;         // Section type (See below)
-  uint32_t Offset = 0;       // Offset with in the file
+  uint32_t Type = 0;
+  uint32_t Offset = 0;       // Offset within the file
   StringRef Name;            // Section name (User-defined sections only)
   uint32_t Comdat = UINT32_MAX; // From the "comdat info" section
-  ArrayRef<uint8_t> Content; // Section content
-  std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
+  ArrayRef<uint8_t> Content;
+  std::vector<wasm::WasmRelocation> Relocations;
+  // Length of the LEB encoding of the section header's size field
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
 };
 
 struct WasmSegment {
index 0f6c4f0..94ecc2f 100644 (file)
@@ -189,6 +189,7 @@ struct Section {
 
   SectionType Type;
   std::vector<Relocation> Relocations;
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
 };
 
 struct CustomSection : Section {
index 9bc5831..f860ec6 100644 (file)
@@ -23,6 +23,7 @@ struct Section {
   // For now, each section is only an opaque binary blob with no distinction
   // between custom and known sections.
   uint8_t SectionType;
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
   StringRef Name;
   ArrayRef<uint8_t> Contents;
 };
index 6e7d8b5..578e789 100644 (file)
@@ -22,8 +22,8 @@ Expected<std::unique_ptr<Object>> Reader::create() const {
   Obj->Sections.reserve(WasmObj.getNumSections());
   for (const SectionRef &Sec : WasmObj.sections()) {
     const WasmSection &WS = WasmObj.getWasmSection(Sec);
-    Obj->Sections.push_back(
-        {static_cast<uint8_t>(WS.Type), WS.Name, WS.Content});
+    Obj->Sections.push_back({static_cast<uint8_t>(WS.Type),
+                             WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content});
     // Give known sections standard names to allow them to be selected. (Custom
     // sections already have their names filled in by the parser).
     Section &ReaderSec = Obj->Sections.back();
index fdcd441..bfab25c 100644 (file)
@@ -29,16 +29,19 @@ Writer::SectionHeader Writer::createSectionHeader(const Section &S,
   SectionSize = S.Contents.size();
   if (HasName)
     SectionSize += getULEB128Size(S.Name.size()) + S.Name.size();
-  // Pad the LEB value out to 5 bytes to make it a predictable size, and
-  // match the behavior of clang.
-  encodeULEB128(SectionSize, OS, 5);
+  // If we read this section from an object file, use its original size for the
+  // padding of the LEB value to avoid changing the file size. Otherwise, pad
+  // out to 5 bytes to make it predictable, and match the behavior of clang.
+  unsigned HeaderSecSizeEncodingLen =
+      S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5;
+  encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen);
   if (HasName) {
     encodeULEB128(S.Name.size(), OS);
     OS << S.Name;
   }
   // Total section size is the content size plus 1 for the section type and
-  // 5 for the LEB-encoded size.
-  SectionSize = SectionSize + 1 + 5;
+  // the LEB-encoded size.
+  SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen;
   return Header;
 }
 
index a72242b..11b9b57 100644 (file)
@@ -268,7 +268,11 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
   Section.Offset = Ctx.Ptr - Ctx.Start;
   Section.Type = readUint8(Ctx);
   LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
+  // When reading the section's size, store the size of the LEB used to encode
+  // it. This allows objcopy/strip to reproduce the binary identically.
+  const uint8_t *PreSizePtr = Ctx.Ptr;
   uint32_t Size = readVaruint32(Ctx);
+  Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr;
   if (Size == 0)
     return make_error<StringError>("zero length section",
                                    object_error::parse_failed);
index 6230312..9b8fd11 100644 (file)
@@ -646,8 +646,18 @@ bool WasmWriter::writeWasm(raw_ostream &OS) {
 
     StringStream.flush();
 
+    unsigned HeaderSecSizeEncodingLen =
+        Sec->HeaderSecSizeEncodingLen ? *Sec->HeaderSecSizeEncodingLen : 5;
+    unsigned RequiredLen = getULEB128Size(OutString.size());
+    // Wasm spec does not allow LEBs larger than 5 bytes
+    assert(RequiredLen <= 5);
+    if (HeaderSecSizeEncodingLen < RequiredLen) {
+      reportError("section header length can't be encoded in a LEB of size " +
+                  Twine(HeaderSecSizeEncodingLen));
+      return false;
+    }
     // Write the section size followed by the content
-    encodeULEB128(OutString.size(), OS);
+    encodeULEB128(OutString.size(), OS, HeaderSecSizeEncodingLen);
     OS << OutString;
   }
 
index 7ca4224..ef47766 100644 (file)
@@ -45,6 +45,7 @@ void MappingTraits<WasmYAML::Object>::mapping(IO &IO,
 static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
   IO.mapRequired("Type", Section.Type);
   IO.mapOptional("Relocations", Section.Relocations);
+  IO.mapOptional("HeaderSecSizeEncodingLen", Section.HeaderSecSizeEncodingLen);
 }
 
 static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
diff --git a/llvm/test/ObjectYAML/wasm/section_header_size.yaml b/llvm/test/ObjectYAML/wasm/section_header_size.yaml
new file mode 100644 (file)
index 0000000..b632588
--- /dev/null
@@ -0,0 +1,91 @@
+## Test that obj2yaml output includes the section header size encoding length
+## only when the length isn't padded to 5 bytes.
+# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s
+
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    HeaderSecSizeEncodingLen:   3
+    Signatures:
+      - Index:           0
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+  - Type:            FUNCTION
+    HeaderSecSizeEncodingLen:  4
+    FunctionTypes:             [ 0 ]
+  - Type:            MEMORY
+    HeaderSecSizeEncodingLen:  1
+    Memories:
+      - Flags:           [ HAS_MAX ]
+        Minimum:         0x100
+        Maximum:         0x100
+  - Type:            EXPORT
+    HeaderSecSizeEncodingLen:  5
+    Exports:
+      - Name:            add
+        Kind:            FUNCTION
+        Index:           0
+  - Type:            CODE
+    HeaderSecSizeEncodingLen:  2
+    Functions:
+      - Index:           0
+        Locals:          []
+        Body:            200020016A0B
+...
+# CHECK: --- !WASM
+# CHECK-NEXT: FileHeader:
+# CHECK-NEXT:   Version:         0x1
+# CHECK-NEXT: Sections:
+# CHECK-NEXT:   - Type:            TYPE
+# CHECK-NEXT:     HeaderSecSizeEncodingLen:   3
+# CHECK-NEXT:     Signatures:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         ParamTypes:
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:         ReturnTypes:
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:   - Type:            FUNCTION
+# CHECK-NEXT:     HeaderSecSizeEncodingLen:  4
+# CHECK-NEXT:     FunctionTypes:             [ 0 ]
+# CHECK-NEXT:   - Type:            MEMORY
+# CHECK-NEXT:     Memories:
+# CHECK-NEXT:       - Flags:           [ HAS_MAX ]
+# CHECK-NEXT:         Minimum:         0x100
+# CHECK-NEXT:         Maximum:         0x100
+# CHECK-NEXT:   - Type:            EXPORT
+# CHECK-NEXT:     Exports:
+# CHECK-NEXT:       - Name:            add
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         Index:           0
+# CHECK-NEXT:   - Type:            CODE
+# CHECK-NEXT:     HeaderSecSizeEncodingLen:  2
+# CHECK-NEXT:     Functions:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            200020016A0B
+
+## Test if we correctly error out if the provided section header size is less
+## than the size required.
+# RUN: not yaml2obj --docnum=2 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID
+# INVALID: yaml2obj: error: section header length can't be encoded in a LEB of size 0
+
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    HeaderSecSizeEncodingLen:   0
+    Signatures:
+      - Index:           0
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+...
diff --git a/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test b/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test
new file mode 100644 (file)
index 0000000..4ea73f6
--- /dev/null
@@ -0,0 +1,41 @@
+## Test that objcopy generates section headers that are identical to those from
+## the input binary, including the encoded size of the LEB that represents the
+## section size.
+
+# RUN: yaml2obj %s -o %t.wasm
+# RUN: llvm-objcopy %t.wasm %t.wasm.copy
+# RUN: diff %t.wasm %t.wasm.copy
+
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    HeaderSecSizeEncodingLen:   3
+    Signatures:
+      - Index:           0
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+  - Type:            FUNCTION
+    HeaderSecSizeEncodingLen:  4
+    FunctionTypes:             [ 0 ]
+  - Type:            MEMORY
+    HeaderSecSizeEncodingLen:  1
+    Memories:
+      - Flags:           [ HAS_MAX ]
+        Minimum:         0x100
+        Maximum:         0x100
+  - Type:            EXPORT
+    HeaderSecSizeEncodingLen:  5
+    Exports:
+      - Name:            add
+        Kind:            FUNCTION
+        Index:           0
+  - Type:            CODE
+    Functions:
+      - Index:           0
+        Locals:          []
+        Body:            200020016A0B
index 813d03d..c450c02 100644 (file)
@@ -10,6 +10,7 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/ObjectYAML/WasmYAML.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/YAMLTraits.h"
 
 using namespace llvm;
@@ -392,6 +393,17 @@ ErrorOr<WasmYAML::Object *> WasmDumper::dump() {
       llvm_unreachable("Unknown section type");
       break;
     }
+
+    // Only propagate the section size encoding length if it's not the minimal
+    // size or 5 (the default "padded" value). This is to avoid having every
+    // YAML output polluted with this value when we usually don't care about it
+    // (and avoid rewriting all the test expectations).
+    if (WasmSec.HeaderSecSizeEncodingLen &&
+        WasmSec.HeaderSecSizeEncodingLen !=
+            getULEB128Size(WasmSec.Content.size()) &&
+        WasmSec.HeaderSecSizeEncodingLen != 5)
+      S->HeaderSecSizeEncodingLen = WasmSec.HeaderSecSizeEncodingLen;
+
     for (const wasm::WasmRelocation &Reloc : WasmSec.Relocations) {
       WasmYAML::Relocation R;
       R.Type = Reloc.Type;