Previously when objcopy generated section headers, it padded the LEB
that encodes the section size out to 5 bytes, matching the behavior of
clang. This is correct, but results in a binary that differs from the
input. This can sometimes have undesirable consequences (e.g. breaking
source maps).
This change makes the object reader remember the size of the LEB
encoding in the section header, so that llvm-objcopy can reproduce it
exactly. For sections not read from an object file (e.g. that
llvm-objcopy is adding itself), pad to 5 bytes.
Reviewed By: jhenderson
Differential Revision: https://reviews.llvm.org/D155535
(cherry picked from commit
1b21067cf247c62c2442daa7ee2d3915249d1ee2)
struct WasmSection {
WasmSection() = default;
- uint32_t Type = 0; // Section type (See below)
- uint32_t Offset = 0; // Offset with in the file
+ uint32_t Type = 0;
+ uint32_t Offset = 0; // Offset within the file
StringRef Name; // Section name (User-defined sections only)
uint32_t Comdat = UINT32_MAX; // From the "comdat info" section
- ArrayRef<uint8_t> Content; // Section content
- std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
+ ArrayRef<uint8_t> Content;
+ std::vector<wasm::WasmRelocation> Relocations;
+ // Length of the LEB encoding of the section header's size field
+ std::optional<uint8_t> HeaderSecSizeEncodingLen;
};
struct WasmSegment {
SectionType Type;
std::vector<Relocation> Relocations;
+ std::optional<uint8_t> HeaderSecSizeEncodingLen;
};
struct CustomSection : Section {
// For now, each section is only an opaque binary blob with no distinction
// between custom and known sections.
uint8_t SectionType;
+ std::optional<uint8_t> HeaderSecSizeEncodingLen;
StringRef Name;
ArrayRef<uint8_t> Contents;
};
Obj->Sections.reserve(WasmObj.getNumSections());
for (const SectionRef &Sec : WasmObj.sections()) {
const WasmSection &WS = WasmObj.getWasmSection(Sec);
- Obj->Sections.push_back(
- {static_cast<uint8_t>(WS.Type), WS.Name, WS.Content});
+ Obj->Sections.push_back({static_cast<uint8_t>(WS.Type),
+ WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content});
// Give known sections standard names to allow them to be selected. (Custom
// sections already have their names filled in by the parser).
Section &ReaderSec = Obj->Sections.back();
SectionSize = S.Contents.size();
if (HasName)
SectionSize += getULEB128Size(S.Name.size()) + S.Name.size();
- // Pad the LEB value out to 5 bytes to make it a predictable size, and
- // match the behavior of clang.
- encodeULEB128(SectionSize, OS, 5);
+ // If we read this section from an object file, use its original size for the
+ // padding of the LEB value to avoid changing the file size. Otherwise, pad
+ // out to 5 bytes to make it predictable, and match the behavior of clang.
+ unsigned HeaderSecSizeEncodingLen =
+ S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5;
+ encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen);
if (HasName) {
encodeULEB128(S.Name.size(), OS);
OS << S.Name;
}
// Total section size is the content size plus 1 for the section type and
- // 5 for the LEB-encoded size.
- SectionSize = SectionSize + 1 + 5;
+ // the LEB-encoded size.
+ SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen;
return Header;
}
Section.Offset = Ctx.Ptr - Ctx.Start;
Section.Type = readUint8(Ctx);
LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
+ // When reading the section's size, store the size of the LEB used to encode
+ // it. This allows objcopy/strip to reproduce the binary identically.
+ const uint8_t *PreSizePtr = Ctx.Ptr;
uint32_t Size = readVaruint32(Ctx);
+ Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr;
if (Size == 0)
return make_error<StringError>("zero length section",
object_error::parse_failed);
StringStream.flush();
+ unsigned HeaderSecSizeEncodingLen =
+ Sec->HeaderSecSizeEncodingLen ? *Sec->HeaderSecSizeEncodingLen : 5;
+ unsigned RequiredLen = getULEB128Size(OutString.size());
+ // Wasm spec does not allow LEBs larger than 5 bytes
+ assert(RequiredLen <= 5);
+ if (HeaderSecSizeEncodingLen < RequiredLen) {
+ reportError("section header length can't be encoded in a LEB of size " +
+ Twine(HeaderSecSizeEncodingLen));
+ return false;
+ }
// Write the section size followed by the content
- encodeULEB128(OutString.size(), OS);
+ encodeULEB128(OutString.size(), OS, HeaderSecSizeEncodingLen);
OS << OutString;
}
static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
IO.mapRequired("Type", Section.Type);
IO.mapOptional("Relocations", Section.Relocations);
+ IO.mapOptional("HeaderSecSizeEncodingLen", Section.HeaderSecSizeEncodingLen);
}
static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
--- /dev/null
+## Test that obj2yaml output includes the section header size encoding length
+## only when the length isn't padded to 5 bytes.
+# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s
+
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ HeaderSecSizeEncodingLen: 3
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ - I32
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ HeaderSecSizeEncodingLen: 4
+ FunctionTypes: [ 0 ]
+ - Type: MEMORY
+ HeaderSecSizeEncodingLen: 1
+ Memories:
+ - Flags: [ HAS_MAX ]
+ Minimum: 0x100
+ Maximum: 0x100
+ - Type: EXPORT
+ HeaderSecSizeEncodingLen: 5
+ Exports:
+ - Name: add
+ Kind: FUNCTION
+ Index: 0
+ - Type: CODE
+ HeaderSecSizeEncodingLen: 2
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 200020016A0B
+...
+# CHECK: --- !WASM
+# CHECK-NEXT: FileHeader:
+# CHECK-NEXT: Version: 0x1
+# CHECK-NEXT: Sections:
+# CHECK-NEXT: - Type: TYPE
+# CHECK-NEXT: HeaderSecSizeEncodingLen: 3
+# CHECK-NEXT: Signatures:
+# CHECK-NEXT: - Index: 0
+# CHECK-NEXT: ParamTypes:
+# CHECK-NEXT: - I32
+# CHECK-NEXT: - I32
+# CHECK-NEXT: ReturnTypes:
+# CHECK-NEXT: - I32
+# CHECK-NEXT: - Type: FUNCTION
+# CHECK-NEXT: HeaderSecSizeEncodingLen: 4
+# CHECK-NEXT: FunctionTypes: [ 0 ]
+# CHECK-NEXT: - Type: MEMORY
+# CHECK-NEXT: Memories:
+# CHECK-NEXT: - Flags: [ HAS_MAX ]
+# CHECK-NEXT: Minimum: 0x100
+# CHECK-NEXT: Maximum: 0x100
+# CHECK-NEXT: - Type: EXPORT
+# CHECK-NEXT: Exports:
+# CHECK-NEXT: - Name: add
+# CHECK-NEXT: Kind: FUNCTION
+# CHECK-NEXT: Index: 0
+# CHECK-NEXT: - Type: CODE
+# CHECK-NEXT: HeaderSecSizeEncodingLen: 2
+# CHECK-NEXT: Functions:
+# CHECK-NEXT: - Index: 0
+# CHECK-NEXT: Locals: []
+# CHECK-NEXT: Body: 200020016A0B
+
+## Test if we correctly error out if the provided section header size is less
+## than the size required.
+# RUN: not yaml2obj --docnum=2 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID
+# INVALID: yaml2obj: error: section header length can't be encoded in a LEB of size 0
+
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ HeaderSecSizeEncodingLen: 0
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ - I32
+ ReturnTypes:
+ - I32
+...
--- /dev/null
+## Test that objcopy generates section headers that are identical to those from
+## the input binary, including the encoded size of the LEB that represents the
+## section size.
+
+# RUN: yaml2obj %s -o %t.wasm
+# RUN: llvm-objcopy %t.wasm %t.wasm.copy
+# RUN: diff %t.wasm %t.wasm.copy
+
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ HeaderSecSizeEncodingLen: 3
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ - I32
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ HeaderSecSizeEncodingLen: 4
+ FunctionTypes: [ 0 ]
+ - Type: MEMORY
+ HeaderSecSizeEncodingLen: 1
+ Memories:
+ - Flags: [ HAS_MAX ]
+ Minimum: 0x100
+ Maximum: 0x100
+ - Type: EXPORT
+ HeaderSecSizeEncodingLen: 5
+ Exports:
+ - Name: add
+ Kind: FUNCTION
+ Index: 0
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 200020016A0B
#include "llvm/Object/COFF.h"
#include "llvm/ObjectYAML/WasmYAML.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/YAMLTraits.h"
using namespace llvm;
llvm_unreachable("Unknown section type");
break;
}
+
+ // Only propagate the section size encoding length if it's not the minimal
+ // size or 5 (the default "padded" value). This is to avoid having every
+ // YAML output polluted with this value when we usually don't care about it
+ // (and avoid rewriting all the test expectations).
+ if (WasmSec.HeaderSecSizeEncodingLen &&
+ WasmSec.HeaderSecSizeEncodingLen !=
+ getULEB128Size(WasmSec.Content.size()) &&
+ WasmSec.HeaderSecSizeEncodingLen != 5)
+ S->HeaderSecSizeEncodingLen = WasmSec.HeaderSecSizeEncodingLen;
+
for (const wasm::WasmRelocation &Reloc : WasmSec.Relocations) {
WasmYAML::Relocation R;
R.Type = Reloc.Type;