From c7b89f0f626b733b631e30b73f8aa35c1122190e Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Mon, 3 Dec 2018 20:30:28 +0000 Subject: [PATCH] [WebAssembly] Enforce assembler emits to streamer in order. Summary: The assembler processes directives and instructions in whatever order they are in the file, then directly emits them to the streamer. This could cause badly written (or generated) .s files to produce incorrect binaries. It now has state that tracks what it has most recently seen, to enforce they are emitted in a given order that always produces correct wasm binaries. Also added a new test that compares obj2yaml output from llc (the backend) to that going via .s and the assembler to ensure both paths generate the same binaries. The features this test covers could be extended. Passes all wasm Lit tests. Fixes: https://bugs.llvm.org/show_bug.cgi?id=39557 Reviewers: sbc100, dschuff, aheejin Subscribers: jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D55149 llvm-svn: 348185 --- .../WebAssembly/AsmParser/WebAssemblyAsmParser.cpp | 47 +++++++++++ llvm/test/MC/WebAssembly/assembler-binary.ll | 92 ++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 llvm/test/MC/WebAssembly/assembler-binary.ll diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index bc2ec11..c93b215 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -136,6 +136,24 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { // Much like WebAssemblyAsmPrinter in the backend, we have to own these. std::vector> Signatures; + // Order of labels, directives and instructions in a .s file have no + // syntactical enforcement. This class is a callback from the actual parser, + // and yet we have to be feeding data to the streamer in a very particular + // order to ensure a correct binary encoding that matches the regular backend + // (the streamer does not enforce this). This "state machine" enum helps + // guarantee that correct order. + enum ParserState { + FileStart, + Label, + FunctionStart, + FunctionLocals, + Instructions, + } CurrentState = FileStart; + + // We track this to see if a .functype following a label is the same, + // as this is how we recognize the start of a function. + MCSymbol *LastLabel = nullptr; + public: WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) @@ -334,6 +352,11 @@ public: return false; } + void onLabelParsed(MCSymbol *Symbol) override { + LastLabel = Symbol; + CurrentState = Label; + } + // This function processes wasm-specific directives streamed to // WebAssemblyTargetStreamer, all others go to the generic parser // (see WasmAsmParser). @@ -370,10 +393,19 @@ public: TOut.emitGlobalType(WasmSym); return Expect(AsmToken::EndOfStatement, "EOL"); } else if (DirectiveID.getString() == ".functype") { + // This code has to send things to the streamer similar to + // WebAssemblyAsmPrinter::EmitFunctionBodyStart. + // TODO: would be good to factor this into a common function, but the + // assembler and backend really don't share any common code, and this code + // parses the locals seperately. auto SymName = ExpectIdent(); if (SymName.empty()) return true; auto WasmSym = cast( TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); + if (CurrentState == Label && WasmSym == LastLabel) { + // This .functype indicates a start of a function. + CurrentState = FunctionStart; + } auto Signature = make_unique(); if (Expect(AsmToken::LParen, "(")) return true; if (ParseRegTypeList(Signature->Params)) return true; @@ -386,11 +418,16 @@ public: addSignature(std::move(Signature)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); TOut.emitFunctionType(WasmSym); + // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. return Expect(AsmToken::EndOfStatement, "EOL"); } else if (DirectiveID.getString() == ".local") { + if (CurrentState != FunctionStart) + return Error(".local directive should follow the start of a function", + Lexer.getTok()); SmallVector Locals; if (ParseRegTypeList(Locals)) return true; TOut.emitLocal(Locals); + CurrentState = FunctionLocals; return Expect(AsmToken::EndOfStatement, "EOL"); } return true; // We didn't process this directive. @@ -405,6 +442,16 @@ public: MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { case Match_Success: { + if (CurrentState == FunctionStart) { + // This is the first instruction in a function, but we haven't seen + // a .local directive yet. The streamer requires locals to be encoded + // as a prelude to the instructions, so emit an empty list of locals + // here. + auto &TOut = reinterpret_cast( + *Out.getTargetStreamer()); + TOut.emitLocal(SmallVector()); + } + CurrentState = Instructions; Out.EmitInstruction(Inst, getSTI()); return false; } diff --git a/llvm/test/MC/WebAssembly/assembler-binary.ll b/llvm/test/MC/WebAssembly/assembler-binary.ll new file mode 100644 index 0000000..3667a52 --- /dev/null +++ b/llvm/test/MC/WebAssembly/assembler-binary.ll @@ -0,0 +1,92 @@ +; RUN: llc -filetype=asm -asm-verbose=false %s -o %t.s +; RUN: FileCheck -check-prefix=ASM -input-file %t.s %s +; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=asm %t.s -o - | FileCheck -check-prefix=ASM %s +; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %t.s -o - | obj2yaml | FileCheck %s + +; This specifically tests that we can generate a binary from the assembler +; that produces the same binary as the backend would. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare void @bar() + +define void @foo(i32 %n) { +entry: + call void @bar() + ret void +} + +; Checking assembly is not the point of this test, but if something breaks +; it is easier to spot it here than in the yaml output. + +; ASM: .text +; ASM: .file "assembler-binary.ll" +; ASM: .globl foo +; ASM: foo: +; ASM-NEXT: .functype foo (i32) -> () +; ASM-NEXT: call bar@FUNCTION +; ASM-NEXT: end_function +; ASM-NEXT: .Lfunc_end0: +; ASM-NEXT: .size foo, .Lfunc_end0-foo +; ASM: .functype bar () -> () + + +; CHECK: --- !WASM +; CHECK-NEXT: FileHeader: +; CHECK-NEXT: Version: 0x00000001 +; CHECK-NEXT: Sections: +; CHECK-NEXT: - Type: TYPE +; CHECK-NEXT: Signatures: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: ReturnType: NORESULT +; CHECK-NEXT: ParamTypes: +; CHECK-NEXT: - I32 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: ReturnType: NORESULT +; CHECK-NEXT: ParamTypes: [] +; CHECK-NEXT: - Type: IMPORT +; CHECK-NEXT: Imports: +; CHECK-NEXT: - Module: env +; CHECK-NEXT: Field: __linear_memory +; CHECK-NEXT: Kind: MEMORY +; CHECK-NEXT: Memory: +; CHECK-NEXT: Initial: 0x00000000 +; CHECK-NEXT: - Module: env +; CHECK-NEXT: Field: __indirect_function_table +; CHECK-NEXT: Kind: TABLE +; CHECK-NEXT: Table: +; CHECK-NEXT: ElemType: ANYFUNC +; CHECK-NEXT: Limits: +; CHECK-NEXT: Initial: 0x00000000 +; CHECK-NEXT: - Module: env +; CHECK-NEXT: Field: bar +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: SigIndex: 1 +; CHECK-NEXT: - Type: FUNCTION +; CHECK-NEXT: FunctionTypes: [ 0 ] +; CHECK-NEXT: - Type: CODE +; CHECK-NEXT: Relocations: +; CHECK-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB +; CHECK-NEXT: Index: 1 +; CHECK-NEXT: Offset: 0x00000004 +; CHECK-NEXT: Functions: +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 1080808080000B +; CHECK-NEXT: - Type: CUSTOM +; CHECK-NEXT: Name: linking +; CHECK-NEXT: Version: 1 +; CHECK-NEXT: SymbolTable: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Name: foo +; CHECK-NEXT: Flags: [ ] +; CHECK-NEXT: Function: 1 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Kind: FUNCTION +; CHECK-NEXT: Name: bar +; CHECK-NEXT: Flags: [ UNDEFINED ] +; CHECK-NEXT: Function: 0 +; CHECK-NEXT: ... -- 2.7.4