From 98f9389f65e630a7243f9c99717009107758d492 Mon Sep 17 00:00:00 2001 From: Shiva Chen Date: Wed, 25 Apr 2018 14:18:55 +0000 Subject: [PATCH] [RISCV] Support "call" pseudoinstruction in the MC layer To do this: 1. Add PseudoCALLIndirct to match indirect function call. 2. Add PseudoCALL to support parsing and print pseudo `call` in assembly 3. Expand PseudoCALL to the following form with R_RISCV_CALL relocation type while encoding: auipc ra, func jalr ra, ra, 0 If we expand PseudoCALL before emitting assembly, we will see auipc and jalr pair when compile with -S. It's hard for assembly parser to parsing this pair and identify it's semantic is function call and then insert R_RISCV_CALL relocation type. Although we could insert R_RISCV_PCREL_HI20 and R_RISCV_PCREL_LO12_I relocation types instead of R_RISCV_CALL. Due to RISCV relocation design, auipc and jalr pair only can relax to jal with R_RISCV_CALL + R_RISCV_RELAX relocation types. We expand PseudoCALL as late as encoding(RISCVMCCodeEmitter) instead of before emitting assembly(RISCVAsmPrinter) because we want to preserve call pseudoinstruction in assembly code. It's more readable and assembly parser could identify call assembly and insert R_RISCV_CALL relocation type. Differential Revision: https://reviews.llvm.org/D45859 llvm-svn: 330826 --- llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 14 ++++++ .../RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp | 11 +++++ .../Target/RISCV/MCTargetDesc/RISCVFixupKinds.h | 3 ++ .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 50 ++++++++++++++++++++++ llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp | 6 ++- llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h | 1 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 29 ++++++++++++- llvm/test/MC/RISCV/function-call-invalid.s | 11 +++++ llvm/test/MC/RISCV/function-call.s | 19 ++++++++ 9 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 llvm/test/MC/RISCV/function-call-invalid.s create mode 100644 llvm/test/MC/RISCV/function-call.s diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 521d72b..02f075a 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -171,6 +171,16 @@ public: // Predicate methods for AsmOperands defined in RISCVInstrInfo.td + bool isBareSymbol() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + // Must be of 'immediate' type but not a constant. + if (!isImm() || evaluateConstantImm(Imm, VK)) + return false; + return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + /// Return true if the operand is a valid for the fence instruction e.g. /// ('iorw'). bool isFenceArg() const { @@ -703,6 +713,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ErrorLoc, "operand must be a valid floating point rounding mode mnemonic"); } + case Match_InvalidBareSymbol: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "operand must be a bare symbol name"); + } } llvm_unreachable("Unknown match type detected!"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 0dc371c..c0a005c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -23,6 +23,15 @@ public: ~RISCVELFObjectWriter() override; + // Return true if the given relocation must be with a symbol rather than + // section plus offset. + bool needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const override { + // TODO: this is very conservative, update once RISC-V psABI requirements + // are clarified. + return true; + } + protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -67,6 +76,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_RVC_JUMP; case RISCV::fixup_riscv_rvc_branch: return ELF::R_RISCV_RVC_BRANCH; + case RISCV::fixup_riscv_call: + return ELF::R_RISCV_CALL; } } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h index 604c3dc..7d9f8fc 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h @@ -47,6 +47,9 @@ enum Fixups { // fixup_riscv_rvc_branch - 8-bit fixup for symbol references in the // compressed branch instruction fixup_riscv_rvc_branch, + // fixup_riscv_call - A fixup representing a call attached to the auipc + // instruction in a pair composed of adjacent auipc+jalr instructions. + fixup_riscv_call, // fixup_riscv_invalid - used as a sentinel and a marker, must be last fixup fixup_riscv_invalid, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index a1ecb3e..58275fa 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" @@ -52,6 +53,10 @@ public: SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + void expandFunctionCall(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// TableGen'erated function for getting the binary encoding for an /// instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -80,6 +85,42 @@ MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII, return new RISCVMCCodeEmitter(Ctx, MCII); } +// Expand PseudoCALL to AUIPC and JALR with relocation types. +// We expand PseudoCALL while encoding, meaning AUIPC and JALR won't go through +// RISCV MC to MC compressed instruction transformation. This is acceptable +// because AUIPC has no 16-bit form and C_JALR have no immediate operand field. +// We let linker relaxation deal with it. When linker relaxation enabled, +// AUIPC and JALR have chance relax to JAL. If C extension is enabled, +// JAL has chance relax to C_JAL. +void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + MCInst TmpInst; + MCOperand Func = MI.getOperand(0); + unsigned Ra = RISCV::X1; + uint32_t Binary; + + assert(Func.isExpr() && "Expected expression"); + + const MCExpr *Expr = Func.getExpr(); + + // Create function call expression CallExpr for AUIPC. + const MCExpr *CallExpr = + RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_CALL, Ctx); + + // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type. + TmpInst = MCInstBuilder(RISCV::AUIPC) + .addReg(Ra) + .addOperand(MCOperand::createExpr(CallExpr)); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::Writer(OS).write(Binary); + + // Emit JALR Ra, Ra, 0 + TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0); + Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::Writer(OS).write(Binary); +} + void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { @@ -87,6 +128,12 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, // Get byte count of instruction. unsigned Size = Desc.getSize(); + if (MI.getOpcode() == RISCV::PseudoCALL) { + expandFunctionCall(MI, OS, Fixups, STI); + MCNumEmitted += 2; + return; + } + switch (Size) { default: llvm_unreachable("Unhandled encodeInstruction length!"); @@ -183,6 +230,9 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, case RISCVMCExpr::VK_RISCV_PCREL_HI: FixupKind = RISCV::fixup_riscv_pcrel_hi20; break; + case RISCVMCExpr::VK_RISCV_CALL: + FixupKind = RISCV::fixup_riscv_call; + break; } } else if (Kind == MCExpr::SymbolRef && cast(Expr)->getKind() == MCSymbolRefExpr::VK_None) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index 4d1573a..844039f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -32,7 +32,8 @@ const RISCVMCExpr *RISCVMCExpr::create(const MCExpr *Expr, VariantKind Kind, } void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { - bool HasVariant = getKind() != VK_RISCV_None; + bool HasVariant = + ((getKind() != VK_RISCV_None) && (getKind() != VK_RISCV_CALL)); if (HasVariant) OS << '%' << getVariantKindName(getKind()) << '('; Expr->print(OS, MAI); @@ -77,7 +78,8 @@ StringRef RISCVMCExpr::getVariantKindName(VariantKind Kind) { bool RISCVMCExpr::evaluateAsConstant(int64_t &Res) const { MCValue Value; - if (Kind == VK_RISCV_PCREL_HI || Kind == VK_RISCV_PCREL_LO) + if (Kind == VK_RISCV_PCREL_HI || Kind == VK_RISCV_PCREL_LO || + Kind == VK_RISCV_CALL) return false; if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr)) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h index e428b0d..d2e0f6b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h @@ -29,6 +29,7 @@ public: VK_RISCV_HI, VK_RISCV_PCREL_LO, VK_RISCV_PCREL_HI, + VK_RISCV_CALL, VK_RISCV_Invalid }; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 00f0342..0309fe9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -153,6 +153,20 @@ def simm21_lsb0 : Operand { }]; } +def BareSymbol : AsmOperandClass { + let Name = "BareSymbol"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidBareSymbol"; +} + +// A bare symbol. +def bare_symbol : Operand { + let ParserMatchClass = BareSymbol; + let MCOperandPredicate = [{ + return MCOp.isBareSymbolRef(); + }]; +} + // A parameterized register class alternative to i32imm/i64imm from Target.td. def ixlenimm : Operand; @@ -621,9 +635,20 @@ def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>; def : Pat<(brind (add GPR:$rs1, simm12:$imm12)), (PseudoBRIND GPR:$rs1, simm12:$imm12)>; +// PseudoCALL is a pseudo instruction which will eventually expand to auipc +// and jalr. Define AsmString because we want assembler could print "call" +// when compile with -S. Define isCodeGenOnly = 0 because we want parser +// could parsing assembly "call" instruction. +let isCall = 1, Defs = [X1], isCodeGenOnly = 0, + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func), + []> { + let AsmString = "call\t$func"; +} + let isCall = 1, Defs = [X1] in -def PseudoCALL : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>, - PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; +def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>, + PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>, diff --git a/llvm/test/MC/RISCV/function-call-invalid.s b/llvm/test/MC/RISCV/function-call-invalid.s new file mode 100644 index 0000000..1453249 --- /dev/null +++ b/llvm/test/MC/RISCV/function-call-invalid.s @@ -0,0 +1,11 @@ +# RUN: not llvm-mc -triple riscv32 < %s 2>&1 | FileCheck %s + +call 1234 # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %pcrel_hi(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %pcrel_lo(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %pcrel_hi(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %pcrel_lo(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %hi(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %lo(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %hi(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call %lo(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name diff --git a/llvm/test/MC/RISCV/function-call.s b/llvm/test/MC/RISCV/function-call.s new file mode 100644 index 0000000..d52623e --- /dev/null +++ b/llvm/test/MC/RISCV/function-call.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \ +# RUN: | llvm-objdump -d - | FileCheck -check-prefix=INSTR %s +# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \ +# RUN: | llvm-readobj -r | FileCheck -check-prefix=RELOC %s +# RUN: llvm-mc -triple riscv32 < %s -show-encoding \ +# RUN: | FileCheck -check-prefix=FIXUP %s + +.long foo + +call foo +# RELOC: R_RISCV_CALL foo 0x0 +# INSTR: auipc ra, 0 +# INSTR: jalr ra +# FIXUP: fixup A - offset: 0, value: foo, kind: +call bar +# RELOC: R_RISCV_CALL bar 0x0 +# INSTR: auipc ra, 0 +# INSTR: jalr ra +# FIXUP: fixup A - offset: 0, value: bar, kind: -- 2.7.4