From: Craig Topper Date: Fri, 16 Jul 2021 16:23:17 +0000 (-0700) Subject: [RISCV] Teach constant materialization that it can use zext.w at the end with Zba... X-Git-Tag: llvmorg-14-init~1234 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4dbb78806871b158c9ec23196f075c070baa2909;p=platform%2Fupstream%2Fllvm.git [RISCV] Teach constant materialization that it can use zext.w at the end with Zba to reduce number of instructions. If the upper 32 bits are zero and bit 31 is set, we might be able to use zext.w to fill in the zeros after using an lui and/or addi. Most of this patch is plumbing the subtarget features into the constant materialization. Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D105509 --- diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 4127dd7..87496e0 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2210,13 +2210,19 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) { void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out) { - RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, isRV64()); + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits()); MCRegister SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { if (Inst.Opc == RISCV::LUI) { emitToStreamer( Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm)); + } else if (Inst.Opc == RISCV::ADDUW) { + emitToStreamer(Out, MCInstBuilder(RISCV::ADDUW) + .addReg(DestReg) + .addReg(SrcReg) + .addReg(RISCV::X0)); } else { emitToStreamer( Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm( diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index 72f7faf..aac35f8 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -13,8 +13,11 @@ using namespace llvm; // Recursively generate a sequence for materializing an integer. -static void generateInstSeqImpl(int64_t Val, bool IsRV64, +static void generateInstSeqImpl(int64_t Val, + const FeatureBitset &ActiveFeatures, RISCVMatInt::InstSeq &Res) { + bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; + if (isInt<32>(Val)) { // Depending on the active bits in the immediate Value v, the following // instruction sequences are emitted: @@ -66,7 +69,7 @@ static void generateInstSeqImpl(int64_t Val, bool IsRV64, int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52); Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount); - generateInstSeqImpl(Hi52, IsRV64, Res); + generateInstSeqImpl(Hi52, ActiveFeatures, Res); Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount)); if (Lo12) @@ -75,44 +78,73 @@ static void generateInstSeqImpl(int64_t Val, bool IsRV64, namespace llvm { namespace RISCVMatInt { -InstSeq generateInstSeq(int64_t Val, bool IsRV64) { +InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { RISCVMatInt::InstSeq Res; - generateInstSeqImpl(Val, IsRV64, Res); + generateInstSeqImpl(Val, ActiveFeatures, Res); // If the constant is positive we might be able to generate a shifted constant // with no leading zeros and use a final SRLI to restore them. if (Val > 0 && Res.size() > 2) { - assert(IsRV64 && "Expected RV32 to only need 2 instructions"); - unsigned ShiftAmount = countLeadingZeros((uint64_t)Val); - Val <<= ShiftAmount; + assert(ActiveFeatures[RISCV::Feature64Bit] && + "Expected RV32 to only need 2 instructions"); + unsigned LeadingZeros = countLeadingZeros((uint64_t)Val); + uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; // Fill in the bits that will be shifted out with 1s. An example where this // helps is trailing one masks with 32 or more ones. This will generate // ADDI -1 and an SRLI. - Val |= maskTrailingOnes(ShiftAmount); + ShiftedVal |= maskTrailingOnes(LeadingZeros); RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(Val, IsRV64, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount)); + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) + if (TmpSeq.size() < Res.size()) { Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } // Some cases can benefit from filling the lower bits with zeros instead. - Val &= maskTrailingZeros(ShiftAmount); + ShiftedVal &= maskTrailingZeros(LeadingZeros); TmpSeq.clear(); - generateInstSeqImpl(Val, IsRV64, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount)); + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) + if (TmpSeq.size() < Res.size()) { Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } + + // If we have exactly 32 leading zeros and Zba, we can try using zext.w at + // the end of the sequence. + if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureExtZba]) { + // Try replacing upper bits with 1. + uint64_t LeadingOnesVal = Val | maskLeadingOnes(LeadingZeros); + TmpSeq.clear(); + generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0)); + + // Keep the new sequence if it is an improvement. + if (TmpSeq.size() < Res.size()) { + Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } + } } return Res; } -int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) { +int getIntMatCost(const APInt &Val, unsigned Size, + const FeatureBitset &ActiveFeatures) { + bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; int PlatRegSize = IsRV64 ? 64 : 32; // Split the constant into platform register sized chunks, and calculate cost @@ -120,7 +152,7 @@ int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) { int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); - InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), IsRV64); + InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures); Cost += MatSeq.size(); } return std::max(1, Cost); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index 322b634..c5e04af 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -10,10 +10,12 @@ #define LLVM_LIB_TARGET_RISCV_MATINT_H #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/SubtargetFeature.h" #include namespace llvm { class APInt; +class MCSubtargetInfo; namespace RISCVMatInt { struct Inst { @@ -29,15 +31,16 @@ using InstSeq = SmallVector; // simple struct is produced rather than directly emitting the instructions in // order to allow this helper to be used from both the MC layer and during // instruction selection. -InstSeq generateInstSeq(int64_t Val, bool IsRV64); +InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures); // Helper to estimate the number of instructions required to materialise the // given immediate value into a register. This estimate does not account for // `Val` possibly fitting into an immediate, and so may over-estimate. // // This will attempt to produce instructions to materialise `Val` as an -// `Size`-bit immediate. `IsRV64` should match the target architecture. -int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64); +// `Size`-bit immediate. +int getIntMatCost(const APInt &Val, unsigned Size, + const FeatureBitset &ActiveFeatures); } // namespace RISCVMatInt } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 9b7f135..ca301dcc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -112,8 +112,10 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { } static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, - MVT XLenVT) { - RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64); + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); @@ -121,6 +123,9 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); if (Inst.Opc == RISCV::LUI) Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); + else if (Inst.Opc == RISCV::ADDUW) + Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg, + CurDAG->getRegister(RISCV::X0, XLenVT)); else Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); @@ -454,7 +459,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, New.getNode()); return; } - ReplaceNode(Node, selectImm(CurDAG, DL, ConstNode->getSExtValue(), XLenVT)); + ReplaceNode(Node, + selectImm(CurDAG, DL, ConstNode->getSExtValue(), *Subtarget)); return; } case ISD::FrameIndex: { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7273a3a..5df79ab 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6239,9 +6239,9 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( // Neither constant will fit into an immediate, so find materialisation // costs. int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), - Subtarget.is64Bit()); + Subtarget.getFeatureBits()); int ShiftedC1Cost = RISCVMatInt::getIntMatCost( - ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); + ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits()); // Materialising `c1` is cheaper than materialising `c1 << c2`, so the // combine should be prevented. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index f6cac90..5c27036 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -432,16 +432,16 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, MachineInstr::MIFlag Flag) const { MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - bool IsRV64 = MF->getSubtarget().is64Bit(); Register SrcReg = RISCV::X0; Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass); unsigned Num = 0; - if (!IsRV64 && !isInt<32>(Val)) + if (!STI.is64Bit() && !isInt<32>(Val)) report_fatal_error("Should only materialize 32-bit constants for RV32"); - RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, IsRV64); - assert(Seq.size() > 0); + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits()); + assert(!Seq.empty()); for (RISCVMatInt::Inst &Inst : Seq) { // Write the final result to DstReg if it's the last instruction in the Seq. @@ -453,6 +453,11 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result) .addImm(Inst.Imm) .setMIFlag(Flag); + } else if (Inst.Opc == RISCV::ADDUW) { + BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result) + .addReg(SrcReg, RegState::Kill) + .addReg(RISCV::X0) + .setMIFlag(Flag); } else { BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result) .addReg(SrcReg, RegState::Kill) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 6605951..fd110db 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -27,7 +27,7 @@ InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, // Otherwise, we check how many instructions it will take to materialise. const DataLayout &DL = getDataLayout(); return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), - getST()->is64Bit()); + getST()->getFeatureBits()); } InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 3a49e24..f47f6b3 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -886,3 +886,50 @@ define i64 @mul264(i64 %a) { %c = mul i64 %a, 264 ret i64 %c } + +define i64 @imm_zextw() nounwind { +; RV64I-LABEL: imm_zextw: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, zero, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, -2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: imm_zextw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addi a0, zero, -2 +; RV64IB-NEXT: zext.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBA-LABEL: imm_zextw: +; RV64IBA: # %bb.0: +; RV64IBA-NEXT: addi a0, zero, -2 +; RV64IBA-NEXT: zext.w a0, a0 +; RV64IBA-NEXT: ret + ret i64 4294967294 ; -2 in 32 bits. +} + +define i64 @imm_zextw2() nounwind { +; RV64I-LABEL: imm_zextw2: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 171 +; RV64I-NEXT: addiw a0, a0, -1365 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: addi a0, a0, -1366 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: imm_zextw2: +; RV64IB: # %bb.0: +; RV64IB-NEXT: lui a0, 699051 +; RV64IB-NEXT: addiw a0, a0, -1366 +; RV64IB-NEXT: zext.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBA-LABEL: imm_zextw2: +; RV64IBA: # %bb.0: +; RV64IBA-NEXT: lui a0, 699051 +; RV64IBA-NEXT: addiw a0, a0, -1366 +; RV64IBA-NEXT: zext.w a0, a0 +; RV64IBA-NEXT: ret + ret i64 2863311530 ; 0xAAAAAAAA +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll index cce4b50..d797fcd 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll @@ -240,9 +240,8 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind { ; ; RV64IB-LABEL: rol_i32_neg_constant_rhs: ; RV64IB: # %bb.0: -; RV64IB-NEXT: addi a1, zero, 1 -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: addi a1, a1, -2 +; RV64IB-NEXT: addi a1, zero, -2 +; RV64IB-NEXT: zext.w a1, a1 ; RV64IB-NEXT: rolw a0, a1, a0 ; RV64IB-NEXT: ret ; @@ -370,9 +369,8 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind { ; ; RV64IB-LABEL: ror_i32_neg_constant_rhs: ; RV64IB: # %bb.0: -; RV64IB-NEXT: addi a1, zero, 1 -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: addi a1, a1, -2 +; RV64IB-NEXT: addi a1, zero, -2 +; RV64IB-NEXT: zext.w a1, a1 ; RV64IB-NEXT: rorw a0, a1, a0 ; RV64IB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index 1fd89bd..6ac3717 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -420,9 +420,8 @@ define zeroext i1 @uaddo.i32.constant(i32 %v1, i32* %res) { ; ; RV64ZBA-LABEL: uaddo.i32.constant: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: addi a2, zero, 1 -; RV64ZBA-NEXT: slli a2, a2, 32 -; RV64ZBA-NEXT: addi a3, a2, -2 +; RV64ZBA-NEXT: addi a2, zero, -2 +; RV64ZBA-NEXT: zext.w a3, a2 ; RV64ZBA-NEXT: addw a2, a0, a3 ; RV64ZBA-NEXT: sext.w a4, a0 ; RV64ZBA-NEXT: sltu a2, a2, a4 @@ -758,9 +757,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 %v1, i32* %res) { ; ; RV64ZBA-LABEL: usubo.i32.constant.lhs: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: addi a2, zero, 1 -; RV64ZBA-NEXT: slli a2, a2, 32 -; RV64ZBA-NEXT: addi a3, a2, -2 +; RV64ZBA-NEXT: addi a2, zero, -2 +; RV64ZBA-NEXT: zext.w a3, a2 ; RV64ZBA-NEXT: subw a2, a3, a0 ; RV64ZBA-NEXT: addi a2, a2, 1 ; RV64ZBA-NEXT: seqz a2, a2 diff --git a/llvm/test/MC/RISCV/rv64b-aliases-valid.s b/llvm/test/MC/RISCV/rv64b-aliases-valid.s index ab9521d..97585d3 100644 --- a/llvm/test/MC/RISCV/rv64b-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv64b-aliases-valid.s @@ -362,3 +362,17 @@ grevw x5, x6, 13 # CHECK-S-OBJ-NOALIAS: gorciw t0, t1, 13 # CHECK-S-OBJ: gorciw t0, t1, 13 gorcw x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: addi t1, zero, -2 +# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t1, t1, zero +# CHECK-S-OBJ: addi t1, zero, -2 +# CHECK-S-OBJ-NEXT: zext.w t1, t1 +li x6, 0xfffffffe + +# CHECK-S-OBJ-NOALIAS: lui t2, 699051 +# CHECK-S-OBJ-NOALIAS-NEXT: addiw t2, t2, -1366 +# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t2, t2, zero +# CHECK-S-OBJ: lui t2, 699051 +# CHECK-S-OBJ-NEXT: addiw t2, t2, -1366 +# CHECK-S-OBJ-NEXT: zext.w t2, t2 +li x7, 0xaaaaaaaa