void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCStreamer &Out) {
- RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, isRV64());
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits());
MCRegister SrcReg = RISCV::X0;
for (RISCVMatInt::Inst &Inst : Seq) {
if (Inst.Opc == RISCV::LUI) {
emitToStreamer(
Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
+ } else if (Inst.Opc == RISCV::ADDUW) {
+ emitToStreamer(Out, MCInstBuilder(RISCV::ADDUW)
+ .addReg(DestReg)
+ .addReg(SrcReg)
+ .addReg(RISCV::X0));
} else {
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
using namespace llvm;
// Recursively generate a sequence for materializing an integer.
-static void generateInstSeqImpl(int64_t Val, bool IsRV64,
+static void generateInstSeqImpl(int64_t Val,
+ const FeatureBitset &ActiveFeatures,
RISCVMatInt::InstSeq &Res) {
+ bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
+
if (isInt<32>(Val)) {
// Depending on the active bits in the immediate Value v, the following
// instruction sequences are emitted:
int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
- generateInstSeqImpl(Hi52, IsRV64, Res);
+ generateInstSeqImpl(Hi52, ActiveFeatures, Res);
Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
namespace llvm {
namespace RISCVMatInt {
-InstSeq generateInstSeq(int64_t Val, bool IsRV64) {
+InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
RISCVMatInt::InstSeq Res;
- generateInstSeqImpl(Val, IsRV64, Res);
+ generateInstSeqImpl(Val, ActiveFeatures, Res);
// If the constant is positive we might be able to generate a shifted constant
// with no leading zeros and use a final SRLI to restore them.
if (Val > 0 && Res.size() > 2) {
- assert(IsRV64 && "Expected RV32 to only need 2 instructions");
- unsigned ShiftAmount = countLeadingZeros((uint64_t)Val);
- Val <<= ShiftAmount;
+ assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ "Expected RV32 to only need 2 instructions");
+ unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
+ uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
// Fill in the bits that will be shifted out with 1s. An example where this
// helps is trailing one masks with 32 or more ones. This will generate
// ADDI -1 and an SRLI.
- Val |= maskTrailingOnes<uint64_t>(ShiftAmount);
+ ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(Val, IsRV64, TmpSeq);
- TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount));
+ generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
// Keep the new sequence if it is an improvement.
- if (TmpSeq.size() < Res.size())
+ if (TmpSeq.size() < Res.size()) {
Res = TmpSeq;
+ // A 2 instruction sequence is the best we can do.
+ if (Res.size() <= 2)
+ return Res;
+ }
// Some cases can benefit from filling the lower bits with zeros instead.
- Val &= maskTrailingZeros<uint64_t>(ShiftAmount);
+ ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
TmpSeq.clear();
- generateInstSeqImpl(Val, IsRV64, TmpSeq);
- TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount));
+ generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
// Keep the new sequence if it is an improvement.
- if (TmpSeq.size() < Res.size())
+ if (TmpSeq.size() < Res.size()) {
Res = TmpSeq;
+ // A 2 instruction sequence is the best we can do.
+ if (Res.size() <= 2)
+ return Res;
+ }
+
+ // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
+ // the end of the sequence.
+ if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureExtZba]) {
+ // Try replacing upper bits with 1.
+ uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0));
+
+ // Keep the new sequence if it is an improvement.
+ if (TmpSeq.size() < Res.size()) {
+ Res = TmpSeq;
+ // A 2 instruction sequence is the best we can do.
+ if (Res.size() <= 2)
+ return Res;
+ }
+ }
}
return Res;
}
-int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) {
+int getIntMatCost(const APInt &Val, unsigned Size,
+ const FeatureBitset &ActiveFeatures) {
+ bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
int PlatRegSize = IsRV64 ? 64 : 32;
// Split the constant into platform register sized chunks, and calculate cost
int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
- InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), IsRV64);
+ InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
Cost += MatSeq.size();
}
return std::max(1, Cost);
#define LLVM_LIB_TARGET_RISCV_MATINT_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/SubtargetFeature.h"
#include <cstdint>
namespace llvm {
class APInt;
+class MCSubtargetInfo;
namespace RISCVMatInt {
struct Inst {
// simple struct is produced rather than directly emitting the instructions in
// order to allow this helper to be used from both the MC layer and during
// instruction selection.
-InstSeq generateInstSeq(int64_t Val, bool IsRV64);
+InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures);
// Helper to estimate the number of instructions required to materialise the
// given immediate value into a register. This estimate does not account for
// `Val` possibly fitting into an immediate, and so may over-estimate.
//
// This will attempt to produce instructions to materialise `Val` as an
-// `Size`-bit immediate. `IsRV64` should match the target architecture.
-int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64);
+// `Size`-bit immediate.
+int getIntMatCost(const APInt &Val, unsigned Size,
+ const FeatureBitset &ActiveFeatures);
} // namespace RISCVMatInt
} // namespace llvm
#endif
}
static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
- MVT XLenVT) {
- RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64);
+ const RISCVSubtarget &Subtarget) {
+ MVT XLenVT = Subtarget.getXLenVT();
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
SDNode *Result = nullptr;
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
if (Inst.Opc == RISCV::LUI)
Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
+ else if (Inst.Opc == RISCV::ADDUW)
+ Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg,
+ CurDAG->getRegister(RISCV::X0, XLenVT));
else
Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
ReplaceNode(Node, New.getNode());
return;
}
- ReplaceNode(Node, selectImm(CurDAG, DL, ConstNode->getSExtValue(), XLenVT));
+ ReplaceNode(Node,
+ selectImm(CurDAG, DL, ConstNode->getSExtValue(), *Subtarget));
return;
}
case ISD::FrameIndex: {
// Neither constant will fit into an immediate, so find materialisation
// costs.
int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
- Subtarget.is64Bit());
+ Subtarget.getFeatureBits());
int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
- ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
+ ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits());
// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
// combine should be prevented.
MachineInstr::MIFlag Flag) const {
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- bool IsRV64 = MF->getSubtarget<RISCVSubtarget>().is64Bit();
Register SrcReg = RISCV::X0;
Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass);
unsigned Num = 0;
- if (!IsRV64 && !isInt<32>(Val))
+ if (!STI.is64Bit() && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
- RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, IsRV64);
- assert(Seq.size() > 0);
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
+ assert(!Seq.empty());
for (RISCVMatInt::Inst &Inst : Seq) {
// Write the final result to DstReg if it's the last instruction in the Seq.
BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
.addImm(Inst.Imm)
.setMIFlag(Flag);
+ } else if (Inst.Opc == RISCV::ADDUW) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(RISCV::X0)
+ .setMIFlag(Flag);
} else {
BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
.addReg(SrcReg, RegState::Kill)
// Otherwise, we check how many instructions it will take to materialise.
const DataLayout &DL = getDataLayout();
return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
- getST()->is64Bit());
+ getST()->getFeatureBits());
}
InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
%c = mul i64 %a, 264
ret i64 %c
}
+
+define i64 @imm_zextw() nounwind {
+; RV64I-LABEL: imm_zextw:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a0, zero, 1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: addi a0, a0, -2
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: imm_zextw:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addi a0, zero, -2
+; RV64IB-NEXT: zext.w a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBA-LABEL: imm_zextw:
+; RV64IBA: # %bb.0:
+; RV64IBA-NEXT: addi a0, zero, -2
+; RV64IBA-NEXT: zext.w a0, a0
+; RV64IBA-NEXT: ret
+ ret i64 4294967294 ; -2 in 32 bits.
+}
+
+define i64 @imm_zextw2() nounwind {
+; RV64I-LABEL: imm_zextw2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a0, 171
+; RV64I-NEXT: addiw a0, a0, -1365
+; RV64I-NEXT: slli a0, a0, 12
+; RV64I-NEXT: addi a0, a0, -1366
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: imm_zextw2:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: lui a0, 699051
+; RV64IB-NEXT: addiw a0, a0, -1366
+; RV64IB-NEXT: zext.w a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBA-LABEL: imm_zextw2:
+; RV64IBA: # %bb.0:
+; RV64IBA-NEXT: lui a0, 699051
+; RV64IBA-NEXT: addiw a0, a0, -1366
+; RV64IBA-NEXT: zext.w a0, a0
+; RV64IBA-NEXT: ret
+ ret i64 2863311530 ; 0xAAAAAAAA
+}
;
; RV64IB-LABEL: rol_i32_neg_constant_rhs:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: addi a1, zero, 1
-; RV64IB-NEXT: slli a1, a1, 32
-; RV64IB-NEXT: addi a1, a1, -2
+; RV64IB-NEXT: addi a1, zero, -2
+; RV64IB-NEXT: zext.w a1, a1
; RV64IB-NEXT: rolw a0, a1, a0
; RV64IB-NEXT: ret
;
;
; RV64IB-LABEL: ror_i32_neg_constant_rhs:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: addi a1, zero, 1
-; RV64IB-NEXT: slli a1, a1, 32
-; RV64IB-NEXT: addi a1, a1, -2
+; RV64IB-NEXT: addi a1, zero, -2
+; RV64IB-NEXT: zext.w a1, a1
; RV64IB-NEXT: rorw a0, a1, a0
; RV64IB-NEXT: ret
;
;
; RV64ZBA-LABEL: uaddo.i32.constant:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: addi a2, zero, 1
-; RV64ZBA-NEXT: slli a2, a2, 32
-; RV64ZBA-NEXT: addi a3, a2, -2
+; RV64ZBA-NEXT: addi a2, zero, -2
+; RV64ZBA-NEXT: zext.w a3, a2
; RV64ZBA-NEXT: addw a2, a0, a3
; RV64ZBA-NEXT: sext.w a4, a0
; RV64ZBA-NEXT: sltu a2, a2, a4
;
; RV64ZBA-LABEL: usubo.i32.constant.lhs:
; RV64ZBA: # %bb.0: # %entry
-; RV64ZBA-NEXT: addi a2, zero, 1
-; RV64ZBA-NEXT: slli a2, a2, 32
-; RV64ZBA-NEXT: addi a3, a2, -2
+; RV64ZBA-NEXT: addi a2, zero, -2
+; RV64ZBA-NEXT: zext.w a3, a2
; RV64ZBA-NEXT: subw a2, a3, a0
; RV64ZBA-NEXT: addi a2, a2, 1
; RV64ZBA-NEXT: seqz a2, a2
# CHECK-S-OBJ-NOALIAS: gorciw t0, t1, 13
# CHECK-S-OBJ: gorciw t0, t1, 13
gorcw x5, x6, 13
+
+# CHECK-S-OBJ-NOALIAS: addi t1, zero, -2
+# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t1, t1, zero
+# CHECK-S-OBJ: addi t1, zero, -2
+# CHECK-S-OBJ-NEXT: zext.w t1, t1
+li x6, 0xfffffffe
+
+# CHECK-S-OBJ-NOALIAS: lui t2, 699051
+# CHECK-S-OBJ-NOALIAS-NEXT: addiw t2, t2, -1366
+# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t2, t2, zero
+# CHECK-S-OBJ: lui t2, 699051
+# CHECK-S-OBJ-NEXT: addiw t2, t2, -1366
+# CHECK-S-OBJ-NEXT: zext.w t2, t2
+li x7, 0xaaaaaaaa