void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
int64_t Offset);
bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset);
- bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg,
- int64_t &Offset);
-
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
return false;
}
-// Detect patterns for offsets that are passed into a SHXADD instruction.
-// The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15.
-// The constant is created with addi voff, x0, C, and shXadd is used to
-// fill insert the trailing zeros and do the addition.
-//
-// HiLUI: lui vreg1, %hi(s)
-// LoADDI: addi vreg2, vreg1, %lo(s)
-// OffsetTail: addi voff, x0, C
-// TailAdd: shXadd vreg4, voff, vreg2
-bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd,
- Register GAReg,
- int64_t &Offset) {
- assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
- TailShXAdd.getOpcode() == RISCV::SH2ADD ||
- TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
- "Expected SHXADD instruction!");
- // The first source is the shifted operand.
- Register Rs1 = TailShXAdd.getOperand(1).getReg();
-
- if (GAReg != TailShXAdd.getOperand(2).getReg())
- return false;
-
- // Can't fold if the register has more than one use.
- if (!MRI->hasOneUse(Rs1))
- return false;
- // This can point to an ADDI X0, C.
- MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
- if (OffsetTail.getOpcode() != RISCV::ADDI)
- return false;
- if (!OffsetTail.getOperand(1).isReg() ||
- OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
- !OffsetTail.getOperand(2).isImm())
- return false;
-
- Offset = OffsetTail.getOperand(2).getImm();
- assert(isInt<12>(Offset) && "Unexpected offset");
-
- switch (TailShXAdd.getOpcode()) {
- default: llvm_unreachable("Unexpected opcode");
- case RISCV::SH1ADD: Offset <<= 1; break;
- case RISCV::SH2ADD: Offset <<= 2; break;
- case RISCV::SH3ADD: Offset <<= 3; break;
- }
-
- LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
- DeadInstrs.insert(&OffsetTail);
- return true;
-}
-
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI) {
Register DestReg = LoADDI.getOperand(0).getReg();
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
}
- case RISCV::SH1ADD:
- case RISCV::SH2ADD:
- case RISCV::SH3ADD: {
- // The offset is too large to fit in the immediate field of ADDI.
- // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
- // (SH3ADD (ADDI X0, C), DestReg).
- int64_t Offset;
- if (!matchShiftedOffset(Tail, DestReg, Offset))
- return false;
- foldOffset(HiLUI, LoADDI, Tail, Offset);
- return true;
- }
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
-; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZBA
-; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZBA
+; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64
%struct.S = type { [40 x i32], i32, i32, i32, [4100 x i32], i32, i32, i32 }
@s = common dso_local global %struct.S zeroinitializer, align 4
; CHECK-NEXT: ret
ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 -4000)
}
-
-; With Zba the constant 6424 is created with LI+SH2ADD.
-define i8* @offset_sh2add() {
-; CHECK-LABEL: offset_sh2add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(bar+6424)
-; CHECK-NEXT: addi a0, a0, %lo(bar+6424)
-; CHECK-NEXT: ret
- ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 6424)
-}
-
-; With Zba the constant 12848 is created with LI+SH3ADD.
-define i8* @offset_sh3add() {
-; CHECK-LABEL: offset_sh3add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(bar+12848)
-; CHECK-NEXT: addi a0, a0, %lo(bar+12848)
-; CHECK-NEXT: ret
- ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848)
-}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32I: {{.*}}
-; RV32ZBA: {{.*}}
-; RV64I: {{.*}}
-; RV64ZBA: {{.*}}