#include "llvm/Support/MathExtras.h"
using namespace llvm;
+static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
+ if (!HasRVC)
+ return Res.size();
+
+ int Cost = 0;
+ for (auto Instr : Res) {
+ bool Compressed;
+ switch (Instr.Opc) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCV::SLLI:
+ case RISCV::SRLI:
+ Compressed = true;
+ break;
+ case RISCV::ADDI:
+ case RISCV::ADDIW:
+ case RISCV::LUI:
+ Compressed = isInt<6>(Instr.Imm);
+ break;
+ case RISCV::ADDUW:
+ Compressed = false;
+ break;
+ }
+ // Two RVC instructions take the same space as one RVI instruction, but
+ // can take longer to execute than the single RVI instruction. Thus, we
+ // consider that two RVC instruction are slightly more costly than one
+ // RVI instruction. For longer sequences of RVC instructions the space
+ // savings can be worth it, though. The costs below try to model that.
+ if (!Compressed)
+ Cost += 100; // Baseline cost of one RVI instruction: 100%.
+ else
+ Cost += 70; // 70% cost of baseline.
+ }
+ return Cost;
+}
+
// Recursively generate a sequence for materializing an integer.
static void generateInstSeqImpl(int64_t Val,
const FeatureBitset &ActiveFeatures,
int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+ // If the remaining bits don't fit in 12 bits, we might be able to reduce the
+ // shift amount in order to use LUI which will zero the lower 12 bits.
+ if (ShiftAmount > 12 && !isInt<12>(Hi52) && isInt<32>((uint64_t)Hi52 << 12)) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
+ ShiftAmount -= 12;
+ Hi52 = (uint64_t)Hi52 << 12;
+ }
+
generateInstSeqImpl(Hi52, ActiveFeatures, Res);
Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
}
int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures) {
+ const FeatureBitset &ActiveFeatures,
+ bool CompressionCost) {
bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
+ bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
int PlatRegSize = IsRV64 ? 64 : 32;
// Split the constant into platform register sized chunks, and calculate cost
for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
- Cost += MatSeq.size();
+ Cost += getInstSeqCost(MatSeq, HasRVC);
}
return std::max(1, Cost);
}
//
// This will attempt to produce instructions to materialise `Val` as an
// `Size`-bit immediate.
+//
+// If CompressionCost is true it will use a different cost calculation if RVC is
+// enabled. This should be used to compare two different sequences to determine
+// which is more compressible.
int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures);
+ const FeatureBitset &ActiveFeatures,
+ bool CompressionCost = false);
} // namespace RISCVMatInt
} // namespace llvm
#endif
// Neither constant will fit into an immediate, so find materialisation
// costs.
int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
- Subtarget.getFeatureBits());
+ Subtarget.getFeatureBits(),
+ /*CompressionCost*/true);
int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
- ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits());
+ ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
+ /*CompressionCost*/true);
// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
// combine should be prevented.
;
; RV64I-LABEL: add_large_const:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 1
-; RV64I-NEXT: addiw a1, a1, -1
-; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: lui a1, 4095
+; RV64I-NEXT: slli a1, a1, 36
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: jalr zero, 0(ra)
;
;
; RV64I-LABEL: add_huge_const:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: addiw a1, a1, -1
-; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: lui a1, 32767
+; RV64I-NEXT: slli a1, a1, 36
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: jalr zero, 0(ra)
;
;
; RV32C-LABEL: add_wide_operand:
; RV32C: # %bb.0:
-; RV32C-NEXT: c.lw a2, 0(a1)
-; RV32C-NEXT: c.lw a3, 4(a1)
-; RV32C-NEXT: lw a6, 12(a1)
+; RV32C-NEXT: c.lw a2, 4(a1)
+; RV32C-NEXT: c.lw a3, 12(a1)
+; RV32C-NEXT: c.lw a4, 0(a1)
; RV32C-NEXT: c.lw a1, 8(a1)
-; RV32C-NEXT: srli a5, a2, 29
-; RV32C-NEXT: slli a4, a3, 3
-; RV32C-NEXT: c.or a4, a5
-; RV32C-NEXT: c.srli a3, 29
-; RV32C-NEXT: slli a5, a1, 3
+; RV32C-NEXT: c.lui a5, 16
+; RV32C-NEXT: c.add a3, a5
+; RV32C-NEXT: c.slli a3, 3
+; RV32C-NEXT: srli a5, a1, 29
+; RV32C-NEXT: or a6, a3, a5
+; RV32C-NEXT: srli a5, a4, 29
+; RV32C-NEXT: slli a3, a2, 3
; RV32C-NEXT: c.or a3, a5
-; RV32C-NEXT: c.srli a1, 29
-; RV32C-NEXT: slli a5, a6, 3
-; RV32C-NEXT: c.or a1, a5
-; RV32C-NEXT: c.slli a2, 3
-; RV32C-NEXT: lui a5, 128
-; RV32C-NEXT: c.add a1, a5
+; RV32C-NEXT: c.srli a2, 29
+; RV32C-NEXT: c.slli a1, 3
+; RV32C-NEXT: c.or a1, a2
+; RV32C-NEXT: slli a2, a4, 3
; RV32C-NEXT: c.sw a2, 0(a0)
-; RV32C-NEXT: c.sw a3, 8(a0)
-; RV32C-NEXT: c.sw a4, 4(a0)
-; RV32C-NEXT: c.sw a1, 12(a0)
+; RV32C-NEXT: c.sw a1, 8(a0)
+; RV32C-NEXT: c.sw a3, 4(a0)
+; RV32C-NEXT: sw a6, 12(a0)
; RV32C-NEXT: c.jr ra
;
; RV64C-LABEL: add_wide_operand:
;
; RV64IFD-LABEL: fld_fsd_constant:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: lui a1, 56
-; RV64IFD-NEXT: addiw a1, a1, -1353
-; RV64IFD-NEXT: slli a1, a1, 14
+; RV64IFD-NEXT: lui a1, 228023
+; RV64IFD-NEXT: slli a1, a1, 2
; RV64IFD-NEXT: fld ft0, -273(a1)
; RV64IFD-NEXT: fmv.d.x ft1, a0
; RV64IFD-NEXT: fadd.d ft0, ft1, ft0
;
; RV64IF-LABEL: flw_fsw_constant:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: lui a1, 56
-; RV64IF-NEXT: addiw a1, a1, -1353
-; RV64IF-NEXT: slli a1, a1, 14
+; RV64IF-NEXT: lui a1, 228023
+; RV64IF-NEXT: slli a1, a1, 2
; RV64IF-NEXT: flw ft0, -273(a1)
; RV64IF-NEXT: fmv.w.x ft1, a0
; RV64IF-NEXT: fadd.s ft0, ft1, ft0
;
; RV64IZFH-LABEL: flh_fsh_constant:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: lui a0, 56
-; RV64IZFH-NEXT: addiw a0, a0, -1353
-; RV64IZFH-NEXT: slli a0, a0, 14
+; RV64IZFH-NEXT: lui a0, 228023
+; RV64IZFH-NEXT: slli a0, a0, 2
; RV64IZFH-NEXT: flh ft0, -273(a0)
; RV64IZFH-NEXT: fadd.h fa0, fa0, ft0
; RV64IZFH-NEXT: fsh fa0, -273(a0)
;
; RV64I-LABEL: imm_left_shifted_lui_1:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 64
-; RV64I-NEXT: addiw a0, a0, 1
-; RV64I-NEXT: slli a0, a0, 13
+; RV64I-NEXT: lui a0, 262145
+; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: ret
ret i64 2147491840 ; 0x8000_2000
}
;
; RV64I-LABEL: imm_left_shifted_lui_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 64
-; RV64I-NEXT: addiw a0, a0, 1
-; RV64I-NEXT: slli a0, a0, 14
+; RV64I-NEXT: lui a0, 262145
+; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: ret
ret i64 4294983680 ; 0x1_0000_4000
}
;
; RV64I-LABEL: imm_left_shifted_lui_3:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, 1
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: lui a0, 4097
+; RV64I-NEXT: slli a0, a0, 20
; RV64I-NEXT: ret
ret i64 17596481011712 ; 0x1001_0000_0000
}
;
; RV64I-LABEL: imm_right_shifted_lui_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 65536
-; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: lui a0, 1044481
; RV64I-NEXT: slli a0, a0, 12
-; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: srli a0, a0, 24
; RV64I-NEXT: ret
ret i64 1099511623681 ; 0xFF_FFFF_F001
}
;
; RV64I-LABEL: imm_decoupled_lui_addi:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, 1
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: lui a0, 4097
+; RV64I-NEXT: slli a0, a0, 20
; RV64I-NEXT: addi a0, a0, -3
; RV64I-NEXT: ret
ret i64 17596481011709 ; 0x1000_FFFF_FFFD
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -2032
; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
-; CHECK-NEXT: lui a0, 95
-; CHECK-NEXT: addiw a0, a0, 1505
-; CHECK-NEXT: slli a0, a0, 13
+; CHECK-NEXT: lui a0, 390625
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -2000
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: call baz@plt
-; CHECK-NEXT: lui a0, 95
-; CHECK-NEXT: addiw a0, a0, 1505
-; CHECK-NEXT: slli a0, a0, 13
+; CHECK-NEXT: lui a0, 390625
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -2000
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-LABEL: gorc16_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 16
-; RV64I-NEXT: lui a2, 1048560
-; RV64I-NEXT: addiw a2, a2, 1
-; RV64I-NEXT: slli a3, a2, 16
+; RV64I-NEXT: lui a2, 983041
+; RV64I-NEXT: slli a3, a2, 4
; RV64I-NEXT: addi a3, a3, -1
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a3, a0, 16
-; RV64I-NEXT: slli a2, a2, 32
+; RV64I-NEXT: slli a2, a2, 20
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: srli a2, a2, 16
; RV64I-NEXT: and a2, a3, a2
; RV64I-LABEL: grev16_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 16
-; RV64I-NEXT: lui a2, 1048560
-; RV64I-NEXT: addiw a2, a2, 1
-; RV64I-NEXT: slli a3, a2, 16
+; RV64I-NEXT: lui a2, 983041
+; RV64I-NEXT: slli a3, a2, 4
; RV64I-NEXT: addi a3, a3, -1
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a0, a0, 16
-; RV64I-NEXT: slli a2, a2, 32
+; RV64I-NEXT: slli a2, a2, 20
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: srli a2, a2, 16
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a1, a1, 963
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a0, 2
-; RV64I-NEXT: lui a3, 48
-; RV64I-NEXT: addiw a3, a3, 771
-; RV64I-NEXT: slli a3, a3, 16
+; RV64I-NEXT: lui a3, 197379
+; RV64I-NEXT: slli a3, a3, 4
; RV64I-NEXT: addi a3, a3, 771
; RV64I-NEXT: slli a4, a3, 16
; RV64I-NEXT: addi a4, a4, 771
define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: shfl4_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 1048560
-; RV64I-NEXT: addiw a1, a1, 255
-; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: lui a1, 983295
+; RV64I-NEXT: slli a1, a1, 4
; RV64I-NEXT: addi a1, a1, 255
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: addi a1, a1, 255
; RV64I-NEXT: addi a1, a1, 15
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a0, 4
+; RV64I-NEXT: lui a3, 983055
+; RV64I-NEXT: slli a3, a3, 4
+; RV64I-NEXT: addi a3, a3, 15
+; RV64I-NEXT: slli a3, a3, 16
+; RV64I-NEXT: addi a3, a3, 15
+; RV64I-NEXT: slli a3, a3, 12
+; RV64I-NEXT: srli a3, a3, 4
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: srli a0, a0, 4
; RV64I-NEXT: lui a3, 240
; RV64I-NEXT: addiw a3, a3, 15
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: addi a3, a3, 15
-; RV64I-NEXT: slli a4, a3, 12
-; RV64I-NEXT: addi a4, a4, 1
-; RV64I-NEXT: slli a4, a4, 12
-; RV64I-NEXT: addi a4, a4, -256
-; RV64I-NEXT: and a2, a2, a4
-; RV64I-NEXT: srli a0, a0, 4
; RV64I-NEXT: slli a3, a3, 20
; RV64I-NEXT: addi a3, a3, 240
; RV64I-NEXT: and a0, a0, a3
define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: shfl8_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 1048560
-; RV64I-NEXT: addiw a1, a1, 1
-; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: lui a1, 983041
+; RV64I-NEXT: slli a1, a1, 4
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: slli a1, a1, 24
; RV64I-NEXT: addi a1, a1, 255
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: lui a3, 16
-; RV64I-NEXT: addiw a3, a3, -1
-; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: lui a3, 65535
+; RV64I-NEXT: slli a4, a3, 20
; RV64I-NEXT: and a2, a2, a4
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srli a0, a0, 16
-; RV64I-NEXT: slli a2, a3, 16
+; RV64I-NEXT: slli a2, a3, 4
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vsrl.vi v25, v8, 16
-; RV64-NEXT: lui a0, 32
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: slli a0, a0, 15
+; RV64-NEXT: lui a0, 131071
+; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: vand.vx v25, v25, a0
; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; RV64-NEXT: vnsrl.wi v8, v25, 0
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: lui a2, 62
-; RV64IM-NEXT: addiw a2, a2, -711
-; RV64IM-NEXT: slli a2, a2, 14
+; RV64IM-NEXT: lui a2, 253241
+; RV64IM-NEXT: slli a2, a2, 2
; RV64IM-NEXT: addi a2, a2, -61
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 42
li t4, 0x123456789abcdef0
# CHECK-EXPAND: addi t5, zero, -1
li t5, 0xFFFFFFFFFFFFFFFF
-# CHECK-EXPAND: lui t6, 64
-# CHECK-EXPAND-NEXT: addiw t6, t6, 1
-# CHECK-EXPAND-NEXT: slli t6, t6, 13
+# CHECK-EXPAND: lui t6, 262145
+# CHECK-EXPAND-NEXT: slli t6, t6, 1
li t6, 0x80002000
-# CHECK-EXPAND: lui t0, 64
-# CHECK-EXPAND-NEXT: addiw t0, t0, 1
-# CHECK-EXPAND-NEXT: slli t0, t0, 14
+# CHECK-EXPAND: lui t0, 262145
+# CHECK-EXPAND-NEXT: slli t0, t0, 2
li x5, 0x100004000
-# CHECK-EXPAND: lui t1, 1
-# CHECK-EXPAND-NEXT: addiw t1, t1, 1
-# CHECK-EXPAND-NEXT: slli t1, t1, 32
+# CHECK-EXPAND: lui t1, 4097
+# CHECK-EXPAND-NEXT: slli t1, t1, 20
li x6, 0x100100000000
# CHECK-EXPAND: lui t2, 983056
# CHECK-EXPAND-NEXT: srli t2, t2, 16
li x7, 0xFFFFFFFFF001
-# CHECK-EXPAND: lui s0, 65536
-# CHECK-EXPAND-NEXT: addiw s0, s0, -1
+# CHECK-EXPAND: lui s0, 1044481
# CHECK-EXPAND-NEXT: slli s0, s0, 12
-# CHECK-EXPAND-NEXT: addi s0, s0, 1
+# CHECK-EXPAND-NEXT: srli s0, s0, 24
li x8, 0xFFFFFFF001
-# CHECK-EXPAND: lui s1, 1
-# CHECK-EXPAND-NEXT: addiw s1, s1, 1
-# CHECK-EXPAND-NEXT: slli s1, s1, 32
+# CHECK-EXPAND: lui s1, 4097
+# CHECK-EXPAND-NEXT: slli s1, s1, 20
# CHECK-EXPAND-NEXT: addi s1, s1, -3
li x9, 0x1000FFFFFFFD
# CHECK-EXPAND: addi a0, zero, -1