From 86e604c4d68528478333a8901d7c79c09ca16fa8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 15 Jan 2021 10:54:26 -0800 Subject: [PATCH] [RISCV] Add implementation of targetShrinkDemandedConstant to optimize AND immediates. SimplifyDemandedBits can remove set bits from immediates from instructions like AND/OR/XOR. This can prevent them from being efficiently codegened on RISCV. This adds an initial version that tries to keep or form 12 bit sign extended immediates for AND operations to enable use of ANDI. If that doesn't work we'll try to create a 32 bit sign extended immediate to use LUI+ADDIW. More optimizations are possible for different size immediates or different operations. But this is a good starting point that already has test coverage. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D94628 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 65 ++++++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 + llvm/test/CodeGen/RISCV/copysign-casts.ll | 8 +- llvm/test/CodeGen/RISCV/frame-info.ll | 10 +- .../RISCV/half-bitmanip-dagcombines.ll | 5 +- llvm/test/CodeGen/RISCV/rv64Zbp.ll | 200 +++++++----------- llvm/test/CodeGen/RISCV/srem-lkk.ll | 22 +- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 98 ++++----- ...realignment-with-variable-sized-objects.ll | 5 +- llvm/test/CodeGen/RISCV/vararg.ll | 10 +- 10 files changed, 196 insertions(+), 231 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9b034e594f63..16e3a94839c6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1900,6 +1900,71 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( return true; } +bool RISCVTargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + TargetLoweringOpt &TLO) const { + // Delay this optimization as late as possible. + if (!TLO.LegalOps) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + // Only handle AND for now. + if (Op.getOpcode() != ISD::AND) + return false; + + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + + const APInt &Mask = C->getAPIntValue(); + + // Clear all non-demanded bits initially. + APInt ShrunkMask = Mask & DemandedBits; + + // If the shrunk mask fits in sign extended 12 bits, let the target + // independent code apply it. + if (ShrunkMask.isSignedIntN(12)) + return false; + + // Try to make a smaller immediate by setting undemanded bits. + + // We need to be able to make a negative number through a combination of mask + // and undemanded bits. + APInt ExpandedMask = Mask | ~DemandedBits; + if (!ExpandedMask.isNegative()) + return false; + + // What is the fewest number of bits we need to represent the negative number. + unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); + + // Try to make a 12 bit negative immediate. If that fails try to make a 32 + // bit negative immediate unless the shrunk immediate already fits in 32 bits. + APInt NewMask = ShrunkMask; + if (MinSignedBits <= 12) + NewMask.setBitsFrom(11); + else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) + NewMask.setBitsFrom(31); + else + return false; + + // Sanity check that our new mask is a subset of the demanded mask. + assert(NewMask.isSubsetOf(ExpandedMask)); + + // If we aren't changing the mask, just return true to keep it and prevent + // the caller from optimizing. + if (NewMask == Mask) + return true; + + // Replace the constant with the new mask. + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); +} + void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 081d62a33ef0..b1f6e55766ff 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -131,6 +131,10 @@ public: SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + TargetLoweringOpt &TLO) const override; + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll index ad80ae63ca9b..f19eca7d4cac 100644 --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -42,8 +42,7 @@ define double @fold_promote_d_s(double %a, float %b) nounwind { ; RV64I-NEXT: slli a2, a2, 63 ; RV64I-NEXT: addi a2, a2, -1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 31 +; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a1 @@ -188,10 +187,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind { ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: addiw a2, a2, -1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 33 -; RV64I-NEXT: addi a2, a2, -1 -; RV64I-NEXT: slli a2, a2, 15 +; RV64I-NEXT: lui a2, 1048568 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/frame-info.ll b/llvm/test/CodeGen/RISCV/frame-info.ll index 358b075ee75b..a793e2c190b9 100644 --- a/llvm/test/CodeGen/RISCV/frame-info.ll +++ b/llvm/test/CodeGen/RISCV/frame-info.ll @@ -82,10 +82,7 @@ define void @stack_alloc(i32 signext %size) { ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: addi a0, a0, 15 -; RV64-NEXT: addi a1, zero, 1 -; RV64-NEXT: slli a1, a1, 33 -; RV64-NEXT: addi a1, a1, -16 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: andi a0, a0, -16 ; RV64-NEXT: sub a0, sp, a0 ; RV64-NEXT: mv sp, a0 ; RV64-NEXT: call callee_with_args@plt @@ -129,10 +126,7 @@ define void @stack_alloc(i32 signext %size) { ; RV64-WITHFP-NEXT: slli a0, a0, 32 ; RV64-WITHFP-NEXT: srli a0, a0, 32 ; RV64-WITHFP-NEXT: addi a0, a0, 15 -; RV64-WITHFP-NEXT: addi a1, zero, 1 -; RV64-WITHFP-NEXT: slli a1, a1, 33 -; RV64-WITHFP-NEXT: addi a1, a1, -16 -; RV64-WITHFP-NEXT: and a0, a0, a1 +; RV64-WITHFP-NEXT: andi a0, a0, -16 ; RV64-WITHFP-NEXT: sub a0, sp, a0 ; RV64-WITHFP-NEXT: mv sp, a0 ; RV64-WITHFP-NEXT: call callee_with_args@plt diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll index 5fddeea99725..8f4cce9f77d6 100644 --- a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll @@ -122,10 +122,7 @@ define half @fcopysign_fneg(half %a, half %b) nounwind { ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: addiw a2, a2, -1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 33 -; RV64I-NEXT: addi a2, a2, -1 -; RV64I-NEXT: slli a2, a2, 15 +; RV64I-NEXT: lui a2, 1048568 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll index 9f0918c16314..c040261dd990 100644 --- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -10,10 +10,8 @@ define signext i32 @gorc1_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc1_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 171 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: lui a2, 699051 +; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 1 ; RV64I-NEXT: lui a3, 349525 @@ -91,10 +89,8 @@ define signext i32 @gorc2_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc2_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 205 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: lui a2, 838861 +; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 2 ; RV64I-NEXT: lui a3, 209715 @@ -182,10 +178,8 @@ define signext i32 @gorc3_i32(i32 signext %a) nounwind { ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 205 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: lui a2, 838861 +; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 2 ; RV64I-NEXT: lui a3, 209715 @@ -297,10 +291,8 @@ define signext i32 @gorc4_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc4_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: lui a3, 61681 @@ -388,10 +380,8 @@ define signext i32 @gorc5_i32(i32 signext %a) nounwind { ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: lui a3, 61681 @@ -513,10 +503,8 @@ define signext i32 @gorc6_i32(i32 signext %a) nounwind { ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: lui a3, 61681 @@ -648,10 +636,8 @@ define signext i32 @gorc7_i32(i32 signext %a) nounwind { ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: lui a3, 61681 @@ -797,10 +783,8 @@ define signext i32 @gorc8_i32(i32 signext %a) nounwind { ; RV64I-LABEL: gorc8_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: lui a2, 1044496 +; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 8 ; RV64I-NEXT: lui a3, 4080 @@ -1015,20 +999,16 @@ define signext i32 @gorc2b_i32(i32 signext %a) nounwind { ; RV64I-NEXT: lui a2, 838861 ; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 209715 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: lui a4, 209715 +; RV64I-NEXT: addiw a4, a4, 819 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: or a0, a3, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 205 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: and a2, a2, a4 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: sext.w a0, a0 @@ -1124,30 +1104,26 @@ define signext i32 @gorc3b_i32(i32 signext %a) nounwind { ; RV64I-NEXT: lui a2, 699051 ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 349525 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: lui a4, 349525 +; RV64I-NEXT: addiw a4, a4, 1365 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: or a0, a3, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 838861 -; RV64I-NEXT: addiw a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a4, 209715 -; RV64I-NEXT: addiw a4, a4, 819 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: lui a3, 838861 +; RV64I-NEXT: addiw a3, a3, -820 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: lui a5, 209715 +; RV64I-NEXT: addiw a5, a5, 819 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: or a0, a3, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 171 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: and a2, a2, a4 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: sext.w a0, a0 @@ -1319,10 +1295,8 @@ define signext i32 @grev1_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev1_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 171 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: lui a2, 699051 +; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 1 ; RV64I-NEXT: lui a2, 349525 @@ -1396,10 +1370,8 @@ define signext i32 @grev2_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev2_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 205 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: lui a2, 838861 +; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: lui a2, 209715 @@ -1482,10 +1454,8 @@ define signext i32 @grev3_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 205 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 +; RV64I-NEXT: lui a2, 838861 +; RV64I-NEXT: addiw a2, a2, -820 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: lui a2, 209715 @@ -1590,10 +1560,8 @@ define signext i32 @grev4_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev4_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: lui a2, 61681 @@ -1676,10 +1644,8 @@ define signext i32 @grev5_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: lui a2, 61681 @@ -1794,10 +1760,8 @@ define signext i32 @grev6_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: lui a2, 61681 @@ -1920,10 +1884,8 @@ define signext i32 @grev7_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 241 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: lui a2, 986895 +; RV64I-NEXT: addiw a2, a2, 240 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: lui a2, 61681 @@ -2059,10 +2021,8 @@ define signext i32 @grev8_i32(i32 signext %a) nounwind { ; RV64I-LABEL: grev8_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: lui a2, 1044496 +; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 8 ; RV64I-NEXT: lui a2, 4080 @@ -2273,10 +2233,8 @@ define signext i32 @grev3b_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 171 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 +; RV64I-NEXT: lui a2, 699051 +; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 1 ; RV64I-NEXT: lui a2, 349525 @@ -2386,27 +2344,23 @@ define signext i32 @grev2b_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addiw a2, a2, -1366 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a3, 838861 -; RV64I-NEXT: addiw a3, a3, -820 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: lui a4, 838861 +; RV64I-NEXT: addiw a4, a4, -820 +; RV64I-NEXT: and a1, a1, a4 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a3, 209715 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a4, 209715 +; RV64I-NEXT: addiw a4, a4, 819 +; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a3, 171 -; RV64I-NEXT: addiw a3, a3, -1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -1366 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret @@ -2536,9 +2490,9 @@ define signext i32 @grev0_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addiw a4, a4, -820 ; RV64I-NEXT: and a1, a1, a4 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a4, 209715 -; RV64I-NEXT: addiw a4, a4, 819 -; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: lui a5, 209715 +; RV64I-NEXT: addiw a5, a5, 819 +; RV64I-NEXT: and a0, a0, a5 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 1 ; RV64I-NEXT: and a1, a1, a2 @@ -2546,13 +2500,9 @@ define signext i32 @grev0_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 205 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a1, a1, a4 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: and a0, a0, a5 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret @@ -3196,10 +3146,7 @@ define i32 @bswap_rotr_i32(i32 %a) { ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 32 -; RV64I-NEXT: addi a2, a2, -1 -; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: lui a2, 1048560 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: or a0, a0, a1 @@ -3252,10 +3199,7 @@ define i32 @bswap_rotl_i32(i32 %a) { ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 32 -; RV64I-NEXT: addi a2, a2, -1 -; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: lui a2, 1048560 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index 0030e6d0b8c3..eaf3db84e18b 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -360,10 +360,7 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind { ; RV64I-NEXT: srli a1, a1, 57 ; RV64I-NEXT: andi a1, a1, 63 ; RV64I-NEXT: add a1, a0, a1 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 32 -; RV64I-NEXT: addi a2, a2, -64 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: andi a1, a1, -64 ; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; @@ -373,10 +370,7 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind { ; RV64IM-NEXT: srli a1, a1, 57 ; RV64IM-NEXT: andi a1, a1, 63 ; RV64IM-NEXT: add a1, a0, a1 -; RV64IM-NEXT: addi a2, zero, 1 -; RV64IM-NEXT: slli a2, a2, 32 -; RV64IM-NEXT: addi a2, a2, -64 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: andi a1, a1, -64 ; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, 64 @@ -420,11 +414,9 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind { ; RV64I-NEXT: sext.w a1, a0 ; RV64I-NEXT: srli a1, a1, 32 ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: addiw a2, a2, -1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addiw a3, a2, -1 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: add a1, a0, a1 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 31 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret @@ -434,11 +426,9 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind { ; RV64IM-NEXT: sext.w a1, a0 ; RV64IM-NEXT: srli a1, a1, 32 ; RV64IM-NEXT: lui a2, 524288 -; RV64IM-NEXT: addiw a2, a2, -1 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: addiw a3, a2, -1 +; RV64IM-NEXT: and a1, a1, a3 ; RV64IM-NEXT: add a1, a0, a1 -; RV64IM-NEXT: addi a2, zero, 1 -; RV64IM-NEXT: slli a2, a2, 31 ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 9cf570e3fc03..e415825cd073 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -677,19 +677,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: lh a1, 4(a1) ; RV32I-NEXT: srli a4, a2, 26 ; RV32I-NEXT: add a4, a2, a4 -; RV32I-NEXT: lui a6, 16 -; RV32I-NEXT: addi a5, a6, -64 -; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: andi a4, a4, -64 ; RV32I-NEXT: sub s2, a2, a4 ; RV32I-NEXT: srli a2, a1, 27 ; RV32I-NEXT: add a2, a1, a2 -; RV32I-NEXT: addi a4, a6, -32 -; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: andi a2, a2, -32 ; RV32I-NEXT: sub s3, a1, a2 ; RV32I-NEXT: srli a1, a3, 29 ; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: addi a2, a6, -8 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: andi a1, a1, -8 ; RV32I-NEXT: sub s1, a3, a1 ; RV32I-NEXT: addi a1, zero, 95 ; RV32I-NEXT: call __modsi3@plt @@ -707,7 +703,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_srem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 8(a1) +; RV32IM-NEXT: lh a2, 8(a1) ; RV32IM-NEXT: lh a3, 4(a1) ; RV32IM-NEXT: lh a4, 12(a1) ; RV32IM-NEXT: lh a1, 0(a1) @@ -715,32 +711,28 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32IM-NEXT: addi a5, a5, 389 ; RV32IM-NEXT: mulh a5, a4, a5 ; RV32IM-NEXT: add a5, a5, a4 -; RV32IM-NEXT: srli a2, a5, 31 +; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 -; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: add a6, a5, a6 ; RV32IM-NEXT: addi a5, zero, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a7, a4, a2 -; RV32IM-NEXT: srli a4, a1, 26 -; RV32IM-NEXT: add a4, a1, a4 -; RV32IM-NEXT: lui a5, 16 -; RV32IM-NEXT: addi a2, a5, -64 -; RV32IM-NEXT: and a2, a4, a2 -; RV32IM-NEXT: sub a1, a1, a2 -; RV32IM-NEXT: srli a2, a3, 27 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: addi a4, a5, -32 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: sub a2, a3, a2 -; RV32IM-NEXT: srli a3, a6, 29 -; RV32IM-NEXT: add a3, a6, a3 -; RV32IM-NEXT: addi a4, a5, -8 -; RV32IM-NEXT: and a3, a3, a4 -; RV32IM-NEXT: sub a3, a6, a3 -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: mul a5, a6, a5 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: srli a5, a1, 26 +; RV32IM-NEXT: add a5, a1, a5 +; RV32IM-NEXT: andi a5, a5, -64 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: srli a5, a3, 27 +; RV32IM-NEXT: add a5, a3, a5 +; RV32IM-NEXT: andi a5, a5, -32 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: srli a5, a2, 29 +; RV32IM-NEXT: add a5, a2, a5 +; RV32IM-NEXT: andi a5, a5, -8 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) -; RV32IM-NEXT: sh a7, 6(a0) +; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_srem_power_of_two: @@ -758,19 +750,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: lh a1, 8(a1) ; RV64I-NEXT: srli a4, a2, 58 ; RV64I-NEXT: add a4, a2, a4 -; RV64I-NEXT: lui a6, 16 -; RV64I-NEXT: addiw a5, a6, -64 -; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: andi a4, a4, -64 ; RV64I-NEXT: sub s2, a2, a4 ; RV64I-NEXT: srli a2, a1, 59 ; RV64I-NEXT: add a2, a1, a2 -; RV64I-NEXT: addiw a4, a6, -32 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: andi a2, a2, -32 ; RV64I-NEXT: sub s3, a1, a2 ; RV64I-NEXT: srli a1, a3, 61 ; RV64I-NEXT: add a1, a3, a1 -; RV64I-NEXT: addiw a2, a6, -8 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: andi a1, a1, -8 ; RV64I-NEXT: sub s1, a3, a1 ; RV64I-NEXT: addi a1, zero, 95 ; RV64I-NEXT: call __moddi3@plt @@ -807,27 +795,23 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64IM-NEXT: add a2, a5, a2 ; RV64IM-NEXT: addi a5, zero, 95 ; RV64IM-NEXT: mul a2, a2, a5 -; RV64IM-NEXT: sub a7, a1, a2 +; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: srli a2, a4, 58 ; RV64IM-NEXT: add a2, a4, a2 -; RV64IM-NEXT: lui a5, 16 -; RV64IM-NEXT: addiw a1, a5, -64 -; RV64IM-NEXT: and a1, a2, a1 -; RV64IM-NEXT: sub a1, a4, a1 -; RV64IM-NEXT: srli a2, a3, 59 -; RV64IM-NEXT: add a2, a3, a2 -; RV64IM-NEXT: addiw a4, a5, -32 -; RV64IM-NEXT: and a2, a2, a4 -; RV64IM-NEXT: sub a2, a3, a2 -; RV64IM-NEXT: srli a3, a6, 61 -; RV64IM-NEXT: add a3, a6, a3 -; RV64IM-NEXT: addiw a4, a5, -8 -; RV64IM-NEXT: and a3, a3, a4 -; RV64IM-NEXT: sub a3, a6, a3 -; RV64IM-NEXT: sh a3, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) -; RV64IM-NEXT: sh a1, 0(a0) -; RV64IM-NEXT: sh a7, 6(a0) +; RV64IM-NEXT: andi a2, a2, -64 +; RV64IM-NEXT: sub a2, a4, a2 +; RV64IM-NEXT: srli a4, a3, 59 +; RV64IM-NEXT: add a4, a3, a4 +; RV64IM-NEXT: andi a4, a4, -32 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: srli a4, a6, 61 +; RV64IM-NEXT: add a4, a6, a4 +; RV64IM-NEXT: andi a4, a4, -8 +; RV64IM-NEXT: sub a4, a6, a4 +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll index 7f7e1c4c7e13..b7f29b18c641 100644 --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -51,10 +51,7 @@ define void @caller(i32 %n) { ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: addi a0, a0, 15 -; RV64I-NEXT: addi a1, zero, 1 -; RV64I-NEXT: slli a1, a1, 33 -; RV64I-NEXT: addi a1, a1, -16 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: mv a1, s1 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index 7c50fa3883bb..0f2001b2746a 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -361,10 +361,7 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: srli a0, a0, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, zero, 1 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 33 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, -16 -; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -16 ; LP64-LP64F-LP64D-FPELIM-NEXT: sub a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: call notdead@plt @@ -396,10 +393,7 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: srli a0, a0, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, zero, 1 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 33 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, -16 -; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -16 ; LP64-LP64F-LP64D-WITHFP-NEXT: sub a0, sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: call notdead@plt -- 2.34.1