From: Fraser Cormack Date: Fri, 16 Jul 2021 14:12:46 +0000 (+0100) Subject: [RISCV] Lower more BUILD_VECTOR sequences to RVV's VID X-Git-Tag: llvmorg-14-init~649 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7b3a69bc16515b0b28c520fba0fe0f51fcd3ea33;p=platform%2Fupstream%2Fllvm.git [RISCV] Lower more BUILD_VECTOR sequences to RVV's VID This relands a6ca88e908b5befcd9b0f8c8cb40f53095cc17bc which was originally reverted due to overflow bugs in e3fa2b1eab60342dc882b7b888658b03c472fa2b. This patch teaches the compiler to identify a wider variety of `BUILD_VECTOR`s which form integer arithmetic sequences, and to lower them to `vid.v` with modifications for non-unit steps and non-zero addends. The sequences handled by this optimization must either be monotonically increasing or decreasing. Consecutive elements holding the same value indicate a fractional step which, while simple mathematically, becomes more complex to handle both in the realm of lossy integer division and in the presence of `undef`s. For example, a common "interleaving" shuffle index will be lowered by LLVM to both `<0,u,1,u,2,...>` and `` `BUILD_VECTOR` nodes. Either of these would ideally be lowered to `vid.v` shifted right by 1. Detection of this sequence in presence of general `undef` values is more complicated, however: `<0,u,u,1,>` could match either `<0,0,0,1,>` or `<0,0,1,1,>` depending on later values in the sequence. Both are possible, so backtracking or multiple passes is inevitable. Sticking to monotonic sequences keeps the logic simpler as it can be done in one pass. Fractional steps will likely be a separate optimization in a future patch. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D104921 --- diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a8f804e..c3b3a2f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1385,6 +1385,78 @@ static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Splat, DAG, Subtarget); } +struct VIDSequence { + int64_t Step; + int64_t Addend; +}; + +// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] +// to the (non-zero) step S and start value X. This can be then lowered as the +// RVV sequence (VID * S) + X, for example. +// Note that this method will also match potentially unappealing index +// sequences, like , however it is left to the caller to +// determine whether this is worth generating code for. +static Optional isSimpleVIDSequence(SDValue Op) { + unsigned NumElts = Op.getNumOperands(); + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); + if (!Op.getValueType().isInteger()) + return None; + + Optional SeqStep, SeqAddend; + Optional> PrevElt; + unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); + for (unsigned Idx = 0; Idx < NumElts; Idx++) { + // Assume undef elements match the sequence; we just have to be careful + // when interpolating across them. + if (Op.getOperand(Idx).isUndef()) + continue; + // The BUILD_VECTOR must be all constants. + if (!isa(Op.getOperand(Idx))) + return None; + + uint64_t Val = Op.getConstantOperandVal(Idx) & + maskTrailingOnes(EltSizeInBits); + + if (PrevElt) { + // Calculate the step since the last non-undef element, and ensure + // it's consistent across the entire sequence. + int64_t Diff = SignExtend64(Val - PrevElt->first, EltSizeInBits); + // The difference must cleanly divide the element span. + if (Diff % (Idx - PrevElt->second) != 0) + return None; + int64_t Step = Diff / (Idx - PrevElt->second); + // A zero step indicates we're either a not an index sequence, or we + // have a fractional step. This must be handled by a more complex + // pattern recognition (undefs complicate things here). + if (Step == 0) + return None; + if (!SeqStep) + SeqStep = Step; + else if (Step != SeqStep) + return None; + } + + // Record and/or check any addend. + if (SeqStep) { + int64_t Addend = + SignExtend64(Val - (Idx * (uint64_t)*SeqStep), EltSizeInBits); + if (!SeqAddend) + SeqAddend = Addend; + else if (SeqAddend != Addend) + return None; + } + + // Record this non-undef element for later. + PrevElt = std::make_pair(Val, Idx); + } + // We need to have logged both a step and an addend for this to count as + // a legal index sequence. + if (!SeqStep || !SeqAddend) + return None; + + return VIDSequence{*SeqStep, *SeqAddend}; +} + static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -1512,18 +1584,41 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Splat, DAG, Subtarget); } - // Try and match an index sequence, which we can lower directly to the vid - // instruction. An all-undef vector is matched by getSplatValue, above. - if (VT.isInteger()) { - bool IsVID = true; - for (unsigned I = 0; I < NumElts && IsVID; I++) - IsVID &= Op.getOperand(I).isUndef() || - (isa(Op.getOperand(I)) && - Op.getConstantOperandVal(I) == I); - - if (IsVID) { + // Try and match index sequences, which we can lower to the vid instruction + // with optional modifications. An all-undef vector is matched by + // getSplatValue, above. + if (auto SimpleVID = isSimpleVIDSequence(Op)) { + int64_t Step = SimpleVID->Step; + int64_t Addend = SimpleVID->Addend; + // Only emit VIDs with suitably-small steps/addends. We use imm5 is a + // threshold since it's the immediate value many RVV instructions accept. + if (isInt<5>(Step) && isInt<5>(Addend)) { SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); - return convertFromScalableVector(VT, VID, DAG, Subtarget); + // Convert right out of the scalable type so we can use standard ISD + // nodes for the rest of the computation. If we used scalable types with + // these, we'd lose the fixed-length vector info and generate worse + // vsetvli code. + VID = convertFromScalableVector(VT, VID, DAG, Subtarget); + assert(Step != 0 && "Invalid step"); + bool Negate = false; + if (Step != 1) { + int64_t SplatStepVal = Step; + unsigned Opcode = ISD::MUL; + if (isPowerOf2_64(std::abs(Step))) { + Negate = Step < 0; + Opcode = ISD::SHL; + SplatStepVal = Log2_64(std::abs(Step)); + } + SDValue SplatStep = DAG.getSplatVector( + VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); + VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep); + } + if (Addend != 0 || Negate) { + SDValue SplatAddend = + DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT)); + VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); + } + return VID; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 554ab97..5393c7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -175,34 +175,29 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; RV32-NEXT: addi a0, zero, 12 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-NEXT: vid.v v25 +; RV32-NEXT: vrsub.vi v25, v25, 4 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vmv.v.x v25, a0 -; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu +; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t ; RV32-NEXT: vmv2r.v v8, v26 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: addi a0, zero, 2 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vmv.s.x v26, a0 -; RV64-NEXT: vmv.v.i v28, 1 -; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, mu -; RV64-NEXT: vslideup.vi v28, v26, 2 ; RV64-NEXT: addi a0, zero, 12 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vlse64.v v26, (a0), zero +; RV64-NEXT: vid.v v28 +; RV64-NEXT: vrsub.vi v28, v28, 4 ; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t ; RV64-NEXT: vmv2r.v v8, v26 @@ -214,17 +209,17 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_shuffle_vx_v4f64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; RV32-NEXT: vid.v v25 ; RV32-NEXT: addi a0, zero, 3 +; RV32-NEXT: vmul.vx v25, v25, a0 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI8_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI8_0) ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: lui a0, 48 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vmv.v.x v25, a0 -; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu +; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t ; RV32-NEXT: vmv2r.v v8, v26 ; RV32-NEXT: ret @@ -232,10 +227,9 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; RV64-LABEL: vrgather_shuffle_vx_v4f64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vmv.v.i v28, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; RV64-NEXT: vmv.s.x v28, zero +; RV64-NEXT: vid.v v26 ; RV64-NEXT: addi a0, zero, 3 +; RV64-NEXT: vmul.vx v28, v26, a0 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, %hi(.LCPI8_0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index ac03d0e..1b36ada 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -38,34 +38,275 @@ define void @buildvec_notquite_vid_v16i8(<16 x i8>* %x) { ret void } -; TODO: Could do VID then add a constant splat define void @buildvec_vid_plus_imm_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: buildvec_vid_plus_imm_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vle8.v v25, (a1) +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vadd.vi v25, v25, 2 ; CHECK-NEXT: vse8.v v25, (a0) ; CHECK-NEXT: ret store <16 x i8> , <16 x i8>* %x ret void } -; TODO: Could do VID then multiply by a constant splat define void @buildvec_vid_mpy_imm_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: buildvec_vid_mpy_imm_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vle8.v v25, (a1) +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: vmul.vx v25, v25, a1 ; CHECK-NEXT: vse8.v v25, (a0) ; CHECK-NEXT: ret store <16 x i8> , <16 x i8>* %x ret void } +define void @buildvec_vid_step2_add0_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3) { +; CHECK-LABEL: buildvec_vid_step2_add0_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vsll.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: vse8.v v25, (a1) +; CHECK-NEXT: vse8.v v25, (a2) +; CHECK-NEXT: vse8.v v25, (a3) +; CHECK-NEXT: ret + store <4 x i8> , <4 x i8>* %z0 + store <4 x i8> , <4 x i8>* %z1 + store <4 x i8> , <4 x i8>* %z2 + store <4 x i8> , <4 x i8>* %z3 + ret void +} + +define void @buildvec_vid_step2_add1_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3) { +; CHECK-LABEL: buildvec_vid_step2_add1_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vsll.vi v25, v25, 1 +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: vse8.v v25, (a1) +; CHECK-NEXT: vse8.v v25, (a2) +; CHECK-NEXT: vse8.v v25, (a3) +; CHECK-NEXT: ret + store <4 x i8> , <4 x i8>* %z0 + store <4 x i8> , <4 x i8>* %z1 + store <4 x i8> , <4 x i8>* %z2 + store <4 x i8> , <4 x i8>* %z3 + ret void +} + +; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate +; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before +; being combined. +define void @buildvec_vid_stepn1_add0_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x +; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vi v25, v25, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: vse8.v v25, (a1) +; CHECK-NEXT: vse8.v v25, (a2) +; CHECK-NEXT: vse8.v v25, (a3) +; CHECK-NEXT: ret +i8>* %z2, <4 x i8>* %z3) { + store <4 x i8> , <4 x i8>* %z0 + store <4 x i8> , <4 x i8>* %z1 + store <4 x i8> , <4 x i8>* %z2 + store <4 x i8> , <4 x i8>* %z3 + ret void +} + +define void @buildvec_vid_stepn2_add0_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3) { +; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vsll.vi v25, v25, 1 +; CHECK-NEXT: vrsub.vi v25, v25, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: vse8.v v25, (a1) +; CHECK-NEXT: vse8.v v25, (a2) +; CHECK-NEXT: vse8.v v25, (a3) +; CHECK-NEXT: ret + store <4 x i8> , <4 x i8>* %z0 + store <4 x i8> , <4 x i8>* %z1 + store <4 x i8> , <4 x i8>* %z2 + store <4 x i8> , <4 x i8>* %z3 + ret void +} + +define void @buildvec_vid_stepn2_add3_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3) { +; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vsll.vi v25, v25, 1 +; CHECK-NEXT: vrsub.vi v25, v25, 3 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + store <4 x i8> , <4 x i8>* %z0 + ret void +} + +define void @buildvec_vid_stepn3_add3_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3) { +; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v25, 3 +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: addi a1, zero, -3 +; CHECK-NEXT: vmadd.vx v26, a1, v25 +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + store <4 x i8> , <4 x i8>* %z0 + ret void +} + +define void @buildvec_vid_stepn3_addn3_v4i32(<4 x i32>* %z0, <4 x i32>* %z1, <4 x i32>* %z2, <4 x i32>* %z3) { +; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, -3 +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: addi a4, zero, -3 +; CHECK-NEXT: vmadd.vx v26, a4, v25 +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: vse32.v v26, (a1) +; CHECK-NEXT: vse32.v v26, (a2) +; CHECK-NEXT: vse32.v v26, (a3) +; CHECK-NEXT: ret + store <4 x i32> , <4 x i32>* %z0 + store <4 x i32> , <4 x i32>* %z1 + store <4 x i32> , <4 x i32>* %z2 + store <4 x i32> , <4 x i32>* %z3 + ret void +} + +; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization. +define <4 x i64> @buildvec_vid_step1_add0_v4i64() { +; RV32-LABEL: buildvec_vid_step1_add0_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; RV32-NEXT: vslideup.vi v8, v25, 2 +; RV32-NEXT: lui a0, %hi(.LCPI12_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vle32.v v9, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_step1_add0_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vadd.vi v9, v8, 2 +; RV64-NEXT: ret + ret <4 x i64> +} + +define <4 x i64> @buildvec_vid_step2_add0_v4i64() { +; RV32-LABEL: buildvec_vid_step2_add0_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; RV32-NEXT: vslideup.vi v8, v25, 2 +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vle32.v v9, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_step2_add0_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vid.v v25 +; RV64-NEXT: vsll.vi v8, v25, 1 +; RV64-NEXT: vadd.vi v9, v8, 4 +; RV64-NEXT: ret + ret <4 x i64> +} + +define void @buildvec_no_vid_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3, <4 x i8>* %z4, <4 x i8>* %z5) { +; RV32-LABEL: buildvec_no_vid_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a6, %hi(.LCPI14_0) +; RV32-NEXT: addi a6, a6, %lo(.LCPI14_0) +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV32-NEXT: vle8.v v25, (a6) +; RV32-NEXT: lui a6, %hi(.LCPI14_1) +; RV32-NEXT: addi a6, a6, %lo(.LCPI14_1) +; RV32-NEXT: vle8.v v26, (a6) +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: vse8.v v26, (a1) +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: addi a0, a0, -2048 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV32-NEXT: vse8.v v25, (a2) +; RV32-NEXT: addi a0, zero, 2047 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV32-NEXT: lui a0, %hi(.LCPI14_2) +; RV32-NEXT: addi a0, a0, %lo(.LCPI14_2) +; RV32-NEXT: vle8.v v26, (a0) +; RV32-NEXT: vse8.v v25, (a3) +; RV32-NEXT: vmv.v.i v25, -2 +; RV32-NEXT: vse8.v v25, (a4) +; RV32-NEXT: vse8.v v26, (a5) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_no_vid_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a6, %hi(.LCPI14_0) +; RV64-NEXT: addi a6, a6, %lo(.LCPI14_0) +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV64-NEXT: vle8.v v25, (a6) +; RV64-NEXT: lui a6, %hi(.LCPI14_1) +; RV64-NEXT: addi a6, a6, %lo(.LCPI14_1) +; RV64-NEXT: vle8.v v26, (a6) +; RV64-NEXT: vse8.v v25, (a0) +; RV64-NEXT: vse8.v v26, (a1) +; RV64-NEXT: lui a0, 1 +; RV64-NEXT: addiw a0, a0, -2048 +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV64-NEXT: vse8.v v25, (a2) +; RV64-NEXT: addi a0, zero, 2047 +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI14_2) +; RV64-NEXT: addi a0, a0, %lo(.LCPI14_2) +; RV64-NEXT: vle8.v v26, (a0) +; RV64-NEXT: vse8.v v25, (a3) +; RV64-NEXT: vmv.v.i v25, -2 +; RV64-NEXT: vse8.v v25, (a4) +; RV64-NEXT: vse8.v v26, (a5) +; RV64-NEXT: ret + store <4 x i8> , <4 x i8>* %z0 + store <4 x i8> , <4 x i8>* %z1 + store <4 x i8> , <4 x i8>* %z2 + store <4 x i8> , <4 x i8>* %z3 + store <4 x i8> , <4 x i8>* %z4 + store <4 x i8> , <4 x i8>* %z5 + ret void +} + define void @buildvec_dominant0_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: buildvec_dominant0_v8i16: ; CHECK: # %bb.0: @@ -115,10 +356,8 @@ define void @buildvec_dominant2_v2i8(<2 x i8>* %x) { ; CHECK-LABEL: buildvec_dominant2_v2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.i v25, -1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, mu -; CHECK-NEXT: vmv.s.x v25, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vi v25, v25, 0 ; CHECK-NEXT: vse8.v v25, (a0) ; CHECK-NEXT: ret store <2 x i8> , <2 x i8>* %x @@ -128,8 +367,8 @@ define void @buildvec_dominant2_v2i8(<2 x i8>* %x) { define void @buildvec_dominant0_v2i32(<2 x i64>* %x) { ; RV32-LABEL: buildvec_dominant0_v2i32: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI10_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI10_0) +; RV32-NEXT: lui a1, %hi(.LCPI20_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI20_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v25, (a1) ; RV32-NEXT: vse32.v v25, (a0) @@ -159,8 +398,8 @@ define void @buildvec_dominant0_v2i32(<2 x i64>* %x) { define void @buildvec_dominant1_optsize_v2i32(<2 x i64>* %x) optsize { ; RV32-LABEL: buildvec_dominant1_optsize_v2i32: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI11_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI11_0) +; RV32-NEXT: lui a1, %hi(.LCPI21_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI21_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v25, (a1) ; RV32-NEXT: vse32.v v25, (a0) @@ -168,8 +407,8 @@ define void @buildvec_dominant1_optsize_v2i32(<2 x i64>* %x) optsize { ; ; RV64-LABEL: buildvec_dominant1_optsize_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI11_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI11_0) +; RV64-NEXT: lui a1, %hi(.LCPI21_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI21_0) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v25, (a1) ; RV64-NEXT: vse64.v v25, (a0) @@ -218,8 +457,8 @@ define void @buildvec_seq_v8i8_v2i32(<8 x i8>* %x) { define void @buildvec_seq_v16i8_v2i64(<16 x i8>* %x) { ; RV32-LABEL: buildvec_seq_v16i8_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI14_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI14_0) +; RV32-NEXT: lui a1, %hi(.LCPI24_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI24_0) ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vle8.v v25, (a1) ; RV32-NEXT: vse8.v v25, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 280dc01..634b989 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -104,37 +104,19 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { } define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { -; RV32-LABEL: vrgather_shuffle_xv_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 12 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vmv.v.x v26, a0 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vmv.v.i v25, 5 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; RV32-NEXT: vrgather.vv v25, v8, v26, v0.t -; RV32-NEXT: vmv1r.v v8, v25 -; RV32-NEXT: ret -; -; RV64-LABEL: vrgather_shuffle_xv_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, zero, 12 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV64-NEXT: vmv.v.x v26, a0 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV64-NEXT: vmv.v.i v25, 5 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; RV64-NEXT: vrgather.vv v25, v8, v26, v0.t -; RV64-NEXT: vmv1r.v v8, v25 -; RV64-NEXT: ret +; CHECK-LABEL: vrgather_shuffle_xv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 12 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vi v26, v25, 4 +; CHECK-NEXT: vmv.v.i v25, 5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vrgather.vv v25, v8, v26, v0.t +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> ret <4 x i16> %s } @@ -142,12 +124,12 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vid.v v25 ; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vmul.vx v26, v25, a0 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: lui a0, 48 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v25, 5 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 616ac70..8501cda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1034,10 +1034,6 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vmv.v.i v26, 2 -; RV64-NEXT: addi a1, zero, 1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v26, a1 ; RV64-NEXT: lui a1, 1035469 ; RV64-NEXT: addiw a1, a1, -819 ; RV64-NEXT: slli a1, a1, 12 @@ -1046,8 +1042,7 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; RV64-NEXT: addi a1, a1, -819 ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v27, a1 +; RV64-NEXT: vmv.v.x v26, a1 ; RV64-NEXT: lui a1, 1026731 ; RV64-NEXT: addiw a1, a1, -1365 ; RV64-NEXT: slli a1, a1, 12 @@ -1057,9 +1052,11 @@ define void @mulhu_v2i64(<2 x i64>* %x) { ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a1, a1, -1365 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v27, a1 +; RV64-NEXT: vmv.s.x v26, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulhu.vv v25, v25, v27 +; RV64-NEXT: vmulhu.vv v25, v25, v26 +; RV64-NEXT: vid.v v26 +; RV64-NEXT: vadd.vi v26, v26, 1 ; RV64-NEXT: vsrl.vv v25, v25, v26 ; RV64-NEXT: vse64.v v25, (a0) ; RV64-NEXT: ret @@ -1252,9 +1249,6 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vmv.v.i v26, -1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v26, zero ; RV64-NEXT: lui a1, 21845 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: slli a1, a1, 12 @@ -1263,18 +1257,18 @@ define void @mulhs_v2i64(<2 x i64>* %x) { ; RV64-NEXT: addi a1, a1, 1365 ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a2, a1, 1365 -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v27, a2 +; RV64-NEXT: vmv.v.x v26, a2 ; RV64-NEXT: addi a1, a1, 1366 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v27, a1 +; RV64-NEXT: vmv.s.x v26, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vv v27, v25, v27 -; RV64-NEXT: vmacc.vv v27, v25, v26 +; RV64-NEXT: vmulh.vv v26, v25, v26 +; RV64-NEXT: vid.v v27 +; RV64-NEXT: vrsub.vi v28, v27, 0 +; RV64-NEXT: vmadd.vv v28, v25, v26 ; RV64-NEXT: addi a1, zero, 63 -; RV64-NEXT: vsrl.vx v25, v27, a1 -; RV64-NEXT: vid.v v26 -; RV64-NEXT: vsra.vv v26, v27, v26 +; RV64-NEXT: vsrl.vx v25, v28, a1 +; RV64-NEXT: vsra.vv v26, v28, v27 ; RV64-NEXT: vadd.vv v25, v26, v25 ; RV64-NEXT: vse64.v v25, (a0) ; RV64-NEXT: ret @@ -4411,16 +4405,9 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: vsub.vv v26, v26, v28 ; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v26, v27 ; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: vmv.v.i v27, 3 -; LMULMAX1-RV64-NEXT: addi a2, zero, 2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vsrl.vv v26, v26, v27 -; LMULMAX1-RV64-NEXT: vmv.v.i v27, 2 -; LMULMAX1-RV64-NEXT: addi a2, zero, 1 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 +; LMULMAX1-RV64-NEXT: vid.v v27 +; LMULMAX1-RV64-NEXT: vadd.vi v28, v27, 2 +; LMULMAX1-RV64-NEXT: vsrl.vv v26, v26, v28 ; LMULMAX1-RV64-NEXT: lui a2, 1035469 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 @@ -4429,7 +4416,6 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: addi a2, a2, -819 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 ; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmv.v.x v28, a2 ; LMULMAX1-RV64-NEXT: lui a2, 1026731 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 @@ -4443,6 +4429,7 @@ define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vadd.vi v27, v27, 1 ; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v27 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) @@ -4795,9 +4782,6 @@ define void @mulhs_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v26, (a1) -; LMULMAX1-RV64-NEXT: vmv.v.i v27, -1 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, zero ; LMULMAX1-RV64-NEXT: lui a2, 21845 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 @@ -4806,23 +4790,23 @@ define void @mulhs_v4i64(<4 x i64>* %x) { ; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 ; LMULMAX1-RV64-NEXT: addi a3, a2, 1365 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmv.v.x v28, a3 +; LMULMAX1-RV64-NEXT: vmv.v.x v27, a3 ; LMULMAX1-RV64-NEXT: addi a2, a2, 1366 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmulh.vv v29, v26, v28 -; LMULMAX1-RV64-NEXT: vmacc.vv v29, v27, v26 +; LMULMAX1-RV64-NEXT: vmulh.vv v28, v26, v27 +; LMULMAX1-RV64-NEXT: vid.v v29 +; LMULMAX1-RV64-NEXT: vrsub.vi v30, v29, 0 +; LMULMAX1-RV64-NEXT: vmacc.vv v28, v30, v26 ; LMULMAX1-RV64-NEXT: addi a2, zero, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v26, v29, a2 -; LMULMAX1-RV64-NEXT: vid.v v30 -; LMULMAX1-RV64-NEXT: vsra.vv v29, v29, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v29, v26 -; LMULMAX1-RV64-NEXT: vmulh.vv v28, v25, v28 -; LMULMAX1-RV64-NEXT: vmacc.vv v28, v25, v27 -; LMULMAX1-RV64-NEXT: vsrl.vx v25, v28, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v27, v28, v30 +; LMULMAX1-RV64-NEXT: vsrl.vx v26, v28, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v28, v28, v29 +; LMULMAX1-RV64-NEXT: vadd.vv v26, v28, v26 +; LMULMAX1-RV64-NEXT: vmulh.vv v27, v25, v27 +; LMULMAX1-RV64-NEXT: vmacc.vv v27, v25, v30 +; LMULMAX1-RV64-NEXT: vsrl.vx v25, v27, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v27, v27, v29 ; LMULMAX1-RV64-NEXT: vadd.vv v25, v27, v25 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll index b09a8dc..129cbd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll @@ -91,11 +91,9 @@ declare <16 x i16> @llvm.experimental.stepvector.v16i16() define <16 x i16> @stepvector_v16i16() { ; LMULMAX1-LABEL: stepvector_v16i16: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a0, %hi(.LCPI7_0) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI7_0) ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 8 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v16i16: @@ -136,11 +134,9 @@ declare <8 x i32> @llvm.experimental.stepvector.v8i32() define <8 x i32> @stepvector_v8i32() { ; LMULMAX1-LABEL: stepvector_v8i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a0, %hi(.LCPI10_0) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI10_0) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 4 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v8i32: @@ -157,26 +153,18 @@ declare <16 x i32> @llvm.experimental.stepvector.v16i32() define <16 x i32> @stepvector_v16i32() { ; LMULMAX1-LABEL: stepvector_v16i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a0, %hi(.LCPI11_0) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI11_0) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: lui a0, %hi(.LCPI11_1) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI11_1) -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: lui a0, %hi(.LCPI11_2) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI11_2) -; LMULMAX1-NEXT: vle32.v v11, (a0) ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 4 +; LMULMAX1-NEXT: vadd.vi v10, v8, 8 +; LMULMAX1-NEXT: vadd.vi v11, v8, 12 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v16i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: lui a0, %hi(.LCPI11_0) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI11_0) ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vid.v v8 +; LMULMAX2-NEXT: vadd.vi v10, v8, 8 ; LMULMAX2-NEXT: ret %v = call <16 x i32> @llvm.experimental.stepvector.v16i32() ret <16 x i32> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll index 632f9f2..3c3e70c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll @@ -91,11 +91,9 @@ declare <16 x i16> @llvm.experimental.stepvector.v16i16() define <16 x i16> @stepvector_v16i16() { ; LMULMAX1-LABEL: stepvector_v16i16: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a0, %hi(.LCPI7_0) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI7_0) ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 8 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v16i16: @@ -136,11 +134,9 @@ declare <8 x i32> @llvm.experimental.stepvector.v8i32() define <8 x i32> @stepvector_v8i32() { ; LMULMAX1-LABEL: stepvector_v8i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a0, %hi(.LCPI10_0) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI10_0) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 4 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v8i32: @@ -157,26 +153,18 @@ declare <16 x i32> @llvm.experimental.stepvector.v16i32() define <16 x i32> @stepvector_v16i32() { ; LMULMAX1-LABEL: stepvector_v16i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a0, %hi(.LCPI11_0) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI11_0) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: lui a0, %hi(.LCPI11_1) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI11_1) -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: lui a0, %hi(.LCPI11_2) -; LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI11_2) -; LMULMAX1-NEXT: vle32.v v11, (a0) ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 4 +; LMULMAX1-NEXT: vadd.vi v10, v8, 8 +; LMULMAX1-NEXT: vadd.vi v11, v8, 12 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v16i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: lui a0, %hi(.LCPI11_0) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI11_0) ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vid.v v8 +; LMULMAX2-NEXT: vadd.vi v10, v8, 8 ; LMULMAX2-NEXT: ret %v = call <16 x i32> @llvm.experimental.stepvector.v16i32() ret <16 x i32> %v @@ -200,12 +188,8 @@ define <4 x i64> @stepvector_v4i64() { ; LMULMAX1-LABEL: stepvector_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v9, 3 -; LMULMAX1-NEXT: addi a0, zero, 2 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v9, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 2 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v4i64: @@ -223,31 +207,17 @@ define <8 x i64> @stepvector_v8i64() { ; LMULMAX1-LABEL: stepvector_v8i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v9, 3 -; LMULMAX1-NEXT: addi a0, zero, 2 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v9, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 5 -; LMULMAX1-NEXT: addi a0, zero, 4 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v10, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v11, 7 -; LMULMAX1-NEXT: addi a0, zero, 6 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v11, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 2 +; LMULMAX1-NEXT: vadd.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vi v11, v8, 6 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v8i64: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: lui a0, %hi(.LCPI14_0) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI14_0) ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-NEXT: vle64.v v10, (a0) ; LMULMAX2-NEXT: vid.v v8 +; LMULMAX2-NEXT: vadd.vi v10, v8, 4 ; LMULMAX2-NEXT: ret %v = call <8 x i64> @llvm.experimental.stepvector.v8i64() ret <8 x i64> %v @@ -259,57 +229,23 @@ define <16 x i64> @stepvector_v16i64() { ; LMULMAX1-LABEL: stepvector_v16i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v9, 3 -; LMULMAX1-NEXT: addi a0, zero, 2 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v9, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 5 -; LMULMAX1-NEXT: addi a0, zero, 4 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v10, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v11, 7 -; LMULMAX1-NEXT: addi a0, zero, 6 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v11, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 9 -; LMULMAX1-NEXT: addi a0, zero, 8 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v12, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v13, 11 -; LMULMAX1-NEXT: addi a0, zero, 10 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v13, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v14, 13 -; LMULMAX1-NEXT: addi a0, zero, 12 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v14, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v15, 15 -; LMULMAX1-NEXT: addi a0, zero, 14 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-NEXT: vmv.s.x v15, a0 -; LMULMAX1-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-NEXT: vid.v v8 +; LMULMAX1-NEXT: vadd.vi v9, v8, 2 +; LMULMAX1-NEXT: vadd.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vi v11, v8, 6 +; LMULMAX1-NEXT: vadd.vi v12, v8, 8 +; LMULMAX1-NEXT: vadd.vi v13, v8, 10 +; LMULMAX1-NEXT: vadd.vi v14, v8, 12 +; LMULMAX1-NEXT: vadd.vi v15, v8, 14 ; LMULMAX1-NEXT: ret ; ; LMULMAX2-LABEL: stepvector_v16i64: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: lui a0, %hi(.LCPI15_0) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI15_0) ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-NEXT: vle64.v v10, (a0) -; LMULMAX2-NEXT: lui a0, %hi(.LCPI15_1) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI15_1) -; LMULMAX2-NEXT: vle64.v v12, (a0) -; LMULMAX2-NEXT: lui a0, %hi(.LCPI15_2) -; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI15_2) -; LMULMAX2-NEXT: vle64.v v14, (a0) ; LMULMAX2-NEXT: vid.v v8 +; LMULMAX2-NEXT: vadd.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vi v12, v8, 8 +; LMULMAX2-NEXT: vadd.vi v14, v8, 12 ; LMULMAX2-NEXT: ret %v = call <16 x i64> @llvm.experimental.stepvector.v16i64() ret <16 x i64> %v