From: Craig Topper Date: Wed, 26 Oct 2022 16:26:58 +0000 (-0700) Subject: [RISCV] Use vslide1down for i64 insertelt on RV32. X-Git-Tag: upstream/17.0.6~29443 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a61b74889f54925949e94e68fd7548a0aa3cf709;p=platform%2Fupstream%2Fllvm.git [RISCV] Use vslide1down for i64 insertelt on RV32. Instead of using vslide1up, use vslide1down and build the other direction. This avoids the overlap constraint early clobber of vslide1up. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D136735 --- diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2948fa9..5ed8192 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4947,7 +4947,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL); } else { // On RV32, i64-element vectors must be specially handled to place the - // value at element 0, by using two vslide1up instructions in sequence on + // value at element 0, by using two vslide1down instructions in sequence on // the i32 split lo/hi value. Use an equivalently-sized i32 vector for // this. SDValue One = DAG.getConstant(1, DL, XLenVT); @@ -4959,16 +4959,14 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; // Limit the active VL to two. SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); - // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied - // undef doesn't obey the earlyclobber constraint. Just splat a zero value. - ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, - DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL); - // First slide in the hi value, then the lo in underneath it. - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, - DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, + // First slide in the lo value, then the hi in above it. We use slide1down + // to avoid the register group overlap constraint of vslide1up. + ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), + DAG.getUNDEF(I32ContainerVT), ValLo, I32Mask, InsertI64VL); - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, - DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo, + ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, I32Mask, InsertI64VL); // Bitcast back to the right container type. ValInVec = DAG.getBitcast(ContainerVT, ValInVec); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll index 7619c48..1d5715d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -508,11 +508,10 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a) { ; RV32-LABEL: bitcast_i64_v4i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v9, v8, a1 -; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v8, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 0 +; RV32-NEXT: vslideup.vi v8, v9, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v4i16: @@ -547,11 +546,10 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a) { ; RV32-LABEL: bitcast_i64_v2i32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v9, v8, a1 -; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v8, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 0 +; RV32-NEXT: vslideup.vi v8, v9, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v2i32: @@ -586,11 +584,10 @@ define <1 x i64> @bitcast_i64_v1i64(i64 %a) { ; RV32-LABEL: bitcast_i64_v1i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v9, v8, a1 -; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v8, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 0 +; RV32-NEXT: vslideup.vi v8, v9, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitcast_i64_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll index 9d4f3d6..b316c71 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -199,11 +199,10 @@ define <4 x half> @bitcast_i64_v4f16(i64 %a) { ; RV32-FP-LABEL: bitcast_i64_v4f16: ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-FP-NEXT: vmv.v.i v8, 0 -; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 -; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 +; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v10, 0 +; RV32-FP-NEXT: vslideup.vi v8, v9, 0 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v4f16: @@ -219,11 +218,10 @@ define <2 x float> @bitcast_i64_v2f32(i64 %a) { ; RV32-FP-LABEL: bitcast_i64_v2f32: ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-FP-NEXT: vmv.v.i v8, 0 -; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 -; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 +; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v10, 0 +; RV32-FP-NEXT: vslideup.vi v8, v9, 0 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v2f32: @@ -239,11 +237,10 @@ define <1 x double> @bitcast_i64_v1f64(i64 %a) { ; RV32-FP-LABEL: bitcast_i64_v1f64: ; RV32-FP: # %bb.0: ; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-FP-NEXT: vmv.v.i v8, 0 -; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 -; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vslide1down.vx v8, v8, a0 +; RV32-FP-NEXT: vslide1down.vx v9, v8, a1 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-FP-NEXT: vslideup.vi v8, v10, 0 +; RV32-FP-NEXT: vslideup.vi v8, v9, 0 ; RV32-FP-NEXT: ret ; ; RV64-FP-LABEL: bitcast_i64_v1f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index d86535f..fe728c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -11,9 +11,8 @@ define void @insertelt_v4i64(<4 x i64>* %x, i64 %y) { ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vslide1up.vx v12, v10, a2 -; RV32-NEXT: vslide1up.vx v10, v12, a1 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 3 ; RV32-NEXT: vse64.v v8, (a0) @@ -52,9 +51,8 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) { ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vslide1up.vx v12, v10, a2 -; RV32-NEXT: vslide1up.vx v10, v12, a1 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: sw a1, 16(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index f890241..dde5bfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -132,8 +132,6 @@ define <2 x i64> @mgather_v2i64_align4(<2 x i64*> %ptrs, <2 x i1> %m, <2 x i64> ; RV32-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV32-NEXT: vmv.x.s a0, v0 ; RV32-NEXT: andi a1, a0, 1 -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: bnez a1, .LBB5_3 ; RV32-NEXT: # %bb.1: # %else ; RV32-NEXT: andi a0, a0, 2 @@ -142,26 +140,28 @@ define <2 x i64> @mgather_v2i64_align4(<2 x i64*> %ptrs, <2 x i1> %m, <2 x i64> ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; RV32-NEXT: .LBB5_3: # %cond.load +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) -; RV32-NEXT: vslide1up.vx v11, v10, a2 -; RV32-NEXT: vslide1up.vx v12, v11, a1 +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32-NEXT: vslide1down.vx v10, v8, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v12, 0 +; RV32-NEXT: vslideup.vi v9, v10, 0 ; RV32-NEXT: andi a0, a0, 2 ; RV32-NEXT: beqz a0, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a0, 4(a0) ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslide1up.vx v8, v10, a1 -; RV32-NEXT: vslide1up.vx v10, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v10, 1 +; RV32-NEXT: vslideup.vi v9, v8, 1 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index 7edba04..6328add 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -636,9 +636,8 @@ define @insertelt_nxv1i64_0( %v, i64 %elt) ; CHECK-LABEL: insertelt_nxv1i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vslide1up.vx v10, v9, a1 -; CHECK-NEXT: vslide1up.vx v9, v10, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 0 ; CHECK-NEXT: ret @@ -650,9 +649,8 @@ define @insertelt_nxv1i64_imm( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv1i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vslide1up.vx v10, v9, a1 -; CHECK-NEXT: vslide1up.vx v9, v10, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret @@ -664,9 +662,8 @@ define @insertelt_nxv1i64_idx( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv1i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vslide1up.vx v10, v9, a1 -; CHECK-NEXT: vslide1up.vx v9, v10, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a2 @@ -679,9 +676,8 @@ define @insertelt_nxv2i64_0( %v, i64 %elt) ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vslide1up.vx v12, v10, a1 -; CHECK-NEXT: vslide1up.vx v10, v12, a0 +; CHECK-NEXT: vslide1down.vx v10, v8, a0 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 0 ; CHECK-NEXT: ret @@ -693,9 +689,8 @@ define @insertelt_nxv2i64_imm( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv2i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vslide1up.vx v12, v10, a1 -; CHECK-NEXT: vslide1up.vx v10, v12, a0 +; CHECK-NEXT: vslide1down.vx v10, v8, a0 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -707,9 +702,8 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv2i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vslide1up.vx v12, v10, a1 -; CHECK-NEXT: vslide1up.vx v10, v12, a0 +; CHECK-NEXT: vslide1down.vx v10, v8, a0 +; CHECK-NEXT: vslide1down.vx v10, v10, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a2 @@ -722,9 +716,8 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vslide1up.vx v16, v12, a1 -; CHECK-NEXT: vslide1up.vx v12, v16, a0 +; CHECK-NEXT: vslide1down.vx v12, v8, a0 +; CHECK-NEXT: vslide1down.vx v12, v12, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 0 ; CHECK-NEXT: ret @@ -736,9 +729,8 @@ define @insertelt_nxv4i64_imm( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vslide1up.vx v16, v12, a1 -; CHECK-NEXT: vslide1up.vx v12, v16, a0 +; CHECK-NEXT: vslide1down.vx v12, v8, a0 +; CHECK-NEXT: vslide1down.vx v12, v12, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -750,9 +742,8 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv4i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vslide1up.vx v16, v12, a1 -; CHECK-NEXT: vslide1up.vx v12, v16, a0 +; CHECK-NEXT: vslide1down.vx v12, v8, a0 +; CHECK-NEXT: vslide1down.vx v12, v12, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v12, a2 @@ -765,9 +756,8 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vslide1up.vx v24, v16, a1 -; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vslide1down.vx v16, v8, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 0 ; CHECK-NEXT: ret @@ -779,9 +769,8 @@ define @insertelt_nxv8i64_imm( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vslide1up.vx v24, v16, a1 -; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vslide1down.vx v16, v8, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -793,9 +782,8 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt ; CHECK-LABEL: insertelt_nxv8i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vslide1up.vx v24, v16, a1 -; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vslide1down.vx v16, v8, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 ; CHECK-NEXT: addi a0, a2, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v16, a2