getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
// that for slideup this includes the offset.
- SDValue VL =
- getVLOp(OrigIdx + SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
+ unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
+ SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
- // Use tail agnostic policy if OrigIdx is the last index of Vec.
+ // Use tail agnostic policy if we're inserting over Vec's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (VecVT.isFixedLengthVector() &&
- OrigIdx + 1 == VecVT.getVectorNumElements())
+ if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
Policy = RISCVII::TAIL_AGNOSTIC;
SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
SlideupAmt, Mask, VL, Policy);
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v0, v8, a2
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v0, v16, 2
; CHECK-NEXT: ret
%mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc)
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v0, v16, 6
; CHECK-NEXT: ret
%mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc)
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vi v0, v16, 14
; CHECK-NEXT: ret
%mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
; CHECK-NEXT: li a0, 32
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vslideup.vi v16, v24, 16
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl)
ret <32 x float> %v
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: li a0, 128
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vslideup.vx v16, v24, a1
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl)
ret <128 x i7> %v
; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: li a7, 32
; CHECK-NEXT: .LBB16_12:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a4, 48
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: vslideup.vi v16, v8, 16
; CHECK-NEXT: vse32.v v16, (a0)
; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: li a0, 32
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vslideup.vi v16, v24, 16
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl)
ret <32 x i32> %v
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 8
; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 12
; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v10, v9, 4
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vse32.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
; LMULMAX1-NEXT: vse16.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v10, v9, 2
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v10, (a1)
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 14
; LMULMAX1-NEXT: vse8.v v8, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, tu, ma
+; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX4-NEXT: vslideup.vi v8, v12, 8
; LMULMAX4-NEXT: vse8.v v8, (a0)
; LMULMAX4-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0
; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v10, v9, 6
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0
; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0
-; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, ma
+; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX4-NEXT: vslideup.vi v8, v12, 8
; LMULMAX4-NEXT: vse16.v v8, (a0)
; LMULMAX4-NEXT: ret
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0
; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v10, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0
; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v11, v9, 2
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0
; LMULMAX1-NEXT: vnsrl.wi v12, v14, 0
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v12, v9, 2
; LMULMAX1-NEXT: addi a1, a0, 48
; LMULMAX1-NEXT: vse32.v v12, (a1)
; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0
; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0
-; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, ma
+; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; LMULMAX4-NEXT: vslideup.vi v12, v16, 8
; LMULMAX4-NEXT: vse32.v v12, (a0)
; LMULMAX4-NEXT: ret
; LMULMAX1-NEXT: vle32.v v9, (a2)
; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
; LMULMAX1-NEXT: vse16.v v10, (a1)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
; LMULMAX1-NEXT: vse16.v v9, (a1)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; LMULMAX1-NEXT: vmv.v.i v9, 0
; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptosi <8 x float> %x to <8 x i1>
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; LMULMAX1-NEXT: vmv.v.i v9, 0
; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptoui <8 x float> %x to <8 x i1>
; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
; LMULMAX1-NEXT: vse8.v v9, (a1)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
; LMULMAX1-NEXT: vse8.v v9, (a1)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptosi <8 x double> %x to <8 x i1>
; LMULMAX1-NEXT: vmsne.vi v0, v10, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 6
-; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; LMULMAX1-NEXT: vmsne.vi v0, v8, 0
; LMULMAX1-NEXT: ret
%z = fptoui <8 x double> %x to <8 x i1>
; LMULMAX1-NEXT: vfncvt.f.x.w v10, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
; LMULMAX1-NEXT: vse16.v v9, (a1)
; LMULMAX1-NEXT: ret
; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v8
; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10
-; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
; LMULMAX1-NEXT: vse16.v v9, (a1)
; LMULMAX1-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 2
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: ret
; LMULMAX1-NEXT: vle32.v v8, (a1)
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vle32.v v9, (a0)
-; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 2
; LMULMAX1-NEXT: vse32.v v9, (a0)
; LMULMAX1-NEXT: ret
; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v10, (a0)
-; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; LMULMAX2-NEXT: vslideup.vi v10, v8, 6
; LMULMAX2-NEXT: vse32.v v10, (a0)
; LMULMAX2-NEXT: ret
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-NEXT: vle32.v v9, (a0)
-; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; LMULMAX1-NEXT: vslideup.vi v9, v8, 2
; LMULMAX1-NEXT: vse32.v v9, (a0)
; LMULMAX1-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v9, (a1)
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 4
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
; RV32-NEXT: vlse32.v v10, (a4), zero
; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; RV32-NEXT: vmv.s.x v10, a3
-; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a1
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vslideup.vi v8, v9, 4
; LMULMAX1-NEXT: vse8.v v8, (a1)
; LMULMAX1-NEXT: ret
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t
; RV64V-NEXT: li a0, 32
-; RV64V-NEXT: vsetvli zero, a0, e8, m2, tu, ma
+; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64V-NEXT: vslideup.vi v12, v10, 16
-; RV64V-NEXT: vmv2r.v v8, v12
+; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v32i8:
; RV32-NEXT: vid.v v24
; RV32-NEXT: vmsltu.vx v12, v24, a1
; RV32-NEXT: vmsltu.vx v13, v16, a1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vi v12, v13, 4
; RV32-NEXT: li a0, 64
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64-NEXT: vid.v v24
; RV64-NEXT: vmsltu.vx v12, v24, a1
; RV64-NEXT: vmsltu.vx v13, v16, a1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslideup.vi v12, v13, 4
; RV64-NEXT: li a0, 64
; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, tu, ma
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vi v16, v1, 8
-; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv.v.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v16, v1, 2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v16, v1, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v16, v25, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v16, v25, 4
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
; RV64-NEXT: vmv1r.v v0, v10
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, ma
+; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64-NEXT: vslideup.vi v8, v12, 16
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
; RV32-NEXT: vnsrl.wi v24, v16, 0
; RV32-NEXT: vnsrl.wi v16, v8, 0
; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vslideup.vi v16, v24, 16
-; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; RV32-NEXT: li a3, 16
; RV32-NEXT: vsll.vi v16, v16, 3
; RV32-NEXT: mv a2, a1
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4i8 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4i16 = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4i32 = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4i64 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4f16 = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4f32 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4f64 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
; RV32-LABEL: vector_interleave_v4i64_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV32-NEXT: vslideup.vi v12, v8, 0
-; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma
-; RV32-NEXT: vslideup.vi v12, v10, 2
+; RV32-NEXT: vslideup.vi v12, v10, 0
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: lui a0, %hi(.LCPI3_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI3_0)
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vle16.v v10, (a0)
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vslideup.vi v12, v8, 2
; RV32-NEXT: vrgatherei16.vv v8, v12, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vector_interleave_v4i64_v2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vmv1r.v v10, v9
+; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vmv.v.i v12, 0
; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV64-NEXT: vslideup.vi v12, v8, 0
-; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, ma
-; RV64-NEXT: vslideup.vi v12, v10, 2
+; RV64-NEXT: vslideup.vi v12, v10, 0
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: lui a0, %hi(.LCPI3_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI3_0)
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64-NEXT: vle64.v v10, (a0)
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: vslideup.vi v12, v8, 2
; RV64-NEXT: vrgather.vv v8, v12, v10
; RV64-NEXT: ret
%res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) {
; RV32-LABEL: vector_interleave_v4f64_v2f64:
; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v10, v9
+; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV32-NEXT: vslideup.vi v12, v8, 0
-; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma
-; RV32-NEXT: vslideup.vi v12, v10, 2
+; RV32-NEXT: vslideup.vi v12, v10, 0
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: lui a0, %hi(.LCPI9_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI9_0)
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vle16.v v10, (a0)
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vslideup.vi v12, v8, 2
; RV32-NEXT: vrgatherei16.vv v8, v12, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vector_interleave_v4f64_v2f64:
; RV64: # %bb.0:
-; RV64-NEXT: vmv1r.v v10, v9
+; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vmv.v.i v12, 0
; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV64-NEXT: vslideup.vi v12, v8, 0
-; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, ma
-; RV64-NEXT: vslideup.vi v12, v10, 2
+; RV64-NEXT: vslideup.vi v12, v10, 0
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: lui a0, %hi(.LCPI9_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI9_0)
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64-NEXT: vle64.v v10, (a0)
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: vslideup.vi v12, v8, 2
; RV64-NEXT: vrgather.vv v8, v12, v10
; RV64-NEXT: ret
%res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)