From 65ffcc099c7f91a15e57b013e2713fa1a49540d2 Mon Sep 17 00:00:00 2001 From: LiaoChunyu Date: Tue, 18 Jul 2023 15:10:53 +0800 Subject: [PATCH] [RISCV] Lower VP_CTLZ_ZERO_UNDEF/VP_CTTZ_ZERO_UNDEF/VP_CTLZ by converting to FP and extracting the exponent. D111904, D141585 made RISC-V customized lower vector ISD::CTLZ_ZERO_UNDEF/CTTZ_ZERO_UNDEF/CTLZ by converting to float and using the float result. Perhaps VP_CTLZ_ZERO_UNDEF/VP_CTTZ_ZERO_UNDEF/VP_CTLZ could use the similar feature. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D155150 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 76 +- llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 7592 ++++----------------------- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 3892 ++++---------- 3 files changed, 1988 insertions(+), 9572 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8384f29..9ad9d93 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -765,9 +765,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // range of f32. EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); if (isTypeLegal(FloatVT)) { - setOperationAction( - {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); + setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, + ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, + ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, + VT, Custom); } } } @@ -4285,6 +4286,16 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, unsigned EltSize = VT.getScalarSizeInBits(); SDValue Src = Op.getOperand(0); SDLoc DL(Op); + MVT ContainerVT = VT; + + SDValue Mask, VL; + if (Op->isVPOpcode()) { + Mask = Op.getOperand(1); + if (VT.isFixedLengthVector()) + Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, + Subtarget); + VL = Op.getOperand(2); + } // We choose FP type that can represent the value if possible. Otherwise, we // use rounding to zero conversion for correct exponent of the result. @@ -4305,21 +4316,27 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { SDValue Neg = DAG.getNegative(Src, DL, VT); Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); + } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { + SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), + Src, Mask, VL); + Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); } // We have a legal FP type, convert to it. SDValue FloatVal; if (FloatVT.bitsGT(VT)) { - FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); + if (Op->isVPOpcode()) + FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); + else + FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); } else { // Use RTZ to avoid rounding influencing exponent of FloatVal. - MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VT); Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); } - - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + if (!Op->isVPOpcode()) + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); SDValue RTZRM = DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); MVT ContainerFloatVT = @@ -4333,30 +4350,49 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; - SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, - DAG.getConstant(ShiftAmt, DL, IntVT)); + + SDValue Exp; // Restore back to original type. Truncation after SRL is to generate vnsrl. - if (IntVT.bitsLT(VT)) - Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); - else if (IntVT.bitsGT(VT)) - Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); + if (Op->isVPOpcode()) { + Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast, + DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); + Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL); + } else { + Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, + DAG.getConstant(ShiftAmt, DL, IntVT)); + if (IntVT.bitsLT(VT)) + Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); + else if (IntVT.bitsGT(VT)) + Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); + } + // The exponent contains log2 of the value in biased form. unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; - // For trailing zeros, we just need to subtract the bias. if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) return DAG.getNode(ISD::SUB, DL, VT, Exp, DAG.getConstant(ExponentBias, DL, VT)); + if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) + return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, + DAG.getConstant(ExponentBias, DL, VT), Mask, VL); // For leading zeros, we need to remove the bias and convert from log2 to // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). unsigned Adjust = ExponentBias + (EltSize - 1); - SDValue Res = - DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); + SDValue Res; + if (Op->isVPOpcode()) + Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, + Mask, VL); + else + Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); + // The above result with zero input equals to Adjust which is greater than // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. if (Op.getOpcode() == ISD::CTLZ) Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); + else if (Op.getOpcode() == ISD::VP_CTLZ) + Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, + DAG.getConstant(EltSize, DL, VT), Mask, VL); return Res; } @@ -5440,10 +5476,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true); case ISD::VP_CTLZ: case ISD::VP_CTLZ_ZERO_UNDEF: - return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true); + if (Subtarget.hasStdExtZvbb()) + return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true); + return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VP_CTTZ: case ISD::VP_CTTZ_ZERO_UNDEF: - return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true); + if (Subtarget.hasStdExtZvbb()) + return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true); + return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VP_CTPOP: return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true); case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 5ee4cb1..5dc5ced 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -13,26 +13,19 @@ declare @llvm.vp.ctlz.nxv1i8(, i1 immarg, @vp_ctlz_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8: @@ -47,26 +40,16 @@ define @vp_ctlz_nxv1i8( %va, @vp_ctlz_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8_unmasked: @@ -85,26 +68,19 @@ declare @llvm.vp.ctlz.nxv2i8(, i1 immarg, @vp_ctlz_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8: @@ -119,26 +95,16 @@ define @vp_ctlz_nxv2i8( %va, @vp_ctlz_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8_unmasked: @@ -157,26 +123,19 @@ declare @llvm.vp.ctlz.nxv4i8(, i1 immarg, @vp_ctlz_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8: @@ -191,26 +150,16 @@ define @vp_ctlz_nxv4i8( %va, @vp_ctlz_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8_unmasked: @@ -229,26 +178,19 @@ declare @llvm.vp.ctlz.nxv8i8(, i1 immarg, @vp_ctlz_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8: @@ -263,26 +205,16 @@ define @vp_ctlz_nxv8i8( %va, @vp_ctlz_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v10, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8_unmasked: @@ -301,26 +233,19 @@ declare @llvm.vp.ctlz.nxv16i8(, i1 immarg, define @vp_ctlz_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8: @@ -335,26 +260,16 @@ define @vp_ctlz_nxv16i8( %va, @vp_ctlz_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v10, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v12, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8_unmasked: @@ -515,71 +430,19 @@ define @vp_ctlz_nxv64i8_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv1i16(, i1 immarg, , i32) define @vp_ctlz_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -591,71 +454,16 @@ define @vp_ctlz_nxv1i16( %va, @vp_ctlz_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -671,71 +479,19 @@ define @vp_ctlz_nxv1i16_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv2i16(, i1 immarg, , i32) define @vp_ctlz_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -747,71 +503,16 @@ define @vp_ctlz_nxv2i16( %va, @vp_ctlz_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -827,71 +528,19 @@ define @vp_ctlz_nxv2i16_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv4i16(, i1 immarg, , i32) define @vp_ctlz_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -903,71 +552,16 @@ define @vp_ctlz_nxv4i16( %va, @vp_ctlz_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -983,71 +577,19 @@ define @vp_ctlz_nxv4i16_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv8i16(, i1 immarg, , i32) define @vp_ctlz_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16: ; CHECK-ZVBB: # %bb.0: @@ -1059,71 +601,16 @@ define @vp_ctlz_nxv8i16( %va, @vp_ctlz_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1139,71 +626,19 @@ define @vp_ctlz_nxv8i16_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv16i16(, i1 immarg, , i32) define @vp_ctlz_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -1215,71 +650,16 @@ define @vp_ctlz_nxv16i16( %va, @vp_ctlz_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1451,77 +831,20 @@ define @vp_ctlz_nxv32i16_unmasked( %va, i declare @llvm.vp.ctlz.nxv1i32(, i1 immarg, , i32) define @vp_ctlz_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -1533,77 +856,17 @@ define @vp_ctlz_nxv1i32( %va, @vp_ctlz_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v9, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1619,77 +882,20 @@ define @vp_ctlz_nxv1i32_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv2i32(, i1 immarg, , i32) define @vp_ctlz_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -1701,77 +907,17 @@ define @vp_ctlz_nxv2i32( %va, @vp_ctlz_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v10, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1787,77 +933,20 @@ define @vp_ctlz_nxv2i32_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv4i32(, i1 immarg, , i32) define @vp_ctlz_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -1869,77 +958,17 @@ define @vp_ctlz_nxv4i32( %va, @vp_ctlz_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v12, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1955,77 +984,20 @@ define @vp_ctlz_nxv4i32_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv8i32(, i1 immarg, , i32) define @vp_ctlz_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -2037,77 +1009,17 @@ define @vp_ctlz_nxv8i32( %va, @vp_ctlz_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v16, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -2123,77 +1035,18 @@ define @vp_ctlz_nxv8i32_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv16i32(, i1 immarg, , i32) define @vp_ctlz_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32: ; CHECK-ZVBB: # %bb.0: @@ -2205,77 +1058,18 @@ define @vp_ctlz_nxv16i32( %va, @vp_ctlz_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 23 +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -2291,120 +1085,19 @@ define @vp_ctlz_nxv16i32_unmasked( %va, i declare @llvm.vp.ctlz.nxv1i64(, i1 immarg, , i32) define @vp_ctlz_nxv1i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64: ; CHECK-ZVBB: # %bb.0: @@ -2416,120 +1109,19 @@ define @vp_ctlz_nxv1i64( %va, @vp_ctlz_nxv1i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv1i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv1i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -2545,120 +1137,19 @@ define @vp_ctlz_nxv1i64_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv2i64(, i1 immarg, , i32) define @vp_ctlz_nxv2i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64: ; CHECK-ZVBB: # %bb.0: @@ -2670,120 +1161,19 @@ define @vp_ctlz_nxv2i64( %va, @vp_ctlz_nxv2i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv2i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -2799,120 +1189,19 @@ define @vp_ctlz_nxv2i64_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv4i64(, i1 immarg, , i32) define @vp_ctlz_nxv4i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64: ; CHECK-ZVBB: # %bb.0: @@ -2924,120 +1213,19 @@ define @vp_ctlz_nxv4i64( %va, @vp_ctlz_nxv4i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv4i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv4i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -3053,120 +1241,19 @@ define @vp_ctlz_nxv4i64_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv7i64(, i1 immarg, , i32) define @vp_ctlz_nxv7i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv7i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv7i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64: ; CHECK-ZVBB: # %bb.0: @@ -3178,120 +1265,19 @@ define @vp_ctlz_nxv7i64( %va, @vp_ctlz_nxv7i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv7i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv7i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -3307,120 +1293,19 @@ define @vp_ctlz_nxv7i64_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv8i64(, i1 immarg, , i32) define @vp_ctlz_nxv8i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64: ; CHECK-ZVBB: # %bb.0: @@ -3432,120 +1317,19 @@ define @vp_ctlz_nxv8i64( %va, @vp_ctlz_nxv8i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv8i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv8i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -3561,339 +1345,40 @@ define @vp_ctlz_nxv8i64_unmasked( %va, i32 declare @llvm.vp.ctlz.nxv16i64(, i1 immarg, , i32) define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB46_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB46_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi t0, sp, 16 -; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB46_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB46_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3, v0.t +; CHECK-NEXT: li a4, 64 +; CHECK-NEXT: vminu.vx v16, v16, a4, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB46_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: vminu.vx v8, v8, a4, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64: ; CHECK-ZVBB: # %bb.0: @@ -3921,233 +1406,35 @@ define @vp_ctlz_nxv16i64( %va, @vp_ctlz_nxv16i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv16i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 16 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB47_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 16 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v24, v16, a2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v24, v16, a4 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 -; RV64-NEXT: bltu a0, a1, .LBB47_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB47_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vmul.vx v8, v8, a6 -; RV64-NEXT: vsrl.vx v8, v8, a7 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2 +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3 +; CHECK-NEXT: li a4, 64 +; CHECK-NEXT: vminu.vx v16, v16, a4 +; CHECK-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB47_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vrsub.vx v8, v8, a3 +; CHECK-NEXT: vminu.vx v8, v8, a4 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4174,26 +1461,17 @@ define @vp_ctlz_nxv16i64_unmasked( %va, i define @vp_ctlz_zero_undef_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8: @@ -4208,26 +1486,14 @@ define @vp_ctlz_zero_undef_nxv1i8( %va, @vp_ctlz_zero_undef_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked: @@ -4245,26 +1511,17 @@ define @vp_ctlz_zero_undef_nxv1i8_unmasked( % define @vp_ctlz_zero_undef_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8: @@ -4279,26 +1536,14 @@ define @vp_ctlz_zero_undef_nxv2i8( %va, @vp_ctlz_zero_undef_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked: @@ -4316,26 +1561,17 @@ define @vp_ctlz_zero_undef_nxv2i8_unmasked( % define @vp_ctlz_zero_undef_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8: @@ -4350,26 +1586,14 @@ define @vp_ctlz_zero_undef_nxv4i8( %va, @vp_ctlz_zero_undef_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked: @@ -4387,26 +1611,17 @@ define @vp_ctlz_zero_undef_nxv4i8_unmasked( % define @vp_ctlz_zero_undef_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8: @@ -4421,26 +1636,14 @@ define @vp_ctlz_zero_undef_nxv8i8( %va, @vp_ctlz_zero_undef_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v10, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked: @@ -4458,26 +1661,17 @@ define @vp_ctlz_zero_undef_nxv8i8_unmasked( % define @vp_ctlz_zero_undef_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8: @@ -4492,26 +1686,14 @@ define @vp_ctlz_zero_undef_nxv16i8( %va, @vp_ctlz_zero_undef_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v10, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: li a0, 134 +; CHECK-NEXT: vrsub.vx v8, v12, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked: @@ -4669,71 +1851,17 @@ define @vp_ctlz_zero_undef_nxv64i8_unmasked( @vp_ctlz_zero_undef_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -4745,71 +1873,14 @@ define @vp_ctlz_zero_undef_nxv1i16( %va, @vp_ctlz_zero_undef_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4824,71 +1895,17 @@ define @vp_ctlz_zero_undef_nxv1i16_unmasked( @vp_ctlz_zero_undef_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -4900,71 +1917,14 @@ define @vp_ctlz_zero_undef_nxv2i16( %va, @vp_ctlz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4979,71 +1939,17 @@ define @vp_ctlz_zero_undef_nxv2i16_unmasked( @vp_ctlz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -5055,71 +1961,14 @@ define @vp_ctlz_zero_undef_nxv4i16( %va, @vp_ctlz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -5130,151 +1979,40 @@ define @vp_ctlz_zero_undef_nxv4i16_unmasked( %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) ret %v -} - - -define @vp_ctlz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_ctlz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +} + + +define @vp_ctlz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_ctlz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -5289,71 +2027,17 @@ define @vp_ctlz_zero_undef_nxv8i16_unmasked( @vp_ctlz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -5365,71 +2049,14 @@ define @vp_ctlz_zero_undef_nxv16i16( %va, } define @vp_ctlz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -5599,77 +2226,18 @@ define @vp_ctlz_zero_undef_nxv32i16_unmasked( @vp_ctlz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -5681,77 +2249,15 @@ define @vp_ctlz_zero_undef_nxv1i32( %va, @vp_ctlz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v9, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -5766,77 +2272,18 @@ define @vp_ctlz_zero_undef_nxv1i32_unmasked( @vp_ctlz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -5848,77 +2295,15 @@ define @vp_ctlz_zero_undef_nxv2i32( %va, @vp_ctlz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v10, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -5933,77 +2318,18 @@ define @vp_ctlz_zero_undef_nxv2i32_unmasked( @vp_ctlz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -6015,162 +2341,41 @@ define @vp_ctlz_zero_undef_nxv4i32( %va, @vp_ctlz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vclz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_ctlz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v12, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_ctlz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -6182,77 +2387,15 @@ define @vp_ctlz_zero_undef_nxv8i32( %va, @vp_ctlz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v16, a0 +; CHECK-NEXT: li a0, 1054 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -6267,77 +2410,16 @@ define @vp_ctlz_zero_undef_nxv8i32_unmasked( @vp_ctlz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32: ; CHECK-ZVBB: # %bb.0: @@ -6349,77 +2431,16 @@ define @vp_ctlz_zero_undef_nxv16i32( %va, } define @vp_ctlz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 23 +; CHECK-NEXT: li a0, 158 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -6434,120 +2455,17 @@ define @vp_ctlz_zero_undef_nxv16i32_unmasked( @vp_ctlz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64: ; CHECK-ZVBB: # %bb.0: @@ -6559,120 +2477,17 @@ define @vp_ctlz_zero_undef_nxv1i64( %va, @vp_ctlz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -6687,120 +2502,17 @@ define @vp_ctlz_zero_undef_nxv1i64_unmasked( @vp_ctlz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64: ; CHECK-ZVBB: # %bb.0: @@ -6812,120 +2524,17 @@ define @vp_ctlz_zero_undef_nxv2i64( %va, @vp_ctlz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -6940,120 +2549,17 @@ define @vp_ctlz_zero_undef_nxv2i64_unmasked( @vp_ctlz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64: ; CHECK-ZVBB: # %bb.0: @@ -7062,123 +2568,20 @@ define @vp_ctlz_zero_undef_nxv4i64( %va, @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) ret %v -} - -define @vp_ctlz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +} + +define @vp_ctlz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -7193,120 +2596,17 @@ define @vp_ctlz_zero_undef_nxv4i64_unmasked( @vp_ctlz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64: ; CHECK-ZVBB: # %bb.0: @@ -7318,120 +2618,17 @@ define @vp_ctlz_zero_undef_nxv7i64( %va, @vp_ctlz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -7446,120 +2643,17 @@ define @vp_ctlz_zero_undef_nxv7i64_unmasked( @vp_ctlz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64: ; CHECK-ZVBB: # %bb.0: @@ -7571,120 +2665,17 @@ define @vp_ctlz_zero_undef_nxv8i64( %va, @vp_ctlz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1086 +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -7698,339 +2689,37 @@ define @vp_ctlz_zero_undef_nxv8i64_unmasked( @vp_ctlz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB94_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB94_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi t0, sp, 16 -; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB94_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB94_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB94_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: @@ -8058,233 +2747,32 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, } define @vp_ctlz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 16 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v24, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB95_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB95_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 16 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsrl.vx v24, v16, a2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v24, v16, a4 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 -; RV64-NEXT: bltu a0, a1, .LBB95_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vmul.vx v8, v8, a6 -; RV64-NEXT: vsrl.vx v8, v8, a7 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2 +; CHECK-NEXT: li a3, 1086 +; CHECK-NEXT: vrsub.vx v16, v16, a3 +; CHECK-NEXT: bltu a0, a1, .LBB95_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB95_2: +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vrsub.vx v8, v8, a3 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 38647bb..afe2d5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -3577,23 +3577,20 @@ define @vp_cttz_nxv16i64_unmasked( %va, i define @vp_cttz_zero_undef_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8: @@ -3608,23 +3605,17 @@ define @vp_cttz_zero_undef_nxv1i8( %va, @vp_cttz_zero_undef_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked: @@ -3642,23 +3633,20 @@ define @vp_cttz_zero_undef_nxv1i8_unmasked( % define @vp_cttz_zero_undef_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8: @@ -3673,23 +3661,17 @@ define @vp_cttz_zero_undef_nxv2i8( %va, @vp_cttz_zero_undef_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vnsrl.wi v8, v8, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked: @@ -3707,23 +3689,20 @@ define @vp_cttz_zero_undef_nxv2i8_unmasked( % define @vp_cttz_zero_undef_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8: @@ -3738,23 +3717,17 @@ define @vp_cttz_zero_undef_nxv4i8( %va, @vp_cttz_zero_undef_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked: @@ -3772,23 +3745,20 @@ define @vp_cttz_zero_undef_nxv4i8_unmasked( % define @vp_cttz_zero_undef_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8: @@ -3803,23 +3773,17 @@ define @vp_cttz_zero_undef_nxv8i8( %va, @vp_cttz_zero_undef_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v9, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v10, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked: @@ -3837,23 +3801,20 @@ define @vp_cttz_zero_undef_nxv8i8_unmasked( % define @vp_cttz_zero_undef_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t -; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: vand.vi v8, v8, 15, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8: @@ -3868,23 +3829,17 @@ define @vp_cttz_zero_undef_nxv16i8( %va, @vp_cttz_zero_undef_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vand.vv v8, v8, v10 -; CHECK-NEXT: vsrl.vi v10, v8, 1 -; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: li a0, 51 -; CHECK-NEXT: vand.vx v10, v8, a0 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vadd.vv v8, v10, v8 -; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v12, a0 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked: @@ -4030,61 +3985,19 @@ define @vp_cttz_zero_undef_nxv64i8_unmasked( @vp_cttz_zero_undef_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -4096,61 +4009,16 @@ define @vp_cttz_zero_undef_nxv1i16( %va, @vp_cttz_zero_undef_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4165,26 +4033,218 @@ define @vp_cttz_zero_undef_nxv1i16_unmasked( @vp_cttz_zero_undef_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i16: +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v10, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: vnsrl.wi v8, v16, 23 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_cttz_zero_undef_nxv32i16: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a0, 5 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: vand.vx v16, v16, a0, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a0, 3 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t +; RV32-NEXT: vand.vx v16, v8, a0, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t @@ -4193,26 +4253,26 @@ define @vp_cttz_zero_undef_nxv2i16( %va, @vp_cttz_zero_undef_nxv2i16( %va, @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 +; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a0, 5 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a0, 3 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a0 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v8, v8, a0 @@ -4259,26 +4319,26 @@ define @vp_cttz_zero_undef_nxv2i16_unmasked( @vp_cttz_zero_undef_nxv2i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) - ret %v + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v9, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) - ret %v + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v10, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) - ret %v + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret +define @vp_cttz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret ; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v12, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret +define @vp_cttz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 +; CHECK-NEXT: li a0, 52 +; CHECK-NEXT: vnsrl.wx v8, v16, a0 +; CHECK-NEXT: li a0, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret ; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 23 +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) - ret %v + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) - ret %v + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret +define @vp_cttz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v + %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) + ret %v } -define @vp_cttz_zero_undef_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +define @vp_cttz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vrsub.vi v12, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv7i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv7i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) - ret %v -} - - -define @vp_cttz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t -; CHECK-ZVBB-NEXT: ret - %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) - ret %v -} - -define @vp_cttz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a2, 1 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB94_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB94_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsub.vx v16, v8, a2, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a3, a3, a2 -; RV64-NEXT: li a2, 1 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v8, v16, a2, v0.t -; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi t0, sp, 16 -; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB94_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsub.vx v16, v8, a2, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t -; RV64-NEXT: vmul.vx v8, v8, a6, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) + ret %v +} + + +define @vp_cttz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: li a1, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) + ret %v +} + +define @vp_cttz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t +; CHECK-NEXT: vand.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: li a3, 1023 +; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB94_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB94_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: @@ -6904,193 +4949,36 @@ define @vp_cttz_zero_undef_nxv16i64( %va, } define @vp_cttz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a2, 1 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB95_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB95_2: -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v8, a2 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a3, a3, a2 -; RV64-NEXT: li a2, 1 -; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v24, v16, a4 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 -; RV64-NEXT: bltu a0, a1, .LBB95_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v8, a2 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vmul.vx v8, v8, a6 -; RV64-NEXT: vsrl.vx v8, v8, a7 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v24, v16, 0 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vfcvt.f.xu.v v16, v16 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: li a2, 52 +; CHECK-NEXT: vsrl.vx v16, v16, a2 +; CHECK-NEXT: li a3, 1023 +; CHECK-NEXT: vsub.vx v16, v16, a3 +; CHECK-NEXT: bltu a0, a1, .LBB95_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB95_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vrsub.vi v24, v8, 0 +; CHECK-NEXT: vand.vv v8, v8, v24 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vsub.vx v8, v8, a3 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: ; CHECK-ZVBB: # %bb.0: -- 2.7.4