def int_riscv_sm3p0 : ScalarCryptoGprIntrinsicAny;
def int_riscv_sm3p1 : ScalarCryptoGprIntrinsicAny;
} // TargetPrefix = "riscv"
+
+//===----------------------------------------------------------------------===//
+// Vendor extensions
+//===----------------------------------------------------------------------===//
+include "llvm/IR/IntrinsicsRISCVXTHead.td"
--- /dev/null
+let TargetPrefix = "riscv" in {
+
+ class TH_VdotTernaryWideMasked
+ : DefaultAttrsIntrinsic< [llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<2, llvm_i1_ty>,
+ llvm_anyint_ty, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 1;
+ let VLOperand = 4;
+ }
+
+ multiclass TH_VdotTernaryWide {
+ def "int_riscv_" # NAME : RISCVTernaryWideUnMasked;
+ def "int_riscv_" # NAME # "_mask" : TH_VdotTernaryWideMasked;
+ }
+
+ defm th_vmaqa : TH_VdotTernaryWide;
+ defm th_vmaqau : TH_VdotTernaryWide;
+ defm th_vmaqasu : TH_VdotTernaryWide;
+ defm th_vmaqaus : TH_VdotTernaryWide;
+}
defm THVdotVMAQASU : THVdotVMAQA<"th.vmaqasu", 0b100100>;
defm THVdotVMAQAUS : THVdotVMAQA_VX<"th.vmaqaus",0b100110>;
}
+
+// Associate LMUL with tablegen records of register classes.
+def THVdotV_M1 : LMULInfo<0b000, 8, VR, VR, VR, VR, VR, "M1">;
+def THVdotV_M2 : LMULInfo<0b001, 16, VRM2, VRM2, VR, VR, VR, "M2">;
+def THVdotV_M4 : LMULInfo<0b010, 32, VRM4, VRM4, VRM2, VR, VR, "M4">;
+def THVdotV_M8 : LMULInfo<0b011, 64, VRM8, VRM8, VRM4, VRM2, VR, "M8">;
+
+defvar MxListTHVdot = [V_MF2, THVdotV_M1, THVdotV_M2, THVdotV_M4, THVdotV_M8];
+
+defset list<VTypeInfoToWide> AllQuadWidenableInt8NoVLMulVectors = {
+ def : VTypeInfoToWide<VI8MF2, VI32MF2>;
+ def : VTypeInfoToWide<VI8M1, VI32M1>;
+ def : VTypeInfoToWide<VI8M2, VI32M2>;
+ def : VTypeInfoToWide<VI8M4, VI32M4>;
+ def : VTypeInfoToWide<VI8M8, VI32M8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Combination of instruction classes.
+// Use these multiclasses to define instructions more easily.
+//===----------------------------------------------------------------------===//
+multiclass VPseudoVMAQA_VV_VX {
+ foreach m = MxListTHVdot in {
+ defm "" : VPseudoTernaryW_VV<m>;
+ defm "" : VPseudoTernaryW_VX<m>;
+ }
+}
+
+multiclass VPseudoVMAQA_VX {
+ foreach m = MxListTHVdot in {
+ defm "" : VPseudoTernaryW_VX<m>;
+ }
+}
+
+multiclass VPatTernaryVMAQA_VV<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+ wti.Vector, vti.Vector, vti.Vector,
+ vti.Mask, wti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.RegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatTernaryVMAQA_VX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ wti.Vector, vti.Scalar, vti.Vector,
+ vti.Mask, wti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.ScalarRegClass, vti.RegClass>;
+ }
+}
+
+multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatTernaryVMAQA_VV<intrinsic, instruction, vtilist>,
+ VPatTernaryVMAQA_VX<intrinsic, instruction, vtilist>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+defm PseudoTHVdotVMAQA : VPseudoVMAQA_VV_VX;
+defm PseudoTHVdotVMAQAU : VPseudoVMAQA_VV_VX;
+defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX;
+defm PseudoTHVdotVMAQAUS : VPseudoVMAQA_VX;
+
+let Predicates = [HasVendorXTHeadVdot] in {
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA", AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqa.nxv1i32.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqa_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqa.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqa.nxv1i32.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqa.mask.nxv1i32.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqa_mask_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqa.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqa.mask.nxv1i32.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqa.nxv2i32.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqa_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqa.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqa.nxv2i32.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqa.mask.nxv2i32.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqa_mask_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqa.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqa.mask.nxv2i32.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqa.nxv4i32.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqa_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqa.vv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqa.nxv4i32.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqa.mask.nxv4i32.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqa_mask_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqa.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqa.mask.nxv4i32.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqa.nxv8i32.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqa_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqa.vv v8, v12, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqa.nxv8i32.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqa.mask.nxv8i32.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqa_mask_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqa.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqa.mask.nxv8i32.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqa.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqa_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqa.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqa.mask.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqa_mask_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqa.mask.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqa.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqa_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqa.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqa.mask.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqa_mask_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqa.mask.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqa.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqa_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqa.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqa.mask.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqa_mask_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqa.mask.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqa.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqa_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqa.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqa.mask.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqa_mask_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqa_mask_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqa.vx v8, a0, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqa.mask.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.nxv1i32.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqasu_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqasu.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.nxv1i32.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.mask.nxv1i32.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqasu_mask_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqasu.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.mask.nxv1i32.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.nxv2i32.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqasu_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqasu.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.nxv2i32.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.mask.nxv2i32.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqasu_mask_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqasu.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.mask.nxv2i32.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.nxv4i32.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqasu_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqasu.vv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.nxv4i32.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.mask.nxv4i32.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqasu_mask_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqasu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.mask.nxv4i32.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.nxv8i32.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqasu_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqasu.vv v8, v12, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.nxv8i32.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.mask.nxv8i32.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqasu_mask_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqasu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.mask.nxv8i32.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqasu_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.mask.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqasu_mask_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqasu.mask.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqasu_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.mask.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqasu_mask_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqasu.mask.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqasu_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.mask.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqasu_mask_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqasu.mask.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqasu_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.mask.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqasu_mask_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqasu_mask_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqasu.vx v8, a0, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqasu.mask.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqau.nxv1i32.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqau_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqau.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqau.nxv1i32.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqau.mask.nxv1i32.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqau_mask_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqau.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqau.mask.nxv1i32.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqau.nxv2i32.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqau_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqau.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqau.nxv2i32.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqau.mask.nxv2i32.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqau_mask_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqau.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqau.mask.nxv2i32.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqau.nxv4i32.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqau_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqau.vv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqau.nxv4i32.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqau.mask.nxv4i32.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqau_mask_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqau.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqau.mask.nxv4i32.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqau.nxv8i32.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqau_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqau.vv v8, v12, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqau.nxv8i32.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqau.mask.nxv8i32.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqau_mask_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqau.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqau.mask.nxv8i32.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqau.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqau_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqau.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqau.mask.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqau_mask_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqau.mask.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqau.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqau_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqau.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqau.mask.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqau_mask_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqau.mask.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqau.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqau_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqau.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqau.mask.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqau_mask_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqau.mask.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqau.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqau_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqau.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqau.mask.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqau_mask_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqau_mask_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqau.vx v8, a0, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqau.mask.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xtheadvdot \
+; RUN: -verify-machineinstrs | FileCheck %s
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqaus.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqaus_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqaus.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.th.vmaqaus.mask.nxv1i32.i8(
+ <vscale x 1 x i32>,
+ i8,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_th_vmaqaus_mask_vx_nxv1i32_i8_nxv4i8(<vscale x 1 x i32> %0, i8 %1, <vscale x 4 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_mask_vx_nxv1i32_i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.th.vmaqaus.mask.nxv1i32.i8(
+ <vscale x 1 x i32> %0,
+ i8 %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqaus.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqaus_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqaus.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.th.vmaqaus.mask.nxv2i32.i8(
+ <vscale x 2 x i32>,
+ i8,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_th_vmaqaus_mask_vx_nxv2i32_i8_nxv8i8(<vscale x 2 x i32> %0, i8 %1, <vscale x 8 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_mask_vx_nxv2i32_i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.th.vmaqaus.mask.nxv2i32.i8(
+ <vscale x 2 x i32> %0,
+ i8 %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqaus.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqaus_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqaus.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.th.vmaqaus.mask.nxv4i32.i8(
+ <vscale x 4 x i32>,
+ i8,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_th_vmaqaus_mask_vx_nxv4i32_i8_nxv16i8(<vscale x 4 x i32> %0, i8 %1, <vscale x 16 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_mask_vx_nxv4i32_i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.th.vmaqaus.mask.nxv4i32.i8(
+ <vscale x 4 x i32> %0,
+ i8 %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqaus.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ iXLen,
+ iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqaus_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqaus.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.th.vmaqaus.mask.nxv8i32.i8(
+ <vscale x 8 x i32>,
+ i8,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_th_vmaqaus_mask_vx_nxv8i32_i8_nxv32i8(<vscale x 8 x i32> %0, i8 %1, <vscale x 32 x i8> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_th_vmaqaus_mask_vx_nxv8i32_i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT: th.vmaqaus.vx v8, a0, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.th.vmaqaus.mask.nxv8i32.i8(
+ <vscale x 8 x i32> %0,
+ i8 %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 0)
+
+ ret <vscale x 8 x i32> %a
+}