From c5ed93f975830b4ed52f1899bfc9d8c89bf81c38 Mon Sep 17 00:00:00 2001 From: Jolanta Jensen Date: Mon, 12 Jun 2023 09:14:13 +0000 Subject: [PATCH] [SVE ACLE] Remove DAG combines that are no longer relevant. This patch removes DAG combines that are no longer relevant because equivalent IR combines have been added. Differential Revision: https://reviews.llvm.org/D153445 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 46 -- llvm/test/CodeGen/AArch64/sve-aba.ll | 26 +- .../AArch64/sve-intrinsics-int-arith-imm.ll | 483 +++++++++++---------- .../CodeGen/AArch64/sve-intrinsics-logical-imm.ll | 140 +++--- .../CodeGen/AArch64/sve-intrinsics-unpred-form.ll | 472 ++++++++++---------- llvm/test/CodeGen/AArch64/sve-saba.ll | 16 +- llvm/test/CodeGen/AArch64/sve2-int-mul.ll | 48 +- .../AArch64/sve2-intrinsics-int-arith-imm.ll | 116 ++--- llvm/test/CodeGen/AArch64/sve2-sra.ll | 26 +- 9 files changed, 673 insertions(+), 700 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3488f26..fd226aa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18921,58 +18921,36 @@ static SDValue performIntrinsicCombine(SDNode *N, N->getOperand(1)); case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); - case Intrinsic::aarch64_sve_mul: - return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG); case Intrinsic::aarch64_sve_mul_u: return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_smulh: - return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG); case Intrinsic::aarch64_sve_smulh_u: return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_umulh: - return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG); case Intrinsic::aarch64_sve_umulh_u: return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_smin: - return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG); case Intrinsic::aarch64_sve_smin_u: return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_umin: - return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG); case Intrinsic::aarch64_sve_umin_u: return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_smax: - return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG); case Intrinsic::aarch64_sve_smax_u: return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_umax: - return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG); case Intrinsic::aarch64_sve_umax_u: return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_lsl: - return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG); case Intrinsic::aarch64_sve_lsl_u: return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_lsr: - return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG); case Intrinsic::aarch64_sve_lsr_u: return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_asr: - return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG); case Intrinsic::aarch64_sve_asr_u: return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_fadd: - return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG); case Intrinsic::aarch64_sve_fadd_u: return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); @@ -18995,55 +18973,35 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_sve_fminnm_u: return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_fmul: - return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG); case Intrinsic::aarch64_sve_fmul_u: return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_fsub: - return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG); case Intrinsic::aarch64_sve_fsub_u: return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_add: - return convertMergedOpToPredOp(N, ISD::ADD, DAG, true); case Intrinsic::aarch64_sve_add_u: return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_sub: - return convertMergedOpToPredOp(N, ISD::SUB, DAG, true); case Intrinsic::aarch64_sve_sub_u: return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_subr: return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true); - case Intrinsic::aarch64_sve_and: - return convertMergedOpToPredOp(N, ISD::AND, DAG, true); case Intrinsic::aarch64_sve_and_u: return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_bic: - return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true); case Intrinsic::aarch64_sve_bic_u: return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_eor: - return convertMergedOpToPredOp(N, ISD::XOR, DAG, true); case Intrinsic::aarch64_sve_eor_u: return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_orr: - return convertMergedOpToPredOp(N, ISD::OR, DAG, true); case Intrinsic::aarch64_sve_orr_u: return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_sabd: - return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true); case Intrinsic::aarch64_sve_sabd_u: return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_uabd: - return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true); case Intrinsic::aarch64_sve_uabd_u: return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); @@ -19055,15 +19013,11 @@ static SDValue performIntrinsicCombine(SDNode *N, N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_sqadd: return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true); - case Intrinsic::aarch64_sve_sqsub: - return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true); case Intrinsic::aarch64_sve_sqsub_u: return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_uqadd: return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true); - case Intrinsic::aarch64_sve_uqsub: - return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true); case Intrinsic::aarch64_sve_uqsub_u: return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), N->getOperand(2), N->getOperand(3)); diff --git a/llvm/test/CodeGen/AArch64/sve-aba.ll b/llvm/test/CodeGen/AArch64/sve-aba.ll index b1298e0..6859f7d 100644 --- a/llvm/test/CodeGen/AArch64/sve-aba.ll +++ b/llvm/test/CodeGen/AArch64/sve-aba.ll @@ -42,7 +42,7 @@ define @saba_b_from_sabd( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %2 = call @llvm.aarch64.sve.sabd.nxv16i8( %1, %b, %c) + %2 = call @llvm.aarch64.sve.sabd.u.nxv16i8( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -93,7 +93,7 @@ define @saba_h_from_sabd( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %2 = call @llvm.aarch64.sve.sabd.nxv8i16( %1, %b, %c) + %2 = call @llvm.aarch64.sve.sabd.u.nxv8i16( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -144,7 +144,7 @@ define @saba_s_from_sabd( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %2 = call @llvm.aarch64.sve.sabd.nxv4i32( %1, %b, %c) + %2 = call @llvm.aarch64.sve.sabd.u.nxv4i32( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -195,7 +195,7 @@ define @saba_d_from_sabd( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %2 = call @llvm.aarch64.sve.sabd.nxv2i64( %1, %b, %c) + %2 = call @llvm.aarch64.sve.sabd.u.nxv2i64( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -249,7 +249,7 @@ define @uaba_b_from_uabd( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %2 = call @llvm.aarch64.sve.uabd.nxv16i8( %1, %b, %c) + %2 = call @llvm.aarch64.sve.uabd.u.nxv16i8( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -299,7 +299,7 @@ define @uaba_h_from_uabd( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %2 = call @llvm.aarch64.sve.uabd.nxv8i16( %1, %b, %c) + %2 = call @llvm.aarch64.sve.uabd.u.nxv8i16( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -349,7 +349,7 @@ define @uaba_s_from_uabd( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %2 = call @llvm.aarch64.sve.uabd.nxv4i32( %1, %b, %c) + %2 = call @llvm.aarch64.sve.uabd.u.nxv4i32( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -399,7 +399,7 @@ define @uaba_d_from_uabd( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %2 = call @llvm.aarch64.sve.uabd.nxv2i64( %1, %b, %c) + %2 = call @llvm.aarch64.sve.uabd.u.nxv2i64( %1, %b, %c) %3 = add %2, %a ret %3 } @@ -443,21 +443,11 @@ declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) -declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) -declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) -declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) -declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) - declare @llvm.aarch64.sve.sabd.u.nxv16i8(, , ) declare @llvm.aarch64.sve.sabd.u.nxv8i16(, , ) declare @llvm.aarch64.sve.sabd.u.nxv4i32(, , ) declare @llvm.aarch64.sve.sabd.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) -declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) -declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) -declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) - declare @llvm.aarch64.sve.uabd.u.nxv16i8(, , ) declare @llvm.aarch64.sve.uabd.u.nxv8i16(, , ) declare @llvm.aarch64.sve.uabd.u.nxv4i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index 58e9137..59d52b0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -11,9 +11,9 @@ define @add_i8( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -25,9 +25,9 @@ define @add_i16( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -40,9 +40,9 @@ define @add_i16_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -54,9 +54,9 @@ define @add_i32( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -70,9 +70,9 @@ define @add_i32_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -84,9 +84,9 @@ define @add_i64( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 127, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -100,9 +100,9 @@ define @add_i64_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 257, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.add.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.add.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -116,9 +116,9 @@ define @sub_i8( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -130,9 +130,9 @@ define @sub_i16( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -145,9 +145,9 @@ define @sub_i16_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -159,9 +159,9 @@ define @sub_i32( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -175,9 +175,9 @@ define @sub_i32_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -189,9 +189,9 @@ define @sub_i64( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 127, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -205,9 +205,9 @@ define @sub_i64_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 257, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sub.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -220,9 +220,9 @@ define @sub_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.sub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.sub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -236,9 +236,9 @@ define @sub_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.sub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.sub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -426,9 +426,9 @@ define @smax_i8( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -440,9 +440,9 @@ define @smax_i16( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -457,9 +457,9 @@ define @smax_i16_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 129, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -471,9 +471,9 @@ define @smax_i32( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -487,9 +487,9 @@ define @smax_i32_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 -129, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -501,9 +501,9 @@ define @smax_i64( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 127, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -517,9 +517,9 @@ define @smax_i64_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 65535, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smax.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -532,9 +532,9 @@ define @smax_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.smax.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.smax.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -548,9 +548,9 @@ define @smax_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.smax.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.smax.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -583,9 +583,9 @@ define @smin_i8( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -597,9 +597,9 @@ define @smin_i16( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -613,9 +613,9 @@ define @smin_i16_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 -129, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -627,9 +627,9 @@ define @smin_i32( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -644,9 +644,9 @@ define @smin_i32_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -659,9 +659,9 @@ define @smin_i64( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 -128, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -675,9 +675,9 @@ define @smin_i64_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 -256, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.smin.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -690,9 +690,9 @@ define @smin_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.smin.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.smin.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -706,9 +706,9 @@ define @smin_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.smin.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.smin.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -741,9 +741,9 @@ define @umax_i8( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -755,9 +755,9 @@ define @umax_i16( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -771,9 +771,9 @@ define @umax_i16_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -785,9 +785,9 @@ define @umax_i32( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -802,9 +802,9 @@ define @umax_i32_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -816,9 +816,9 @@ define @umax_i64( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 255, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -832,9 +832,9 @@ define @umax_i64_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 65535, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umax.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -847,9 +847,9 @@ define @umax_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.umax.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.umax.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -863,9 +863,9 @@ define @umax_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.umax.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.umax.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -898,9 +898,9 @@ define @umin_i8( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -912,9 +912,9 @@ define @umin_i16( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -928,9 +928,9 @@ define @umin_i16_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -942,9 +942,9 @@ define @umin_i32( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -959,9 +959,9 @@ define @umin_i32_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -973,9 +973,9 @@ define @umin_i64( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 0, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -989,9 +989,9 @@ define @umin_i64_out_of_range( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 65535, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.umin.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -1004,9 +1004,9 @@ define @umin_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.umin.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.umin.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -1020,9 +1020,9 @@ define @umin_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.umin.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.umin.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -1412,9 +1412,9 @@ define @asr_i8_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.asr.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -1452,9 +1452,9 @@ define @asr_i16_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.asr.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -1492,9 +1492,9 @@ define @asr_i32_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.asr.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -1532,9 +1532,9 @@ define @asr_i64_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.asr.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -1574,9 +1574,9 @@ define @lsl_i8_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 7, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsl.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -1627,9 +1627,9 @@ define @lsl_i16_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 15, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsl.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -1680,9 +1680,9 @@ define @lsl_i32_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 31, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsl.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -1733,9 +1733,9 @@ define @lsl_i64_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 63, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsl.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -1788,9 +1788,9 @@ define @lsr_i8_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsr.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -1828,9 +1828,9 @@ define @lsr_i16_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsr.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -1868,9 +1868,9 @@ define @lsr_i32_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -1908,9 +1908,9 @@ define @lsr_i64_all_active( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.lsr.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -1936,9 +1936,9 @@ define @lsr_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.lsr.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -1952,9 +1952,9 @@ define @lsr_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.lsr.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -1989,9 +1989,9 @@ define @mul_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.mul.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.mul.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -2005,9 +2005,9 @@ define @mul_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.mul.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.mul.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -2025,20 +2025,22 @@ define @mul_i32_ptrue_all_d( %a) #0 { %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) %out = tail call @llvm.aarch64.sve.mul.nxv4i32( %pg.s, - %a, - %b) + %a, + %b) ret %out } -declare @llvm.aarch64.sve.add.nxv16i8(, , ) -declare @llvm.aarch64.sve.add.nxv8i16(, , ) -declare @llvm.aarch64.sve.add.nxv4i32(, , ) -declare @llvm.aarch64.sve.add.nxv2i64(, , ) +declare @llvm.aarch64.sve.add.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.add.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.add.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.add.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.sub.nxv16i8(, , ) -declare @llvm.aarch64.sve.sub.nxv8i16(, , ) declare @llvm.aarch64.sve.sub.nxv4i32(, , ) -declare @llvm.aarch64.sve.sub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sub.u.nxv2i64(, , ) declare @llvm.aarch64.sve.subr.nxv16i8(, , ) declare @llvm.aarch64.sve.subr.nxv8i16(, , ) @@ -2065,45 +2067,70 @@ declare @llvm.aarch64.sve.uqsub.x.nxv8i16(, declare @llvm.aarch64.sve.uqsub.x.nxv4i32(, ) declare @llvm.aarch64.sve.uqsub.x.nxv2i64(, ) -declare @llvm.aarch64.sve.smax.nxv16i8(, , ) -declare @llvm.aarch64.sve.smax.nxv8i16(, , ) declare @llvm.aarch64.sve.smax.nxv4i32(, , ) -declare @llvm.aarch64.sve.smax.nxv2i64(, , ) -declare @llvm.aarch64.sve.smin.nxv16i8(, , ) -declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smax.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smax.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.smin.nxv4i32(, , ) -declare @llvm.aarch64.sve.smin.nxv2i64(, , ) -declare @llvm.aarch64.sve.umax.nxv16i8(, , ) -declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smin.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smin.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.umax.nxv4i32(, , ) -declare @llvm.aarch64.sve.umax.nxv2i64(, , ) -declare @llvm.aarch64.sve.umin.nxv16i8(, , ) -declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umax.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umax.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.umin.nxv4i32(, , ) -declare @llvm.aarch64.sve.umin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umin.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umin.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umin.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umin.u.nxv2i64(, , ) declare @llvm.aarch64.sve.asr.nxv16i8(, , ) declare @llvm.aarch64.sve.asr.nxv8i16(, , ) declare @llvm.aarch64.sve.asr.nxv4i32(, , ) declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +declare @llvm.aarch64.sve.asr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) -declare @llvm.aarch64.sve.mul.nxv16i8(, , ) -declare @llvm.aarch64.sve.mul.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.u.nxv2i64(, , ) + declare @llvm.aarch64.sve.mul.nxv4i32(, , ) -declare @llvm.aarch64.sve.mul.nxv2i64(, , ) + +declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll index 85eace3..7cdedee 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll @@ -12,9 +12,9 @@ define @and_i8( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i8 7, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.and.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -25,9 +25,9 @@ define @and_i16( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i16 240, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.and.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -38,9 +38,9 @@ define @and_i32( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i32 16776960, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.and.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -51,9 +51,9 @@ define @and_i64( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i64 18445618173802708992, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -68,9 +68,9 @@ define @bic_i8( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i8 254, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.bic.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -81,9 +81,9 @@ define @bic_i16( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i16 65534, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -94,9 +94,9 @@ define @bic_i32( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i32 16776960, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.bic.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -107,9 +107,9 @@ define @bic_i64( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i64 18445618173802708992, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -124,9 +124,9 @@ define @eor_i8( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i8 15, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.eor.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -137,9 +137,9 @@ define @eor_i16( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i16 64519, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.eor.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -150,9 +150,9 @@ define @eor_i32( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i32 16776960, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.eor.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -163,9 +163,9 @@ define @eor_i64( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i64 281474976710656, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.eor.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -180,9 +180,9 @@ define @orr_i8( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i8 6, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.orr.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -193,9 +193,9 @@ define @orr_i16( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i16 32769, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.orr.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -206,9 +206,9 @@ define @orr_i32( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i32 65535, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.orr.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -219,9 +219,9 @@ define @orr_i64( %a) { ; CHECK-NEXT: ret %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer %b = shufflevector insertelement ( undef, i64 9222246136947933184, i32 0), undef, zeroinitializer - %out = call @llvm.aarch64.sve.orr.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -234,9 +234,9 @@ define @orr_i32_ptrue_all_b( %a) { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535) - %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.orr.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -250,9 +250,9 @@ define @orr_i32_ptrue_all_h( %a) { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535) - %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.orr.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -275,25 +275,27 @@ define @orr_i32_ptrue_all_d( %a) { ret %out } -declare @llvm.aarch64.sve.and.nxv16i8(, , ) -declare @llvm.aarch64.sve.and.nxv8i16(, , ) -declare @llvm.aarch64.sve.and.nxv4i32(, , ) -declare @llvm.aarch64.sve.and.nxv2i64(, , ) +declare @llvm.aarch64.sve.and.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.and.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.and.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.and.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.bic.nxv16i8(, , ) -declare @llvm.aarch64.sve.bic.nxv8i16(, , ) -declare @llvm.aarch64.sve.bic.nxv4i32(, , ) -declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +declare @llvm.aarch64.sve.bic.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.bic.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.bic.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.bic.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.eor.nxv16i8(, , ) -declare @llvm.aarch64.sve.eor.nxv8i16(, , ) -declare @llvm.aarch64.sve.eor.nxv4i32(, , ) -declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +declare @llvm.aarch64.sve.eor.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.eor.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.eor.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.eor.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.orr.nxv16i8(, , ) -declare @llvm.aarch64.sve.orr.nxv8i16(, , ) declare @llvm.aarch64.sve.orr.nxv4i32(, , ) -declare @llvm.aarch64.sve.orr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.orr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.orr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.orr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.orr.u.nxv2i64(, , ) declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll index 6d240b9..baef30b 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll @@ -11,9 +11,9 @@ define @add_i8( %a, %b) ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.add.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.add.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -23,9 +23,9 @@ define @add_i16( %a, %b) ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.add.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.add.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -35,9 +35,9 @@ define @add_i32( %a, %b) ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.add.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.add.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -47,9 +47,9 @@ define @add_i64( %a, %b) ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.add.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.add.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -63,9 +63,9 @@ define @sub_i8( %a, %b) ; CHECK-NEXT: sub z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.sub.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sub.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -75,9 +75,9 @@ define @sub_i16( %a, %b) ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.sub.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sub.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -87,9 +87,9 @@ define @sub_i32( %a, %b) ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.sub.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sub.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -99,9 +99,9 @@ define @sub_i64( %a, %b) ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.sub.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sub.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -113,9 +113,9 @@ define @sub_i32_ptrue_all_b( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.sub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.sub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -128,9 +128,9 @@ define @sub_i32_ptrue_all_h( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.sub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.sub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -161,9 +161,9 @@ define @mul_i8( %a, %b) ; CHECK-NEXT: mul z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.mul.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.mul.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -173,9 +173,9 @@ define @mul_i16( %a, %b) ; CHECK-NEXT: mul z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.mul.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -185,9 +185,9 @@ define @mul_i32( %a, %b) ; CHECK-NEXT: mul z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -197,9 +197,9 @@ define @mul_i64( %a, %b) ; CHECK-NEXT: mul z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.mul.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -213,9 +213,9 @@ define @smulh_i8( %a, %b ; CHECK-NEXT: smulh z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.smulh.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.smulh.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -225,9 +225,9 @@ define @smulh_i16( %a, % ; CHECK-NEXT: smulh z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.smulh.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.smulh.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -237,9 +237,9 @@ define @smulh_i32( %a, % ; CHECK-NEXT: smulh z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.smulh.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.smulh.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -249,9 +249,9 @@ define @smulh_i64( %a, % ; CHECK-NEXT: smulh z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.smulh.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.smulh.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -265,9 +265,9 @@ define @umulh_i8( %a, %b ; CHECK-NEXT: umulh z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.umulh.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.umulh.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -277,9 +277,9 @@ define @umulh_i16( %a, % ; CHECK-NEXT: umulh z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.umulh.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.umulh.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -289,9 +289,9 @@ define @umulh_i32( %a, % ; CHECK-NEXT: umulh z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.umulh.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.umulh.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -301,9 +301,9 @@ define @umulh_i64( %a, % ; CHECK-NEXT: umulh z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.umulh.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.umulh.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -315,9 +315,9 @@ define @umulh_i32_ptrue_all_b( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.umulh.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -330,9 +330,9 @@ define @umulh_i32_ptrue_all_h( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.umulh.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.umulh.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -363,9 +363,9 @@ define @and_i8( %a, %b) ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.and.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -375,9 +375,9 @@ define @and_i16( %a, %b) ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.and.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -387,9 +387,9 @@ define @and_i32( %a, %b) ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.and.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -399,9 +399,9 @@ define @and_i64( %a, %b) ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.and.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -415,9 +415,9 @@ define @bic_i8( %a, %b) ; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.bic.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -427,9 +427,9 @@ define @bic_i16( %a, %b) ; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -439,9 +439,9 @@ define @bic_i32( %a, %b) ; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.bic.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -451,9 +451,9 @@ define @bic_i64( %a, %b) ; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.bic.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -467,9 +467,9 @@ define @eor_i8( %a, %b) ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.eor.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -479,9 +479,9 @@ define @eor_i16( %a, %b) ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.eor.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -491,9 +491,9 @@ define @eor_i32( %a, %b) ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.eor.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -503,9 +503,9 @@ define @eor_i64( %a, %b) ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.eor.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.eor.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -519,9 +519,9 @@ define @orr_i8( %a, %b) ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.orr.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -531,9 +531,9 @@ define @orr_i16( %a, %b) ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.orr.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -543,9 +543,9 @@ define @orr_i32( %a, %b) ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.orr.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -555,9 +555,9 @@ define @orr_i64( %a, %b) ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.orr.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.orr.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -569,9 +569,9 @@ define @orr_i32_ptrue_all_b( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.orr.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -584,9 +584,9 @@ define @orr_i32_ptrue_all_h( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.orr.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -669,9 +669,9 @@ define @sqsub_i8( %a, %b ; CHECK-NEXT: sqsub z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.sqsub.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sqsub.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -681,9 +681,9 @@ define @sqsub_i16( %a, % ; CHECK-NEXT: sqsub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.sqsub.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sqsub.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -693,9 +693,9 @@ define @sqsub_i32( %a, % ; CHECK-NEXT: sqsub z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.sqsub.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sqsub.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -705,9 +705,9 @@ define @sqsub_i64( %a, % ; CHECK-NEXT: sqsub z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.sqsub.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.sqsub.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -773,9 +773,9 @@ define @uqsub_i8( %a, %b ; CHECK-NEXT: uqsub z0.b, z0.b, z1.b ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %out = call @llvm.aarch64.sve.uqsub.nxv16i8( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.uqsub.u.nxv16i8( %pg, + %a, + %b) ret %out } @@ -785,9 +785,9 @@ define @uqsub_i16( %a, % ; CHECK-NEXT: uqsub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.uqsub.nxv8i16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.uqsub.u.nxv8i16( %pg, + %a, + %b) ret %out } @@ -797,9 +797,9 @@ define @uqsub_i32( %a, % ; CHECK-NEXT: uqsub z0.s, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.uqsub.nxv4i32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg, + %a, + %b) ret %out } @@ -809,9 +809,9 @@ define @uqsub_i64( %a, % ; CHECK-NEXT: uqsub z0.d, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.uqsub.nxv2i64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.uqsub.u.nxv2i64( %pg, + %a, + %b) ret %out } @@ -823,9 +823,9 @@ define @uqsub_i32_ptrue_all_b( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -838,9 +838,9 @@ define @uqsub_i32_ptrue_all_h( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) - %out = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -1037,9 +1037,9 @@ define @fadd_half( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fadd.u.nxv8f16( %pg, + %a, + %b) ret %out } @@ -1049,9 +1049,9 @@ define @fadd_float( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fadd.u.nxv4f32( %pg, + %a, + %b) ret %out } @@ -1061,9 +1061,9 @@ define @fadd_double( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fadd.u.nxv2f64( %pg, + %a, + %b) ret %out } @@ -1077,9 +1077,9 @@ define @fsub_half( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fsub.u.nxv8f16( %pg, + %a, + %b) ret %out } @@ -1089,9 +1089,9 @@ define @fsub_float( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fsub.u.nxv4f32( %pg, + %a, + %b) ret %out } @@ -1101,9 +1101,9 @@ define @fsub_double( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fsub.u.nxv2f64( %pg, + %a, + %b) ret %out } @@ -1117,9 +1117,9 @@ define @fmul_half( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fmul.u.nxv8f16( %pg, + %a, + %b) ret %out } @@ -1129,9 +1129,9 @@ define @fmul_float( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fmul.u.nxv4f32( %pg, + %a, + %b) ret %out } @@ -1141,76 +1141,84 @@ define @fmul_double( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, - %a, - %b) + %out = call @llvm.aarch64.sve.fmul.u.nxv2f64( %pg, + %a, + %b) ret %out } -declare @llvm.aarch64.sve.add.nxv16i8(, , ) -declare @llvm.aarch64.sve.add.nxv8i16(, , ) -declare @llvm.aarch64.sve.add.nxv4i32(, , ) -declare @llvm.aarch64.sve.add.nxv2i64(, , ) +declare @llvm.aarch64.sve.add.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.add.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.add.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.add.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.sub.nxv16i8(, , ) -declare @llvm.aarch64.sve.sub.nxv8i16(, , ) declare @llvm.aarch64.sve.sub.nxv4i32(, , ) -declare @llvm.aarch64.sve.sub.nxv2i64(, , ) -declare @llvm.aarch64.sve.mul.nxv16i8(, , ) -declare @llvm.aarch64.sve.mul.nxv8i16(, , ) -declare @llvm.aarch64.sve.mul.nxv4i32(, , ) -declare @llvm.aarch64.sve.mul.nxv2i64(, , ) +declare @llvm.aarch64.sve.sub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sub.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.mul.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) -declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) -declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) -declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) -declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) -declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) -declare @llvm.aarch64.sve.and.nxv16i8(, , ) -declare @llvm.aarch64.sve.and.nxv8i16(, , ) -declare @llvm.aarch64.sve.and.nxv4i32(, , ) -declare @llvm.aarch64.sve.and.nxv2i64(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.bic.nxv16i8(, , ) -declare @llvm.aarch64.sve.bic.nxv8i16(, , ) -declare @llvm.aarch64.sve.bic.nxv4i32(, , ) -declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +declare @llvm.aarch64.sve.and.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.and.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.and.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.and.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.eor.nxv16i8(, , ) -declare @llvm.aarch64.sve.eor.nxv8i16(, , ) -declare @llvm.aarch64.sve.eor.nxv4i32(, , ) -declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +declare @llvm.aarch64.sve.bic.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.bic.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.bic.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.bic.u.nxv2i64(, , ) + +declare @llvm.aarch64.sve.eor.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.eor.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.eor.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.eor.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.orr.nxv16i8(, , ) -declare @llvm.aarch64.sve.orr.nxv8i16(, , ) declare @llvm.aarch64.sve.orr.nxv4i32(, , ) -declare @llvm.aarch64.sve.orr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.orr.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.orr.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.orr.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.orr.u.nxv2i64(, , ) declare @llvm.aarch64.sve.sqadd.nxv16i8(, , ) declare @llvm.aarch64.sve.sqadd.nxv8i16(, , ) declare @llvm.aarch64.sve.sqadd.nxv4i32(, , ) declare @llvm.aarch64.sve.sqadd.nxv2i64(, , ) -declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) -declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) -declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) -declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv2i64(, , ) declare @llvm.aarch64.sve.uqadd.nxv16i8(, , ) declare @llvm.aarch64.sve.uqadd.nxv8i16(, , ) declare @llvm.aarch64.sve.uqadd.nxv4i32(, , ) declare @llvm.aarch64.sve.uqadd.nxv2i64(, , ) -declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) -declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) -declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqsub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqsub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqsub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqsub.u.nxv2i64(, , ) declare @llvm.aarch64.sve.asr.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.asr.wide.nxv8i16(, , ) @@ -1224,17 +1232,17 @@ declare @llvm.aarch64.sve.lsr.wide.nxv16i8( @llvm.aarch64.sve.lsr.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.lsr.wide.nxv4i32(, , ) -declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) -declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) -declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) +declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fadd.u.nxv2f64(, , ) -declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) -declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) -declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) +declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsub.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsub.u.nxv2f64(, , ) -declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) -declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) -declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmul.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmul.u.nxv2f64(, , ) declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() diff --git a/llvm/test/CodeGen/AArch64/sve-saba.ll b/llvm/test/CodeGen/AArch64/sve-saba.ll index 0022f1b..43a5804 100644 --- a/llvm/test/CodeGen/AArch64/sve-saba.ll +++ b/llvm/test/CodeGen/AArch64/sve-saba.ll @@ -55,7 +55,7 @@ define @saba_sabd_d( %a, ; CHECK-NEXT: saba z0.d, z1.d, z2.d ; CHECK-NEXT: ret %true = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %sabd = call @llvm.aarch64.sve.sabd.nxv2i64( %true, %b, %c) + %sabd = call @llvm.aarch64.sve.sabd.u.nxv2i64( %true, %b, %c) %add = add %sabd, %a ret %add } @@ -66,7 +66,7 @@ define @saba_sabd_s( %a, ; CHECK-NEXT: saba z0.s, z1.s, z2.s ; CHECK-NEXT: ret %true = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %sabd = call @llvm.aarch64.sve.sabd.nxv4i32( %true, %b, %c) + %sabd = call @llvm.aarch64.sve.sabd.u.nxv4i32( %true, %b, %c) %add = add %sabd, %a ret %add } @@ -77,7 +77,7 @@ define @saba_sabd_h( %a, ; CHECK-NEXT: saba z0.h, z1.h, z2.h ; CHECK-NEXT: ret %true = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %sabd = call @llvm.aarch64.sve.sabd.nxv8i16( %true, %b, %c) + %sabd = call @llvm.aarch64.sve.sabd.u.nxv8i16( %true, %b, %c) %add = add %sabd, %a ret %add } @@ -88,7 +88,7 @@ define @saba_sabd_b( %a, ; CHECK-NEXT: saba z0.b, z1.b, z2.b ; CHECK-NEXT: ret %true = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %sabd = call @llvm.aarch64.sve.sabd.nxv16i8( %true, %b, %c) + %sabd = call @llvm.aarch64.sve.sabd.u.nxv16i8( %true, %b, %c) %add = add %sabd, %a ret %add } @@ -103,7 +103,7 @@ declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) -declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) -declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) -declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) -declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv2i64(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.u.nxv16i8(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll index 0d1743d..800888b 100644 --- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll +++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll @@ -132,8 +132,8 @@ define @smulh_i8( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %res = call @llvm.aarch64.sve.smulh.nxv16i8( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.smulh.u.nxv16i8( %sel, %a, + %b) ret %res } @@ -144,8 +144,8 @@ define @smulh_i16( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %res = call @llvm.aarch64.sve.smulh.nxv8i16( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.smulh.u.nxv8i16( %sel, %a, + %b) ret %res } @@ -156,8 +156,8 @@ define @smulh_i32( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %res = call @llvm.aarch64.sve.smulh.nxv4i32( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.smulh.u.nxv4i32( %sel, %a, + %b) ret %res } @@ -168,8 +168,8 @@ define @smulh_i64( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %res = call @llvm.aarch64.sve.smulh.nxv2i64( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.smulh.u.nxv2i64( %sel, %a, + %b) ret %res } @@ -183,8 +183,8 @@ define @umulh_i8( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %res = call @llvm.aarch64.sve.umulh.nxv16i8( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.umulh.u.nxv16i8( %sel, %a, + %b) ret %res } @@ -195,8 +195,8 @@ define @umulh_i16( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %res = call @llvm.aarch64.sve.umulh.nxv8i16( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.umulh.u.nxv8i16( %sel, %a, + %b) ret %res } @@ -207,8 +207,8 @@ define @umulh_i32( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %res = call @llvm.aarch64.sve.umulh.nxv4i32( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.umulh.u.nxv4i32( %sel, %a, + %b) ret %res } @@ -219,8 +219,8 @@ define @umulh_i64( %a, ; CHECK-NEXT: ret %b) { %sel = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %res = call @llvm.aarch64.sve.umulh.nxv2i64( %sel, %a, - %b) + %res = call @llvm.aarch64.sve.umulh.u.nxv2i64( %sel, %a, + %b) ret %res } @@ -336,14 +336,14 @@ declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) -declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) -declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) -declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) -declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) -declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) -declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) -declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) -declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.smulh.u.nxv2i64(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.umulh.u.nxv2i64(, , ) declare @llvm.aarch64.sve.pmul.nxv16i8(, ) declare @llvm.aarch64.sve.sqdmulh.nxv16i8(, ) declare @llvm.aarch64.sve.sqdmulh.nxv8i16(, ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll index 6a8f53d..8bfcd08 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll @@ -111,9 +111,9 @@ define @sqsub_b_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -125,9 +125,9 @@ define @sqsub_h_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -139,9 +139,9 @@ define @sqsub_h_highimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -153,9 +153,9 @@ define @sqsub_s_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -167,9 +167,9 @@ define @sqsub_s_highimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -181,9 +181,9 @@ define @sqsub_d_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -195,9 +195,9 @@ define @sqsub_d_highimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.sqsub.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.sqsub.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -311,9 +311,9 @@ define @uqsub_b_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv16i8( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv16i8( %pg, + %a, + %splat) ret %out } @@ -325,9 +325,9 @@ define @uqsub_h_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -339,9 +339,9 @@ define @uqsub_h_highimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv8i16( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv8i16( %pg, + %a, + %splat) ret %out } @@ -353,9 +353,9 @@ define @uqsub_s_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -367,9 +367,9 @@ define @uqsub_s_highimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv4i32( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg, + %a, + %splat) ret %out } @@ -381,9 +381,9 @@ define @uqsub_d_lowimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -395,9 +395,9 @@ define @uqsub_d_highimm( %a) { %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer - %out = call @llvm.aarch64.sve.uqsub.nxv2i64( %pg, - %a, - %splat) + %out = call @llvm.aarch64.sve.uqsub.u.nxv2i64( %pg, + %a, + %splat) ret %out } @@ -410,9 +410,9 @@ define @uqsub_i32_ptrue_all_b( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -426,9 +426,9 @@ define @uqsub_i32_ptrue_all_h( %a) #0 { %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) - %out = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %pg.s, - %a, - %b) + %out = tail call @llvm.aarch64.sve.uqsub.u.nxv4i32( %pg.s, + %a, + %b) ret %out } @@ -446,8 +446,8 @@ define @uqsub_i32_ptrue_all_d( %a) #0 { %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) %out = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %pg.s, - %a, - %b) + %a, + %b) ret %out } @@ -456,20 +456,22 @@ declare @llvm.aarch64.sve.sqadd.nxv8i16(, @llvm.aarch64.sve.sqadd.nxv4i32(, , ) declare @llvm.aarch64.sve.sqadd.nxv2i64(, , ) -declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) -declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) -declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) -declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqsub.u.nxv2i64(, , ) declare @llvm.aarch64.sve.uqadd.nxv16i8(, , ) declare @llvm.aarch64.sve.uqadd.nxv8i16(, , ) declare @llvm.aarch64.sve.uqadd.nxv4i32(, , ) declare @llvm.aarch64.sve.uqadd.nxv2i64(, , ) -declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) -declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) -declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uqsub.u.nxv16i8(, , ) +declare @llvm.aarch64.sve.uqsub.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.uqsub.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.uqsub.u.nxv2i64(, , ) declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() diff --git a/llvm/test/CodeGen/AArch64/sve2-sra.ll b/llvm/test/CodeGen/AArch64/sve2-sra.ll index afa8dd5..3de49c9 100644 --- a/llvm/test/CodeGen/AArch64/sve2-sra.ll +++ b/llvm/test/CodeGen/AArch64/sve2-sra.ll @@ -61,7 +61,7 @@ define @usra_intr_i8( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %ins = insertelement poison, i8 1, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.lsr.u.nxv16i8( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -74,7 +74,7 @@ define @usra_intr_i16( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %ins = insertelement poison, i16 2, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.lsr.u.nxv8i16( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -87,7 +87,7 @@ define @usra_intr_i32( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %ins = insertelement poison, i32 3, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.lsr.u.nxv4i32( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -100,7 +100,7 @@ define @usra_intr_i64( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %ins = insertelement poison, i64 4, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.lsr.u.nxv2i64( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -211,7 +211,7 @@ define @ssra_intr_i8( %a, @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %ins = insertelement poison, i8 1, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.asr.u.nxv16i8( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -224,7 +224,7 @@ define @ssra_intr_i16( %a, @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %ins = insertelement poison, i16 2, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.asr.nxv8i16( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.asr.u.nxv8i16( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -237,7 +237,7 @@ define @ssra_intr_i32( %a, @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %ins = insertelement poison, i32 3, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.asr.nxv4i32( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.asr.u.nxv4i32( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -250,7 +250,7 @@ define @ssra_intr_i64( %a, @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %ins = insertelement poison, i64 4, i32 0 %splat = shufflevector %ins, poison, zeroinitializer - %shift = call @llvm.aarch64.sve.asr.nxv2i64( %pg, %b, %splat) + %shift = call @llvm.aarch64.sve.asr.u.nxv2i64( %pg, %b, %splat) %add = add %a, %shift ret %add } @@ -308,21 +308,11 @@ declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) declare @llvm.aarch64.sve.ptrue.nxv2i1(i32 immarg) -declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) -declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) -declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) -declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) - declare @llvm.aarch64.sve.lsr.u.nxv16i8(, , ) declare @llvm.aarch64.sve.lsr.u.nxv8i16(, , ) declare @llvm.aarch64.sve.lsr.u.nxv4i32(, , ) declare @llvm.aarch64.sve.lsr.u.nxv2i64(, , ) -declare @llvm.aarch64.sve.asr.nxv16i8(, , ) -declare @llvm.aarch64.sve.asr.nxv8i16(, , ) -declare @llvm.aarch64.sve.asr.nxv4i32(, , ) -declare @llvm.aarch64.sve.asr.nxv2i64(, , ) - declare @llvm.aarch64.sve.asr.u.nxv16i8(, , ) declare @llvm.aarch64.sve.asr.u.nxv8i16(, , ) declare @llvm.aarch64.sve.asr.u.nxv4i32(, , ) -- 2.7.4