From cf4df61688385af46de58562890dae335f96694b Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 10 Feb 2023 12:25:26 +0000 Subject: [PATCH] [SVE] Add intrinsics for floating-point operations that explicitly undefine the result for inactive lanes. This patch is the floating-point equivalent of D141937. Depends on D143764. Differential Revision: https://reviews.llvm.org/D143765 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 16 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 32 +- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 15 +- .../AArch64/sve-intrinsics-fp-arith-undef.ll | 822 +++++++++++++++++++++ 4 files changed, 877 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-undef.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index edf6fdd..b2ae369 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1931,31 +1931,44 @@ def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic; // def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fabd_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fabs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fadd_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic; def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic; def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic; def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fdiv_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fexpa_x : AdvSIMD_SVE_EXPA_Intrinsic; def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmax_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmaxnm_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmin_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fminnm_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmla_u : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmls_u : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_fmul_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmulx_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fneg : AdvSIMD_Merged1VectorArg_Intrinsic; -def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmla_u : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_fnmls_u : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_frecpe_x : AdvSIMD_1VectorArg_Intrinsic; def int_aarch64_sve_frecps_x : AdvSIMD_2VectorArg_Intrinsic; @@ -1972,6 +1985,7 @@ def int_aarch64_sve_frsqrts_x : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic; def int_aarch64_sve_fsqrt : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fsub_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c69b164..3a9372e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18337,10 +18337,38 @@ static SDValue performIntrinsicCombine(SDNode *N, N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_fadd: return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG); - case Intrinsic::aarch64_sve_fsub: - return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG); + case Intrinsic::aarch64_sve_fadd_u: + return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fdiv_u: + return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fmax_u: + return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fmaxnm_u: + return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fmla_u: + return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(3), N->getOperand(4), + N->getOperand(2)); + case Intrinsic::aarch64_sve_fmin_u: + return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fminnm_u: + return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_fmul: return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG); + case Intrinsic::aarch64_sve_fmul_u: + return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fsub: + return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG); + case Intrinsic::aarch64_sve_fsub_u: + return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_add: return convertMergedOpToPredOp(N, ISD::ADD, DAG, true); case Intrinsic::aarch64_sve_add_u: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c298736..8ac7559 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -352,24 +352,28 @@ def AArch64fmul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), }]>; -def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), - (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>; +def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2), + [(int_aarch64_sve_fabd_u node:$pg, node:$op1, node:$op2), + (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>; def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), node:$zn, node:$zm, node:$za), node:$za)]>; def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), - [(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za), + [(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm), + (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za), (AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za), (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), (AArch64fneg_mt (AArch64ptrue 31), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>; def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), - [(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))), + [(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm), + (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))), (AArch64fneg_mt_nsz node:$pg, (AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (undef))]>; def AArch64fnmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), - [(AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>; + [(int_aarch64_sve_fnmls_u node:$pg, node:$za, node:$zn, node:$zm), + (AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>; def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), (AArch64fsub_p node:$pg, node:$op2, node:$op1)>; @@ -623,6 +627,7 @@ let Predicates = [HasSVEorSME] in { defm FMAX_ZPZZ : sve_fp_bin_pred_hfd; defm FMIN_ZPZZ : sve_fp_bin_pred_hfd; defm FABD_ZPZZ : sve_fp_bin_pred_hfd; + defm FMULX_ZPZZ : sve_fp_bin_pred_hfd; defm FDIV_ZPZZ : sve_fp_bin_pred_hfd; } // End HasSVEorSME diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-undef.ll new file mode 100644 index 0000000..a77bcdf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-undef.ll @@ -0,0 +1,822 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; FABD +; + +define @fabd_h( %pg, %a, %b) { +; CHECK-LABEL: fabd_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fabd_s( %pg, %a, %b) { +; CHECK-LABEL: fabd_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fabd_d( %pg, %a, %b) { +; CHECK-LABEL: fabd_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fabd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fabd.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FADD +; + +define @fadd_h( %pg, %a, %b) { +; CHECK-LABEL: fadd_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fadd_s( %pg, %a, %b) { +; CHECK-LABEL: fadd_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fadd_d( %pg, %a, %b) { +; CHECK-LABEL: fadd_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fadd.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FDIV +; + +define @fdiv_h( %pg, %a, %b) { +; CHECK-LABEL: fdiv_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fdiv_s( %pg, %a, %b) { +; CHECK-LABEL: fdiv_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fdiv_d( %pg, %a, %b) { +; CHECK-LABEL: fdiv_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FDIVR +; + +define @fdivr_h( %pg, %a, %b) { +; CHECK-LABEL: fdivr_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fdivr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.u.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fdivr_s( %pg, %a, %b) { +; CHECK-LABEL: fdivr_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.u.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fdivr_d( %pg, %a, %b) { +; CHECK-LABEL: fdivr_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdiv.u.nxv2f64( %pg, + %b, + %a) + ret %out +} + +; +; FMAD +; + +define @fmad_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.u.nxv8f16( %pg, + %c, + %a, + %b) + ret %out +} + +define @fmad_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.u.nxv4f32( %pg, + %c, + %a, + %b) + ret %out +} + +define @fmad_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmad_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.u.nxv2f64( %pg, + %c, + %a, + %b) + ret %out +} + +; +; FMAX +; + +define @fmax_h( %pg, %a, %b) { +; CHECK-LABEL: fmax_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmax_s( %pg, %a, %b) { +; CHECK-LABEL: fmax_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmax_d( %pg, %a, %b) { +; CHECK-LABEL: fmax_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmax.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMAXNM +; + +define @fmaxnm_h( %pg, %a, %b) { +; CHECK-LABEL: fmaxnm_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmaxnm_s( %pg, %a, %b) { +; CHECK-LABEL: fmaxnm_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmaxnm_d( %pg, %a, %b) { +; CHECK-LABEL: fmaxnm_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnm.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMIN +; + +define @fmin_h( %pg, %a, %b) { +; CHECK-LABEL: fmin_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmin_s( %pg, %a, %b) { +; CHECK-LABEL: fmin_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmin_d( %pg, %a, %b) { +; CHECK-LABEL: fmin_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmin.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMINNM +; + +define @fminnm_h( %pg, %a, %b) { +; CHECK-LABEL: fminnm_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fminnm_s( %pg, %a, %b) { +; CHECK-LABEL: fminnm_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fminnm_d( %pg, %a, %b) { +; CHECK-LABEL: fminnm_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnm.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMLA +; + +define @fmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.u.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.u.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmla_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmla.u.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FMLS +; + +define @fmls_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.u.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmls_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.u.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fmls_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmls_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.u.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FMSB +; + +define @fmsb_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.u.nxv8f16( %pg, + %c, + %a, + %b) + ret %out +} + +define @fmsb_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.u.nxv4f32( %pg, + %c, + %a, + %b) + ret %out +} + +define @fmsb_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fmsb_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmls.u.nxv2f64( %pg, + %c, + %a, + %b) + ret %out +} + +; +; FMUL +; + +define @fmul_h( %pg, %a, %b) { +; CHECK-LABEL: fmul_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmul_s( %pg, %a, %b) { +; CHECK-LABEL: fmul_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmul_d( %pg, %a, %b) { +; CHECK-LABEL: fmul_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmul.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMULX +; + +define @fmulx_h( %pg, %a, %b) { +; CHECK-LABEL: fmulx_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fmulx z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmulx_s( %pg, %a, %b) { +; CHECK-LABEL: fmulx_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmulx z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmulx_d( %pg, %a, %b) { +; CHECK-LABEL: fmulx_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmulx z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmulx.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FNMAD +; + +define @fnmad_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.u.nxv8f16( %pg, + %c, + %a, + %b) + ret %out +} + +define @fnmad_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.u.nxv4f32( %pg, + %c, + %a, + %b) + ret %out +} + +define @fnmad_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmad_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.u.nxv2f64( %pg, + %c, + %a, + %b) + ret %out +} + +; +; FNMLA +; + +define @fnmla_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.u.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmla_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.u.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmla_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmla_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmla.u.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FNMLS +; + +define @fnmls_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.u.nxv8f16( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmls_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.u.nxv4f32( %pg, + %a, + %b, + %c) + ret %out +} + +define @fnmls_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmls_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.u.nxv2f64( %pg, + %a, + %b, + %c) + ret %out +} + +; +; FNMSB +; + +define @fnmsb_h( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.u.nxv8f16( %pg, + %c, + %a, + %b) + ret %out +} + +define @fnmsb_s( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.u.nxv4f32( %pg, + %c, + %a, + %b) + ret %out +} + +define @fnmsb_d( %pg, %a, %b, %c) { +; CHECK-LABEL: fnmsb_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fnmls.u.nxv2f64( %pg, + %c, + %a, + %b) + ret %out +} + +; +; FSUB +; + +define @fsub_h( %pg, %a, %b) { +; CHECK-LABEL: fsub_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.u.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fsub_s( %pg, %a, %b) { +; CHECK-LABEL: fsub_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.u.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fsub_d( %pg, %a, %b) { +; CHECK-LABEL: fsub_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.u.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FSUBR +; + +define @fsubr_h( %pg, %a, %b) { +; CHECK-LABEL: fsubr_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.u.nxv8f16( %pg, + %b, + %a) + ret %out +} + +define @fsubr_s( %pg, %a, %b) { +; CHECK-LABEL: fsubr_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.u.nxv4f32( %pg, + %b, + %a) + ret %out +} + +define @fsubr_d( %pg, %a, %b) { +; CHECK-LABEL: fsubr_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fsub.u.nxv2f64( %pg, + %b, + %a) + ret %out +} + +declare @llvm.aarch64.sve.fabd.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fabd.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fabd.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fadd.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fdiv.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fdiv.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fdiv.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmax.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmax.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmax.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxnm.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxnm.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxnm.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmin.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmin.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmin.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminnm.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminnm.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminnm.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmla.u.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmla.u.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmla.u.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmls.u.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fmls.u.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fmls.u.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmul.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmul.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmulx.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmulx.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmulx.u.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fnmla.u.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmla.u.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmla.u.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fnmls.u.nxv8f16(, , , ) +declare @llvm.aarch64.sve.fnmls.u.nxv4f32(, , , ) +declare @llvm.aarch64.sve.fnmls.u.nxv2f64(, , , ) + +declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsub.u.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsub.u.nxv2f64(, , ) -- 2.7.4