From: Stam Markianos-Wright Date: Thu, 16 Jan 2020 14:20:48 +0000 (+0000) Subject: [GCC][PATCH][AArch64]Add ACLE intrinsics for dot product (usdot - vector, dot - by element) for AArch64 AdvSIMD ARMv8.6 Extension gcc/ChangeLog: 2020-01-16 Stam Markianos-Wright * config/aarch64/aarch64-builtins.c: (enum aarch64_type_qualifiers): New qualifier_lane_quadtup_index, TYPES_TERNOP_SSUS, TYPES_QUADOPSSUS_LANE_QUADTUP, TYPES_QUADOPSSSU_LANE_QUADTUP. (aarch64_simd_expand_args): Add case SIMD_ARG_LANE_QUADTUP_INDEX. (aarch64_simd_expand_builtin): Add qualifier_lane_quadtup_index. * config/aarch64/aarch64-simd-builtins.def (usdot, usdot_lane, usdot_laneq, sudot_lane,sudot_laneq): New. * config/aarch64/aarch64-simd.md (aarch64_usdot): New. (aarch64_dot_lane): New. * config/aarch64/arm_neon.h (vusdot_s32): New. (vusdotq_s32): New. (vusdot_lane_s32): New. (vsudot_lane_s32): New. * config/aarch64/iterators.md (DOTPROD_I8MM): New iterator. (UNSPEC_USDOT, UNSPEC_SUDOT): New unspecs. gcc/testsuite/ChangeLog: 2020-01-16 Stam Markianos-Wright * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c: New test. --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9a94998..49dcecb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2020-01-16 Stam Markianos-Wright + + * config/aarch64/aarch64-builtins.c: (enum aarch64_type_qualifiers): + New qualifier_lane_quadtup_index, TYPES_TERNOP_SSUS, + TYPES_QUADOPSSUS_LANE_QUADTUP, TYPES_QUADOPSSSU_LANE_QUADTUP. + (aarch64_simd_expand_args): Add case SIMD_ARG_LANE_QUADTUP_INDEX. + (aarch64_simd_expand_builtin): Add qualifier_lane_quadtup_index. + * config/aarch64/aarch64-simd-builtins.def (usdot, usdot_lane, + usdot_laneq, sudot_lane,sudot_laneq): New. + * config/aarch64/aarch64-simd.md (aarch64_usdot): New. + (aarch64_dot_lane): New. + * config/aarch64/arm_neon.h (vusdot_s32): New. + (vusdotq_s32): New. + (vusdot_lane_s32): New. + (vsudot_lane_s32): New. + * config/aarch64/iterators.md (DOTPROD_I8MM): New iterator. + (UNSPEC_USDOT, UNSPEC_SUDOT): New unspecs. + 2020-01-16 Martin Liska * value-prof.c (dump_histogram_value): Fix diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index f0e0461..f50c485 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -110,6 +110,9 @@ enum aarch64_type_qualifiers /* Lane indices selected in pairs. - must be in range, and flipped for bigendian. */ qualifier_lane_pair_index = 0x800, + /* Lane indices selected in quadtuplets. - must be in range, and flipped for + bigendian. */ + qualifier_lane_quadtup_index = 0x1000, }; typedef struct @@ -176,6 +179,10 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; +#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) static enum aarch64_type_qualifiers @@ -195,6 +202,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_unsigned, + qualifier_none, qualifier_lane_quadtup_index }; +#define TYPES_QUADOPSSUS_LANE_QUADTUP \ + (aarch64_types_quadopssus_lane_quadtup_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_unsigned, qualifier_lane_quadtup_index }; +#define TYPES_QUADOPSSSU_LANE_QUADTUP \ + (aarch64_types_quadopsssu_lane_quadtup_qualifiers) + +static enum aarch64_type_qualifiers aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; @@ -1288,6 +1308,7 @@ typedef enum SIMD_ARG_LANE_INDEX, SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, SIMD_ARG_LANE_PAIR_INDEX, + SIMD_ARG_LANE_QUADTUP_INDEX, SIMD_ARG_STOP } builtin_simd_arg; @@ -1377,9 +1398,25 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode); } - /* Fall through - if the lane index isn't a constant then - the next case will error. */ - /* FALLTHRU */ + /* If the lane index isn't a constant then error out. */ + goto constant_arg; + case SIMD_ARG_LANE_QUADTUP_INDEX: + /* Must be a previous operand into which this is an index and + index is restricted to nunits / 4. */ + gcc_assert (opc > 0); + if (CONST_INT_P (op[opc])) + { + machine_mode vmode = insn_data[icode].operand[opc - 1].mode; + unsigned int nunits + = GET_MODE_NUNITS (vmode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp); + /* Keep to GCC-vector-extension lane indices in the RTL. */ + int lane = INTVAL (op[opc]); + op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), + SImode); + } + /* If the lane index isn't a constant then error out. */ + goto constant_arg; case SIMD_ARG_CONSTANT: constant_arg: if (!(*insn_data[icode].operand[opc].predicate) @@ -1492,6 +1529,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) args[k] = SIMD_ARG_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) args[k] = SIMD_ARG_LANE_PAIR_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index) + args[k] = SIMD_ARG_LANE_QUADTUP_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_immediate) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 57fc593..4744dd1 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -212,10 +212,15 @@ /* Implemented by aarch64_{_lane}{q}. */ BUILTIN_VB (TERNOP, sdot, 0) BUILTIN_VB (TERNOPU, udot, 0) + BUILTIN_VB (TERNOP_SSUS, usdot, 0) BUILTIN_VB (QUADOP_LANE, sdot_lane, 0) BUILTIN_VB (QUADOPU_LANE, udot_lane, 0) BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0) BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0) + BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0) + BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0) + BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0) + BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0) /* Implemented by aarch64_fcadd. */ BUILTIN_VHSDF (BINOP, fcadd90, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 2989096..9e56e8c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -506,6 +506,20 @@ [(set_attr "type" "neon_dot")] ) +;; These instructions map to the __builtins for the armv8.6a I8MM usdot +;; (vector) Dot Product operation. +(define_insn "aarch64_usdot" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand: 3 "register_operand" "w")] + UNSPEC_USDOT) + (match_operand:VS 1 "register_operand" "0")))] + "TARGET_I8MM" + "usdot\\t%0., %2., %3." + [(set_attr "type" "neon_dot")] +) + ;; These expands map to the Dot Product optab the vectorizer checks for. ;; The auto-vectorizer expects a dot product builtin that also does an ;; accumulation into the provided register. @@ -573,6 +587,26 @@ [(set_attr "type" "neon_dot")] ) +;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot +;; (by element) Dot Product operations. +(define_insn "aarch64_dot_lane" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand:VB 3 "register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD_I8MM) + (match_operand:VS 1 "register_operand" "0")))] + "TARGET_I8MM" + { + int nunits = GET_MODE_NUNITS (mode).to_constant (); + int lane = INTVAL (operands[4]); + operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode); + return "dot\\t%0., %2., %3.4b[%4]"; + } + [(set_attr "type" "neon_dot")] +) + (define_expand "copysign3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index eaba156..c962140 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -34611,6 +34611,89 @@ vrnd64xq_f64 (float64x2_t __a) #include "arm_bf16.h" +/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ + +#pragma GCC push_options +#pragma GCC target ("arch=armv8.2-a+i8mm") + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index) +{ + return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, int8x16_t __b, + const int __index) +{ + return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, int8x8_t __b, + const int __index) +{ + return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b, + const int __index) +{ + return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index) +{ + return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, uint8x16_t __b, + const int __index) +{ + return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, uint8x8_t __b, + const int __index) +{ + return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b, + const int __index) +{ + return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index); +} + +#pragma GCC pop_options + #undef __aarch64_vget_lane_any #undef __aarch64_vdup_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b9843b8..83720d9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -799,6 +799,8 @@ UNSPEC_USUBLT ; Used in aarch64-sve2.md. UNSPEC_USUBWB ; Used in aarch64-sve2.md. UNSPEC_USUBWT ; Used in aarch64-sve2.md. + UNSPEC_USDOT ; Used in aarch64-simd.md. + UNSPEC_SUDOT ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------ @@ -1463,6 +1465,8 @@ (define_mode_attr f16quad [(V2SF "") (V4SF "q")]) +(define_mode_attr isquadop [(V8QI "") (V16QI "q")]) + (define_code_attr f16mac [(plus "a") (minus "s")]) ;; Map smax to smin and umax to umin. @@ -2045,6 +2049,8 @@ (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) +(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT]) + (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN UNSPEC_SUBHN UNSPEC_RSUBHN]) @@ -2738,6 +2744,7 @@ (UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr") (UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s") (UNSPEC_SDOT "s") (UNSPEC_UDOT "u") + (UNSPEC_USDOT "us") (UNSPEC_SUDOT "su") ]) (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0d8aa60..8b01aa0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2020-01-16 Stam Markianos-Wright + + * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c: New test. + 2020-01-16 Andre Vieira PR tree-optimization/92429 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c new file mode 100755 index 0000000..ac4f821 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c @@ -0,0 +1,136 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ +/* { dg-add-options arm_v8_2a_i8mm } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +/* Unsigned-Signed Dot Product instructions. */ + +/* +**ufoo: +** usdot v0\.2s, v1\.8b, v2\.8b +** ret +*/ +int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y) +{ + return vusdot_s32 (r, x, y); +} + +/* +**ufooq: +** usdot v0\.4s, v1\.16b, v2\.16b +** ret +*/ +int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y) +{ + return vusdotq_s32 (r, x, y); +} + +/* +**ufoo_lane: +** usdot v0\.2s, v1\.8b, v2\.4b\[0\] +** ret +*/ +int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) +{ + return vusdot_lane_s32 (r, x, y, 0); +} + +/* +**ufoo_laneq: +** usdot v0\.2s, v1\.8b, v2\.4b\[2\] +** ret +*/ +int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) +{ + return vusdot_laneq_s32 (r, x, y, 2); +} + +/* +**ufooq_lane: +** usdot v0\.4s, v1\.16b, v2\.4b\[1\] +** ret +*/ +int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) +{ + return vusdotq_lane_s32 (r, x, y, 1); +} + +/* +**ufooq_laneq: +** usdot v0\.4s, v1\.16b, v2\.4b\[3\] +** ret +*/ +int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) +{ + return vusdotq_laneq_s32 (r, x, y, 3); +} + + +/* Signed-Unsigned Dot Product instructions. */ + +/* +**sfoo_lane: +** sudot v0\.2s, v1\.8b, v2\.4b\[0\] +** ret +*/ +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) +{ + return vsudot_lane_s32 (r, x, y, 0); +} + +/* +**sfoo_laneq: +** sudot v0\.2s, v1\.8b, v2\.4b\[2\] +** ret +*/ +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) +{ + return vsudot_laneq_s32 (r, x, y, 2); +} + +/* +**sfooq_lane: +** sudot v0\.4s, v1\.16b, v2\.4b\[1\] +** ret +*/ +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) +{ + return vsudotq_lane_s32 (r, x, y, 1); +} + +/* +**sfooq_laneq: +** sudot v0\.4s, v1\.16b, v2\.4b\[3\] +** ret +*/ +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) +{ + return vsudotq_laneq_s32 (r, x, y, 3); +} + +/* +**ufoo_untied: +** mov v0\.8b, v1\.8b +** usdot v0\.2s, v2\.8b, v3\.8b +** ret +*/ +int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y) +{ + return vusdot_s32 (r, x, y); +} + +/* +**ufooq_laneq_untied: +** mov v0\.16b, v1\.16b +** usdot v0\.4s, v2\.16b, v3\.4b\[3\] +** ret +*/ +int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y) +{ + return vusdotq_laneq_s32 (r, x, y, 3); +} + diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c new file mode 100755 index 0000000..96bca23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c @@ -0,0 +1,137 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ +/* { dg-add-options arm_v8_2a_i8mm } */ +/* { dg-additional-options "-mbig-endian -save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +/* Unsigned-Signed Dot Product instructions. */ + +/* +**ufoo: +** usdot v0\.2s, v1\.8b, v2\.8b +** ret +*/ +int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y) +{ + return vusdot_s32 (r, x, y); +} + +/* +**ufooq: +** usdot v0\.4s, v1\.16b, v2\.16b +** ret +*/ +int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y) +{ + return vusdotq_s32 (r, x, y); +} + +/* +**ufoo_lane: +** usdot v0\.2s, v1\.8b, v2\.4b\[0\] +** ret +*/ +int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) +{ + return vusdot_lane_s32 (r, x, y, 0); +} + +/* +**ufoo_laneq: +** usdot v0\.2s, v1\.8b, v2\.4b\[2\] +** ret +*/ +int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) +{ + return vusdot_laneq_s32 (r, x, y, 2); +} + +/* +**ufooq_lane: +** usdot v0\.4s, v1\.16b, v2\.4b\[1\] +** ret +*/ +int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) +{ + return vusdotq_lane_s32 (r, x, y, 1); +} + +/* +**ufooq_laneq: +** usdot v0\.4s, v1\.16b, v2\.4b\[3\] +** ret +*/ +int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) +{ + return vusdotq_laneq_s32 (r, x, y, 3); +} + + +/* Signed-Unsigned Dot Product instructions. */ + +/* +**sfoo_lane: +** sudot v0\.2s, v1\.8b, v2\.4b\[0\] +** ret +*/ +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) +{ + return vsudot_lane_s32 (r, x, y, 0); +} + +/* +**sfoo_laneq: +** sudot v0\.2s, v1\.8b, v2\.4b\[2\] +** ret +*/ +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) +{ + return vsudot_laneq_s32 (r, x, y, 2); +} + +/* +**sfooq_lane: +** sudot v0\.4s, v1\.16b, v2\.4b\[1\] +** ret +*/ +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) +{ + return vsudotq_lane_s32 (r, x, y, 1); +} + +/* +**sfooq_laneq: +** sudot v0\.4s, v1\.16b, v2\.4b\[3\] +** ret +*/ +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) +{ + return vsudotq_laneq_s32 (r, x, y, 3); +} + +/* +**ufoo_untied: +** mov v0\.8b, v1\.8b +** usdot v0\.2s, v2\.8b, v3\.8b +** ret +*/ +int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y) +{ + return vusdot_s32 (r, x, y); +} + +/* +**ufooq_laneq_untied: +** mov v0\.16b, v1\.16b +** usdot v0\.4s, v2\.16b, v3\.4b\[3\] +** ret +*/ +int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y) +{ + return vusdotq_laneq_s32 (r, x, y, 3); +} + + diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c new file mode 100755 index 0000000..18ecabe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c @@ -0,0 +1,31 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ +/* { dg-add-options arm_v8_2a_i8mm } */ +/* { dg-additional-options "--save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) +{ + /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */ + return vusdot_lane_s32 (r, x, y, -1); +} + +int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) +{ + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vusdot_laneq_s32 (r, x, y, -1); +} + +int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) +{ + /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */ + return vusdotq_lane_s32 (r, x, y, 2); +} + +int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) +{ + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vusdotq_laneq_s32 (r, x, y, 4); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c new file mode 100755 index 0000000..66c87d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c @@ -0,0 +1,31 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ +/* { dg-add-options arm_v8_2a_i8mm } */ +/* { dg-additional-options "--save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) +{ + /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */ + return vsudot_lane_s32 (r, x, y, -1); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) +{ + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vsudot_laneq_s32 (r, x, y, -1); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) +{ + /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */ + return vsudotq_lane_s32 (r, x, y, 2); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) +{ + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + return vsudotq_laneq_s32 (r, x, y, 4); +}