AArch64: Add support for sign differing dot-product usdot for NEON and SVE.

author Tamar Christina <tamar.christina@arm.com>

Wed, 14 Jul 2021 14:19:32 +0000 (15:19 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Wed, 14 Jul 2021 14:19:32 +0000 (15:19 +0100)
author Tamar Christina <tamar.christina@arm.com>
Wed, 14 Jul 2021 14:19:32 +0000 (15:19 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Wed, 14 Jul 2021 14:19:32 +0000 (15:19 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index ac5d4fc..063f503 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -374,10 +374,11 @@
    BUILTIN_VSDQ_I_DI (BINOP, srshl, 0, NONE)
    BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
  
-  /* Implemented by aarch64_<sur><dotprod>{_lane}{q}<dot_mode>.  */
+  /* Implemented by <sur><dotprod>_prod<dot_mode>.  */
    BUILTIN_VB (TERNOP, sdot, 0, NONE)
    BUILTIN_VB (TERNOPU, udot, 0, NONE)
-  BUILTIN_VB (TERNOP_SSUS, usdot, 0, NONE)
+  BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE)
+  /* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>.  */
    BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
    BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE)
    BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 540244c..7489098 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -601,7 +601,7 @@
  
  ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
  ;; (vector) Dot Product operation.
-(define_insn "aarch64_usdot<vsi2qi>"
+(define_insn "usdot_prod<vsi2qi>"
    [(set (match_operand:VS 0 "register_operand" "=w")
         (plus:VS
           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc

index 8fd6d3f..02e42a7 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2366,7 +2366,7 @@ public:
         Hence we do the same rotation on arguments as svdot_impl does.  */
      e.rotate_inputs_left (0, 3);
      machine_mode mode = e.vector_mode (0);
-    insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+    insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode);
      return e.use_exact_insn (icode);
    }
  
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 9e48c0e..359fe0e 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6870,7 +6870,7 @@
    [(set_attr "movprfx" "*,yes")]
  )
  
-(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
+(define_insn "@<sur>dot_prod<vsi2qi>"
    [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
          (plus:VNx4SI_ONLY
           (unspec:VNx4SI_ONLY
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 17e059e..00d76ea 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34039,14 +34039,14 @@ __extension__ extern __inline int32x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
  {
-  return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
+  return __builtin_aarch64_usdot_prodv8qi_ssus (__r, __a, __b);
  }
  
  __extension__ extern __inline int32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
  {
-  return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
+  return __builtin_aarch64_usdot_prodv16qi_ssus (__r, __a, __b);
  }
  
  __extension__ extern __inline int32x2_t
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c

new file mode 100644 (file)

index 0000000..b99a945
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c

new file mode 100644 (file)

index 0000000..094dd51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
author	Tamar Christina <tamar.christina@arm.com>
	Wed, 14 Jul 2021 14:19:32 +0000 (15:19 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Wed, 14 Jul 2021 14:19:32 +0000 (15:19 +0100)
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/aarch64-sve-builtins-base.cc		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c	[new file with mode: 0644]	patch \| blob