[AArch64] Implement usadv16qi and ssadv16qi standard names

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 21 May 2018 11:21:07 +0000 (11:21 +0000)

committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>

Mon, 21 May 2018 11:21:07 +0000 (11:21 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 21 May 2018 11:21:07 +0000 (11:21 +0000)
committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>
Mon, 21 May 2018 11:21:07 +0000 (11:21 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index b1a9017..b247c1f 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2018-05-21  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL,
+       UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2,
+       UNSPEC_UADALP values.
+       * config/aarch64/iterators.md (ABAL): New int iterator.
+       (ABDL2): Likewise.
+       (ADALP): Likewise.
+       (sur): Add mappings for the above.
+       * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3):
+       New define_insn.
+       (aarch64_<sur>abal<mode>_4): Likewise.
+       (aarch64_<sur>adalp<mode>_3): Likewise.
+       (<sur>sadv16qi): New define_expand.
+
  2018-05-21  Alexander Nesterovskiy  <alexander.nesterovskiy@intel.com>
  
         * config/i386/i386.md (*movsf_internal): AVX falsedep fix.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index c53a774..fd971bf 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -612,6 +612,67 @@
    [(set_attr "type" "neon_abd<q>")]
  )
  
+(define_insn "aarch64_<sur>abdl2<mode>_3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+       (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
+                         (match_operand:VDQV_S 2 "register_operand" "w")]
+       ABDL2))]
+  "TARGET_SIMD"
+  "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_abd<q>")]
+)
+
+(define_insn "aarch64_<sur>abal<mode>_4"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+       (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
+                         (match_operand:VDQV_S 2 "register_operand" "w")
+                        (match_operand:<VDBLW> 3 "register_operand" "0")]
+       ABAL))]
+  "TARGET_SIMD"
+  "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_arith_acc<q>")]
+)
+
+(define_insn "aarch64_<sur>adalp<mode>_3"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+       (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
+                         (match_operand:<VDBLW> 2 "register_operand" "0")]
+       ADALP))]
+  "TARGET_SIMD"
+  "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
+;; inputs in operands 1 and 2.  The sequence also has to perform a widening
+;; reduction of the difference into a V4SI vector and accumulate that into
+;; operand 3 before copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; UABDL2      tmp.8h, op1.16b, op2.16b
+;; UABAL       tmp.8h, op1.16b, op2.16b
+;; UADALP      op3.4s, tmp.8h
+;; MOV         op0, op3 // should be eliminated in later passes.
+;; The signed version just uses the signed variants of the above instructions.
+
+(define_expand "<sur>sadv16qi"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
+                 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
+   (use (match_operand:V4SI 3 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    rtx reduc = gen_reg_rtx (V8HImode);
+    emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
+                                              operands[2]));
+    emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
+                                             operands[2], reduc));
+    emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
+                                             operands[3]));
+    emit_move_insn (operands[0], operands[3]);
+    DONE;
+  }
+)
+
  (define_insn "aba<mode>_3"
    [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index 6556303..7437971 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -141,6 +141,9 @@
      UNSPEC_PRLG_STK
      UNSPEC_REV
      UNSPEC_RBIT
+    UNSPEC_SABAL
+    UNSPEC_SABDL2
+    UNSPEC_SADALP
      UNSPEC_SCVTF
      UNSPEC_SISD_NEG
      UNSPEC_SISD_SSHL
@@ -159,6 +162,9 @@
      UNSPEC_TLSLE24
      UNSPEC_TLSLE32
      UNSPEC_TLSLE48
+    UNSPEC_UABAL
+    UNSPEC_UABDL2
+    UNSPEC_UADALP
      UNSPEC_UCVTF
      UNSPEC_USHL_2S
      UNSPEC_VSTRUCTDUMMY
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index ae4ec9d..bf01044 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1389,6 +1389,16 @@
  ;; -------------------------------------------------------------------
  ;; Int Iterators.
  ;; -------------------------------------------------------------------
+
+;; The unspec codes for the SABAL, UABAL AdvancedSIMD instructions.
+(define_int_iterator ABAL [UNSPEC_SABAL UNSPEC_UABAL])
+
+;; The unspec codes for the SABDL2, UABDL2 AdvancedSIMD instructions.
+(define_int_iterator ABDL2 [UNSPEC_SABDL2 UNSPEC_UABDL2])
+
+;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions.
+(define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP])
+
  (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
                               UNSPEC_SMAXV UNSPEC_SMINV])
  
@@ -1596,6 +1606,9 @@
                       (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u")
                       (UNSPEC_SRHSUB "sr") (UNSPEC_URHSUB "ur")
                       (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r")
+                     (UNSPEC_SABAL "s") (UNSPEC_UABAL "u")
+                     (UNSPEC_SABDL2 "s") (UNSPEC_UABDL2 "u")
+                     (UNSPEC_SADALP "s") (UNSPEC_UADALP "u")
                       (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r")
                       (UNSPEC_ADDHN2 "") (UNSPEC_RADDHN2 "r")
                       (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 6f917cb..4d10e0a 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-05-21  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.c-torture/execute/ssad-run.c: New test.
+       * gcc.c-torture/execute/usad-run.c: Likewise.
+       * gcc.target/aarch64/ssadv16qi.c: Likewise.
+       * gcc.target/aarch64/usadv16qi.c: Likewise.
+
  2018-05-21  Tamar Christina  <tamar.christina@arm.com>
  
         * gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32,
diff --git a/gcc/testsuite/gcc.c-torture/execute/ssad-run.c b/gcc/testsuite/gcc.c-torture/execute/ssad-run.c

new file mode 100644 (file)

index 0000000..f15f85f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ssad-run.c
@@ -0,0 +1,49 @@
+extern void abort ();
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
+
+static int
+foo (signed char *w, int i, signed char *x, int j)
+{
+  int tot = 0;
+  for (int a = 0; a < 16; a++)
+    {
+      for (int b = 0; b < 16; b++)
+       tot += abs (w[b] - x[b]);
+      w += i;
+      x += j;
+    }
+  return tot;
+}
+
+void
+bar (signed char *w, signed char *x, int i, int *result)
+{
+  *result = foo (w, 16, x, i);
+}
+
+int
+main (void)
+{
+  signed char m[256];
+  signed char n[256];
+  int sum, i;
+
+  for (i = 0; i < 256; ++i)
+    if (i % 2 == 0)
+      {
+       m[i] = (i % 8) * 2 + 1;
+       n[i] = -(i % 8);
+      }
+    else
+      {
+       m[i] = -((i % 8) * 2 + 2);
+       n[i] = -((i % 8) >> 1);
+      }
+
+  bar (m, n, 16, &sum);
+
+  if (sum != 2368)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/usad-run.c b/gcc/testsuite/gcc.c-torture/execute/usad-run.c

new file mode 100644 (file)

index 0000000..904a634
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/usad-run.c
@@ -0,0 +1,49 @@
+extern void abort ();
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
+
+static int
+foo (unsigned char *w, int i, unsigned char *x, int j)
+{
+  int tot = 0;
+  for (int a = 0; a < 16; a++)
+    {
+      for (int b = 0; b < 16; b++)
+       tot += abs (w[b] - x[b]);
+      w += i;
+      x += j;
+    }
+  return tot;
+}
+
+void
+bar (unsigned char *w, unsigned char *x, int i, int *result)
+{
+  *result = foo (w, 16, x, i);
+}
+
+int
+main (void)
+{
+  unsigned char m[256];
+  unsigned char n[256];
+  int sum, i;
+
+  for (i = 0; i < 256; ++i)
+    if (i % 2 == 0)
+      {
+       m[i] = (i % 8) * 2 + 1;
+       n[i] = -(i % 8);
+      }
+    else
+      {
+       m[i] = -((i % 8) * 2 + 2);
+       n[i] = -((i % 8) >> 1);
+      }
+
+  bar (m, n, 16, &sum);
+
+  if (sum != 32384)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c

new file mode 100644 (file)

index 0000000..bab7599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#define N 1024
+
+signed char pix1[N], pix2[N];
+
+int foo (void)
+{
+  int i_sum = 0;
+  int i;
+
+  for (i = 0; i < N; i++)
+    i_sum += __builtin_abs (pix1[i] - pix2[i]);
+
+  return i_sum;
+}
+
+/* { dg-final { scan-assembler-not {\tsshll\t} } } */
+/* { dg-final { scan-assembler-not {\tsshll2\t} } } */
+/* { dg-final { scan-assembler-not {\tssubl\t} } } */
+/* { dg-final { scan-assembler-not {\tssubl2\t} } } */
+/* { dg-final { scan-assembler-not {\tabs\t} } } */
+
+/* { dg-final { scan-assembler {\tsabdl2\t} } } */
+/* { dg-final { scan-assembler {\tsabal\t} } } */
+/* { dg-final { scan-assembler {\tsadalp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c

new file mode 100644 (file)

index 0000000..b7c08ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#define N 1024
+
+unsigned char pix1[N], pix2[N];
+
+int foo (void)
+{
+  int i_sum = 0;
+  int i;
+
+  for (i = 0; i < N; i++)
+    i_sum += __builtin_abs (pix1[i] - pix2[i]);
+
+  return i_sum;
+}
+
+/* { dg-final { scan-assembler-not {\tushll\t} } } */
+/* { dg-final { scan-assembler-not {\tushll2\t} } } */
+/* { dg-final { scan-assembler-not {\tusubl\t} } } */
+/* { dg-final { scan-assembler-not {\tusubl2\t} } } */
+/* { dg-final { scan-assembler-not {\tabs\t} } } */
+
+/* { dg-final { scan-assembler {\tuabdl2\t} } } */
+/* { dg-final { scan-assembler {\tuabal\t} } } */
+/* { dg-final { scan-assembler {\tuadalp\t} } } */
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 21 May 2018 11:21:07 +0000 (11:21 +0000)
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>
	Mon, 21 May 2018 11:21:07 +0000 (11:21 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.c-torture/execute/ssad-run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.c-torture/execute/usad-run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/ssadv16qi.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/usadv16qi.c	[new file with mode: 0644]	patch \| blob