[AArch64] Change representation of SABD in RTL

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Thu, 7 Feb 2019 18:18:16 +0000 (18:18 +0000)

committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>

Thu, 7 Feb 2019 18:18:16 +0000 (18:18 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 7 Feb 2019 18:18:16 +0000 (18:18 +0000)
committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>
Thu, 7 Feb 2019 18:18:16 +0000 (18:18 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 1969ff7..0984790 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2019-02-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/iterators.md (max_opp): New code_attr.
+       (USMAX): New code iterator.
+       * config/aarch64/predicates.md (aarch64_smin): New predicate.
+       (aarch64_smax): Likewise.
+       * config/aarch64/aarch64-simd.md (abd<mode>_3): Rename to...
+       (*aarch64_<su>abd<mode>_3): ... Change RTL representation to
+       MINUS (MAX MIN).
+
  2019-02-07  H.J. Lu  <hongjiu.lu@intel.com>
  
         PR target/89229
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index cae9a8f..e3852c5 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -705,13 +705,22 @@
    [(set_attr "type" "neon_abs<q>")]
  )
  
-(define_insn "abd<mode>_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "*aarch64_<su>abd<mode>_3"
    [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-       (abs:VDQ_BHSI (minus:VDQ_BHSI
-                      (match_operand:VDQ_BHSI 1 "register_operand" "w")
-                      (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
-  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+       (minus:VDQ_BHSI
+         (USMAX:VDQ_BHSI
+           (match_operand:VDQ_BHSI 1 "register_operand" "w")
+           (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+         (match_operator 3 "aarch64_<max_opp>"
+           [(match_dup 1)
+            (match_dup 2)])))]
+  "TARGET_SIMD"
+  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
    [(set_attr "type" "neon_abd<q>")]
  )
  
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 85fa161..6caeeac 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1052,6 +1052,9 @@
  
  (define_code_attr f16mac [(plus "a") (minus "s")])
  
+;; Map smax to smin and umax to umin.
+(define_code_attr max_opp [(smax "smin") (umax "umin")])
+
  ;; The number of subvectors in an SVE_STRUCT.
  (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
                                 (VNx8SI  "2") (VNx4DI  "2")
@@ -1200,6 +1203,9 @@
  
  (define_code_iterator FMAXMIN [smax smin])
  
+;; Signed and unsigned max operations.
+(define_code_iterator USMAX [smax umax])
+
  ;; Code iterator for variants of vector max and min.
  (define_code_iterator ADDSUB [plus minus])
  
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md

index 855cf7b..b8e6d23 100644 (file)
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -319,6 +319,12 @@
    (ior (match_operand 0 "register_operand")
         (match_operand 0 "const_scalar_int_operand")))
  
+(define_predicate "aarch64_smin"
+  (match_code "smin"))
+
+(define_predicate "aarch64_umin"
+  (match_code "umin"))
+
  ;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ.
  (define_special_predicate "aarch64_comparison_operator"
    (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 5004a5e..9951b17 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-02-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/abd_1.c: New test.
+       * gcc.dg/sabd_1.c: Likewise.
+
  2019-02-07  Dominique d'Humieres  <dominiq@gcc.gnu.org>
  
         PR fortran/52789
diff --git a/gcc/testsuite/gcc.dg/sabd_1.c b/gcc/testsuite/gcc.dg/sabd_1.c

new file mode 100644 (file)

index 0000000..587e305
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sabd_1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fwrapv" } */
+/* Make sure vectorized absolute difference behaves same as scalar version.  */
+
+#define N 16
+signed char a[] = {-100, -100, -100, -100,-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100 };
+signed char b[] = { 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 };
+
+signed char out[N];
+
+__attribute__ ((noinline,noipa))
+void
+foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      signed char diff = b[i] - a[i];
+      out[i] = diff > 0 ? diff : -diff;
+    }
+}
+
+signed char out2[N];
+
+__attribute__ ((noinline,noipa))
+void
+foo_scalar (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      asm volatile ("");
+      signed char diff = b[i] - a[i];
+      out2[i] = diff > 0 ? diff : -diff;
+    }
+}
+
+int
+main (void)
+{
+  foo ();
+  foo_scalar ();
+  for (int i = 0; i < N; i++)
+    if (out[i] != out2[i])
+      __builtin_abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/abd_1.c b/gcc/testsuite/gcc.target/aarch64/abd_1.c

new file mode 100644 (file)

index 0000000..a27cb5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/abd_1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 1024
+
+#define FUNC(T)                                                                \
+void                                                                   \
+sabd_##T (signed T * restrict a, signed T * restrict b,                \
+               signed T * restrict out)                                \
+{                                                                      \
+  for (int i = 0; i < N; i++)                                          \
+    out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);                      \
+}                                                                      \
+                                                                       \
+void                                                                   \
+uabd_##T (unsigned T * restrict a, unsigned T * restrict b,    \
+                 unsigned T * restrict out)                            \
+{                                                                      \
+  for (int i = 0; i < N; i++)                                          \
+    out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);                      \
+}
+
+FUNC(char)
+FUNC(short)
+FUNC(int)
+
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Thu, 7 Feb 2019 18:18:16 +0000 (18:18 +0000)
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>
	Thu, 7 Feb 2019 18:18:16 +0000 (18:18 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/config/aarch64/predicates.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/sabd_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/abd_1.c	[new file with mode: 0644]	patch \| blob