aarch64: Fix RTL patterns for UABA/SABA
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 7 Jan 2021 14:02:02 +0000 (14:02 +0000)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Fri, 8 Jan 2021 10:29:25 +0000 (10:29 +0000)
Sometime ago we changed the RTL representation of the (SU)ABD
instructions in RTL to a (MINUS (MAX) (MIN)) rather than a (MINUS (ABS) (ABS))
as it is more correctly models the semantics.
We should do the same for the accumulation forms of these instructions:
UABA/SABA.

This patch does that and allows the new pattern to generate the unsigned
UABA form as well.
The new form also allows it to more easily be re-used to implement the
relevant arm_neon.h intrinsics in the future.

The testcase takes an -fno-tree-reassoc to work around a side-effect of
PR98581.

gcc/
* config/aarch64/aarch64-simd.md (aba<mode>_3): Rename to...
(aarch64_<su>aba<mode>): ... This.  Handle uaba as well.
Change RTL pattern to match.

gcc/testsuite/
* gcc.target/aarch64/usaba_1.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/usaba_1.c [new file with mode: 0644]

index 85770c8..d23398e 100644 (file)
   }
 )
 
-(define_insn "aba<mode>_3"
+(define_insn "aarch64_<su>aba<mode>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-       (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
-                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
-                        (match_operand:VDQ_BHSI 2 "register_operand" "w")))
-                      (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
-  "TARGET_SIMD"
-  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+       (plus:VDQ_BHSI (minus:VDQ_BHSI
+                        (USMAX:VDQ_BHSI
+                          (match_operand:VDQ_BHSI 2 "register_operand" "w")
+                          (match_operand:VDQ_BHSI 3 "register_operand" "w"))
+                        (<max_opp>:VDQ_BHSI
+                          (match_dup 2)
+                          (match_dup 3)))
+                      (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
   [(set_attr "type" "neon_arith_acc<q>")]
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/usaba_1.c b/gcc/testsuite/gcc.target/aarch64/usaba_1.c
new file mode 100644 (file)
index 0000000..58b5beb
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-reassoc" } */
+
+#pragma GCC target "+nosve"
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define FUNC(T, N, S)  \
+void saba_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c)     \
+{      \
+  int i;       \
+  for (i = 0; i < N; i++)      \
+    c[i] += (MAX (a[i], b[i]) - MIN (a[i], b[i]));     \
+}
+
+FUNC (signed char, 16, qi)
+/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 1 } } */
+FUNC (short, 8, hi)
+/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h} 1 } } */
+FUNC (int, 4, si)
+/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s} 1 } } */
+FUNC (unsigned char, 16, uqi)
+/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+.16b} 1 } } */
+FUNC (unsigned short, 8, uhi)
+/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h} 1 } } */
+FUNC (unsigned int, 4, usi)
+/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s} 1 } } */
+