[AArch64][SVE2] Shift-Right Accumulate combine patterns
authorYuliang Wang <yuliang.wang@arm.com>
Fri, 27 Sep 2019 08:10:30 +0000 (08:10 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Fri, 27 Sep 2019 08:10:30 +0000 (08:10 +0000)
This patch adds combining support for SVE2's shift-right accumulate
instructions.

2019-09-27  Yuliang Wang  <yuliang.wang@arm.com>

gcc/
* config/aarch64/aarch64-sve2.md (aarch64_sve2_sra<mode>):
New combine pattern.

gcc/testsuite/
* gcc.target/aarch64/sve2/shracc_1.c: New test.

From-SVN: r276174

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve2.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve2/shracc_1.c [new file with mode: 0644]

index 3d12337..64cc0a9 100644 (file)
@@ -1,3 +1,8 @@
+2019-09-27  Yuliang Wang  <yuliang.wang@arm.com>
+
+       * config/aarch64/aarch64-sve2.md (aarch64_sve2_sra<mode>):
+       New combine pattern.
+
 2019-09-26  Max Filippov  <jcmvbkbc@gmail.com>
 
        * config/xtensa/xtensa.c (hwloop_optimize): Insert zero overhead
index ee9acdc..b018f5b 100644 (file)
   }
 )
 
+;; Unpredicated signed / unsigned shift-right accumulate.
+(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (plus:SVE_I
+         (unspec:SVE_I
+           [(match_operand 4)
+            (SHIFTRT:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "w")
+              (match_operand:SVE_I 3 "aarch64_simd_rshift_imm" "Dr"))]
+           UNSPEC_PRED_X)
+        (match_operand:SVE_I 1 "register_operand" "0")))]
+  "TARGET_SVE2"
+  "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+  "&& !CONSTANT_P (operands[4])"
+  {
+    operands[4] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
index 82607de..84670ec 100644 (file)
@@ -1,3 +1,7 @@
+2019-09-27  Yuliang Wang  <yuliang.wang@arm.com>
+
+       * gcc.target/aarch64/sve2/shracc_1.c: New test.
+
 2019-09-26  Eric Botcazou  <ebotcazou@adacore.com>
 
        * gcc.dg/cpp/ucs.c: Add test for new warning and adjust.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/shracc_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/shracc_1.c
new file mode 100644 (file)
index 0000000..5535c7d
--- /dev/null
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
+
+#include <stdint.h>
+
+#define SHRACC(TYPE,SHIFT)                     \
+void __attribute__ ((noinline, noclone))       \
+f_##TYPE##_##SHIFT                             \
+  (TYPE *restrict a, TYPE *restrict b, int n)  \
+{                                              \
+  for (int i = 0; i < n; i++)                  \
+    a[i] += b[i] >> (SHIFT);                   \
+}
+
+SHRACC (int8_t, 5);
+SHRACC (int16_t, 14);
+SHRACC (int32_t, 19);
+SHRACC (int64_t, 27);
+
+SHRACC (uint8_t, 2);
+SHRACC (uint16_t, 6);
+SHRACC (uint32_t, 24);
+SHRACC (uint64_t, 53);
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 8 "vect" } } */
+
+/* { dg-final { scan-assembler-not {\tasr\t} } } */
+/* { dg-final { scan-assembler-not {\tlsr\t} } } */
+/* { dg-final { scan-assembler-not {\tadd\t} } } */
+
+/* { dg-final { scan-assembler-times {\tssra\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tssra\tz[0-9]+\.h, z[0-9]+\.h, #14\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tssra\tz[0-9]+\.s, z[0-9]+\.s, #19\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tssra\tz[0-9]+\.d, z[0-9]+\.d, #27\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tusra\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tusra\tz[0-9]+\.h, z[0-9]+\.h, #6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tusra\tz[0-9]+\.s, z[0-9]+\.s, #24\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tusra\tz[0-9]+\.d, z[0-9]+\.d, #53\n} 1 } } */