AArch64: Fix left fold sum reduction RTL patterns [PR104049]

author Tamar Christina <tamar.christina@arm.com>

Thu, 7 Apr 2022 07:27:53 +0000 (08:27 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Thu, 7 Apr 2022 07:27:53 +0000 (08:27 +0100)
author Tamar Christina <tamar.christina@arm.com>
Thu, 7 Apr 2022 07:27:53 +0000 (08:27 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Thu, 7 Apr 2022 07:27:53 +0000 (08:27 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 1873342..a00e1c6 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3385,20 +3385,6 @@
  
  ;; 'across lanes' add.
  
-(define_expand "reduc_plus_scal_<mode>"
-  [(match_operand:<VEL> 0 "register_operand")
-   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
-              UNSPEC_ADDV)]
-  "TARGET_SIMD"
-  {
-    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
-    rtx scratch = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
-    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
-    DONE;
-  }
-)
-
  (define_insn "aarch64_faddp<mode>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
@@ -3409,31 +3395,22 @@
    [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
  )
  
-(define_insn "aarch64_reduc_plus_internal<mode>"
- [(set (match_operand:VDQV 0 "register_operand" "=w")
-       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
+(define_insn "reduc_plus_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
                     UNSPEC_ADDV))]
   "TARGET_SIMD"
   "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
    [(set_attr "type" "neon_reduc_add<q>")]
  )
  
-(define_insn "aarch64_<su>addlv<mode>"
- [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
-       (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
-                   USADDLV))]
- "TARGET_SIMD"
- "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
-  [(set_attr "type" "neon_reduc_add<q>")]
-)
-
-(define_insn "aarch64_<su>addlp<mode>"
- [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
-       (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
-                   USADDLP))]
+(define_insn "reduc_plus_scal_v2si"
+ [(set (match_operand:SI 0 "register_operand" "=w")
+       (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
+                   UNSPEC_ADDV))]
   "TARGET_SIMD"
- "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
-  [(set_attr "type" "neon_reduc_add<q>")]
+ "addp\\t%0.2s, %1.2s, %1.2s"
+  [(set_attr "type" "neon_reduc_add")]
  )
  
  ;; ADDV with result zero-extended to SI/DImode (for popcount).
@@ -3447,15 +3424,6 @@
    [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
  )
  
-(define_insn "aarch64_reduc_plus_internalv2si"
- [(set (match_operand:V2SI 0 "register_operand" "=w")
-       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
-                   UNSPEC_ADDV))]
- "TARGET_SIMD"
- "addp\\t%0.2s, %1.2s, %1.2s"
-  [(set_attr "type" "neon_reduc_add")]
-)
-
  (define_insn "reduc_plus_scal_<mode>"
   [(set (match_operand:<VEL> 0 "register_operand" "=w")
         (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
@@ -3467,7 +3435,7 @@
  
  (define_expand "reduc_plus_scal_v4sf"
   [(set (match_operand:SF 0 "register_operand")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
+       (unspec:SF [(match_operand:V4SF 1 "register_operand")]
                     UNSPEC_FADDV))]
   "TARGET_SIMD"
  {
@@ -3479,6 +3447,24 @@
    DONE;
  })
  
+(define_insn "aarch64_<su>addlv<mode>"
+ [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
+       (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
+                   USADDLV))]
+ "TARGET_SIMD"
+ "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
+(define_insn "aarch64_<su>addlp<mode>"
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+       (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
+                   USADDLP))]
+ "TARGET_SIMD"
+ "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
+  [(set_attr "type" "neon_reduc_add<q>")]
+)
+
  (define_insn "clrsb<mode>2"
    [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
          (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/vadd_reduc-1.c b/gcc/testsuite/gcc.target/aarch64/vadd_reduc-1.c

new file mode 100644 (file)

index 0000000..271a1c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vadd_reduc-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**bar:
+**     ...
+**     addv    s0, v0.4s
+**     fmov    w0, s0
+**     lsr     w1, w0, 16
+**     add     w0, w1, w0, uxth
+**     ret
+*/
+int bar (v4si x)
+{
+  unsigned int sum = vaddvq_s32 (x);
+  return (((uint16_t)(sum & 0xffff)) + ((uint32_t)sum >> 16));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vadd_reduc-2.c b/gcc/testsuite/gcc.target/aarch64/vadd_reduc-2.c

new file mode 100644 (file)

index 0000000..0ad9695
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vadd_reduc-2.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#pragma GCC target "+nosve"
+
+/*
+**test:
+**     ...
+**     addv    s0, v0.4s
+**     fmov    w0, s0
+**     and     w1, w0, 65535
+**     add     w0, w1, w0, lsr 16
+**     lsr     w0, w0, 1
+**     ret
+*/
+int test (uint8_t *p, uint32_t t[1][1], int n) {
+
+  int sum = 0;
+  uint32_t a0;
+  for (int i = 0; i < 4; i++, p++)
+    t[i][0] = p[0];
+
+  for (int i = 0; i < 4; i++) {
+    {
+      int t0 = t[0][i] + t[0][i];
+      a0 = t0;
+    };
+    sum += a0;
+  }
+  return (((uint16_t)sum) + ((uint32_t)sum >> 16)) >> 1;
+}
author	Tamar Christina <tamar.christina@arm.com>
	Thu, 7 Apr 2022 07:27:53 +0000 (08:27 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Thu, 7 Apr 2022 07:27:53 +0000 (08:27 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/vadd_reduc-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vadd_reduc-2.c	[new file with mode: 0644]	patch \| blob