AArch64: correct dot-product RTL patterns for aarch64.

author Tamar Christina <tamar.christina@arm.com>

Mon, 26 Jul 2021 09:23:21 +0000 (10:23 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Mon, 26 Jul 2021 09:23:21 +0000 (10:23 +0100)
author Tamar Christina <tamar.christina@arm.com>
Mon, 26 Jul 2021 09:23:21 +0000 (10:23 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Mon, 26 Jul 2021 09:23:21 +0000 (10:23 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index 3bb45a8..402453a 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -375,8 +375,8 @@
    BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
  
    /* Implemented by <sur><dotprod>_prod<dot_mode>.  */
-  BUILTIN_VB (TERNOP, sdot, 0, NONE)
-  BUILTIN_VB (TERNOPU, udot, 0, NONE)
+  BUILTIN_VB (TERNOP, sdot_prod, 10, NONE)
+  BUILTIN_VB (TERNOPU, udot_prod, 10, NONE)
    BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE)
    /* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>.  */
    BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index bf667b9..13c8698 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -587,19 +587,8 @@
    DONE;
  })
  
-;; These instructions map to the __builtins for the Dot Product operations.
-(define_insn "aarch64_<sur>dot<vsi2qi>"
-  [(set (match_operand:VS 0 "register_operand" "=w")
-       (plus:VS (match_operand:VS 1 "register_operand" "0")
-               (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
-                           (match_operand:<VSI2QI> 3 "register_operand" "w")]
-               DOTPROD)))]
-  "TARGET_DOTPROD"
-  "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
-  [(set_attr "type" "neon_dot<q>")]
-)
-
-;; These expands map to the Dot Product optab the vectorizer checks for.
+;; These expands map to the Dot Product optab the vectorizer checks for
+;; and to the intrinsics patttern.
  ;; The auto-vectorizer expects a dot product builtin that also does an
  ;; accumulation into the provided register.
  ;; Given the following pattern
@@ -619,20 +608,17 @@
  ;; ...
  ;;
  ;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_expand "<sur>dot_prod<vsi2qi>"
-  [(set (match_operand:VS 0 "register_operand")
-       (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
-                           (match_operand:<VSI2QI> 2 "register_operand")]
-                DOTPROD)
-               (match_operand:VS 3 "register_operand")))]
+(define_insn "<sur>dot_prod<vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+       (plus:VS
+         (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
+                     (match_operand:<VSI2QI> 2 "register_operand" "w")]
+                     DOTPROD)
+         (match_operand:VS 3 "register_operand" "0")))]
    "TARGET_DOTPROD"
-{
-  emit_insn (
-    gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
-                                   operands[2]));
-  emit_insn (gen_rtx_SET (operands[0], operands[3]));
-  DONE;
-})
+  "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
+  [(set_attr "type" "neon_dot<q>")]
+)
  
  ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
  ;; (vector) Dot Product operation and the vectorized optab.
@@ -652,11 +638,12 @@
  ;; indexed operations.
  (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
    [(set (match_operand:VS 0 "register_operand" "=w")
-       (plus:VS (match_operand:VS 1 "register_operand" "0")
-               (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
-                           (match_operand:V8QI 3 "register_operand" "<h_con>")
-                           (match_operand:SI 4 "immediate_operand" "i")]
-               DOTPROD)))]
+       (plus:VS
+         (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+                     (match_operand:V8QI 3 "register_operand" "<h_con>")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+                     DOTPROD)
+         (match_operand:VS 1 "register_operand" "0")))]
    "TARGET_DOTPROD"
    {
      operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
@@ -667,11 +654,12 @@
  
  (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
    [(set (match_operand:VS 0 "register_operand" "=w")
-       (plus:VS (match_operand:VS 1 "register_operand" "0")
-               (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
-                           (match_operand:V16QI 3 "register_operand" "<h_con>")
-                           (match_operand:SI 4 "immediate_operand" "i")]
-               DOTPROD)))]
+       (plus:VS
+         (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+                     (match_operand:V16QI 3 "register_operand" "<h_con>")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+                     DOTPROD)
+         (match_operand:VS 1 "register_operand" "0")))]
    "TARGET_DOTPROD"
    {
      operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
@@ -944,8 +932,7 @@
         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
         rtx abd = gen_reg_rtx (V16QImode);
         emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
-       emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
-                                         abd, ones));
+       emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
         DONE;
        }
      rtx reduc = gen_reg_rtx (V8HImode);
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 0f43994..313b35f 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -31472,28 +31472,28 @@ __extension__ extern __inline uint32x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b)
  {
-  return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b);
+  return __builtin_aarch64_udot_prodv8qi_uuuu (__a, __b, __r);
  }
  
  __extension__ extern __inline uint32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
  {
-  return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b);
+  return __builtin_aarch64_udot_prodv16qi_uuuu (__a, __b, __r);
  }
  
  __extension__ extern __inline int32x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b)
  {
-  return __builtin_aarch64_sdotv8qi (__r, __a, __b);
+  return __builtin_aarch64_sdot_prodv8qi (__a, __b, __r);
  }
  
  __extension__ extern __inline int32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
  {
-  return __builtin_aarch64_sdotv16qi (__r, __a, __b);
+  return __builtin_aarch64_sdot_prodv16qi (__a, __b, __r);
  }
  
  __extension__ extern __inline uint32x2_t
author	Tamar Christina <tamar.christina@arm.com>
	Mon, 26 Jul 2021 09:23:21 +0000 (10:23 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Mon, 26 Jul 2021 09:23:21 +0000 (10:23 +0100)
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history