arm: Replace arm_builtin_vectorized_function [PR106253]

author Richard Sandiford <richard.sandiford@arm.com>

Mon, 18 Jul 2022 11:57:10 +0000 (12:57 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Mon, 18 Jul 2022 11:57:10 +0000 (12:57 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Mon, 18 Jul 2022 11:57:10 +0000 (12:57 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Mon, 18 Jul 2022 11:57:10 +0000 (12:57 +0100)
diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc

index d917137..8f8155c 100644 (file)
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -4026,129 +4026,6 @@ arm_expand_builtin (tree exp,
    return NULL_RTX;
  }
  
-tree
-arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in)
-{
-  machine_mode in_mode, out_mode;
-  int in_n, out_n;
-  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
-
-  /* Can't provide any vectorized builtins when we can't use NEON.  */
-  if (!TARGET_NEON)
-    return NULL_TREE;
-
-  if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE)
-    return NULL_TREE;
-
-  out_mode = TYPE_MODE (TREE_TYPE (type_out));
-  out_n = TYPE_VECTOR_SUBPARTS (type_out);
-  in_mode = TYPE_MODE (TREE_TYPE (type_in));
-  in_n = TYPE_VECTOR_SUBPARTS (type_in);
-
-/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
-   decl of the vectorized builtin for the appropriate vector mode.
-   NULL_TREE is returned if no such builtin is available.  */
-#undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C)    \
-  (TARGET_VFP5   \
-   && flag_unsafe_math_optimizations \
-   && ARM_CHECK_BUILTIN_MODE_1 (C))
-
-#undef ARM_CHECK_BUILTIN_MODE_1
-#define ARM_CHECK_BUILTIN_MODE_1(C) \
-  (out_mode == SFmode && out_n == C \
-   && in_mode == SFmode && in_n == C)
-
-#undef ARM_FIND_VRINT_VARIANT
-#define ARM_FIND_VRINT_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
-    : (ARM_CHECK_BUILTIN_MODE (4) \
-      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
-      : NULL_TREE))
-
-  switch (fn)
-    {
-    CASE_CFN_FLOOR:
-      return ARM_FIND_VRINT_VARIANT (vrintm);
-    CASE_CFN_CEIL:
-      return ARM_FIND_VRINT_VARIANT (vrintp);
-    CASE_CFN_TRUNC:
-      return ARM_FIND_VRINT_VARIANT (vrintz);
-    CASE_CFN_ROUND:
-      return ARM_FIND_VRINT_VARIANT (vrinta);
-#undef ARM_CHECK_BUILTIN_MODE_1
-#define ARM_CHECK_BUILTIN_MODE_1(C) \
-  (out_mode == SImode && out_n == C \
-   && in_mode == SFmode && in_n == C)
-
-#define ARM_FIND_VCVT_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
-   : (ARM_CHECK_BUILTIN_MODE (4) \
-     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
-     : NULL_TREE))
-
-#define ARM_FIND_VCVTU_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
-   : (ARM_CHECK_BUILTIN_MODE (4) \
-     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
-     : NULL_TREE))
-    CASE_CFN_LROUND:
-      return (out_unsigned_p
-             ? ARM_FIND_VCVTU_VARIANT (vcvta)
-             : ARM_FIND_VCVT_VARIANT (vcvta));
-    CASE_CFN_LCEIL:
-      return (out_unsigned_p
-             ? ARM_FIND_VCVTU_VARIANT (vcvtp)
-             : ARM_FIND_VCVT_VARIANT (vcvtp));
-    CASE_CFN_LFLOOR:
-      return (out_unsigned_p
-             ? ARM_FIND_VCVTU_VARIANT (vcvtm)
-             : ARM_FIND_VCVT_VARIANT (vcvtm));
-#undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##mode && out_n == C \
-   && in_mode == N##mode && in_n == C)
-    case CFN_BUILT_IN_BSWAP16:
-      if (ARM_CHECK_BUILTIN_MODE (4, HI))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
-      else if (ARM_CHECK_BUILTIN_MODE (8, HI))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
-      else
-       return NULL_TREE;
-    case CFN_BUILT_IN_BSWAP32:
-      if (ARM_CHECK_BUILTIN_MODE (2, SI))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
-      else if (ARM_CHECK_BUILTIN_MODE (4, SI))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
-      else
-       return NULL_TREE;
-    case CFN_BUILT_IN_BSWAP64:
-      if (ARM_CHECK_BUILTIN_MODE (2, DI))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
-      else
-       return NULL_TREE;
-    CASE_CFN_COPYSIGN:
-      if (ARM_CHECK_BUILTIN_MODE (2, SF))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
-      else if (ARM_CHECK_BUILTIN_MODE (4, SF))
-       return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
-      else
-       return NULL_TREE;
-
-    default:
-      return NULL_TREE;
-    }
-  return NULL_TREE;
-}
-#undef ARM_FIND_VCVT_VARIANT
-#undef ARM_FIND_VCVTU_VARIANT
-#undef ARM_CHECK_BUILTIN_MODE
-#undef ARM_FIND_VRINT_VARIANT
-
  void
  arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
  {
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

index 9d14209..f8aabbd 100644 (file)
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -103,7 +103,6 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
                                   rtx (*) (rtx, rtx, rtx));
  extern rtx mve_bool_vec_to_const (rtx const_vec);
  extern rtx neon_make_constant (rtx, bool generate = true);
-extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
  extern void neon_expand_vector_init (rtx, rtx);
  extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
  extern void arm_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc

index 33fb98d..eca99c9 100644 (file)
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -739,10 +739,6 @@ static const struct attribute_spec arm_attribute_table[] =
  #undef TARGET_VECTORIZE_BUILTINS
  #define TARGET_VECTORIZE_BUILTINS
  
-#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
-  arm_builtin_vectorized_function
-
  #undef TARGET_VECTOR_ALIGNMENT
  #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
  
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def

index 445b2bf..2e642cc 100644 (file)
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -264,7 +264,6 @@ VAR1 (UNOP, vcvtv4hf, v4sf)
  VAR10 (TERNOP, vbsl,
          v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
  VAR2 (TERNOP, vbsl, v8hf, v4hf)
-VAR2 (UNOP, copysignf, v2sf, v4sf)
  VAR2 (UNOP, vrintn, v2sf, v4sf)
  VAR2 (UNOP, vrinta, v2sf, v4sf)
  VAR2 (UNOP, vrintp, v2sf, v4sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md

index 37cf797..29062cd 100644 (file)
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1150,6 +1150,13 @@
                                (UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional")
                                (UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")])
  
+(define_int_attr nvrint_pattern [(UNSPEC_NVRINTZ "btrunc")
+                                (UNSPEC_NVRINTP "ceil")
+                                (UNSPEC_NVRINTA "round")
+                                (UNSPEC_NVRINTM "floor")
+                                (UNSPEC_NVRINTX "rint")
+                                (UNSPEC_NVRINTN "roundeven")])
+
  (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
                                  (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
                                  (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md

index 275bcc1..e1dae28 100644 (file)
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -635,6 +635,13 @@
   [(set_attr "type" "neon_fp_mla_s<q>")]
  )
  
+(define_expand "<NEON_VRINT:nvrint_pattern><VCVTF:mode>2"
+  [(set (match_operand:VCVTF 0 "s_register_operand")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand")]
+                     NEON_VRINT))]
+  "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
+)
+
  (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
    [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
          (unspec:VCVTF [(match_operand:VCVTF 1
@@ -645,6 +652,14 @@
    [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
  )
  
+(define_expand "l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand")
+       (FIXUORS:<V_cmp_result>
+         (unspec:VCVTF [(match_operand:VCVTF 1 "register_operand")]
+                       NEON_VCVT)))]
+  "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
+)
+
  (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
    [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
         (FIXUORS:<V_cmp_result> (unspec:VCVTF
@@ -3059,7 +3074,7 @@
    "TARGET_I8MM"
  )
  
-(define_expand "neon_copysignf<mode>"
+(define_expand "copysign<mode>3"
    [(match_operand:VCVTF 0 "register_operand")
     (match_operand:VCVTF 1 "register_operand")
     (match_operand:VCVTF 2 "register_operand")]
diff --git a/gcc/testsuite/gcc.target/arm/vect_binary_1.c b/gcc/testsuite/gcc.target/arm/vect_binary_1.c

new file mode 100644 (file)

index 0000000..c1fc905
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/vect_binary_1.c
@@ -0,0 +1,50 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_hard_ok } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-add-options arm_v8_neon }  */
+/* { dg-additional-options "-O3 -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#define TEST2(OUT, NAME, IN)                                           \
+OUT __attribute__((vector_size(sizeof(OUT) * 2)))                      \
+test2_##OUT##_##NAME##_##IN (float dummy,                              \
+                            IN __attribute__((vector_size(sizeof(IN) * 2))) y, \
+                            IN __attribute__((vector_size(sizeof(IN) * 2))) z) \
+{                                                                      \
+  OUT __attribute__((vector_size(sizeof(OUT) * 2))) x;                 \
+  x[0] = __builtin_##NAME (y[0], z[0]);                                        \
+  x[1] = __builtin_##NAME (y[1], z[1]);                                        \
+  return x;                                                            \
+}
+
+#define TEST4(OUT, NAME, IN)                                           \
+OUT __attribute__((vector_size(sizeof(OUT) * 4)))                      \
+test4_##OUT##_##NAME##_##IN (float dummy,                              \
+                            IN __attribute__((vector_size(sizeof(OUT) * 4))) y,        \
+                            IN __attribute__((vector_size(sizeof(OUT) * 4))) z)        \
+{                                                                      \
+  OUT __attribute__((vector_size(sizeof(OUT) * 4))) x;                 \
+  x[0] = __builtin_##NAME (y[0], z[0]);                                        \
+  x[1] = __builtin_##NAME (y[1], z[1]);                                        \
+  x[2] = __builtin_##NAME (y[2], z[2]);                                        \
+  x[3] = __builtin_##NAME (y[3], z[3]);                                        \
+  return x;                                                            \
+}
+
+/*
+** test2_float_copysignf_float: { target arm_little_endian }
+**     vmov.i32        d0, #(0x80000000|2147483648)(\s+.*)
+**     vbsl    d0, d2, d1
+**     bx      lr
+*/
+TEST2 (float, copysignf, float)
+
+/*
+** test4_float_copysignf_float: { target arm_little_endian }
+**     vmov.i32        q0, #(0x80000000|2147483648)(\s+.*)
+**     vbsl    q0, q2, q1
+**     bx      lr
+*/
+TEST4 (float, copysignf, float)
diff --git a/gcc/testsuite/gcc.target/arm/vect_unary_1.c b/gcc/testsuite/gcc.target/arm/vect_unary_1.c

new file mode 100644 (file)

index 0000000..4677180
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/vect_unary_1.c
@@ -0,0 +1,224 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_hard_ok } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-add-options arm_v8_neon }  */
+/* { dg-additional-options "-Ofast -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#define TEST2(OUT, NAME, IN)                                           \
+OUT __attribute__((vector_size(sizeof(OUT) * 2)))                      \
+test2_##OUT##_##NAME##_##IN (float dummy,                              \
+                            IN __attribute__((vector_size(sizeof(IN) * 2))) y) \
+{                                                                      \
+  OUT __attribute__((vector_size(sizeof(OUT) * 2))) x;                 \
+  x[0] = __builtin_##NAME (y[0]);                                      \
+  x[1] = __builtin_##NAME (y[1]);                                      \
+  return x;                                                            \
+}
+
+#define TEST4(OUT, NAME, IN)                                           \
+OUT __attribute__((vector_size(sizeof(OUT) * 4)))                      \
+test4_##OUT##_##NAME##_##IN (float dummy,                              \
+                            IN __attribute__((vector_size(sizeof(OUT) * 4))) y)        \
+{                                                                      \
+  OUT __attribute__((vector_size(sizeof(OUT) * 4))) x;                 \
+  x[0] = __builtin_##NAME (y[0]);                                      \
+  x[1] = __builtin_##NAME (y[1]);                                      \
+  x[2] = __builtin_##NAME (y[2]);                                      \
+  x[3] = __builtin_##NAME (y[3]);                                      \
+  return x;                                                            \
+}
+
+#define TEST8(OUT, NAME, IN)                                           \
+OUT __attribute__((vector_size(sizeof(OUT) * 8)))                      \
+test8_##OUT##_##NAME##_##IN (float dummy,                              \
+                            IN __attribute__((vector_size(sizeof(OUT) * 8))) y)        \
+{                                                                      \
+  OUT __attribute__((vector_size(sizeof(OUT) * 8))) x;                 \
+  x[0] = __builtin_##NAME (y[0]);                                      \
+  x[1] = __builtin_##NAME (y[1]);                                      \
+  x[2] = __builtin_##NAME (y[2]);                                      \
+  x[3] = __builtin_##NAME (y[3]);                                      \
+  x[4] = __builtin_##NAME (y[4]);                                      \
+  x[5] = __builtin_##NAME (y[5]);                                      \
+  x[6] = __builtin_##NAME (y[6]);                                      \
+  x[7] = __builtin_##NAME (y[7]);                                      \
+  return x;                                                            \
+}
+
+/*
+** test2_float_truncf_float:
+**     vrintz.f32      d0, d1
+**     bx      lr
+*/
+TEST2 (float, truncf, float)
+
+/*
+** test4_float_truncf_float:
+**     vrintz.f32      q0, q1
+**     bx      lr
+*/
+TEST4 (float, truncf, float)
+
+/*
+** test2_float_roundf_float:
+**     vrinta.f32      d0, d1
+**     bx      lr
+*/
+TEST2 (float, roundf, float)
+
+/*
+** test4_float_roundf_float:
+**     vrinta.f32      q0, q1
+**     bx      lr
+*/
+TEST4 (float, roundf, float)
+
+/*
+** test2_float_floorf_float:
+**     vrintm.f32      d0, d1
+**     bx      lr
+*/
+TEST2 (float, floorf, float)
+
+/*
+** test4_float_floorf_float:
+**     vrintm.f32      q0, q1
+**     bx      lr
+*/
+TEST4 (float, floorf, float)
+
+/*
+** test2_float_ceilf_float:
+**     vrintp.f32      d0, d1
+**     bx      lr
+*/
+TEST2 (float, ceilf, float)
+
+/*
+** test4_float_ceilf_float:
+**     vrintp.f32      q0, q1
+**     bx      lr
+*/
+TEST4 (float, ceilf, float)
+
+/*
+** test2_float_rintf_float:
+**     vrintx.f32      d0, d1
+**     bx      lr
+*/
+TEST2 (float, rintf, float)
+
+/*
+** test4_float_rintf_float:
+**     vrintx.f32      q0, q1
+**     bx      lr
+*/
+TEST4 (float, rintf, float)
+
+/*
+** test2_float_roundevenf_float:
+**     vrintn.f32      d0, d1
+**     bx      lr
+*/
+TEST2 (float, roundevenf, float)
+
+/*
+** test4_float_roundevenf_float:
+**     vrintn.f32      q0, q1
+**     bx      lr
+*/
+TEST4 (float, roundevenf, float)
+
+/*
+** test2_int_roundf_float:
+**     vcvta.s32.f32   d0, d1
+**     bx      lr
+*/
+TEST2 (int, roundf, float)
+
+/*
+** test4_int_roundf_float:
+**     vcvta.s32.f32   q0, q1
+**     bx      lr
+*/
+TEST4 (int, roundf, float)
+
+/*
+** test2_int_floorf_float:
+**     vcvtm.s32.f32   d0, d1
+**     bx      lr
+*/
+TEST2 (int, floorf, float)
+
+/*
+** test4_int_floorf_float:
+**     vcvtm.s32.f32   q0, q1
+**     bx      lr
+*/
+TEST4 (int, floorf, float)
+
+/*
+** test2_int_ceilf_float:
+**     vcvtp.s32.f32   d0, d1
+**     bx      lr
+*/
+TEST2 (int, ceilf, float)
+
+/*
+** test4_int_ceilf_float:
+**     vcvtp.s32.f32   q0, q1
+**     bx      lr
+*/
+TEST4 (int, ceilf, float)
+
+/*
+** test2_int_clz_int:
+**     vclz.i32        d0, d1
+**     bx      lr
+*/
+TEST2 (int, clz, int)
+
+/*
+** test4_int_clz_int:
+**     vclz.i32        q0, q1
+**     bx      lr
+*/
+TEST4 (int, clz, int)
+
+/*
+** test4_int16_t_bswap16_int16_t: { target arm_little_endian }
+**     vrev16.8        d0, d1
+**     bx      lr
+*/
+TEST4 (int16_t, bswap16, int16_t)
+
+/*
+** test8_int16_t_bswap16_int16_t: { target arm_little_endian }
+**     vrev16.8        q0, q1
+**     bx      lr
+*/
+TEST8 (int16_t, bswap16, int16_t)
+
+/*
+** test2_int_bswap32_int: { target arm_little_endian }
+**     vrev32.8        d0, d1
+**     bx      lr
+*/
+TEST2 (int, bswap32, int)
+
+/*
+** test4_int_bswap32_int: { target arm_little_endian }
+**     vrev32.8        q0, q1
+**     bx      lr
+*/
+TEST4 (int, bswap32, int)
+
+/*
+** test2_int64_t_bswap64_int64_t: { target arm_little_endian }
+**     vrev64.8        q0, q1
+**     bx      lr
+*/
+TEST2 (int64_t, bswap64, int64_t)
author	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 18 Jul 2022 11:57:10 +0000 (12:57 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 18 Jul 2022 11:57:10 +0000 (12:57 +0100)
gcc/config/arm/arm-builtins.cc		patch \| blob \| history
gcc/config/arm/arm-protos.h		patch \| blob \| history
gcc/config/arm/arm.cc		patch \| blob \| history
gcc/config/arm/arm_neon_builtins.def		patch \| blob \| history
gcc/config/arm/iterators.md		patch \| blob \| history
gcc/config/arm/neon.md		patch \| blob \| history
gcc/testsuite/gcc.target/arm/vect_binary_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/vect_unary_1.c	[new file with mode: 0644]	patch \| blob