[AArch64] Add vector pattern for __builtin_ctz
authorJiong Wang <jiong.wang@arm.com>
Fri, 21 Nov 2014 16:56:21 +0000 (16:56 +0000)
committerJiong Wang <jiwang@gcc.gnu.org>
Fri, 21 Nov 2014 16:56:21 +0000 (16:56 +0000)
  gcc/
    * config/aarch64/iterators.md (VS): New mode iterator.
    (vsi2qi): New mode attribute.
    (VSI2QI): Likewise.
    * config/aarch64/aarch64-simd-builtins.def: New entry for ctz.
    * config/aarch64/aarch64-simd.md (ctz<mode>2): New pattern for ctz.
    * config/aarch64/aarch64-builtins.c
    (aarch64_builtin_vectorized_function): Support BUILT_IN_CTZ.

  gcc/testsuite/
    * gcc.target/aarch64/vect_ctz_1.c: New testcase.

From-SVN: r217938

gcc/ChangeLog
gcc/config/aarch64/aarch64-builtins.c
gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c [new file with mode: 0644]

index 7afa95f..c4eb793 100644 (file)
@@ -1,3 +1,13 @@
+2014-11-21  Jiong Wang  <jiong.wang@arm.com>
+
+       * config/aarch64/iterators.md (VS): New mode iterator.
+       (vsi2qi): New mode attribute.
+       (VSI2QI): Likewise.
+       * config/aarch64/aarch64-simd-builtins.def: New entry for ctz.
+       * config/aarch64/aarch64-simd.md (ctz<mode>2): New pattern for ctz.
+       * config/aarch64/aarch64-builtins.c
+       (aarch64_builtin_vectorized_function): Support BUILT_IN_CTZ.
+
 2014-11-21  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR bootstrap/63784
index e9c4c85..a9d9704 100644 (file)
@@ -1199,6 +1199,14 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
               return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
             return NULL_TREE;
           }
+       case BUILT_IN_CTZ:
+          {
+           if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+             return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
+           else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+             return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
+           return NULL_TREE;
+          }
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == N##Imode && out_n == C \
index 545c7da..936b671 100644 (file)
@@ -45,6 +45,7 @@
   BUILTIN_VD_BHSI (BINOP, addp, 0)
   VAR1 (UNOP, addp, 0, di)
   BUILTIN_VDQ_BHSI (UNOP, clz, 2)
+  BUILTIN_VS (UNOP, ctz, 2)
 
   /* be_checked_get_lane does its own lane swapping, so not a lane index.  */
   BUILTIN_VALL (GETREG, be_checked_get_lane, 0)
index 926eb76..eed01cf 100644 (file)
   [(set_attr "type" "neon_rbit")]
 )
 
+(define_expand "ctz<mode>2"
+  [(set (match_operand:VS 0 "register_operand")
+        (ctz:VS (match_operand:VS 1 "register_operand")))]
+  "TARGET_SIMD"
+  {
+     emit_insn (gen_bswap<mode> (operands[0], operands[1]));
+     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
+                                            <MODE>mode, 0);
+     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
+     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+     DONE;
+  }
+)
+
 (define_insn "*aarch64_mul3_elt<mode>"
  [(set (match_operand:VMUL 0 "register_operand" "=w")
     (mult:VMUL
index 7dd3917..8b9ff98 100644 (file)
 ;; All byte modes.
 (define_mode_iterator VB [V8QI V16QI])
 
+;; 2 and 4 lane SI modes.
+(define_mode_iterator VS [V2SI V4SI])
+
 (define_mode_iterator TX [TI TF])
 
 ;; Opaque structure modes.
                      (V2DI  "p") (V2DF  "p")
                      (V2SF "p") (V4SF  "v")])
 
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
index 17f7110..9d6643b 100644 (file)
@@ -1,3 +1,7 @@
+2014-11-21  Jiong Wang  <jiong.wang@arm.com>
+
+       * gcc.target/aarch64/vect_ctz_1.c: New testcase.
+
 2014-11-21  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * gcc.target/aarch64/simd/vsqrt_f64_1.c
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c b/gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c
new file mode 100644 (file)
index 0000000..40823b0
--- /dev/null
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fno-inline" } */
+
+extern void abort ();
+
+#define TEST(name, subname, count) \
+void \
+count_tz_##name (unsigned *__restrict a, int *__restrict b) \
+{ \
+  int i; \
+  for (i = 0; i < count; i++) \
+    b[i] = __builtin_##subname (a[i]); \
+}
+
+#define CHECK(name, count, input, output) \
+  count_tz_##name (input, output); \
+  for (i = 0; i < count; i++) \
+    { \
+      if (output[i] != r[i]) \
+       abort (); \
+    }
+
+TEST (v4si, ctz, 4)
+TEST (v2si, ctz, 2)
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.2s" } } */
+
+int
+main ()
+{
+  unsigned int x4[4] = { 0x0, 0xFF80, 0x1FFFF, 0xFF000000 };
+  int r[4] = { 32, 7, 0, 24 };
+  int d[4], i;
+
+  CHECK (v4si, 4, x4, d);
+  CHECK (v2si, 2, x4, d);
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */