vect: inbranch SIMD clones
authorAndrew Stubbs <ams@codesourcery.com>
Thu, 28 Jul 2022 15:07:22 +0000 (16:07 +0100)
committerAndrew Stubbs <ams@codesourcery.com>
Wed, 22 Feb 2023 13:57:11 +0000 (13:57 +0000)
There has been support for generating "inbranch" SIMD clones for a long time,
but nothing actually uses them (as far as I can see).

This patch add supports for a sub-set of possible cases (those using
mask_mode == VOIDmode).  The other cases fail to vectorize, just as before,
so there should be no regressions.

The sub-set of support should cover all cases needed by amdgcn, at present.

gcc/ChangeLog:

* internal-fn.cc (expand_MASK_CALL): New.
* internal-fn.def (MASK_CALL): New.
* internal-fn.h (expand_MASK_CALL): New prototype.
* omp-simd-clone.cc (simd_clone_adjust_argument_types): Set vector_type
for mask arguments also.
* tree-if-conv.cc: Include cgraph.h.
(if_convertible_stmt_p): Do if conversions for calls to SIMD calls.
(predicate_statements): Convert functions to IFN_MASK_CALL.
* tree-vect-loop.cc (vect_get_datarefs_in_loop): Recognise
IFN_MASK_CALL as a SIMD function call.
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle
IFN_MASK_CALL as an inbranch SIMD function call.
Generate the mask vector arguments.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-simd-clone-16.c: New test.
* gcc.dg/vect/vect-simd-clone-16b.c: New test.
* gcc.dg/vect/vect-simd-clone-16c.c: New test.
* gcc.dg/vect/vect-simd-clone-16d.c: New test.
* gcc.dg/vect/vect-simd-clone-16e.c: New test.
* gcc.dg/vect/vect-simd-clone-16f.c: New test.
* gcc.dg/vect/vect-simd-clone-17.c: New test.
* gcc.dg/vect/vect-simd-clone-17b.c: New test.
* gcc.dg/vect/vect-simd-clone-17c.c: New test.
* gcc.dg/vect/vect-simd-clone-17d.c: New test.
* gcc.dg/vect/vect-simd-clone-17e.c: New test.
* gcc.dg/vect/vect-simd-clone-17f.c: New test.
* gcc.dg/vect/vect-simd-clone-18.c: New test.
* gcc.dg/vect/vect-simd-clone-18b.c: New test.
* gcc.dg/vect/vect-simd-clone-18c.c: New test.
* gcc.dg/vect/vect-simd-clone-18d.c: New test.
* gcc.dg/vect/vect-simd-clone-18e.c: New test.
* gcc.dg/vect/vect-simd-clone-18f.c: New test.

25 files changed:
gcc/internal-fn.cc
gcc/internal-fn.def
gcc/internal-fn.h
gcc/omp-simd-clone.cc
gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c [new file with mode: 0644]
gcc/tree-if-conv.cc
gcc/tree-vect-loop.cc
gcc/tree-vect-stmts.cc

index a7936ff37ef6f3e15b3c0439b93d85cc1234751d..6e81dc05e0e0714256759b0594816df451415a2d 100644 (file)
@@ -4521,3 +4521,10 @@ void
 expand_ASSUME (internal_fn, gcall *)
 {
 }
+
+void
+expand_MASK_CALL (internal_fn, gcall *)
+{
+  /* This IFN should only exist between ifcvt and vect passes.  */
+  gcc_unreachable ();
+}
index 22b4a2d92967076c658965afcaeaf39b449a8caf..7fe742c2ae713e7152ab05cfdfba86e4e0aa3456 100644 (file)
@@ -461,6 +461,9 @@ DEF_INTERNAL_FN (SPACESHIP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW
                         | ECF_LOOPING_CONST_OR_PURE, NULL)
 
+/* For if-conversion of inbranch SIMD clones.  */
+DEF_INTERNAL_FN (MASK_CALL, ECF_NOVOPS, NULL)
+
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_FLT_FLOATN_FN
index 2fd82a19d158854b64ec872c63b2dc11c82010b2..08922ed4254898f5fffca3f33973e96ed9ce772f 100644 (file)
@@ -244,6 +244,7 @@ extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
 extern void expand_SPACESHIP (internal_fn, gcall *);
 extern void expand_TRAP (internal_fn, gcall *);
 extern void expand_ASSUME (internal_fn, gcall *);
+extern void expand_MASK_CALL (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
 
index 0949b8ba288dfc7e7692403bfc600983faddf5dd..03ff86e7d188cd8eb30d5d971c5c9bbc7d64543f 100644 (file)
@@ -942,6 +942,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
        }
       sc->args[i].orig_type = base_type;
       sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
+      sc->args[i].vector_type = adj.type;
     }
 
   if (node->definition)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c
new file mode 100644 (file)
index 0000000..ce9a6da
--- /dev/null
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly.  */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned.  */
+#pragma omp declare simd
+TYPE __attribute__((noinline))
+foo (TYPE a)
+{
+  return a + 1;
+}
+
+/* Check that "inbranch" clones are called correctly.  */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+  #pragma omp simd
+  for (int i = 0; i < size; i++)
+    b[i] = a[i]<1 ? foo(a[i]) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling.  */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+  #pragma omp simd
+  for (int i = 0; i < 128; i++)
+    b[i] = a[i]<1 ? foo(a[i]) : a[i];
+}
+
+/* Validate the outputs.  */
+
+void
+check_masked (TYPE *b, int size)
+{
+  for (int i = 0; i < size; i++)
+    if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+       || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+      {
+       __builtin_printf ("error at %d\n", i);
+       __builtin_exit (1);
+      }
+}
+
+int
+main ()
+{
+  TYPE a[1024];
+  TYPE b[1024];
+
+  for (int i = 0; i < 1024; i++)
+    a[i] = i;
+
+  masked_fixed (a, b);
+  check_masked (b, 128);
+
+  /* Test various sizes to cover machines with different vectorization
+     factors.  */
+  for (int size = 8; size <= 1024; size *= 2)
+    {
+      masked (a, b, size);
+      check_masked (b, size);
+    }
+
+  /* Test sizes that might exercise the partial vector code-path.  */
+  for (int size = 8; size <= 1024; size *= 2)
+    {
+      masked (a, b, size-4);
+      check_masked (b, size-4);
+    }
+
+  return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c
new file mode 100644 (file)
index 0000000..af543b6
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
new file mode 100644 (file)
index 0000000..677548a
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
new file mode 100644 (file)
index 0000000..a9ae993
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
new file mode 100644 (file)
index 0000000..c8b482b
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.
+   Some targets use pairs of vectors and do twice the calls.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
new file mode 100644 (file)
index 0000000..f42ac08
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use pairs of vectors and do twice the calls.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c
new file mode 100644 (file)
index 0000000..756225e
--- /dev/null
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly.  */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned.  */
+#pragma omp declare simd uniform(b)
+TYPE __attribute__((noinline))
+foo (TYPE a, TYPE b)
+{
+  return a + b;
+}
+
+/* Check that "inbranch" clones are called correctly.  */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+  #pragma omp simd
+  for (int i = 0; i < size; i++)
+    b[i] = a[i]<1 ? foo(a[i], 1) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling.  */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+  #pragma omp simd
+  for (int i = 0; i < 128; i++)
+    b[i] = a[i]<1 ? foo(a[i], 1) : a[i];
+}
+
+/* Validate the outputs.  */
+
+void
+check_masked (TYPE *b, int size)
+{
+  for (int i = 0; i < size; i++)
+    if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+       || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+      {
+       __builtin_printf ("error at %d\n", i);
+       __builtin_exit (1);
+      }
+}
+
+int
+main ()
+{
+  TYPE a[1024];
+  TYPE b[1024];
+
+  for (int i = 0; i < 1024; i++)
+    a[i] = i;
+
+  masked_fixed (a, b);
+  check_masked (b, 128);
+
+  /* Test various sizes to cover machines with different vectorization
+     factors.  */
+  for (int size = 8; size <= 1024; size *= 2)
+    {
+      masked (a, b, size);
+      check_masked (b, size);
+    }
+
+  /* Test sizes that might exercise the partial vector code-path.  */
+  for (int size = 8; size <= 1024; size *= 2)
+    {
+      masked (a, b, size-4);
+      check_masked (b, size-4);
+    }
+
+  return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c
new file mode 100644 (file)
index 0000000..8731c26
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
new file mode 100644 (file)
index 0000000..6683d1a
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-17.c"
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
new file mode 100644 (file)
index 0000000..d38bde6
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
new file mode 100644 (file)
index 0000000..f2a428c
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.
+   Some targets use pairs of vectors and do twice the calls.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
new file mode 100644 (file)
index 0000000..cd05dec
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use pairs of vectors and do twice the calls.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c
new file mode 100644 (file)
index 0000000..febf9fd
--- /dev/null
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly.  */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned.  */
+#pragma omp declare simd uniform(b)
+TYPE __attribute__((noinline))
+foo (TYPE b, TYPE a)
+{
+  return a + b;
+}
+
+/* Check that "inbranch" clones are called correctly.  */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+  #pragma omp simd
+  for (int i = 0; i < size; i++)
+    b[i] = a[i]<1 ? foo(1, a[i]) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling.  */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+  #pragma omp simd
+  for (int i = 0; i < 128; i++)
+    b[i] = a[i]<1 ? foo(1, a[i]) : a[i];
+}
+
+/* Validate the outputs.  */
+
+void
+check_masked (TYPE *b, int size)
+{
+  for (int i = 0; i < size; i++)
+    if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+       || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+      {
+       __builtin_printf ("error at %d\n", i);
+       __builtin_exit (1);
+      }
+}
+
+int
+main ()
+{
+  TYPE a[1024];
+  TYPE b[1024];
+
+  for (int i = 0; i < 1024; i++)
+    a[i] = i;
+
+  masked_fixed (a, b);
+  check_masked (b, 128);
+
+  /* Test various sizes to cover machines with different vectorization
+     factors.  */
+  for (int size = 8; size <= 1024; size *= 2)
+    {
+      masked (a, b, size);
+      check_masked (b, size);
+    }
+
+  /* Test sizes that might exercise the partial vector code-path.  */
+  for (int size = 8; size <= 1024; size *= 2)
+    {
+      masked (a, b, size-4);
+      check_masked (b, size-4);
+    }
+
+  return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c
new file mode 100644 (file)
index 0000000..120993e
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
new file mode 100644 (file)
index 0000000..0d1fc6d
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
new file mode 100644 (file)
index 0000000..1e6c028
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
new file mode 100644 (file)
index 0000000..9d20e52
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use another call for the epilogue loops.
+   Some targets use pairs of vectors and do twice the calls.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
new file mode 100644 (file)
index 0000000..09ee7ff
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+   Some targets use pairs of vectors and do twice the calls.  */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect" { target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one.  */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
index b19eebd2dfb2c71173e1a237603fff0457431404..a7a8406374da420fc0425cc9fb5d633f5bce6262 100644 (file)
@@ -123,6 +123,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-dse.h"
 #include "tree-vectorizer.h"
 #include "tree-eh.h"
+#include "cgraph.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -1063,7 +1064,8 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt,
    A statement is if-convertible if:
    - it is an if-convertible GIMPLE_ASSIGN,
    - it is a GIMPLE_LABEL or a GIMPLE_COND,
-   - it is builtins call.  */
+   - it is builtins call,
+   - it is a call to a function with a SIMD clone.  */
 
 static bool
 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
@@ -1083,13 +1085,23 @@ if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
        tree fndecl = gimple_call_fndecl (stmt);
        if (fndecl)
          {
+           /* We can vectorize some builtins and functions with SIMD
+              "inbranch" clones.  */
            int flags = gimple_call_flags (stmt);
+           struct cgraph_node *node = cgraph_node::get (fndecl);
            if ((flags & ECF_CONST)
                && !(flags & ECF_LOOPING_CONST_OR_PURE)
-               /* We can only vectorize some builtins at the moment,
-                  so restrict if-conversion to those.  */
                && fndecl_built_in_p (fndecl))
              return true;
+           if (node && node->simd_clones != NULL)
+             /* Ensure that at least one clone can be "inbranch".  */
+             for (struct cgraph_node *n = node->simd_clones; n != NULL;
+                  n = n->simdclone->next_clone)
+               if (n->simdclone->inbranch)
+                 {
+                   need_to_predicate = true;
+                   return true;
+                 }
          }
        return false;
       }
@@ -2613,6 +2625,29 @@ predicate_statements (loop_p loop)
              gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
              update_stmt (stmt);
            }
+
+         /* Convert functions that have a SIMD clone to IFN_MASK_CALL.  This
+            will cause the vectorizer to match the "in branch" clone variants,
+            and serves to build the mask vector in a natural way.  */
+         gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
+         if (call && !gimple_call_internal_p (call))
+           {
+             tree orig_fn = gimple_call_fn (call);
+             int orig_nargs = gimple_call_num_args (call);
+             auto_vec<tree> args;
+             args.safe_push (orig_fn);
+             for (int i = 0; i < orig_nargs; i++)
+               args.safe_push (gimple_call_arg (call, i));
+             args.safe_push (cond);
+
+             /* Replace the call with a IFN_MASK_CALL that has the extra
+                condition parameter. */
+             gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
+                                                               args);
+             gimple_call_set_lhs (new_call, gimple_call_lhs (call));
+             gsi_replace (&gsi, new_call, true);
+           }
+
          lhs = gimple_get_lhs (gsi_stmt (gsi));
          if (lhs && TREE_CODE (lhs) == SSA_NAME)
            ssa_names.add (lhs);
index 8387f7690b2162d3d1c1519c71483537d5438da9..ab7af0ea3b8b5029433913f6ba8d9c8511e723c1 100644 (file)
@@ -2125,6 +2125,14 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
            if (is_gimple_call (stmt) && loop->safelen)
              {
                tree fndecl = gimple_call_fndecl (stmt), op;
+               if (fndecl == NULL_TREE
+                   && gimple_call_internal_p (stmt, IFN_MASK_CALL))
+                 {
+                   fndecl = gimple_call_arg (stmt, 0);
+                   gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
+                   fndecl = TREE_OPERAND (fndecl, 0);
+                   gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
+                 }
                if (fndecl != NULL_TREE)
                  {
                    cgraph_node *node = cgraph_node::get (fndecl);
index df6239a1c61c7213ad3c1468723bc1adf70bc02c..9e5ffbe252e66e706a1dbe80705faa9c54ea61f9 100644 (file)
@@ -4004,6 +4004,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
   size_t i, nargs;
   tree lhs, rtype, ratype;
   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
+  int arg_offset = 0;
 
   /* Is STMT a vectorizable call?   */
   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
@@ -4011,6 +4012,15 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
     return false;
 
   fndecl = gimple_call_fndecl (stmt);
+  if (fndecl == NULL_TREE
+      && gimple_call_internal_p (stmt, IFN_MASK_CALL))
+    {
+      fndecl = gimple_call_arg (stmt, 0);
+      gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
+      fndecl = TREE_OPERAND (fndecl, 0);
+      gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
+      arg_offset = 1;
+    }
   if (fndecl == NULL_TREE)
     return false;
 
@@ -4041,7 +4051,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
     return false;
 
   /* Process function arguments.  */
-  nargs = gimple_call_num_args (stmt);
+  nargs = gimple_call_num_args (stmt) - arg_offset;
 
   /* Bail out if the function has zero arguments.  */
   if (nargs == 0)
@@ -4059,7 +4069,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
       thisarginfo.op = NULL_TREE;
       thisarginfo.simd_lane_linear = false;
 
-      op = gimple_call_arg (stmt, i);
+      op = gimple_call_arg (stmt, i + arg_offset);
       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
                               &thisarginfo.vectype)
          || thisarginfo.dt == vect_uninitialized_def)
@@ -4074,16 +4084,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
          || thisarginfo.dt == vect_external_def)
        gcc_assert (thisarginfo.vectype == NULL_TREE);
       else
-       {
-         gcc_assert (thisarginfo.vectype != NULL_TREE);
-         if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "vector mask arguments are not supported\n");
-             return false;
-           }
-       }
+       gcc_assert (thisarginfo.vectype != NULL_TREE);
 
       /* For linear arguments, the analyze phase should have saved
         the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
@@ -4176,9 +4177,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
        if (target_badness < 0)
          continue;
        this_badness += target_badness * 512;
-       /* FORNOW: Have to add code to add the mask argument.  */
-       if (n->simdclone->inbranch)
-         continue;
        for (i = 0; i < nargs; i++)
          {
            switch (n->simdclone->args[i].arg_type)
@@ -4186,7 +4184,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
              case SIMD_CLONE_ARG_TYPE_VECTOR:
                if (!useless_type_conversion_p
                        (n->simdclone->args[i].orig_type,
-                        TREE_TYPE (gimple_call_arg (stmt, i))))
+                        TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
                  i = -1;
                else if (arginfo[i].dt == vect_constant_def
                         || arginfo[i].dt == vect_external_def
@@ -4216,7 +4214,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
                i = -1;
                break;
              case SIMD_CLONE_ARG_TYPE_MASK:
-               gcc_unreachable ();
+               break;
              }
            if (i == (size_t) -1)
              break;
@@ -4242,18 +4240,55 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
     return false;
 
   for (i = 0; i < nargs; i++)
-    if ((arginfo[i].dt == vect_constant_def
-        || arginfo[i].dt == vect_external_def)
-       && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
-      {
-       tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
-       arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
-                                                         slp_node);
-       if (arginfo[i].vectype == NULL
-           || !constant_multiple_p (bestn->simdclone->simdlen,
-                                    simd_clone_subparts (arginfo[i].vectype)))
+    {
+      if ((arginfo[i].dt == vect_constant_def
+          || arginfo[i].dt == vect_external_def)
+         && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+       {
+         tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
+         arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
+                                                           slp_node);
+         if (arginfo[i].vectype == NULL
+             || !constant_multiple_p (bestn->simdclone->simdlen,
+                                      simd_clone_subparts (arginfo[i].vectype)))
+           return false;
+       }
+
+      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
+         && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "vector mask arguments are not supported.\n");
          return false;
-      }
+       }
+
+      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
+         && bestn->simdclone->mask_mode == VOIDmode
+         && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
+             != simd_clone_subparts (arginfo[i].vectype)))
+       {
+         /* FORNOW we only have partial support for vector-type masks that
+            can't hold all of simdlen. */
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                            vect_location,
+                            "in-branch vector clones are not yet"
+                            " supported for mismatched vector sizes.\n");
+         return false;
+       }
+      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
+         && bestn->simdclone->mask_mode != VOIDmode)
+       {
+         /* FORNOW don't support integer-type masks.  */
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                            vect_location,
+                            "in-branch vector clones are not yet"
+                            " supported for integer mask modes.\n");
+         return false;
+       }
+    }
 
   fndecl = bestn->decl;
   nunits = bestn->simdclone->simdlen;
@@ -4343,7 +4378,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
        {
          unsigned int k, l, m, o;
          tree atype;
-         op = gimple_call_arg (stmt, i);
+         op = gimple_call_arg (stmt, i + arg_offset);
          switch (bestn->simdclone->args[i].arg_type)
            {
            case SIMD_CLONE_ARG_TYPE_VECTOR:
@@ -4442,6 +4477,65 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
                    }
                }
              break;
+           case SIMD_CLONE_ARG_TYPE_MASK:
+             atype = bestn->simdclone->args[i].vector_type;
+             if (bestn->simdclone->mask_mode != VOIDmode)
+               {
+                 /* FORNOW: this is disabled above.  */
+                 gcc_unreachable ();
+               }
+             else
+               {
+                 tree elt_type = TREE_TYPE (atype);
+                 tree one = fold_convert (elt_type, integer_one_node);
+                 tree zero = fold_convert (elt_type, integer_zero_node);
+                 o = vector_unroll_factor (nunits,
+                                           simd_clone_subparts (atype));
+                 for (m = j * o; m < (j + 1) * o; m++)
+                   {
+                     if (simd_clone_subparts (atype)
+                         < simd_clone_subparts (arginfo[i].vectype))
+                       {
+                         /* The mask type has fewer elements than simdlen.  */
+
+                         /* FORNOW */
+                         gcc_unreachable ();
+                       }
+                     else if (simd_clone_subparts (atype)
+                              == simd_clone_subparts (arginfo[i].vectype))
+                       {
+                         /* The SIMD clone function has the same number of
+                            elements as the current function.  */
+                         if (m == 0)
+                           {
+                             vect_get_vec_defs_for_operand (vinfo, stmt_info,
+                                                            o * ncopies,
+                                                            op,
+                                                            &vec_oprnds[i]);
+                             vec_oprnds_i[i] = 0;
+                           }
+                         vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+                         vec_oprnd0
+                           = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
+                                     build_vector_from_val (atype, one),
+                                     build_vector_from_val (atype, zero));
+                         gassign *new_stmt
+                           = gimple_build_assign (make_ssa_name (atype),
+                                                  vec_oprnd0);
+                         vect_finish_stmt_generation (vinfo, stmt_info,
+                                                      new_stmt, gsi);
+                         vargs.safe_push (gimple_assign_lhs (new_stmt));
+                       }
+                     else
+                       {
+                         /* The mask type has more elements than simdlen.  */
+
+                         /* FORNOW */
+                         gcc_unreachable ();
+                       }
+                   }
+               }
+             break;
            case SIMD_CLONE_ARG_TYPE_UNIFORM:
              vargs.safe_push (op);
              break;