2019-06-21 Jakub Jelinek <jakub@redhat.com>
+ * omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
+ create another "omp scan inscan exclusive" array if
+ !ctx->scan_inclusive.
+ (lower_rec_input_clauses): Handle exclusive scan inscan reductions.
+ (lower_omp_scan): Likewise.
+ * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
+ 2-bit bitfield for simd_lane_access_p member.
+ * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
+ aux == (void *)-4 as simd lane access.
+ * tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update
+ comment with permutations to show the canonical permutation order.
+ (vectorizable_scan_store): Handle exclusive scan.
+ (vectorizable_store): Call vectorizable_scan_store even for
+ STMT_VINFO_SIMD_LANE_ACCESS_P > 3.
+
* tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
"omp simd array" arrays with one byte elements.
static bool
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
omplow_simd_context *sctx, tree &ivar,
- tree &lvar, tree *rvar = NULL)
+ tree &lvar, tree *rvar = NULL,
+ tree *rvar2 = NULL)
{
if (known_eq (sctx->max_vf, 0U))
{
*rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
sctx->lastlane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar) = 1;
+
+ if (!ctx->scan_inclusive)
+ {
+ /* And for exclusive scan yet another one, which will
+ hold the value during the scan phase. */
+ tree savar = create_tmp_var_raw (atype);
+ if (TREE_ADDRESSABLE (new_var))
+ TREE_ADDRESSABLE (savar) = 1;
+ DECL_ATTRIBUTES (savar)
+ = tree_cons (get_identifier ("omp simd array"), NULL,
+ tree_cons (get_identifier ("omp simd inscan "
+ "exclusive"), NULL,
+ DECL_ATTRIBUTES (savar)));
+ gimple_add_tmp_var (savar);
+ ctx->cb.decl_map->put (iavar, savar);
+ *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
+ sctx->idx, NULL_TREE, NULL_TREE);
+ TREE_THIS_NOTRAP (*rvar2) = 1;
+ }
}
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
NULL_TREE, NULL_TREE);
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
- tree rvar = NULL_TREE, *rvarp = NULL;
+ tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
- ivar, lvar, rvarp))
+ ivar, lvar, rvarp,
+ &rvar2))
{
if (new_vard == new_var)
{
(c, ivar2, build_outer_var_ref (var, ctx));
gimplify_and_add (x, &llist[0]);
+ if (rvar2)
+ {
+ x = lang_hooks.decls.omp_clause_default_ctor
+ (c, unshare_expr (rvar2),
+ build_outer_var_ref (var, ctx));
+ gimplify_and_add (x, &llist[0]);
+ }
+
/* For types that need construction, add another
private var which will be default constructed
and optionally initialized with
iteration. */
tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
gimple_add_tmp_var (nv);
- ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
+ ctx->cb.decl_map->put (TREE_OPERAND (rvar2
+ ? rvar2
+ : ivar, 0),
nv);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx));
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
+
+ if (rvar2)
+ {
+ x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (&llist[1], tseq);
+ }
+ }
break;
}
if (x)
gimple_seq_add_seq (ilist, tseq);
}
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+ if (!ctx->scan_inclusive)
+ {
+ tree nv2
+ = create_tmp_var_raw (TREE_TYPE (new_var));
+ gimple_add_tmp_var (nv2);
+ ctx->cb.decl_map->put (nv, nv2);
+ x = lang_hooks.decls.omp_clause_default_ctor
+ (c, nv2, build_outer_var_ref (var, ctx));
+ gimplify_and_add (x, ilist);
+ x = lang_hooks.decls.omp_clause_dtor (c, nv2);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (dlist, tseq);
+ }
+ }
x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x)
{
gimple_seq_add_seq (dlist, tseq);
}
}
+ else if (!ctx->scan_inclusive
+ && TREE_ADDRESSABLE (TREE_TYPE (new_var)))
+ {
+ tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
+ gimple_add_tmp_var (nv2);
+ ctx->cb.decl_map->put (new_vard, nv2);
+ x = lang_hooks.decls.omp_clause_dtor (c, nv2);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (dlist, tseq);
+ }
+ }
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
goto do_dtor;
}
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
- tree rvar = NULL_TREE, *rvarp = NULL;
+ tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
- ivar, lvar, rvarp))
+ ivar, lvar, rvarp,
+ &rvar2))
{
if (new_vard != new_var)
{
gimple_seq before = NULL;
omp_context *octx = ctx->outer;
gcc_assert (octx);
+ if (!octx->scan_inclusive && !has_clauses)
+ {
+ gimple_stmt_iterator gsi2 = *gsi_p;
+ gsi_next (&gsi2);
+ gimple *stmt2 = gsi_stmt (gsi2);
+ /* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses
+ with following GIMPLE_OMP_SCAN with clauses, so that input_phase,
+ the one with exclusive clause(s), comes first. */
+ if (stmt2
+ && gimple_code (stmt2) == GIMPLE_OMP_SCAN
+ && gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL)
+ {
+ gsi_remove (gsi_p, false);
+ gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT);
+ ctx = maybe_lookup_ctx (stmt2);
+ gcc_assert (ctx);
+ lower_omp_scan (gsi_p, ctx);
+ return;
+ }
+ }
+
bool input_phase = has_clauses ^ octx->scan_inclusive;
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
- && !gimple_omp_for_combined_into_p (octx->stmt)
- && octx->scan_inclusive)
+ && !gimple_omp_for_combined_into_p (octx->stmt))
{
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
OMP_CLAUSE__SIMDUID_))
{
tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
lane = create_tmp_var (unsigned_type_node);
- tree t = build_int_cst (integer_type_node, 1 + !input_phase);
+ tree t = build_int_cst (integer_type_node,
+ input_phase ? 1
+ : octx->scan_inclusive ? 2 : 3);
gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
gimple_call_set_lhs (g, lane);
tree val = new_var;
tree var2 = NULL_TREE;
tree var3 = NULL_TREE;
+ tree var4 = NULL_TREE;
+ tree lane0 = NULL_TREE;
tree new_vard = new_var;
if (omp_is_reference (var))
{
DECL_ATTRIBUTES (v)))
{
val = unshare_expr (val);
+ lane0 = TREE_OPERAND (val, 1);
TREE_OPERAND (val, 1) = lane;
var2 = lookup_decl (v, octx);
+ if (!octx->scan_inclusive)
+ var4 = lookup_decl (var2, octx);
if (input_phase
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
- var3 = maybe_lookup_decl (var2, octx);
+ var3 = maybe_lookup_decl (var4 ? var4 : var2, octx);
if (!input_phase)
{
var2 = build4 (ARRAY_REF, TREE_TYPE (val),
var2, lane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (var2) = 1;
+ if (!octx->scan_inclusive)
+ {
+ var4 = build4 (ARRAY_REF, TREE_TYPE (val),
+ var4, lane, NULL_TREE,
+ NULL_TREE);
+ TREE_THIS_NOTRAP (var4) = 1;
+ }
}
else
var2 = val;
else
{
var2 = build_outer_var_ref (var, octx);
- if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
var3 = maybe_lookup_decl (new_vard, octx);
- if (var3 == new_vard)
+ if (var3 == new_vard || var3 == NULL_TREE)
var3 = NULL_TREE;
+ else if (!octx->scan_inclusive && !input_phase)
+ {
+ var4 = maybe_lookup_decl (var3, octx);
+ if (var4 == var3 || var4 == NULL_TREE)
+ {
+ if (TREE_ADDRESSABLE (TREE_TYPE (new_var)))
+ {
+ var4 = var3;
+ var3 = NULL_TREE;
+ }
+ else
+ var4 = NULL_TREE;
+ }
+ }
}
+ if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
+ var4 = create_tmp_var (TREE_TYPE (val));
}
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
}
else
{
+ tree x;
+ if (!octx->scan_inclusive)
+ {
+ tree v4 = unshare_expr (var4);
+ tree v2 = unshare_expr (var2);
+ x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2);
+ gimplify_and_add (x, &before);
+ }
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
- tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
- ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
+ x = (DECL_HAS_VALUE_EXPR_P (new_vard)
+ ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
tree vexpr = val;
if (x && omp_is_reference (var))
vexpr = build_fold_addr_expr_loc (clause_loc, val);
SET_DECL_VALUE_EXPR (new_vard, x);
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
- x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
- gimplify_and_add (x, &before);
+ if (octx->scan_inclusive)
+ {
+ x = lang_hooks.decls.omp_clause_assign_op (c, val,
+ var2);
+ gimplify_and_add (x, &before);
+ }
+ else if (lane0 == NULL_TREE)
+ {
+ x = lang_hooks.decls.omp_clause_assign_op (c, val,
+ var4);
+ gimplify_and_add (x, &before);
+ }
}
}
else
tree x = build2 (code, TREE_TYPE (var2),
unshare_expr (var2), unshare_expr (val));
- gimplify_assign (unshare_expr (var2), x, &before);
- gimplify_assign (val, var2, &before);
+ if (octx->scan_inclusive)
+ {
+ gimplify_assign (unshare_expr (var2), x, &before);
+ gimplify_assign (val, var2, &before);
+ }
+ else
+ {
+ gimplify_assign (unshare_expr (var4),
+ unshare_expr (var2), &before);
+ gimplify_assign (var2, x, &before);
+ if (lane0 == NULL_TREE)
+ gimplify_assign (val, var4, &before);
+ }
}
}
+ if (!octx->scan_inclusive && !input_phase && lane0)
+ {
+ tree vexpr = unshare_expr (var4);
+ TREE_OPERAND (vexpr, 1) = lane0;
+ if (omp_is_reference (var))
+ vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
+ SET_DECL_VALUE_EXPR (new_vard, vexpr);
+ }
}
}
else if (has_clauses)
2019-06-21 Jakub Jelinek <jakub@redhat.com>
+ * gcc.dg/vect/vect-simd-12.c: New test.
+ * gcc.dg/vect/vect-simd-13.c: New test.
+ * gcc.dg/vect/vect-simd-14.c: New test.
+ * gcc.dg/vect/vect-simd-15.c: New test.
+ * gcc.target/i386/sse2-vect-simd-12.c: New test.
+ * gcc.target/i386/sse2-vect-simd-13.c: New test.
+ * gcc.target/i386/sse2-vect-simd-14.c: New test.
+ * gcc.target/i386/sse2-vect-simd-15.c: New test.
+ * gcc.target/i386/avx2-vect-simd-12.c: New test.
+ * gcc.target/i386/avx2-vect-simd-13.c: New test.
+ * gcc.target/i386/avx2-vect-simd-14.c: New test.
+ * gcc.target/i386/avx2-vect-simd-15.c: New test.
+ * gcc.target/i386/avx512f-vect-simd-12.c: New test.
+ * gcc.target/i386/avx512f-vect-simd-13.c: New test.
+ * gcc.target/i386/avx512f-vect-simd-14.c: New test.
+ * gcc.target/i386/avx512bw-vect-simd-15.c: New test.
+ * g++.dg/vect/simd-6.cc: New test.
+ * g++.dg/vect/simd-7.cc: New test.
+ * g++.dg/vect/simd-8.cc: New test.
+ * g++.dg/vect/simd-9.cc: New test.
+ * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.
+
PR c++/90950
* g++.dg/gomp/lastprivate-1.C: New test.
for (i = 0; i < 64; i++)
{
d[i] = a;
- #pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
+ #pragma omp scan exclusive (a)
a += c[i];
}
}
--- /dev/null
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+template <typename T>
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ T s;
+};
+
+template <typename T>
+S<T>::S () : s (0)
+{
+}
+
+template <typename T>
+S<T>::~S ()
+{
+}
+
+template <typename T>
+S<T>::S (const S &x)
+{
+ s = x.s;
+}
+
+template <typename T>
+S<T> &
+S<T>::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+template <typename T>
+static inline void
+ini (S<T> &x)
+{
+ x.s = 0;
+}
+
+S<int> r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+template <typename T>
+__attribute__((noipa)) void
+foo (S<T> *a, S<T> *b)
+{
+ #pragma omp simd reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+template <typename T>
+__attribute__((noipa)) S<T>
+bar (void)
+{
+ S<T> s;
+ #pragma omp simd reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return S<T> (s);
+}
+
+__attribute__((noipa)) void
+baz (S<int> *a, S<int> *b)
+{
+ #pragma omp simd reduction (inscan, +:r) simdlen(1)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+__attribute__((noipa)) S<int>
+qux (void)
+{
+ S<int> s;
+ #pragma omp simd if (0) reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return S<int> (s);
+}
+
+int
+main ()
+{
+ S<int> s;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (bar<int> ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+ r.s = 0;
+ baz (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int r, a[1024], b[1024], q;
+
+template <typename T, typename U>
+__attribute__((noipa)) void
+foo (T a, T b, U r)
+{
+ #pragma omp simd reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+template <typename T>
+__attribute__((noipa)) T
+bar (void)
+{
+ T &s = q;
+ q = 0;
+ #pragma omp simd reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+template <typename T>
+__attribute__((noipa)) void
+baz (T *a, T *b, T &r)
+{
+ #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+ for (T i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+template <typename T>
+__attribute__((noipa)) int
+qux (void)
+{
+ T s = q;
+ q = 0;
+ #pragma omp simd reduction (inscan, +:s) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo<int *, int &> (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar<int> () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz<int> (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux<int &> () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int r, a[1024], b[1024], q;
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+ #pragma omp simd reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp simd reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+ #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+ s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+static inline void
+ini (S &x)
+{
+ x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b, S &r)
+{
+ #pragma omp simd reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+ S s;
+ #pragma omp simd reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b, S &r)
+{
+ #pragma omp simd reduction (inscan, +:r) simdlen(1)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+ S s;
+ #pragma omp simd if (0) reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ S s;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b, r);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (bar ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+ r.s = 0;
+ baz (a, b, r);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, +:s) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+#ifndef main
+ check_vect ();
+#endif
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+#ifndef main
+ check_vect ();
+#endif
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+float r = 1.0f, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (float *a, float *b)
+{
+ #pragma omp simd reduction (inscan, *:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r *= a[i];
+ }
+}
+
+__attribute__((noipa)) float
+bar (void)
+{
+ float s = -__builtin_inff ();
+ #pragma omp simd reduction (inscan, max:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s = s > a[i] ? s : a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ float s = 1.0f;
+#ifndef main
+ check_vect ();
+#endif
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (i < 80)
+ a[i] = (i & 1) ? 0.25f : 0.5f;
+ else if (i < 200)
+ a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+ else if (i < 280)
+ a[i] = (i & 1) ? 0.25f : 0.5f;
+ else if (i < 380)
+ a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+ else
+ switch (i % 6)
+ {
+ case 0: a[i] = 0.25f; break;
+ case 1: a[i] = 2.0f; break;
+ case 2: a[i] = -1.0f; break;
+ case 3: a[i] = -4.0f; break;
+ case 4: a[i] = 0.5f; break;
+ case 5: a[i] = 1.0f; break;
+ default: a[i] = 0.0f; break;
+ }
+ b[i] = -19.0f;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r * 16384.0f != 0.125f)
+ abort ();
+ float m = -175.25f;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -231.75f;
+ s *= a[i];
+ a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
+ m += 0.75f;
+ }
+ if (bar () != 592.0f)
+ abort ();
+ s = -__builtin_inff ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ if (s < a[i])
+ s = a[i];
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+unsigned short r2, b2[1024];
+unsigned char r3, b3[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+ #pragma omp simd reduction (inscan, +:r, r2, r3)
+ for (int i = 0; i < 1024; i++)
+ {
+ {
+ b[i] = r;
+ b2[i] = r2;
+ b3[i] = r3;
+ }
+ #pragma omp scan exclusive(r, r2, r3)
+ { r += a[i]; r2 += a[i]; r3 += a[i]; }
+ }
+}
+
+__attribute__((noipa)) int
+bar (unsigned short *s2p, unsigned char *s3p)
+{
+ int s = 0;
+ unsigned short s2 = 0;
+ unsigned char s3 = 0;
+ #pragma omp simd reduction (inscan, +:s, s2, s3)
+ for (int i = 0; i < 1024; i++)
+ {
+ { b[i] = s; b2[i] = s2; b3[i] = s3; }
+ #pragma omp scan exclusive(s, s2, s3)
+ {
+ s += 2 * a[i];
+ s2 += 2 * a[i];
+ s3 += 2 * a[i];
+ }
+ }
+ *s2p = s2;
+ *s3p = s3;
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+ #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
+ for (int i = 0; i < 1024; i++)
+ {
+ {
+ b[i] = r;
+ b2[i] = r2;
+ b3[i] = r3;
+ }
+ #pragma omp scan exclusive(r, r2, r3)
+ {
+ r += a[i];
+ r2 += a[i];
+ r3 += a[i];
+ }
+ }
+}
+
+__attribute__((noipa)) int
+qux (unsigned short *s2p, unsigned char *s3p)
+{
+ int s = 0;
+ unsigned short s2 = 0;
+ unsigned char s3 = 0;
+ #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ { b[i] = s; b2[i] = s2; b3[i] = s3; }
+ #pragma omp scan exclusive(s, s2, s3)
+ { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
+ }
+ *s2p = s2;
+ *s3p = s3;
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ unsigned short s2;
+ unsigned char s3;
+#ifndef main
+ check_vect ();
+#endif
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ b2[i] = -1;
+ b3[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b, b2, b3);
+ if (r != 1024 * 1023 / 2
+ || r2 != (unsigned short) r
+ || r3 != (unsigned char) r)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+ }
+ s += i;
+ }
+ if (bar (&s2, &s3) != 1024 * 1023)
+ abort ();
+ if (s2 != (unsigned short) (1024 * 1023)
+ || s3 != (unsigned char) (1024 * 1023))
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = -1;
+ b2[i] = -1;
+ b3[i] = -1;
+ }
+ s += 2 * i;
+ }
+ r = 0;
+ r2 = 0;
+ r3 = 0;
+ baz (a, b, b2, b3);
+ if (r != 1024 * 1023 / 2
+ || r2 != (unsigned short) r
+ || r3 != (unsigned char) r)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+ }
+ s += i;
+ }
+ s2 = 0;
+ s3 = 0;
+ if (qux (&s2, &s3) != 1024 * 1023)
+ abort ();
+ if (s2 != (unsigned short) (1024 * 1023)
+ || s3 != (unsigned char) (1024 * 1023))
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-12.c"
+
+static void
+avx2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-13.c"
+
+static void
+avx2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-14.c"
+
+static void
+avx2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-15.c"
+
+static void
+avx2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512bw-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-15.c"
+
+static void
+avx512bw_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512f-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-12.c"
+
+static void
+avx512f_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512f-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-13.c"
+
+static void
+avx512f_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "avx512f-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-14.c"
+
+static void
+avx512f_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-12.c"
+
+static void
+sse2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-13.c"
+
+static void
+sse2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-14.c"
+
+static void
+sse2_test (void)
+{
+ do_main ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+
+#include "sse2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/vect/vect-simd-15.c"
+
+static void
+sse2_test (void)
+{
+ do_main ();
+}
/* See if this was detected as SIMD lane access. */
if (dr->aux == (void *)-1
|| dr->aux == (void *)-2
- || dr->aux == (void *)-3)
+ || dr->aux == (void *)-3
+ || dr->aux == (void *)-4)
{
if (nested_in_vect_loop_p (loop, stmt_info))
return opt_result::failure_at (stmt_info->stmt,
kinds are there in order to allow optimizing the initializer store
and combiner sequence, e.g. if it is originally some C++ish user
defined reduction, but allow the vectorizer to pattern recognize it
- and turn into the appropriate vectorized scan. */
+ and turn into the appropriate vectorized scan.
+
+ For exclusive scan, this is slightly different:
+ #pragma omp simd reduction(inscan,+:r)
+ for (...)
+ {
+ use (r);
+ #pragma omp scan exclusive (r)
+ r += something ();
+ }
+ shall have body with:
+ // Initialization for input phase, store the reduction initializer:
+ _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
+ _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
+ D.2042[_21] = 0;
+ // Actual input phase:
+ ...
+ r.0_5 = D.2042[_20];
+ _6 = _4 + r.0_5;
+ D.2042[_20] = _6;
+ // Initialization for scan phase:
+ _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
+ _26 = D.2043[_25];
+ D.2044[_25] = _26;
+ _27 = D.2042[_25];
+ _28 = _26 + _27;
+ D.2043[_25] = _28;
+ // Actual scan phase:
+ ...
+ r.1_8 = D.2044[_20];
+ ... */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
{
if (TREE_CODE (rhs) != SSA_NAME)
goto fail;
- use_operand_p use_p;
- imm_use_iterator iter;
gimple *other_store_stmt = NULL;
- FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+ tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+ bool inscan_var_store
+ = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
+
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
- gimple *use_stmt = USE_STMT (use_p);
- if (use_stmt == stmt || is_gimple_debug (use_stmt))
- continue;
- if (gimple_bb (use_stmt) != gimple_bb (stmt)
- || !gimple_store_p (use_stmt)
- || other_store_stmt)
- goto fail;
- other_store_stmt = use_stmt;
+ if (!inscan_var_store)
+ {
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (use_stmt == stmt || is_gimple_debug (use_stmt))
+ continue;
+ if (gimple_bb (use_stmt) != gimple_bb (stmt)
+ || !is_gimple_assign (use_stmt)
+ || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
+ || other_store_stmt
+ || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
+ goto fail;
+ other_store_stmt = use_stmt;
+ }
+ if (other_store_stmt == NULL)
+ goto fail;
+ rhs = gimple_assign_lhs (other_store_stmt);
+ if (!single_imm_use (rhs, &use_p, &other_store_stmt))
+ goto fail;
+ }
}
- if (other_store_stmt == NULL)
- goto fail;
- stmt_vec_info other_store_stmt_info
- = loop_vinfo->lookup_stmt (other_store_stmt);
- if (other_store_stmt_info == NULL
- || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
+ else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
+ {
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (use_stmt == stmt || is_gimple_debug (use_stmt))
+ continue;
+ if (other_store_stmt)
+ goto fail;
+ other_store_stmt = use_stmt;
+ }
+ }
+ else
goto fail;
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
tree rhs1 = gimple_assign_rhs1 (def_stmt);
tree rhs2 = gimple_assign_rhs2 (def_stmt);
- if (TREE_CODE (rhs1) != SSA_NAME
- || TREE_CODE (rhs2) != SSA_NAME)
+ if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
goto fail;
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
if (load1_stmt_info == NULL
|| load2_stmt_info == NULL
- || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
- || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
+ || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
+ != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
+ || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
+ != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
goto fail;
- if (scan_operand_equal_p (gimple_assign_lhs (stmt),
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
+ {
+ dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
+ if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
+ || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
+ goto fail;
+ tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
+ tree lrhs;
+ if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+ lrhs = rhs1;
+ else
+ lrhs = rhs2;
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
+ continue;
+ if (other_store_stmt)
+ goto fail;
+ other_store_stmt = use_stmt;
+ }
+ }
+
+ if (other_store_stmt == NULL)
+ goto fail;
+ if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
+ || !gimple_store_p (other_store_stmt))
+ goto fail;
+
+ stmt_vec_info other_store_stmt_info
+ = loop_vinfo->lookup_stmt (other_store_stmt);
+ if (other_store_stmt_info == NULL
+ || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
+ != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
+ goto fail;
+
+ gimple *stmt1 = stmt;
+ gimple *stmt2 = other_store_stmt;
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
+ std::swap (stmt1, stmt2);
+ if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
gimple_assign_rhs1 (load2_stmt)))
{
std::swap (rhs1, rhs2);
std::swap (load1_stmt, load2_stmt);
std::swap (load1_stmt_info, load2_stmt_info);
}
- if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
- gimple_assign_rhs1 (load1_stmt))
- || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
+ if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
+ gimple_assign_rhs1 (load1_stmt)))
+ goto fail;
+
+ tree var3 = NULL_TREE;
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
+ && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
gimple_assign_rhs1 (load2_stmt)))
goto fail;
+ else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
+ {
+ dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
+ if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
+ || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
+ goto fail;
+ var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
+ if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
+ || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
+ || lookup_attribute ("omp simd inscan exclusive",
+ DECL_ATTRIBUTES (var3)))
+ goto fail;
+ }
dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
std::swap (var1, var2);
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
+ {
+ if (!lookup_attribute ("omp simd inscan exclusive",
+ DECL_ATTRIBUTES (var1)))
+ goto fail;
+ var1 = var3;
+ }
+
if (loop_vinfo->scan_map == NULL)
goto fail;
tree *init = loop_vinfo->scan_map->get (var1);
goto fail;
/* The IL is as expected, now check if we can actually vectorize it.
+ Inclusive scan:
_26 = D.2043[_25];
_27 = D.2042[_25];
_28 = _26 + _27;
from the D.2042[_21] = 0; store):
_30 = MEM <vector(8) int> [(int *)&D.2043];
_31 = MEM <vector(8) int> [(int *)&D.2042];
- _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
+ _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_33 = _31 + _32;
// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
- _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
+ _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
_35 = _33 + _34;
// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[1]+.._31[4], ... _31[4]+.._31[7] };
- _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
+ _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
_37 = _35 + _36;
// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[0]+.._31[4], ... _31[0]+.._31[7] };
_38 = _30 + _37;
_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
MEM <vector(8) int> [(int *)&D.2043] = _39;
- MEM <vector(8) int> [(int *)&D.2042] = _38; */
+ MEM <vector(8) int> [(int *)&D.2042] = _38;
+ Exclusive scan:
+ _26 = D.2043[_25];
+ D.2044[_25] = _26;
+ _27 = D.2042[_25];
+ _28 = _26 + _27;
+ D.2043[_25] = _28;
+ should be vectorized as (where _40 is the vectorized rhs
+ from the D.2042[_21] = 0; store):
+ _30 = MEM <vector(8) int> [(int *)&D.2043];
+ _31 = MEM <vector(8) int> [(int *)&D.2042];
+ _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
+ _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
+ _34 = _32 + _33;
+ // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
+ // _31[3]+_31[4], ... _31[5]+.._31[6] };
+ _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
+ _36 = _34 + _35;
+ // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+ // _31[1]+.._31[4], ... _31[3]+.._31[6] };
+ _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
+ _38 = _36 + _37;
+ // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+ // _31[0]+.._31[4], ... _31[0]+.._31[6] };
+ _39 = _30 + _38;
+ _50 = _31 + _39;
+ _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
+ MEM <vector(8) int> [(int *)&D.2044] = _39;
+ MEM <vector(8) int> [(int *)&D.2042] = _51; */
enum machine_mode vec_mode = TYPE_MODE (vectype);
optab optab = optab_for_tree_code (code, vectype, optab_default);
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
tree rhs = gimple_assign_rhs1 (stmt);
gcc_assert (TREE_CODE (rhs) == SSA_NAME);
+ tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+ bool inscan_var_store
+ = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
+
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
+ {
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (use_stmt == stmt || is_gimple_debug (use_stmt))
+ continue;
+ rhs = gimple_assign_lhs (use_stmt);
+ break;
+ }
+ }
+
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
enum tree_code code = gimple_assign_rhs_code (def_stmt);
if (code == POINTER_PLUS_EXPR)
{
std::swap (rhs1, rhs2);
std::swap (var1, var2);
+ std::swap (load1_dr_info, load2_dr_info);
}
tree *init = loop_vinfo->scan_map->get (var1);
gcc_assert (init);
- tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
- bool inscan_var_store
- = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
-
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable ();
tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE;
tree vec_oprnd3 = NULL_TREE;
- tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
+ tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
tree dataref_offset = build_int_cst (ref_type, 0);
tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
+ tree ldataref_ptr = NULL_TREE;
tree orig = NULL_TREE;
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
+ ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
for (int j = 0; j < ncopies; j++)
{
stmt_vec_info new_stmt_info;
if (j == 0)
{
vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
- vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
+ if (ldataref_ptr == NULL)
+ vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
orig = vec_oprnd3;
}
else
{
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
- vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
+ if (ldataref_ptr == NULL)
+ vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
if (!inscan_var_store)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
}
+ if (ldataref_ptr)
+ {
+ vec_oprnd2 = make_ssa_name (vectype);
+ tree data_ref = fold_build2 (MEM_REF, vectype,
+ unshare_expr (ldataref_ptr),
+ dataref_offset);
+ vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
+ gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ if (prev_stmt_info == NULL)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+ }
+
tree v = vec_oprnd2;
for (int i = 0; i < units_log2; ++i)
{
new_temp = new_temp2;
}
+ /* For exclusive scan, perform the perms[i] permutation once
+ more. */
+ if (i == 0
+ && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
+ && v == vec_oprnd2)
+ {
+ v = new_temp;
+ --i;
+ continue;
+ }
+
tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, code, v, new_temp);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
+ tree last_perm_arg = new_temp;
+ /* For exclusive scan, new_temp computed above is the exclusive scan
+ prefix sum. Turn it into inclusive prefix sum for the broadcast
+ of the last element into orig. */
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
+ {
+ last_perm_arg = make_ssa_name (vectype);
+ g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+ }
+
orig = make_ssa_name (vectype);
- g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
- perms[units_log2]);
+ g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
+ last_perm_arg, perms[units_log2]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
if (!inscan_var_store)
{
- tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+ tree data_ref = fold_build2 (MEM_REF, vectype,
+ unshare_expr (dataref_ptr),
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
g = gimple_build_assign (data_ref, new_temp);
if (j != 0)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
- tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+ tree data_ref = fold_build2 (MEM_REF, vectype,
+ unshare_expr (dataref_ptr),
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
gimple *g = gimple_build_assign (data_ref, orig);
}
return true;
}
- else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
+ else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
bool strided_p;
/* For both loads and stores. */
- unsigned simd_lane_access_p : 2;
+ unsigned simd_lane_access_p : 3;
/* Classifies how the load or store is going to be implemented
for loop vectorization. */