+2009-07-20 Ira Rosen <irar@il.ibm.com>
+
+ * tree-vectorizer.h (vectorizable_condition): Add parameters.
+ * tree-vect-loop.c (vect_is_simple_reduction): Support COND_EXPR.
+ (get_initial_def_for_reduction): Likewise.
+ (vectorizable_reduction): Skip the check of first operand in case
+ of COND_EXPR. Add check that it is outer loop vectorization if
+ nested cycle was detected. Call vectorizable_condition() for
+ COND_EXPR. If reduction epilogue cannot be created do not fail for
+ nested cycles (if it is not double reduction). Assert that there
+ is only one type in the loop in case of COND_EXPR. Call
+ vectorizable_condition() to vectorize COND_EXPR.
+ * tree-vect-stmts.c (vectorizable_condition): Update comment.
+ Add parameters. Allow nested cycles if called from
+ vectorizable_reduction(). Use reduction vector variable if provided.
+ (vect_analyze_stmt): Call vectorizable_reduction() before
+ vectorizable_condition().
+ (vect_transform_stmt): Update call to vectorizable_condition().
+
2009-07-20 Christian Bruel <christian.bruel@st.com>
* config/sh/sh.opt (-mfmovd): Resurrect and document.
+2009-07-20 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/vect-cond-1.c, gcc.dg/vect/vect-cond-2.c,
+ gcc.dg/vect/vect-cond-3.c, gcc.dg/vect/vect-cond-4.c,
+ gcc.dg/vect/vect-cond-5.c, gcc.dg/vect/vect-cond-6.c: New tests.
+
2009-07-20 Christian Bruel <christian.bruel@st.com>
* gcc.target/sh/mfmovd.c: New test.
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define M 32
+#define N 16
+
+int x_in[M];
+int x_out[M];
+int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
+int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
+int check_result[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int j, i, x;
+ int curr_a, next_a;
+
+ for (j = 0; j < M; j++)
+ {
+ x = x_in[j];
+ curr_a = a[0];
+
+ for (i = 0; i < N; i++)
+ {
+ next_a = a[i+1];
+ curr_a = x > c[i] ? curr_a : next_a;
+ }
+
+ x_out[j] = curr_a;
+ }
+}
+
+int main (void)
+{
+ int i,j;
+
+ check_vect ();
+
+ for (j = 0; j < M; j++)
+ x_in[j] = j;
+
+ foo ();
+
+ for (j = 0; j < M; j++)
+ if (x_out[j] != check_result[j])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
+int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
+
+__attribute__ ((noinline)) void
+foo (int *x)
+{
+ int i;
+ int curr_a, flag, next_a;
+
+ curr_a = a[0];
+
+ for (i = 0; i < N; i++)
+ {
+ flag = *x > c[i];
+ next_a = a[i+1];
+ curr_a = flag ? curr_a : next_a;
+ }
+
+ *x = curr_a;
+}
+
+int main (void)
+{
+ int x = 7;
+
+ check_vect ();
+
+ foo (&x);
+
+ if (x != 256)
+ abort ();
+
+ return 0;
+}
+
+/* The order of computation should not be changed for cond_expr, therefore,
+ it cannot be vectorized in reduction. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define M 32
+#define N 16
+
+int x_in[M];
+int x_out_a[M], x_out_b[M];
+int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
+int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
+int b[N+1] = {17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1};
+int check_result_a[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
+int check_result_b[M] = {17,17,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int j, i, x;
+ int curr_a, flag, next_a, curr_b, next_b;
+
+ for (j = 0; j < M; j++)
+ {
+ x = x_in[j];
+ curr_a = a[0];
+ curr_b = b[0];
+
+ for (i = 0; i < N; i++)
+ {
+ flag = x > c[i];
+ next_a = a[i+1];
+ next_b = b[i+1];
+ curr_a = flag ? curr_a : next_a;
+ curr_b = flag ? next_b : curr_b;
+ }
+
+ x_out_a[j] = curr_a;
+ x_out_b[j] = curr_b;
+ }
+}
+
+int main (void)
+{
+ int i,j;
+
+ check_vect ();
+
+ for (j = 0; j < M; j++)
+ x_in[j] = j;
+
+ foo ();
+
+ for (j = 0; j < M; j++)
+ if (x_out_a[j] != check_result_a[j]
+ || x_out_b[j] != check_result_b[j])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define M 32
+#define N 16
+
+int x_in[M];
+int x_out_a[M], x_out_b[M];
+int c[N] = {3,2,1,10,1,42,3,4,50,9,32,8,11,10,1,2};
+int a[N+1] = {0,16,32,48,64,128,256,512,0,16,32,48,64,128,256,512,1024};
+int b[N+1] = {17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1};
+int check_result_a[M] = {1024,1024,1024,256,256,256,256,256,256,256,256,128,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
+int check_result_b[M] = {17,17,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+
+__attribute__ ((noinline)) void
+foo (int z)
+{
+ int j, i, x;
+ int curr_a, flag, next_a, curr_b, next_b;
+
+ for (j = 0; j < M; j++)
+ {
+ x = x_in[j];
+ curr_a = a[0];
+ curr_b = b[0];
+
+ for (i = 0; i < N; i++)
+ {
+ curr_a = x > c[i] ? curr_a : z;
+ curr_b = x > c[i] ? next_b : 5;
+ }
+
+ x_out_a[j] = curr_a;
+ x_out_b[j] = curr_b;
+ }
+}
+
+int main (void)
+{
+ int i,j;
+
+ check_vect ();
+
+ for (j = 0; j < M; j++)
+ x_in[j] = j;
+
+ foo (125);
+
+ for (j = 0; j < M; j++)
+ if (x_out_a[j] != 125
+ || x_out_b[j] != 5)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int cond_array[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int a[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+__attribute__ ((noinline)) void
+foo (int c)
+{
+ int res, i, j, k, next;
+
+ for (k = 0; k < K; k++)
+ {
+ res = 0;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ {
+ next = a[i][j];
+ res = c > cond_array[i+k][j] ? next : res;
+ }
+
+ out[k] = res;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ cond_array[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ a[i][j] = i+2;
+ }
+
+ foo(5);
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* Double reduction with cond_expr is not supported, since eventhough the order
+ of computation is the same, but vector results should be reduced to scalar
+ result, which can'be done for cond_expr. */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int cond_array[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int a[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+
+__attribute__ ((noinline)) void
+foo (int c)
+{
+ int res, i, j, k, next;
+
+ for (k = 0; k < K; k++)
+ {
+ for (j = 0; j < K; j++)
+ {
+ res = 0;
+ for (i = 0; i < K; i++)
+ {
+ next = a[i][j];
+ res = c > cond_array[i+k][j] ? next : res;
+ }
+
+ out[j] = res;
+ }
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ cond_array[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ a[i][j] = i+2;
+ }
+
+ foo(125);
+
+ for (k = 0; k < K; k++)
+ if (out[k] != 33)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
edge latch_e = loop_latch_edge (loop);
tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
- gimple def_stmt, def1, def2;
+ gimple def_stmt, def1 = NULL, def2 = NULL;
enum tree_code code;
- tree op1, op2;
+ tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
tree type;
int nloop_uses;
tree name;
return NULL;
}
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
+ if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
{
- if (vect_print_dump_info (REPORT_DETAILS))
- report_vect_op (def_stmt, "reduction: not binary operation: ");
- return NULL;
- }
+ if (code != COND_EXPR)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ report_vect_op (def_stmt, "reduction: not binary operation: ");
- op1 = gimple_assign_rhs1 (def_stmt);
- op2 = gimple_assign_rhs2 (def_stmt);
- if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
- return NULL;
+ return NULL;
+ }
+
+ op3 = TREE_OPERAND (TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0), 0);
+ op4 = TREE_OPERAND (TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0), 1);
+ op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
+ op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
+
+ if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
+
+ return NULL;
+ }
}
+ else
+ {
+ op1 = gimple_assign_rhs1 (def_stmt);
+ op2 = gimple_assign_rhs2 (def_stmt);
+
+ if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
+
+ return NULL;
+ }
+ }
type = TREE_TYPE (gimple_assign_lhs (def_stmt));
- if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
- || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
+ if ((TREE_CODE (op1) == SSA_NAME
+ && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1)))
+ || (TREE_CODE (op2) == SSA_NAME
+ && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
+ || (op3 && TREE_CODE (op3) == SSA_NAME
+ && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op3)))
+ || (op4 && TREE_CODE (op4) == SSA_NAME
+ && TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op4))))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
fprintf (vect_dump, ",");
print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
+ if (op3 && op4)
+ {
+ fprintf (vect_dump, ",");
+ print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
+ fprintf (vect_dump, ",");
+ print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
+ }
}
+
return NULL;
}
1) integer arithmetic and no trapv
2) floating point arithmetic, and special flags permit this optimization
3) nested cycle (i.e., outer loop vectorization). */
- def1 = SSA_NAME_DEF_STMT (op1);
- def2 = SSA_NAME_DEF_STMT (op2);
- if (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2))
+ if (TREE_CODE (op1) == SSA_NAME)
+ def1 = SSA_NAME_DEF_STMT (op1);
+
+ if (TREE_CODE (op2) == SSA_NAME)
+ def2 = SSA_NAME_DEF_STMT (op2);
+
+ if (code != COND_EXPR
+ && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
{
if (vect_print_dump_info (REPORT_DETAILS))
report_vect_op (def_stmt, "reduction: no defs for operands: ");
the other def is either defined in the loop ("vect_internal_def"),
or it's an induction (defined by a loop-header phi-node). */
- if (def2 == phi
- && flow_bb_inside_loop_p (loop, gimple_bb (def1))
- && (is_gimple_assign (def1)
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
- || (gimple_code (def1) == GIMPLE_PHI
- && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
- == vect_internal_def
- && !is_loop_header_bb_p (gimple_bb (def1)))))
+ if (def2 && def2 == phi
+ && (code == COND_EXPR
+ || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
+ && (is_gimple_assign (def1)
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
+ == vect_induction_def
+ || (gimple_code (def1) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
+ == vect_internal_def
+ && !is_loop_header_bb_p (gimple_bb (def1)))))))
{
if (vect_print_dump_info (REPORT_DETAILS))
report_vect_op (def_stmt, "detected reduction: ");
return def_stmt;
}
- else if (def1 == phi
- && flow_bb_inside_loop_p (loop, gimple_bb (def2))
- && (is_gimple_assign (def2)
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
- == vect_induction_def
- || (gimple_code (def2) == GIMPLE_PHI
- && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
- == vect_internal_def
- && !is_loop_header_bb_p (gimple_bb (def2)))))
+ else if (def1 && def1 == phi
+ && (code == COND_EXPR
+ || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
+ && (is_gimple_assign (def2)
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+ == vect_induction_def
+ || (gimple_code (def2) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+ == vect_internal_def
+ && !is_loop_header_bb_p (gimple_bb (def2)))))))
{
if (check_reduction)
{
vector of partial results.
Option1 (adjust in epilog): Initialize the vector as follows:
- add/bit or/xor: [0,0,...,0,0]
- mult/bit and: [1,1,...,1,1]
- min/max: [init_val,init_val,..,init_val,init_val]
+ add/bit or/xor: [0,0,...,0,0]
+ mult/bit and: [1,1,...,1,1]
+ min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
and when necessary (e.g. add/mult case) let the caller know
that it needs to adjust the result by init_val.
Option2: Initialize the vector as follows:
- add/bit or/xor: [init_val,0,0,...,0]
- mult/bit and: [init_val,1,1,...,1]
- min/max: [init_val,init_val,...,init_val]
+ add/bit or/xor: [init_val,0,0,...,0]
+ mult/bit and: [init_val,1,1,...,1]
+ min/max/cond_expr: [init_val,init_val,...,init_val]
and no adjustments are needed.
For example, for the following code:
case MIN_EXPR:
case MAX_EXPR:
+ case COND_EXPR:
if (adjustment_def)
{
*adjustment_def = NULL_TREE;
stmt_vec_info prev_stmt_info, prev_phi_info;
gimple first_phi = NULL;
bool single_defuse_cycle = false;
- tree reduc_def;
+ tree reduc_def = NULL_TREE;
gimple new_stmt = NULL;
int j;
tree ops[3];
reduction variable. */
for (i = 0; i < op_type-1; i++)
{
+ /* The condition of COND_EXPR is checked in vectorizable_condition(). */
+ if (i == 0 && code == COND_EXPR)
+ continue;
+
is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
&def, &dt);
gcc_assert (is_simple_use);
&& dt != vect_external_def
&& dt != vect_constant_def
&& dt != vect_induction_def
- && dt != vect_nested_cycle)
+ && !(dt == vect_nested_cycle && nested_cycle))
return false;
if (dt == vect_nested_cycle)
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- /* 4. Supportable by target? */
+ vec_mode = TYPE_MODE (vectype);
- /* 4.1. check support for the operation in the loop */
- optab = optab_for_tree_code (code, vectype, optab_default);
- if (!optab)
+ if (code == COND_EXPR)
{
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "no optab.");
- return false;
+ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "unsupported condition in reduction");
+
+ return false;
+ }
}
- vec_mode = TYPE_MODE (vectype);
- if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
+ else
{
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "op not supported by target.");
- if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
- || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- < vect_min_worthwhile_factor (code))
- return false;
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "proceeding using word mode.");
- }
+ /* 4. Supportable by target? */
- /* Worthwhile without SIMD support? */
- if (!VECTOR_MODE_P (TYPE_MODE (vectype))
- && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- < vect_min_worthwhile_factor (code))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "not worthwhile without SIMD support.");
- return false;
+ /* 4.1. check support for the operation in the loop */
+ optab = optab_for_tree_code (code, vectype, optab_default);
+ if (!optab)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "no optab.");
+
+ return false;
+ }
+
+ if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "op not supported by target.");
+
+ if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
+ || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ < vect_min_worthwhile_factor (code))
+ return false;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "proceeding using word mode.");
+ }
+
+ /* Worthwhile without SIMD support? */
+ if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+ && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ < vect_min_worthwhile_factor (code))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "not worthwhile without SIMD support.");
+
+ return false;
+ }
}
/* 4.2. Check support for the epilog operation.
orig_code = code;
}
- if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
- return false;
-
- reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
- optab_default);
- if (!reduc_optab)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "no optab for reduction.");
- epilog_reduc_code = ERROR_MARK;
- }
-
- if (reduc_optab
- && optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "reduc op not supported by target.");
- epilog_reduc_code = ERROR_MARK;
- }
-
if (nested_cycle)
{
def_bb = gimple_bb (reduc_def_stmt);
double_reduc = true;
}
+ epilog_reduc_code = ERROR_MARK;
+ if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
+ {
+ reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
+ optab_default);
+ if (!reduc_optab)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "no optab for reduction.");
+
+ epilog_reduc_code = ERROR_MARK;
+ }
+
+ if (reduc_optab
+ && optab_handler (reduc_optab, vec_mode)->insn_code
+ == CODE_FOR_nothing)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "reduc op not supported by target.");
+
+ epilog_reduc_code = ERROR_MARK;
+ }
+ }
+ else
+ {
+ if (!nested_cycle || double_reduc)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "no reduc code for scalar code.");
+
+ return false;
+ }
+ }
+
if (double_reduc && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform reduction.");
+ /* FORNOW: Multiple types are not supported for condition. */
+ if (code == COND_EXPR)
+ gcc_assert (ncopies == 1);
+
/* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_phi = create_phi_node (vec_dest, loop->header);
set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo,
NULL));
+ /* Get the vector def for the reduction variable from the phi
+ node. */
+ reduc_def = PHI_RESULT (new_phi);
}
+ if (code == COND_EXPR)
+ {
+ first_phi = new_phi;
+ vectorizable_condition (stmt, gsi, vec_stmt, reduc_def, reduc_index);
+ /* Multiple types are not supported for condition. */
+ break;
+ }
+
/* Handle uses. */
if (j == 0)
{
/* Get the vector def for the reduction variable from the phi
node. */
- reduc_def = PHI_RESULT (new_phi);
first_phi = new_phi;
}
else
STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
}
-
- /* Arguments are ready. create the new vector stmt. */
+ /* Arguments are ready. Create the new vector stmt. */
if (op_type == binary_op)
{
if (reduc_index == 0)
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
+
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
prev_stmt_info = vinfo_for_stmt (new_stmt);
prev_phi_info = vinfo_for_stmt (new_phi);
}
/* Finalize the reduction-phi (set its arguments) and create the
epilog reduction code. */
- if (!single_defuse_cycle)
+ if (!single_defuse_cycle || code == COND_EXPR)
new_temp = gimple_assign_lhs (*vec_stmt);
vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
Check if STMT is conditional modify expression that can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
- at BSI.
+ at GSI.
+
+ When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
+ to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
+ else caluse if it is 2).
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
-static bool
+bool
vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
- gimple *vec_stmt)
+ gimple *vec_stmt, tree reduc_def, int reduc_index)
{
tree scalar_dest = NULL_TREE;
tree vec_dest = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
- if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+ && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+ && reduc_def))
return false;
/* FORNOW: SLP not supported. */
return false;
/* FORNOW: not yet supported. */
- if (STMT_VINFO_LIVE_P (stmt_info))
+ if (STMT_VINFO_LIVE_P (stmt_info))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "value used after loop.");
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
vec_cond_rhs =
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
- vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
- vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
+ if (reduc_index == 1)
+ vec_then_clause = reduc_def;
+ else
+ vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
+ if (reduc_index == 2)
+ vec_else_clause = reduc_def;
+ else
+ vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
/* Arguments are ready. Create the new vector stmt. */
vec_compare = build2 (TREE_CODE (cond_expr), vectype,
|| vectorizable_load (stmt, NULL, NULL, NULL, NULL)
|| vectorizable_call (stmt, NULL, NULL)
|| vectorizable_store (stmt, NULL, NULL, NULL)
- || vectorizable_condition (stmt, NULL, NULL)
- || vectorizable_reduction (stmt, NULL, NULL));
+ || vectorizable_reduction (stmt, NULL, NULL)
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
else
{
if (bb_vinfo)
case condition_vec_info_type:
gcc_assert (!slp_node);
- done = vectorizable_condition (stmt, gsi, &vec_stmt);
+ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
gcc_assert (done);
break;
bool *, slp_tree, slp_instance);
extern void vect_remove_stores (gimple);
extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
-
+extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
+ tree, int);
+
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
extern enum dr_alignment_support vect_supportable_dr_alignment