+2009-05-24 Ira Rosen <irar@il.ibm.com>
+
+ * doc/passes.texi (Tree-SSA passes): Document SLP pass.
+ * tree-pass.h (pass_slp_vectorize): New pass.
+ * params.h (SLP_MAX_INSNS_IN_BB): Define.
+ * timevar.def (TV_TREE_SLP_VECTORIZATION): Define.
+ * tree-vectorizer.c (timevar.h): Include.
+ (user_vect_verbosity_level): Declare.
+ (vect_location): Fix comment.
+ (vect_set_verbosity_level): Update user_vect_verbosity_level
+ instead of vect_verbosity_level.
+ (vect_set_dump_settings): Add an argument. Ignore user defined
+ verbosity if dump flags require higher level of verbosity. Print to
+ stderr only for loop vectorization.
+ (vectorize_loops): Update call to vect_set_dump_settings.
+ (execute_vect_slp): New function.
+ (gate_vect_slp): Likewise.
+ (struct gimple_opt_pass pass_slp_vectorize): New.
+ * tree-vectorizer.h (struct _bb_vec_info): Define along macros to
+ access its members.
+ (vec_info_for_bb): New function.
+ (struct _stmt_vec_info): Add bb_vinfo and a macro for its access.
+ (VECTORIZATION_ENABLED): New macro.
+ (SLP_ENABLED, SLP_DISABLED): Likewise.
+ (vect_is_simple_use): Add bb_vec_info argument.
+ (new_stmt_vec_info, vect_analyze_data_ref_dependences,
+ vect_analyze_data_refs_alignment, vect_verify_datarefs_alignment,
+ vect_analyze_data_ref_accesses, vect_analyze_data_refs,
+ vect_schedule_slp, vect_analyze_slp): Likewise.
+ (vect_analyze_stmt): Add slp_tree argument.
+ (find_bb_location): Declare.
+ (vect_slp_analyze_bb, vect_slp_transform_bb): Likewise.
+ * tree-vect-loop.c (new_loop_vec_info): Adjust function calls.
+ (vect_analyze_loop_operations, vect_analyze_loop,
+ get_initial_def_for_induction, vect_create_epilog_for_reduction,
+ vect_finalize_reduction, vectorizable_reduction,
+ vectorizable_live_operation, vect_transform_loop): Likewise.
+ * tree-data-ref.c (dr_analyze_innermost): Update comment,
+ skip evolution analysis if analyzing a basic block.
+ (dr_analyze_indices): Likewise.
+ (initialize_data_dependence_relation): Skip the test whether the
+ object is invariant for basic blocks.
+ (compute_all_dependences): Skip dependence analysis for data
+ references in basic blocks.
+ (find_data_references_in_stmt): Don't fail in case of invariant
+ access in basic block.
+ (find_data_references_in_bb): New function.
+ (find_data_references_in_loop): Move code to
+ find_data_references_in_bb and add a call to it.
+ (compute_data_dependences_for_bb): New function.
+ * tree-data-ref.h (compute_data_dependences_for_bb): Declare.
+ * tree-vect-data-refs.c (vect_check_interleaving): Adjust to the case
+ that STEP is 0.
+ (vect_analyze_data_ref_dependence): Check for interleaving in case of
+ unknown dependence in basic block and fail in case of dependence in
+ basic block.
+ (vect_analyze_data_ref_dependences): Add bb_vinfo argument, get data
+ dependence instances from either loop or basic block vectorization
+ info.
+ (vect_compute_data_ref_alignment): Check if it is loop vectorization
+ before calling nested_in_vect_loop_p.
+ (vect_compute_data_refs_alignment): Add bb_vinfo argument, get data
+ dependence instances from either loop or basic block vectorization
+ info.
+ (vect_verify_datarefs_alignment): Likewise.
+ (vect_enhance_data_refs_alignment): Adjust function calls.
+ (vect_analyze_data_refs_alignment): Likewise.
+ (vect_analyze_group_access): Fix printing. Skip different checks if
+ DR_STEP is 0. Keep strided stores either in loop or basic block
+ vectorization data structure. Fix indentation.
+ (vect_analyze_data_ref_access): Fix comments, allow zero step in
+ basic blocks.
+ (vect_analyze_data_ref_accesses): Add bb_vinfo argument, get data
+ dependence instances from either loop or basic block vectorization
+ info.
+ (vect_analyze_data_refs): Update comment. Call
+ compute_data_dependences_for_bb to analyze basic blocks.
+ (vect_create_addr_base_for_vector_ref): Check for outer loop only in
+ case of loop vectorization. In case of basic block vectorization use
+ data-ref itself as a base.
+ (vect_create_data_ref_ptr): In case of basic block vectorization:
+ don't advance the pointer, add new statements before the current
+ statement. Adjust function calls.
+ (vect_supportable_dr_alignment): Support only aligned accesses in
+ basic block vectorization.
+ * common.opt (ftree-slp-vectorize): New flag.
+ * tree-vect-patterns.c (widened_name_p): Adjust function calls.
+ (vect_pattern_recog_1): Likewise.
+ * tree-vect-stmts.c (process_use): Likewise.
+ (vect_init_vector): Add new statements in the beginning of the basic
+ block in case of basic block SLP.
+ (vect_get_vec_def_for_operand): Adjust function calls.
+ (vect_finish_stmt_generation): Likewise.
+ (vectorizable_call): Add assert that it is loop vectorization, adjust
+ function calls.
+ (vectorizable_conversion, vectorizable_assignment): Likewise.
+ (vectorizable_operation): In case of basic block SLP, take
+ vectorization factor from statement's type and skip the relevance
+ check. Adjust function calls.
+ (vectorizable_type_demotion): Add assert that it is loop
+ vectorization, adjust function calls.
+ (vectorizable_type_promotion): Likewise.
+ (vectorizable_store): Check for outer loop only in case of loop
+ vectorization. Adjust function calls. For basic blocks, skip the
+ relevance check and don't advance pointers.
+ (vectorizable_load): Likewise.
+ (vectorizable_condition): Add assert that it is loop vectorization,
+ adjust function calls.
+ (vect_analyze_stmt): Add argument. In case of basic block SLP, check
+ that it is not reduction, get vector type, call only supported
+ functions, skip loop specific parts.
+ (vect_transform_stmt): Check for outer loop only in case of loop
+ vectorization.
+ (new_stmt_vec_info): Add new argument and initialize bb_vinfo.
+ (vect_is_simple_use): Fix comment, add new argument, fix conditions
+ for external definition.
+ * passes.c (pass_slp_vectorize): New pass.
+ * tree-vect-slp.c (find_bb_location): New function.
+ (vect_get_and_check_slp_defs): Add argument, adjust function calls,
+ check for patterns only in loops.
+ (vect_build_slp_tree): Add argument, adjust function calls, fail in
+ case of multiple types in basic block SLP.
+ (vect_mark_slp_stmts_relevant): New function.
+ (vect_supported_load_permutation_p): Fix comment.
+ (vect_analyze_slp_instance): Add argument. In case of basic block
+ SLP, take vectorization factor from statement's type, check that
+ unrolling factor is 1. Adjust function call. Save SLP instance in
+ either loop or basic block vectorization structure. Return FALSE,
+ if SLP failed.
+ (vect_analyze_slp): Add argument. Get strided stores groups from
+ either loop or basic block vectorization structure. Return FALSE
+ if basic block SLP failed.
+ (new_bb_vec_info): New function.
+ (destroy_bb_vec_info, vect_slp_analyze_node_operations,
+ vect_slp_analyze_operations, vect_slp_analyze_bb): Likewise.
+ (vect_schedule_slp): Add argument. Get SLP instances from either
+ loop or basic block vectorization structure. Set vectorization factor
+ to be 1 for basic block SLP.
+ (vect_slp_transform_bb): New function.
+ * params.def (PARAM_SLP_MAX_INSNS_IN_BB): Define.
+
2009-05-23 Mark Mitchell <mark@codesourcery.com>
* final.c (shorten_branches): Do not align labels for jump tables.
Common Report Var(flag_tree_vectorize) Optimization
Enable loop vectorization on trees
+ftree-slp-vectorize
+Common Report Var(flag_tree_slp_vectorize) Init(2) Optimization
+Enable basic block vectorization (SLP) on trees
+
fvect-cost-model
Common Report Var(flag_vect_cost_model) Optimization
Enable use of cost model in vectorization
the number of elements operated upon in parallel in each iteration, and the
@code{VF} copies of each scalar operation are fused to form a vector operation.
Additional loop transformations such as peeling and versioning may take place
-to align the number of iterations, and to align the memory accesses in the loop.
-The pass is implemented in @file{tree-vectorizer.c} (the main driver and general
-utilities), @file{tree-vect-analyze.c} and @file{tree-vect-transform.c}.
+to align the number of iterations, and to align the memory accesses in the
+loop.
+The pass is implemented in @file{tree-vectorizer.c} (the main driver),
+@file{tree-vect-loop.c} and @file{tree-vect-loop-manip.c} (loop specific parts
+and general loop utilities), @file{tree-vect-slp} (loop-aware SLP
+functionality), @file{tree-vect-stmts.c} and @file{tree-vect-data-refs.c}.
Analysis of data references is in @file{tree-data-ref.c}.
+SLP Vectorization. This pass performs vectorization of straight-line code. The
+pass is implemented in @file{tree-vectorizer.c} (the main driver),
+@file{tree-vect-slp.c}, @file{tree-vect-stmts.c} and
+@file{tree-vect-data-refs.c}.
+
Autoparallelization. This pass splits the loop iteration space to run
into several threads. The pass is implemented in @file{tree-parloops.c}.
"max basic blocks number in loop for loop invariant motion",
10000, 0, 0)
+/* Avoid SLP vectorization of large basic blocks. */
+DEFPARAM (PARAM_SLP_MAX_INSNS_IN_BB,
+ "slp-max-insns-in-bb",
+ "Maximum number of instructions in basic block to be considered for SLP vectorization",
+ 1000, 0, 0)
+
/*
Local variables:
mode:c
PARAM_VALUE (PARAM_SWITCH_CONVERSION_BRANCH_RATIO)
#define LOOP_INVARIANT_MAX_BBS_IN_LOOP \
PARAM_VALUE (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP)
+#define SLP_MAX_INSNS_IN_BB \
+ PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)
#endif /* ! GCC_PARAMS_H */
NEXT_PASS (pass_dce_loop);
}
NEXT_PASS (pass_complete_unroll);
+ NEXT_PASS (pass_slp_vectorize);
NEXT_PASS (pass_parallelize_loops);
NEXT_PASS (pass_loop_prefetch);
NEXT_PASS (pass_iv_optimize);
+2009-05-24 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/bb-slp-1.c: New test.
+ * gcc.dg/vect/bb-slp-2.c, gcc.dg/vect/bb-slp-3.c,
+ gcc.dg/vect/bb-slp-4.c, gcc.dg/vect/bb-slp-5.c,
+ gcc.dg/vect/bb-slp-6.c, gcc.dg/vect/bb-slp-7.c,
+ gcc.dg/vect/bb-slp-8.c, gcc.dg/vect/bb-slp-9.c,
+ gcc.dg/vect/bb-slp-10.c, gcc.dg/vect/bb-slp-11.c,
+ gcc.dg/vect/no-tree-reassoc-bb-slp-12.c, gcc.dg/vect/bb-slp-13.c,
+ gcc.dg/vect/bb-slp-14.c, gcc.dg/vect/bb-slp-15.c,
+ gcc.dg/vect/bb-slp-16.c, gcc.dg/vect/bb-slp-17.c,
+ gcc.dg/vect/bb-slp-18.c, gcc.dg/vect/bb-slp-19.c,
+ gcc.dg/vect/bb-slp-20.c, gcc.dg/vect/bb-slp-21.c,
+ gcc.dg/vect/bb-slp-22.c: Likewise.
+ * gcc.dg/vect/vect.exp: Run basic block SLP tests.
+
2009-05-23 Mark Mitchell <mark@codesourcery.com>
Maxim Kuvyrkov <maxim@codesourcery.com>
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned int out[N*8];
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 (int dummy)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+
+ for (i = 0; i < N; i++)
+ {
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ /* Avoid loop vectorization. */
+ if (dummy == 32)
+ abort ();
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != in[i*8]
+ || out[i*8 + 1] != in[i*8 + 1]
+ || out[i*8 + 2] != in[i*8 + 2]
+ || out[i*8 + 3] != in[i*8 + 3]
+ || out[i*8 + 4] != in[i*8 + 4]
+ || out[i*8 + 5] != in[i*8 + 5]
+ || out[i*8 + 6] != in[i*8 + 6]
+ || out[i*8 + 7] != in[i*8 + 7])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (33);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[2];
+ unsigned int a0, a1, a2, a3;
+
+ /* Misaligned store. */
+ a0 = *pin++ + 23;
+ a1 = *pin++ + 142;
+ a2 = *pin++ + 2;
+ a3 = *pin++ + 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[2] != (in[0] + 23) * x
+ || out[3] != (in[1] + 142) * y
+ || out[4] != (in[2] + 2) * x
+ || out[5] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+ short a0, a1, a2, a3;
+
+ a0 = *pin++ + 23;
+ a1 = *pin++ + 142;
+ a2 = *pin++ + 2;
+ a3 = *pin++ + 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ /* Not consecutive load with permutation - not supported. */
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[1] + 2;
+ a3 = in[3] + 31;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[1] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ if (x > y)
+ x = x + y;
+ else
+ y = x;
+
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned int out[N*8];
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+unsigned int arr[N] = {0,1,2,3,4,5,6,7};
+
+__attribute__ ((noinline)) int
+main1 (int dummy)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+ unsigned int a = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ *pout++ = *pin++ + a;
+ if (arr[i] = i)
+ a = i;
+ else
+ a = 2;
+ }
+
+ a = 0;
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != in[i*8] + a
+ || out[i*8 + 1] != in[i*8 + 1] + a
+ || out[i*8 + 2] != in[i*8 + 2] + a
+ || out[i*8 + 3] != in[i*8 + 3] + a
+ || out[i*8 + 4] != in[i*8 + 4] + a
+ || out[i*8 + 5] != in[i*8 + 5] + a
+ || out[i*8 + 6] != in[i*8 + 6] + a
+ || out[i*8 + 7] != in[i*8 + 7] + a)
+ abort ();
+
+ if (arr[i] = i)
+ a = i;
+ else
+ a = 2;
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (33);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int b[N];
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ if (x > y)
+ x = x + y;
+ else
+ y = x;
+
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ b[0] = a0;
+ b[1] = a1;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y
+ || b[0] != in[0] + 23
+ || b[1] != in[1] + 142)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != a0 * x
+ || out[1] != a1 * y
+ || out[2] != a2 * x
+ || out[3] != a3 * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned short out[N];
+unsigned short in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned short *pin = &in[0];
+ unsigned short *pout = &out[0];
+
+ /* A group of 9 shorts - unsupported for now. */
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+
+ /* Check results. */
+ if (out[0] != in[0]
+ || out[1] != in[1]
+ || out[2] != in[2]
+ || out[3] != in[3]
+ || out[4] != in[4]
+ || out[5] != in[5]
+ || out[6] != in[6]
+ || out[7] != in[7]
+ || out[8] != in[8])
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N*8];
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 (int dummy)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+
+ for (i = 0; i < N*2; i++)
+ {
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+
+ /* Avoid loop vectorization. */
+ if (dummy == 32)
+ abort ();
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != in[i*8]
+ || out[i*8 + 1] != in[i*8 + 1]
+ || out[i*8 + 2] != in[i*8 + 2]
+ || out[i*8 + 3] != in[i*8 + 3]
+ || out[i*8 + 4] != in[i*8 + 4]
+ || out[i*8 + 5] != in[i*8 + 5]
+ || out[i*8 + 6] != in[i*8 + 6]
+ || out[i*8 + 7] != in[i*8 + 7])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (33);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int b[N];
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ if (x > y)
+ x = x + y;
+ else
+ y = x;
+
+ /* Two SLP instances in the basic block, only one is supported for now,
+ the second one contains type conversion. */
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ b[0] = -a0;
+ b[1] = -a1;
+ b[2] = -a2;
+ b[3] = -a3;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y
+ || b[0] != -(in[0] + 23)
+ || b[1] != -(in[1] + 142)
+ || b[2] != -(in[2] + 2)
+ || b[3] != -(in[3] + 31))
+
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int b[N];
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ /* Two SLP instances in one basic block. */
+ if (x > y)
+ x = x + y;
+ else
+ y = x;
+
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ b[0] = a0;
+ b[1] = a1;
+ b[2] = a2;
+ b[3] = a3;
+
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y
+ || b[0] != (in[0] + 23)
+ || b[1] != (in[1] + 142)
+ || b[2] != (in[2] + 2)
+ || b[3] != (in[3] + 31))
+
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { ! {vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int b[N];
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ a0 = in[0] + 23;
+ a1 = in[1] + 142;
+ a2 = in[2] + 2;
+ a3 = in[3] + 31;
+
+ if (x > y)
+ {
+ b[0] = a0;
+ b[1] = a1;
+ b[2] = a2;
+ b[3] = a3;
+ }
+ else
+ {
+ out[0] = a0 * x;
+ out[1] = a1 * y;
+ out[2] = a2 * x;
+ out[3] = a3 * y;
+ }
+
+ /* Check results. */
+ if ((x <= y
+ && (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y))
+ || (x > y
+ && (b[0] != (in[0] + 23)
+ || b[1] != (in[1] + 142)
+ || b[2] != (in[2] + 2)
+ || b[3] != (in[3] + 31))))
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { ! {vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 2 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+
+ /* Check results. */
+ if (out[0] != in[0]
+ || out[1] != in[1]
+ || out[2] != in[2]
+ || out[3] != in[3])
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned short out[N];
+unsigned short in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned short *pin = &in[0];
+ unsigned short *pout = &out[0];
+
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+
+ /* Check results. */
+ if (out[0] != in[0]
+ || out[1] != in[1]
+ || out[2] != in[2]
+ || out[3] != in[3])
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned short out[N];
+unsigned short in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned short *pin = &in[0];
+ unsigned short *pout = &out[0];
+
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+ *pout++ = *pin++;
+
+ /* Check results. */
+ if (out[0] != in[0]
+ || out[1] != in[1]
+ || out[2] != in[2]
+ || out[3] != in[3]
+ || out[4] != in[4]
+ || out[5] != in[5]
+ || out[6] != in[6]
+ || out[7] != in[7])
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+ unsigned int a0, a1, a2, a3;
+
+ a0 = *pin++ + 23;
+ a1 = *pin++ + 142;
+ a2 = *pin++ + 2;
+ a3 = *pin++ + 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+ unsigned int a0, a1, a2, a3;
+
+ /* Non isomorphic. */
+ a0 = *pin++ + 23;
+ a1 = *pin++ + 142;
+ a2 = *pin++ + 2;
+ a3 = *pin++ * 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] * 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ /* pin and pout may alias. */
+ a0 = *pin++ + 23;
+ a1 = *pin++ + 142;
+ a2 = *pin++ + 2;
+ a3 = *pin++ + 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3, &in[0], &out[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int *pin = &in[1];
+ unsigned int *pout = &out[0];
+ unsigned int a0, a1, a2, a3;
+
+ /* Misaligned load. */
+ a0 = *pin++ + 23;
+ a1 = *pin++ + 142;
+ a2 = *pin++ + 2;
+ a3 = *pin++ + 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in[1] + 23) * x
+ || out[1] != (in[2] + 142) * y
+ || out[2] != (in[3] + 2) * x
+ || out[3] != (in[4] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
+/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in1[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+unsigned int in2[N] = {10,11,12,13,14,15,16,17,18,19,110,111,112,113,114,115};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int *pin1 = &in1[0];
+ unsigned int *pin2 = &in2[0];
+ unsigned int *pout = &out[0];
+ unsigned int a0, a1, a2, a3;
+
+ a0 = *pin2++ - *pin1++ + 23;
+ a1 = *pin2++ - *pin1++ + 142;
+ a2 = *pin2++ - *pin1++ + 2;
+ a3 = *pin2++ - *pin1++ + 31;
+
+ *pout++ = a0 * x;
+ *pout++ = a1 * y;
+ *pout++ = a2 * x;
+ *pout++ = a3 * y;
+
+ /* Check results. */
+ if (out[0] != (in2[0] - in1[0] + 23) * x
+ || out[1] != (in2[1] - in1[1] + 142) * y
+ || out[2] != (in2[2] - in1[2] + 2) * x
+ || out[3] != (in2[3] - in1[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
"" $DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
+set VECT_SLP_CFLAGS $DEFAULT_VECTCFLAGS
+lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \
+ "" $VECT_SLP_CFLAGS
+
#### Tests with special options
global SAVED_DEFAULT_VECTCFLAGS
set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
+set SAVED_VECT_SLP_CFLAGS $VECT_SLP_CFLAGS
# --param vect-max-version-for-alias-checks=0 tests
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/O1-*.\[cS\]]] \
"" $O1_VECTCFLAGS
+# -fno-tree-reassoc
+set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
+lappend VECT_SLP_CFLAGS "-fno-tree-reassoc"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]] \
+ "" $VECT_SLP_CFLAGS
# Clean up.
set dg-do-what-default ${save-dg-do-what-default}
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
+DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
DEFTIMEVAR (TV_GRAPHITE_TRANSFORMS , "GRAPHITE loop transforms")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
}
-/* Analyzes the behavior of the memory reference DR in the innermost loop that
- contains it. Returns true if analysis succeed or false otherwise. */
+/* Analyzes the behavior of the memory reference DR in the innermost loop or
+ basic block that contains it. Returns true if analysis succeed or false
+ otherwise. */
bool
dr_analyze_innermost (struct data_reference *dr)
int punsignedp, pvolatilep;
affine_iv base_iv, offset_iv;
tree init, dinit, step;
+ bool in_loop = (loop && loop->num);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "analyze_innermost: ");
}
base = build_fold_addr_expr (base);
- if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, false))
+ if (in_loop)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
+ if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
+ false))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of base is not affine.\n");
+ return false;
+ }
+ }
+ else
+ {
+ base_iv.base = base;
+ base_iv.step = ssize_int (0);
+ base_iv.no_overflow = true;
}
- if (!poffset)
+
+ if (!poffset || !in_loop)
{
offset_iv.base = ssize_int (0);
offset_iv.step = ssize_int (0);
struct loop *loop = loop_containing_stmt (stmt);
VEC (tree, heap) *access_fns = NULL;
tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
- tree base, off, access_fn;
- basic_block before_loop = block_before_loop (nest);
-
+ tree base, off, access_fn = NULL_TREE;
+ basic_block before_loop = NULL;
+
+ if (nest)
+ before_loop = block_before_loop (nest);
+
while (handled_component_p (aref))
{
if (TREE_CODE (aref) == ARRAY_REF)
{
op = TREE_OPERAND (aref, 1);
- access_fn = analyze_scalar_evolution (loop, op);
- access_fn = instantiate_scev (before_loop, loop, access_fn);
- VEC_safe_push (tree, heap, access_fns, access_fn);
+ if (nest)
+ {
+ access_fn = analyze_scalar_evolution (loop, op);
+ access_fn = instantiate_scev (before_loop, loop, access_fn);
+ VEC_safe_push (tree, heap, access_fns, access_fn);
+ }
TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
}
aref = TREE_OPERAND (aref, 0);
}
- if (INDIRECT_REF_P (aref))
+ if (nest && INDIRECT_REF_P (aref))
{
op = TREE_OPERAND (aref, 0);
access_fn = analyze_scalar_evolution (loop, op);
/* If the base of the object is not invariant in the loop nest, we cannot
analyze it. TODO -- in fact, it would suffice to record that there may
be arbitrary dependences in the loops where the base object varies. */
- if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
- DR_BASE_OBJECT (a)))
+ if (loop_nest
+ && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
+ DR_BASE_OBJECT (a)))
{
DDR_ARE_DEPENDENT (res) = chrec_dont_know;
return res;
{
ddr = initialize_data_dependence_relation (a, b, loop_nest);
VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
- compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
+ if (loop_nest)
+ compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
}
if (compute_self_and_rr)
dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
gcc_assert (dr != NULL);
- /* FIXME -- data dependence analysis does not work correctly for objects with
- invariant addresses. Let us fail here until the problem is fixed. */
- if (dr_address_invariant_p (dr))
+ /* FIXME -- data dependence analysis does not work correctly for objects
+ with invariant addresses in loop nests. Let us fail here until the
+ problem is fixed. */
+ if (dr_address_invariant_p (dr) && nest)
{
free_data_ref (dr);
if (dump_file && (dump_flags & TDF_DETAILS))
/* Search the data references in LOOP, and record the information into
DATAREFS. Returns chrec_dont_know when failing to analyze a
+ difficult case, returns NULL_TREE otherwise. */
+
+static tree
+find_data_references_in_bb (struct loop *loop, basic_block bb,
+ VEC (data_reference_p, heap) **datarefs)
+{
+ gimple_stmt_iterator bsi;
+
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ gimple stmt = gsi_stmt (bsi);
+
+ if (!find_data_references_in_stmt (loop, stmt, datarefs))
+ {
+ struct data_reference *res;
+ res = XCNEW (struct data_reference);
+ VEC_safe_push (data_reference_p, heap, *datarefs, res);
+
+ return chrec_dont_know;
+ }
+ }
+
+ return NULL_TREE;
+}
+
+/* Search the data references in LOOP, and record the information into
+ DATAREFS. Returns chrec_dont_know when failing to analyze a
difficult case, returns NULL_TREE otherwise.
TODO: This function should be made smarter so that it can handle address
{
basic_block bb, *bbs;
unsigned int i;
- gimple_stmt_iterator bsi;
bbs = get_loop_body_in_dom_order (loop);
{
bb = bbs[i];
- for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
- {
- gimple stmt = gsi_stmt (bsi);
-
- if (!find_data_references_in_stmt (loop, stmt, datarefs))
- {
- struct data_reference *res;
- res = XCNEW (struct data_reference);
- VEC_safe_push (data_reference_p, heap, *datarefs, res);
-
- free (bbs);
- return chrec_dont_know;
- }
- }
+ if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
+ {
+ free (bbs);
+ return chrec_dont_know;
+ }
}
free (bbs);
return res;
}
+/* Returns true when the data dependences for the basic block BB have been
+ computed, false otherwise.
+ DATAREFS is initialized to all the array elements contained in this basic
+ block, DEPENDENCE_RELATIONS contains the relations between the data
+ references. Compute read-read and self relations if
+ COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
+bool
+compute_data_dependences_for_bb (basic_block bb,
+ bool compute_self_and_read_read_dependences,
+ VEC (data_reference_p, heap) **datarefs,
+ VEC (ddr_p, heap) **dependence_relations)
+{
+ if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
+ return false;
+
+ compute_all_dependences (*datarefs, dependence_relations, NULL,
+ compute_self_and_read_read_dependences);
+ return true;
+}
+
/* Entry point (for testing only). Analyze all the data references
and the dependence relations in LOOP.
extern bool compute_data_dependences_for_loop (struct loop *, bool,
VEC (data_reference_p, heap) **,
VEC (ddr_p, heap) **);
+extern bool compute_data_dependences_for_bb (basic_block, bool,
+ VEC (data_reference_p, heap) **,
+ VEC (ddr_p, heap) **);
extern tree find_data_references_in_loop (struct loop *,
VEC (data_reference_p, heap) **);
extern void print_direction_vector (FILE *, lambda_vector, int);
extern struct gimple_opt_pass pass_if_conversion;
extern struct gimple_opt_pass pass_loop_distribution;
extern struct gimple_opt_pass pass_vectorize;
+extern struct gimple_opt_pass pass_slp_vectorize;
extern struct gimple_opt_pass pass_complete_unroll;
extern struct gimple_opt_pass pass_complete_unrolli;
extern struct gimple_opt_pass pass_parallelize_loops;
and DRB is accessed before DRA. */
diff_mod_size = (init_a - init_b) % type_size_a;
- if ((init_a - init_b) > step)
+ if (step && (init_a - init_b) > step)
return false;
if (diff_mod_size == 0)
interleaving, and DRA is accessed before DRB. */
diff_mod_size = (init_b - init_a) % type_size_a;
- if ((init_b - init_a) > step)
+ if (step && (init_b - init_a) > step)
return false;
if (diff_mod_size == 0)
return true;
}
+
/* Function vect_analyze_data_ref_dependence.
Return TRUE if there (might) exist a dependence between a memory-reference
loop_vec_info loop_vinfo)
{
unsigned int i;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ struct loop *loop = NULL;
+ int vectorization_factor = 0;
struct data_reference *dra = DDR_A (ddr);
struct data_reference *drb = DDR_B (ddr);
stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
return false;
}
- if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
+ if (loop_vinfo)
+ {
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ }
+
+ if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
return false;
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
+ if (loop_vinfo)
+ {
+ if (vect_print_dump_info (REPORT_DR_DETAILS))
+ {
+ fprintf (vect_dump, "versioning for alias required: "
+ "can't determine dependence between ");
+ print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
+ fprintf (vect_dump, " and ");
+ print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
+ }
+
+ /* Add to list of ddrs that need to be tested at run-time. */
+ return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
+ }
+
+ /* When vectorizing a basic block unknown depnedence can still mean
+ strided access. */
+ if (vect_check_interleaving (dra, drb))
+ return false;
+
if (vect_print_dump_info (REPORT_DR_DETAILS))
{
- fprintf (vect_dump,
- "versioning for alias required: can't determine dependence between ");
+ fprintf (vect_dump, "can't determine dependence between ");
print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
fprintf (vect_dump, " and ");
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
}
- /* Add to list of ddrs that need to be tested at run-time. */
- return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
+
+ return true;
}
+ /* Versioning for alias is not yet supported for basic block SLP, and
+ dependence distance is unapplicable, hence, in case of known data
+ dependence, basic block vectorization is impossible for now. */
+ if (!loop_vinfo)
+ {
+ if (dra != drb && vect_check_interleaving (dra, drb))
+ return false;
+
+ if (vect_print_dump_info (REPORT_DR_DETAILS))
+ {
+ fprintf (vect_dump, "determined dependence between ");
+ print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
+ fprintf (vect_dump, " and ");
+ print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
+ }
+
+ return true;
+ }
+
+ /* Loop-based vectorization and known data dependence. */
if (DDR_NUM_DIST_VECTS (ddr) == 0)
{
if (vect_print_dump_info (REPORT_DR_DETAILS))
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
{
- fprintf (vect_dump,
- "not vectorized, possible dependence "
- "between data-refs ");
+ fprintf (vect_dump, "not vectorized, possible dependence "
+ "between data-refs ");
print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
fprintf (vect_dump, " and ");
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
exist any data dependences between them. */
bool
-vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
+vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
+ bb_vec_info bb_vinfo)
{
unsigned int i;
- VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
+ VEC (ddr_p, heap) *ddrs = NULL;
struct data_dependence_relation *ddr;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_dependences ===");
+ if (loop_vinfo)
+ ddrs = LOOP_VINFO_DDRS (loop_vinfo);
+ else
+ ddrs = BB_VINFO_DDRS (bb_vinfo);
+
for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
return false;
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
tree ref = DR_REF (dr);
tree vectype;
tree base, base_addr;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_compute_data_ref_alignment:");
+ if (loop_vinfo)
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+
/* Initialize misalignment to unknown. */
SET_DR_MISALIGNMENT (dr, -1);
stays the same throughout the execution of the inner-loop, which is why
we have to check that the stride of the dataref in the inner-loop evenly
divides by the vector size. */
- if (nested_in_vect_loop_p (loop, stmt))
+ if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree step = DR_STEP (dr);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
Return FALSE if a data reference is found that cannot be vectorized. */
static bool
-vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
+vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
+ bb_vec_info bb_vinfo)
{
- VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ VEC (data_reference_p, heap) *datarefs;
struct data_reference *dr;
unsigned int i;
+ if (loop_vinfo)
+ datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ else
+ datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
if (!vect_compute_data_ref_alignment (dr))
return false;
Return TRUE if all data references in the loop can be
handled with respect to alignment. */
-static bool
-vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
+bool
+vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
- VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ VEC (data_reference_p, heap) *datarefs;
struct data_reference *dr;
enum dr_alignment_support supportable_dr_alignment;
unsigned int i;
+ if (loop_vinfo)
+ datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ else
+ datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
{
gimple stmt = DR_STMT (dr);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Peeling for alignment will be applied.");
- stat = vect_verify_datarefs_alignment (loop_vinfo);
+ stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
gcc_assert (stat);
return stat;
}
/* Peeling and versioning can't be done together at this time. */
gcc_assert (! (do_peeling && do_versioning));
- stat = vect_verify_datarefs_alignment (loop_vinfo);
+ stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
gcc_assert (stat);
return stat;
}
/* This point is reached if neither peeling nor versioning is being done. */
gcc_assert (! (do_peeling || do_versioning));
- stat = vect_verify_datarefs_alignment (loop_vinfo);
+ stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
return stat;
}
Return FALSE if a data reference is found that cannot be vectorized. */
bool
-vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
+vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
+ bb_vec_info bb_vinfo)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
- if (!vect_compute_data_refs_alignment (loop_vinfo))
+ if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump,
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
HOST_WIDE_INT stride;
bool slp_impossible = false;
DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
if (vect_print_dump_info (REPORT_DR_DETAILS))
{
- fprintf (vect_dump, "Detected single element interleaving %d ",
- DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
+ fprintf (vect_dump, "Detected single element interleaving ");
print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
fprintf (vect_dump, " step ");
print_generic_expr (vect_dump, step, TDF_SLIM);
the type to get COUNT_IN_BYTES. */
count_in_bytes = type_size * count;
- /* Check that the size of the interleaving (including gaps) is not greater
- than STEP. */
+ /* Check that the size of the interleaving (including gaps) is not
+ greater than STEP. */
if (dr_step && dr_step < count_in_bytes + gaps * type_size)
{
if (vect_print_dump_info (REPORT_DETAILS))
/* Check that the size of the interleaving is equal to STEP for stores,
i.e., that there are no gaps. */
- if (dr_step != count_in_bytes)
+ if (dr_step && dr_step != count_in_bytes)
{
if (DR_IS_READ (dr))
{
}
/* Check that STEP is a multiple of type size. */
- if ((dr_step % type_size) != 0)
+ if (dr_step && (dr_step % type_size) != 0)
{
if (vect_print_dump_info (REPORT_DETAILS))
{
if (slp_impossible)
return false;
}
+
+ if (stride == 0)
+ stride = count;
+
DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
/* SLP: create an SLP data structure for every interleaving group of
stores for further analysis in vect_analyse_slp. */
if (!DR_IS_READ (dr) && !slp_impossible)
- VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
+ {
+ if (loop_vinfo)
+ VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
+ stmt);
+ if (bb_vinfo)
+ VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
+ stmt);
+ }
}
return true;
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
- if (!step)
+ if (loop_vinfo)
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+ if (loop_vinfo && !step)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad data-ref access");
+ fprintf (vect_dump, "bad data-ref access in loop");
return false;
}
- /* Don't allow invariant accesses. */
- if (dr_step == 0)
+ /* Don't allow invariant accesses in loops. */
+ if (loop_vinfo && dr_step == 0)
return false;
- if (nested_in_vect_loop_p (loop, stmt))
+ if (loop && nested_in_vect_loop_p (loop, stmt))
{
/* Interleaved accesses are not yet supported within outer-loop
vectorization for references in the inner-loop. */
return true;
}
- if (nested_in_vect_loop_p (loop, stmt))
+ if (loop && nested_in_vect_loop_p (loop, stmt))
{
if (vect_print_dump_info (REPORT_ALIGNMENT))
fprintf (vect_dump, "strided access in outer loop.");
FORNOW: handle only arrays and pointer accesses. */
bool
-vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
+vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
unsigned int i;
- VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ VEC (data_reference_p, heap) *datarefs;
struct data_reference *dr;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
+ if (loop_vinfo)
+ datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ else
+ datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
if (!vect_analyze_data_ref_access (dr))
{
/* Function vect_analyze_data_refs.
- Find all the data references in the loop.
+ Find all the data references in the loop or basic block.
The general structure of the analysis of data refs in the vectorizer is as
follows:
- 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
- find and analyze all data-refs in the loop and their dependences.
+ 1- vect_analyze_data_refs(loop/bb): call
+ compute_data_dependences_for_loop/bb to find and analyze all data-refs
+ in the loop/bb and their dependences.
2- vect_analyze_dependences(): apply dependence testing using ddrs.
3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
*/
bool
-vect_analyze_data_refs (loop_vec_info loop_vinfo)
+vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
+ basic_block bb = NULL;
unsigned int i;
VEC (data_reference_p, heap) *datarefs;
struct data_reference *dr;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
-
- compute_data_dependences_for_loop (loop, true,
- &LOOP_VINFO_DATAREFS (loop_vinfo),
- &LOOP_VINFO_DDRS (loop_vinfo));
+
+ if (loop_vinfo)
+ {
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+ compute_data_dependences_for_loop (loop, true,
+ &LOOP_VINFO_DATAREFS (loop_vinfo),
+ &LOOP_VINFO_DDRS (loop_vinfo));
+ datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ }
+ else
+ {
+ bb = BB_VINFO_BB (bb_vinfo);
+ compute_data_dependences_for_bb (bb, true,
+ &BB_VINFO_DATAREFS (bb_vinfo),
+ &BB_VINFO_DDRS (bb_vinfo));
+ datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+ }
/* Go through the data-refs, check that the analysis succeeded. Update pointer
from stmt_vec_info struct to DR and vectype. */
- datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
{
inner-most enclosing loop). We do that by building a reference to the
first location accessed by the inner-loop, and analyze it relative to
the outer-loop. */
- if (nested_in_vect_loop_p (loop, stmt))
+ if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree outer_step, outer_base, outer_init;
HOST_WIDE_INT pbitsize, pbitpos;
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
tree base_name;
tree data_ref_base_var;
tree init = unshare_expr (DR_INIT (dr));
tree vect_ptr_type;
tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- gcc_assert (loop);
- if (loop != containing_loop)
+ if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
{
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
- gcc_assert (nested_in_vect_loop_p (loop, stmt));
+ gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
}
- /* Create data_ref_base */
- base_name = build_fold_indirect_ref (data_ref_base);
+ if (loop_vinfo)
+ base_name = build_fold_indirect_ref (data_ref_base);
+ else
+ {
+ base_offset = ssize_int (0);
+ init = ssize_int (0);
+ base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
+ }
+
data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
add_referenced_var (data_ref_base_var);
data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
}
/* base + base_offset */
- addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
- data_ref_base, base_offset);
-
+ if (loop_vinfo)
+ addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
+ data_ref_base, base_offset);
+ else
+ {
+ if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
+ addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
+ else
+ addr_base = build1 (ADDR_EXPR,
+ build_pointer_type (TREE_TYPE (DR_REF (dr))),
+ unshare_expr (DR_REF (dr)));
+ }
+
vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
vec_stmt = fold_convert (vect_ptr_type, addr_base);
addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
get_name (base_name));
-
add_referenced_var (addr_expr);
vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
gimple_seq_add_seq (new_stmt_list, seq);
tree base_name;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
- struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
+ struct loop *loop = NULL;
+ bool nested_in_vect_loop = false;
+ struct loop *containing_loop = NULL;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree vect_ptr_type;
tree vect_ptr;
tree new_temp;
gimple vec_stmt;
gimple_seq new_stmt_list = NULL;
- edge pe;
+ edge pe = NULL;
basic_block new_bb;
tree vect_ptr_init;
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree indx_before_incr, indx_after_incr;
gimple incr;
tree step;
-
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+
+ if (loop_vinfo)
+ {
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+ nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
+ containing_loop = (gimple_bb (stmt))->loop_father;
+ pe = loop_preheader_edge (loop);
+ }
+ else
+ {
+ gcc_assert (bb_vinfo);
+ only_init = true;
+ *ptr_incr = NULL;
+ }
+
/* Check the step (evolution) of the load in LOOP, and record
whether it's invariant. */
if (nested_in_vect_loop)
new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
offset, loop);
- pe = loop_preheader_edge (loop);
if (new_stmt_list)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
- gcc_assert (!new_bb);
+ if (pe)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
+ gcc_assert (!new_bb);
+ }
+ else
+ gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
}
*initial_address = new_temp;
fold_convert (vect_ptr_type, new_temp));
vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
- new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
- gcc_assert (!new_bb);
-
+ if (pe)
+ {
+ new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
+ gcc_assert (!new_bb);
+ }
+ else
+ gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
/** (4) Handle the updating of the vector-pointer inside the loop.
This is needed when ONLY_INIT is false, and also when AT_LOOP
is the inner-loop nested in LOOP (during outer-loop vectorization).
**/
- if (only_init && at_loop == loop) /* No update in loop is required. */
+ /* No update in loop is required. */
+ if (only_init && (!loop_vinfo || at_loop == loop))
{
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
vect_ptr, loop, &incr_gsi, insert_after,
&indx_before_incr, &indx_after_incr);
incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
containing_loop, &incr_gsi, insert_after, &indx_before_incr,
&indx_after_incr);
incr = gsi_stmt (incr_gsi);
- set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
/* Copy the points-to information if it exists. */
if (DR_PTR_INFO (dr))
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
enum machine_mode mode = TYPE_MODE (vectype);
- struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
- bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
bool invariant_in_outerloop = false;
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *vect_loop = NULL;
+ bool nested_in_vect_loop = false;
if (aligned_access_p (dr))
return dr_aligned;
+ if (!loop_vinfo)
+ /* FORNOW: Misaligned accesses are supported only in loops. */
+ return dr_unaligned_unsupported;
+
+ vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
+ nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
+
if (nested_in_vect_loop)
{
tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
{
gimple phi = gsi_stmt (si);
gimple_set_uid (phi, 0);
- set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
+ set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
}
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
gimple_set_uid (stmt, 0);
- set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
+ set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
}
}
}
gcc_assert (stmt_info);
- if (!vect_analyze_stmt (stmt, &need_to_vectorize))
+ if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
if (STMT_VINFO_RELEVANT_P (stmt_info) && !PURE_SLP_STMT (stmt_info))
FORNOW: Handle only simple, array references, which
alignment can be forced, and aligned pointer-references. */
- ok = vect_analyze_data_refs (loop_vinfo);
+ ok = vect_analyze_data_refs (loop_vinfo, NULL);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
/* Analyze the alignment of the data-refs in the loop.
Fail if a data reference is found that cannot be vectorized. */
- ok = vect_analyze_data_refs_alignment (loop_vinfo);
+ ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
/* Analyze data dependences between the data-refs in the loop.
FORNOW: fail at the first data dependence that we encounter. */
- ok = vect_analyze_data_ref_dependences (loop_vinfo);
+ ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
/* Analyze the access patterns of the data-refs in the loop (consecutive,
complex, etc.). FORNOW: Only handle consecutive access pattern. */
- ok = vect_analyze_data_ref_accesses (loop_vinfo);
+ ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
}
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
- ok = vect_analyze_slp (loop_vinfo);
+ ok = vect_analyze_slp (loop_vinfo, NULL);
if (ok)
{
/* Decide which possible SLP instances to SLP. */
add_referenced_var (vec_dest);
induction_phi = create_phi_node (vec_dest, iv_loop->header);
set_vinfo_for_stmt (induction_phi,
- new_stmt_vec_info (induction_phi, loop_vinfo));
+ new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
vec_def = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, vec_def);
gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
- set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
+ set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
+ NULL));
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe);
gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
set_vinfo_for_stmt (new_stmt,
- new_stmt_vec_info (new_stmt, loop_vinfo));
+ new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
}
for (j = 0; j < ncopies; j++)
{
phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
- set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
+ set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
if (j == 0)
new_phi = phi;
else
epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
set_vinfo_for_stmt (epilog_stmt,
- new_stmt_vec_info (epilog_stmt, loop_vinfo));
+ new_stmt_vec_info (epilog_stmt, loop_vinfo,
+ NULL));
if (adjustment_def)
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
The last use is the reduction variable. */
for (i = 0; i < op_type-1; i++)
{
- is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt,
+ is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
&def, &dt);
gcc_assert (is_simple_use);
if (dt != vect_internal_def
return false;
}
- is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &def,
- &dt);
+ is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
+ &def, &dt);
gcc_assert (is_simple_use);
gcc_assert (dt == vect_reduction_def);
gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
{
/* Create the reduction-phi that defines the reduction-operand. */
new_phi = create_phi_node (vec_dest, loop->header);
- set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo));
+ set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo,
+ NULL));
}
/* Handle uses. */
op = TREE_OPERAND (gimple_op (stmt, 1), i);
else
op = gimple_op (stmt, i + 1);
- if (op && !vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ if (op
+ && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== scheduling SLP instances ===");
- vect_schedule_slp (loop_vinfo);
+ vect_schedule_slp (loop_vinfo, NULL);
}
/* Hybrid SLP stmts must be vectorized in addition to SLP. */
stmt_vinfo = vinfo_for_stmt (use_stmt);
loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
- if (!vect_is_simple_use (name, loop_vinfo, def_stmt, &def, &dt))
+ if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
return false;
if (dt != vect_internal_def
|| (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
return false;
- if (!vect_is_simple_use (oprnd0, loop_vinfo, &dummy_gimple, &dummy, &dt))
+ if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
+ &dt))
return false;
return true;
/* Mark the stmts that are involved in the pattern. */
gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
set_vinfo_for_stmt (pattern_stmt,
- new_stmt_vec_info (pattern_stmt, loop_vinfo));
+ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
#include "optabs.h"
#include "tree-vectorizer.h"
+/* Extract the location of the basic block in the source code.
+ Return the basic block location if succeed and NULL if not. */
+
+LOC
+find_bb_location (basic_block bb)
+{
+ gimple stmt = NULL;
+ gimple_stmt_iterator si;
+
+ if (!bb)
+ return UNKNOWN_LOC;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ stmt = gsi_stmt (si);
+ if (gimple_location (stmt) != UNKNOWN_LOC)
+ return gimple_location (stmt);
+ }
+
+ return UNKNOWN_LOC;
+}
+
+
/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
static void
the SLP group (stored in FIRST_STMT_...). */
static bool
-vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
- gimple stmt, VEC (gimple, heap) **def_stmts0,
+vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+ slp_tree slp_node, gimple stmt,
+ VEC (gimple, heap) **def_stmts0,
VEC (gimple, heap) **def_stmts1,
enum vect_def_type *first_stmt_dt0,
enum vect_def_type *first_stmt_dt1,
stmt_vec_info stmt_info =
vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
enum gimple_rhs_class rhs_class;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
+
+ if (loop_vinfo)
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
{
oprnd = gimple_op (stmt, i + 1);
- if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ &dt[i])
|| (!def_stmt && dt[i] != vect_constant_def))
{
if (vect_print_dump_info (REPORT_SLP))
return false;
}
- /* Check if DEF_STMT is a part of a pattern and get the def stmt from
- the pattern. Check that all the stmts of the node are in the
+ /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
+ from the pattern. Check that all the stmts of the node are in the
pattern. */
- if (def_stmt && gimple_bb (def_stmt)
+ if (loop && def_stmt && gimple_bb (def_stmt)
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
&& vinfo_for_stmt (def_stmt)
&& STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
TRUE. */
static bool
-vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
- unsigned int group_size,
- int *inside_cost, int *outside_cost,
- int ncopies_for_cost, unsigned int *max_nunits,
+vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+ slp_tree *node, unsigned int group_size,
+ int *inside_cost, int *outside_cost,
+ int ncopies_for_cost, unsigned int *max_nunits,
VEC (int, heap) **load_permutation,
- VEC (slp_tree, heap) **loads)
+ VEC (slp_tree, heap) **loads,
+ unsigned int vectorization_factor)
{
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
tree lhs;
bool stop_recursion = false, need_same_oprnds = false;
tree vectype, scalar_type, first_op1 = NULL_TREE;
- unsigned int vectorization_factor = 0, ncopies;
+ unsigned int ncopies;
optab optab;
int icode;
enum machine_mode optab_op2_mode;
}
return false;
}
-
- gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP with multiple types ");
+ if (ncopies != 1)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "SLP with multiple types ");
+ /* FORNOW: multiple types are unsupported in BB SLP. */
+ if (bb_vinfo)
+ return false;
+ }
+
/* In case of multiple types we need to detect the smallest type. */
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (REFERENCE_CLASS_P (lhs))
{
/* Store. */
- if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
- &def_stmts0, &def_stmts1,
+ if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
+ stmt, &def_stmts0, &def_stmts1,
&first_stmt_dt0,
&first_stmt_dt1,
&first_stmt_def0_type,
}
/* Find the def-stmts. */
- if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
+ if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
&def_stmts0, &def_stmts1,
&first_stmt_dt0, &first_stmt_dt1,
&first_stmt_def0_type,
SLP_TREE_RIGHT (left_node) = NULL;
SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
- if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
+ if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
- max_nunits, load_permutation, loads))
+ max_nunits, load_permutation, loads,
+ vectorization_factor))
return false;
SLP_TREE_LEFT (*node) = left_node;
SLP_TREE_RIGHT (right_node) = NULL;
SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
- if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
+ if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
- max_nunits, load_permutation, loads))
+ max_nunits, load_permutation, loads,
+ vectorization_factor))
return false;
SLP_TREE_RIGHT (*node) = right_node;
}
+/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
+
+static void
+vect_mark_slp_stmts_relevant (slp_tree node)
+{
+ int i;
+ gimple stmt;
+ stmt_vec_info stmt_info;
+
+ if (!node)
+ return;
+
+ for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ {
+ stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
+ || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
+ STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
+ }
+
+ vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
+ vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
+}
+
+
/* Check if the permutation required by the SLP INSTANCE is supported.
Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
int i = 0, j, prev = -1, next, k;
bool supported;
- /* FORNOW: permutations are only supported for loop-aware SLP. */
+ /* FORNOW: permutations are only supported in SLP. */
if (!slp_instn)
return false;
Return FALSE if it's impossible to SLP any stmt in the loop. */
static bool
-vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
+vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+ gimple stmt)
{
slp_instance new_instance;
slp_tree node = XNEW (struct _slp_tree);
tree vectype, scalar_type;
gimple next;
unsigned int vectorization_factor = 0, ncopies;
- bool slp_impossible = false;
int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
unsigned int max_nunits = 0;
VEC (int, heap) *load_permutation;
}
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (loop_vinfo)
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ else
+ /* No multitypes in BB SLP. */
+ vectorization_factor = nunits;
+
ncopies = vectorization_factor / nunits;
+ /* Calculate the unrolling factor. */
+ unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
+ if (unrolling_factor != 1 && !loop_vinfo)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: unrolling required in BB SLP");
+
+ return false;
+ }
+
/* Create a node (a root of the SLP tree) for the packed strided stores. */
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
next = stmt;
SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
- /* Calculate the unrolling factor. */
- unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
-
/* Calculate the number of vector stmts to create based on the unrolling
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
GROUP_SIZE / NUNITS otherwise. */
loads = VEC_alloc (slp_tree, heap, group_size);
/* Build the tree for the SLP instance. */
- if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
- &outside_cost, ncopies_for_cost, &max_nunits,
- &load_permutation, &loads))
+ if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
+ &inside_cost, &outside_cost, ncopies_for_cost,
+ &max_nunits, &load_permutation, &loads,
+ vectorization_factor))
{
/* Create a new SLP instance. */
new_instance = XNEW (struct _slp_instance);
if (max_nunits > nunits)
unrolling_factor = least_common_multiple (max_nunits, group_size)
/ group_size;
-
+
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
else
VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
- VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
- new_instance);
+ if (loop_vinfo)
+ VEC_safe_push (slp_instance, heap,
+ LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+ new_instance);
+ else
+ VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
+ new_instance);
+
if (vect_print_dump_info (REPORT_SLP))
vect_print_slp_tree (node);
VEC_free (int, heap, load_permutation);
VEC_free (slp_tree, heap, loads);
- if (slp_impossible)
- return false;
-
- /* SLP failed for this instance, but it is still possible to SLP other stmts
- in the loop. */
- return true;
+ return false;
}
trees of packed scalar stmts if SLP is possible. */
bool
-vect_analyze_slp (loop_vec_info loop_vinfo)
+vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
unsigned int i;
- VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ VEC (gimple, heap) *strided_stores;
gimple store;
+ bool ok = false;
if (vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "=== vect_analyze_slp ===");
+ if (loop_vinfo)
+ strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ else
+ strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
+
for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
- if (!vect_analyze_slp_instance (loop_vinfo, store))
- {
- /* SLP failed. No instance can be SLPed in the loop. */
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
- fprintf (vect_dump, "SLP failed.");
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
+ ok = true;
- return false;
- }
+ if (bb_vinfo && !ok)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Failed to SLP the basic block.");
+
+ return false;
+ }
return true;
}
vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
}
+
+/* Create and initialize a new bb_vec_info struct for BB, as well as
+ stmt_vec_info structs for all the stmts in it. */
+
+static bb_vec_info
+new_bb_vec_info (basic_block bb)
+{
+ bb_vec_info res = NULL;
+ gimple_stmt_iterator gsi;
+
+ res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
+ BB_VINFO_BB (res) = bb;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ gimple_set_uid (stmt, 0);
+ set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
+ }
+
+ BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
+ BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
+
+ bb->aux = res;
+ return res;
+}
+
+
+/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
+ stmts in the basic block. */
+
+static void
+destroy_bb_vec_info (bb_vec_info bb_vinfo)
+{
+ basic_block bb;
+ gimple_stmt_iterator si;
+
+ if (!bb_vinfo)
+ return;
+
+ bb = BB_VINFO_BB (bb_vinfo);
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ if (stmt_info)
+ /* Free stmt_vec_info. */
+ free_stmt_vec_info (stmt);
+ }
+
+ VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
+ VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
+ free (bb_vinfo);
+ bb->aux = NULL;
+}
+
+
+/* Analyze statements contained in SLP tree node after recursively analyzing
+ the subtree. Return TRUE if the operations are supported. */
+
+static bool
+vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
+{
+ bool dummy;
+ int i;
+ gimple stmt;
+
+ if (!node)
+ return true;
+
+ if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
+ || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
+ return false;
+
+ for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ {
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ gcc_assert (PURE_SLP_STMT (stmt_info));
+
+ if (!vect_analyze_stmt (stmt, &dummy, node))
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Analyze statements in SLP instances of the basic block. Return TRUE if the
+ operations are supported. */
+
+static bool
+vect_slp_analyze_operations (bb_vec_info bb_vinfo)
+{
+ VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+ slp_instance instance;
+ int i;
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
+ {
+ if (!vect_slp_analyze_node_operations (bb_vinfo,
+ SLP_INSTANCE_TREE (instance)))
+ {
+ vect_free_slp_instance (instance);
+ VEC_ordered_remove (slp_instance, slp_instances, i);
+ }
+ else
+ i++;
+ }
+
+ if (!VEC_length (slp_instance, slp_instances))
+ return false;
+
+ return true;
+}
+
+
+/* Cheick if the basic block can be vectorized. */
+
+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+ bb_vec_info bb_vinfo;
+ VEC (ddr_p, heap) *ddrs;
+ VEC (slp_instance, heap) *slp_instances;
+ slp_instance instance;
+ int i, insns = 0;
+ gimple_stmt_iterator gsi;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ insns++;
+
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
+ "block.\n");
+
+ return NULL;
+ }
+
+ bb_vinfo = new_bb_vec_info (bb);
+ if (!bb_vinfo)
+ return NULL;
+
+ if (!vect_analyze_data_refs (NULL, bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
+ "block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ ddrs = BB_VINFO_DDRS (bb_vinfo);
+ if (!VEC_length (ddr_p, ddrs))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
+ "block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: bad data alignment in basic "
+ "block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
+ " block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: unhandled data access in basic "
+ "block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
+ "block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ /* Check the SLP opportunities in the basic block, analyze and build SLP
+ trees. */
+ if (!vect_analyze_slp (NULL, bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
+ "in basic block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+
+ /* Mark all the statements that we want to vectorize as pure SLP and
+ relevant. */
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ {
+ vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
+ vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
+ }
+
+ if (!vect_slp_analyze_operations (bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "BB will be vectorized using SLP\n");
+
+ return bb_vinfo;
+}
+
+
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor). However,
the actual number of vector stmts for every SLP node depends on VF which is
/ SLP_INSTANCE_UNROLLING_FACTOR (instance);
}
+
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
}
+
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
building a vector of type MASK_TYPE from it) and two input vectors placed in
DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
static bool
vect_schedule_slp_instance (slp_tree node, slp_instance instance,
- unsigned int vectorization_factor)
+ unsigned int vectorization_factor)
{
gimple stmt;
bool strided_store, is_store;
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
else
si = gsi_for_stmt (stmt);
-
+
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
if (is_store)
{
bool
-vect_schedule_slp (loop_vec_info loop_vinfo)
+vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
- VEC (slp_instance, heap) *slp_instances =
- LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ VEC (slp_instance, heap) *slp_instances;
slp_instance instance;
- unsigned int i;
+ unsigned int i, vf;
bool is_store = false;
+ if (loop_vinfo)
+ {
+ slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ }
+ else
+ {
+ slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+ vf = 1;
+ }
+
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
{
/* Schedule the tree of INSTANCE. */
is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
- instance, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
-
+ instance, vf);
if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
|| vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump, "vectorizing stmts using SLP.");
return is_store;
}
+
+
+/* Vectorize the basic block. */
+
+void
+vect_slp_transform_bb (basic_block bb)
+{
+ bb_vec_info bb_vinfo = vec_info_for_bb (bb);
+ gimple_stmt_iterator si;
+
+ gcc_assert (bb_vinfo);
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "SLPing BB\n");
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "------>SLPing statement: ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+
+ stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+
+ /* Schedule all the SLP instances when the first SLP stmt is reached. */
+ if (STMT_SLP_TYPE (stmt_info))
+ {
+ vect_schedule_slp (NULL, bb_vinfo);
+ break;
+ }
+ }
+
+ mark_sym_for_renaming (gimple_vop (cfun));
+ /* The memory tags and pointers in vectorized statements need to
+ have their SSA forms updated. FIXME, why can't this be delayed
+ until all the loops have been transformed? */
+ update_ssa (TODO_update_ssa);
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "BB VECTORIZED\n");
+}
+
+
if (!exist_non_indexing_operands_for_use_p (use, stmt))
return true;
- if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
+ if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
{
case vect_unused_in_scope:
relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
- vect_used_in_outer_by_reduction : vect_unused_in_scope;
+ vect_used_in_outer_by_reduction : vect_unused_in_scope;
break;
case vect_used_in_outer_by_reduction:
else
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- if (nested_in_vect_loop_p (loop, stmt))
- loop = loop->inner;
- pe = loop_preheader_edge (loop);
- new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
- gcc_assert (!new_bb);
+
+ if (loop_vinfo)
+ {
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+ if (nested_in_vect_loop_p (loop, stmt))
+ loop = loop->inner;
+
+ pe = loop_preheader_edge (loop);
+ new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
+ gcc_assert (!new_bb);
+ }
+ else
+ {
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+ basic_block bb;
+ gimple_stmt_iterator gsi_bb_start;
+
+ gcc_assert (bb_vinfo);
+ bb = BB_VINFO_BB (bb_vinfo);
+ gsi_bb_start = gsi_start_bb (bb);
+ gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
+ }
}
if (vect_print_dump_info (REPORT_DETAILS))
return vec_oprnd;
}
+
/* Function vect_get_vec_def_for_operand.
OP is an operand in STMT. This function returns a (vector) def that will be
print_generic_expr (vect_dump, op, TDF_SLIM);
}
- is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+ is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
+ &dt);
gcc_assert (is_simple_use);
if (vect_print_dump_info (REPORT_DETAILS))
{
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
- set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
+ set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
+ bb_vinfo));
if (vect_print_dump_info (REPORT_DETAILS))
{
enum { NARROW, NONE, WIDEN } modifier;
size_t i, nargs;
+ /* FORNOW: unsupported in basic block SLP. */
+ gcc_assert (loop_vinfo);
+
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
}
rhs_type = TREE_TYPE (op);
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
+ if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
/* Is STMT a vectorizable conversion? */
+ /* FORNOW: unsupported in basic block SLP. */
+ gcc_assert (loop_vinfo);
+
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
gcc_assert (ncopies >= 1);
/* Check the operands of the operation. */
- if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
+ if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
int i;
VEC(tree,heap) *vec_oprnds = NULL;
tree vop;
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
if (ncopies > 1)
return false; /* FORNOW */
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
else
return false;
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
+ if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
unsigned int k;
bool shift_p = false;
bool scalar_shift_arg = false;
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ int vf;
+
+ if (loop_vinfo)
+ vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ else
+ /* FORNOW: multiple types are not supported in basic block SLP. */
+ vf = nunits_in;
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
gcc_assert (ncopies >= 1);
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
}
op0 = gimple_assign_rhs1 (stmt);
- if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
+ if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
if (op_type == binary_op)
{
op1 = gimple_assign_rhs2 (stmt);
- if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
+ if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ &dt[1]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
fprintf (vect_dump, "op not supported by target.");
/* Check only during analysis. */
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
- || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- < vect_min_worthwhile_factor (code)
+ || (vf < vect_min_worthwhile_factor (code)
&& !vec_stmt))
return false;
if (vect_print_dump_info (REPORT_DETAILS))
/* Worthwhile without SIMD support? Check only during analysis. */
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
- && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- < vect_min_worthwhile_factor (code)
+ && vf < vect_min_worthwhile_factor (code)
&& !vec_stmt)
{
if (vect_print_dump_info (REPORT_DETAILS))
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
tree last_oprnd, intermediate_type;
+ /* FORNOW: not supported by basic block SLP vectorization. */
+ gcc_assert (loop_vinfo);
+
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
-
gcc_assert (ncopies >= 1);
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
return false;
/* Check the operands of the operation. */
- if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
+ if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
prev_stmt_info);
}
}
-
+
/* Function vectorizable_type_promotion
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+ /* FORNOW: not supported by basic block SLP vectorization. */
+ gcc_assert (loop_vinfo);
+
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
return false;
/* Check the operands of the operation. */
- if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
+ if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
if (op_type == binary_op)
{
op1 = gimple_assign_rhs2 (stmt);
- if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
+ if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
enum machine_mode vec_mode;
tree dummy;
enum dr_alignment_support alignment_support_scheme;
bool slp = (slp_node != NULL);
stmt_vec_info first_stmt_vinfo;
unsigned int vec_num;
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+
+ if (loop_vinfo)
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types in nested loop.");
return false;
}
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
gcc_assert (gimple_assign_single_p (stmt));
op = gimple_assign_rhs1 (stmt);
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
{
gcc_assert (gimple_assign_single_p (next_stmt));
op = gimple_assign_rhs1 (next_stmt);
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+ if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
+ &def, &dt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
/* FORNOW */
- gcc_assert (!nested_in_vect_loop_p (loop, stmt));
+ gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
/* We vectorize all the stmts of the interleaving group when we
reach the last stmt in the group. */
dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
&dummy, &ptr_incr, false,
&inv_p);
- gcc_assert (!inv_p);
+ gcc_assert (bb_vinfo || !inv_p);
}
else
{
for (i = 0; i < group_size; i++)
{
op = VEC_index (tree, oprnds, i);
- vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+ vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ &dt);
vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
VEC_replace(tree, dr_chain, i, vec_oprnd);
VEC_replace(tree, oprnds, i, vec_oprnd);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_stmt_info;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
- bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
+ bool nested_in_vect_loop = false;
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree new_temp;
bool slp = (slp_node != NULL);
bool slp_perm = false;
enum tree_code code;
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ int vf;
+
+ if (loop_vinfo)
+ {
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+ nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
+ vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ }
+ else
+ /* FORNOW: multiple types are not supported in basic block SLP. */
+ vf = nunits;
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
return false;
}
- if (slp && SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
- slp_perm = true;
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
{
strided_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- }
+ if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
+ slp_perm = true;
+ }
else
vec_num = group_size;
This can only occur when vectorizing memory accesses in the inner-loop
nested within an outer-loop that is being vectorized. */
- if (nested_in_vect_loop_p (loop, stmt)
+ if (loop && nested_in_vect_loop_p (loop, stmt)
&& (TREE_INT_CST_LOW (DR_STEP (dr))
% GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
}
/* 4. Handle invariant-load. */
- if (inv_p)
+ if (inv_p && !bb_vinfo)
{
gcc_assert (!strided_load);
gcc_assert (nested_in_vect_loop_p (loop, stmt));
if (slp_perm)
{
- if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi,
- LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
slp_node_instance, false))
{
VEC_free (tree, heap, dr_chain);
if (TREE_CODE (lhs) == SSA_NAME)
{
gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
- if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
+ if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
+ &dt))
return false;
}
else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
if (TREE_CODE (rhs) == SSA_NAME)
{
gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
- if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
+ if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
+ &dt))
return false;
}
else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
enum tree_code code;
+ /* FORNOW: unsupported in basic block SLP. */
+ gcc_assert (loop_vinfo);
+
gcc_assert (ncopies >= 1);
if (ncopies > 1)
return false; /* FORNOW */
if (TREE_CODE (then_clause) == SSA_NAME)
{
gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
- if (!vect_is_simple_use (then_clause, loop_vinfo,
+ if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
&then_def_stmt, &def, &dt))
return false;
}
if (TREE_CODE (else_clause) == SSA_NAME)
{
gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
- if (!vect_is_simple_use (else_clause, loop_vinfo,
+ if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
&else_def_stmt, &def, &dt))
return false;
}
/* Make sure the statement is vectorizable. */
bool
-vect_analyze_stmt (gimple stmt, bool *need_to_vectorize)
+vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok;
+ HOST_WIDE_INT dummy;
+ tree scalar_type, vectype;
if (vect_print_dump_info (REPORT_DETAILS))
{
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
- /* Skip stmts that do not need to be vectorized. In loops this is expected
+ /* Skip stmts that do not need to be vectorized. In loops this is expected
to include:
- the COND_EXPR which is the loop exit condition
- any LABEL_EXPRs in the loop
- - computations that are used only for array indexing or loop control.
+ - computations that are used only for array indexing or loop control.
In basic blocks we only analyze statements that are a part of some SLP
instance, therefore, all the statements are relevant. */
- if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
{
if (vect_print_dump_info (REPORT_DETAILS))
break;
case vect_reduction_def:
- gcc_assert (relevance == vect_used_in_outer
+ gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
|| relevance == vect_used_in_outer_by_reduction
- || relevance == vect_unused_in_scope);
+ || relevance == vect_unused_in_scope));
break;
case vect_induction_def:
gcc_unreachable ();
}
+ if (bb_vinfo)
+ {
+ gcc_assert (PURE_SLP_STMT (stmt_info));
+
+ scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "get vectype for scalar type: ");
+ print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+ }
+
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ if (!vectype)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "not SLPed: unsupported data-type ");
+ print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+ }
+ return false;
+ }
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "vectype: ");
+ print_generic_expr (vect_dump, vectype, TDF_SLIM);
+ }
+
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
+ }
+
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
}
ok = true;
- if (STMT_VINFO_RELEVANT_P (stmt_info)
- || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
+ if (!bb_vinfo
+ && (STMT_VINFO_RELEVANT_P (stmt_info)
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
|| vectorizable_type_demotion (stmt, NULL, NULL, NULL)
|| vectorizable_conversion (stmt, NULL, NULL, NULL)
|| vectorizable_store (stmt, NULL, NULL, NULL)
|| vectorizable_condition (stmt, NULL, NULL)
|| vectorizable_reduction (stmt, NULL, NULL));
+ else
+ {
+ if (bb_vinfo)
+ ok = (vectorizable_operation (stmt, NULL, NULL, node)
+ || vectorizable_assignment (stmt, NULL, NULL, node)
+ || vectorizable_load (stmt, NULL, NULL, node, NULL)
+ || vectorizable_store (stmt, NULL, NULL, node));
+ }
if (!ok)
{
fprintf (vect_dump, "supported: ");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
-
+
return false;
}
+ if (bb_vinfo)
+ return true;
+
/* Stmts that are (also) "live" (i.e. - that are used out of the loop)
need extra handling, except for vectorizable reductions. */
if (STMT_VINFO_LIVE_P (stmt_info)
fprintf (vect_dump, "supported: ");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
-
+
return false;
}
if (!PURE_SLP_STMT (stmt_info))
{
- /* Groups of strided accesses whose size is not a power of 2 are not
- vectorizable yet using loop-vectorization. Therefore, if this stmt
- feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
- loop-based vectorized), the loop cannot be vectorized. */
+ /* Groups of strided accesses whose size is not a power of 2 are not
+ vectorizable yet using loop-vectorization. Therefore, if this stmt
+ feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
+ loop-based vectorized), the loop cannot be vectorized. */
if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
&& exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
DR_GROUP_FIRST_DR (stmt_info)))) == -1)
return false;
}
}
-
+
return true;
}
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
gimple orig_stmt_in_pattern;
bool done;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
switch (STMT_VINFO_TYPE (stmt_info))
{
/* Handle inner-loop stmts whose DEF is used in the loop-nest that
is being vectorized, but outside the immediately enclosing loop. */
if (vec_stmt
- && nested_in_vect_loop_p (loop, stmt)
+ && STMT_VINFO_LOOP_VINFO (stmt_info)
+ && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
+ STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
&& STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
&& (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
- || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer_by_reduction))
+ || STMT_VINFO_RELEVANT (stmt_info) ==
+ vect_used_in_outer_by_reduction))
{
- struct loop *innerloop = loop->inner;
+ struct loop *innerloop = LOOP_VINFO_LOOP (
+ STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
imm_use_iterator imm_iter;
use_operand_p use_p;
tree scalar_dest;
gimple exit_phi;
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
+ fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
/* Find the relevant loop-exit phi-node, and reord the vec_stmt there
(to be used when vectorizing outer-loop stmts that use the DEF of
Create and initialize a new stmt_vec_info struct for STMT. */
stmt_vec_info
-new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
+new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
+ bb_vec_info bb_vinfo)
{
stmt_vec_info res;
res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
STMT_VINFO_TYPE (res) = undef_vec_info_type;
STMT_VINFO_STMT (res) = stmt;
STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
+ STMT_VINFO_BB_VINFO (res) = bb_vinfo;
STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
STMT_VINFO_LIVE_P (res) = false;
STMT_VINFO_VECTYPE (res) = NULL;
/* Function vect_is_simple_use.
Input:
- LOOP - the loop that is being vectorized.
- OPERAND - operand of a stmt in LOOP.
+ LOOP_VINFO - the vect info of the loop that is being vectorized.
+ BB_VINFO - the vect info of the basic block that is being vectorized.
+ OPERAND - operand of a stmt in the loop or bb.
DEF - the defining stmt in case OPERAND is an SSA_NAME.
Returns whether a stmt with OPERAND can be vectorized.
- Supportable operands are constants, loop invariants, and operands that are
- defined by the current iteration of the loop. Unsupportable operands are
- those that are defined by a previous iteration of the loop (as is the case
- in reduction/induction computations). */
+ For loops, supportable operands are constants, loop invariants, and operands
+ that are defined by the current iteration of the loop. Unsupportable
+ operands are those that are defined by a previous iteration of the loop (as
+ is the case in reduction/induction computations).
+ For basic blocks, supportable operands are constants and bb invariants.
+ For now, operands defined outside the basic block are not supported. */
bool
-vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
+vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
+ bb_vec_info bb_vinfo, gimple *def_stmt,
tree *def, enum vect_def_type *dt)
{
basic_block bb;
stmt_vec_info stmt_vinfo;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = NULL;
+
+ if (loop_vinfo)
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
*def_stmt = NULL;
*def = NULL_TREE;
*dt = vect_constant_def;
return true;
}
+
if (is_gimple_min_invariant (operand))
{
*def = operand;
fprintf (vect_dump, "non-associatable copy.");
operand = TREE_OPERAND (operand, 0);
}
+
if (TREE_CODE (operand) != SSA_NAME)
{
if (vect_print_dump_info (REPORT_DETAILS))
}
bb = gimple_bb (*def_stmt);
- if (!flow_bb_inside_loop_p (loop, bb))
+
+ if ((loop && !flow_bb_inside_loop_p (loop, bb))
+ || (!loop && bb != BB_VINFO_BB (bb_vinfo))
+ || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
*dt = vect_external_def;
else
{
#include "cfglayout.h"
#include "tree-vectorizer.h"
#include "tree-pass.h"
+#include "timevar.h"
/* vect_dump will be set to stderr or dump_file if exist. */
FILE *vect_dump;
/* vect_verbosity_level set to an invalid value
to mark that it's uninitialized. */
static enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
+static enum verbosity_levels user_vect_verbosity_level = MAX_VERBOSITY_LEVEL;
-/* Loop location. */
+/* Loop or bb location. */
LOC vect_location;
/* Bitmap of virtual variables to be renamed. */
vl = atoi (val);
if (vl < MAX_VERBOSITY_LEVEL)
- vect_verbosity_level = (enum verbosity_levels) vl;
+ user_vect_verbosity_level = (enum verbosity_levels) vl;
else
- vect_verbosity_level = (enum verbosity_levels) (MAX_VERBOSITY_LEVEL - 1);
+ user_vect_verbosity_level
+ = (enum verbosity_levels) (MAX_VERBOSITY_LEVEL - 1);
}
print to stderr, otherwise print to the dump file. */
static void
-vect_set_dump_settings (void)
+vect_set_dump_settings (bool slp)
{
vect_dump = dump_file;
/* Check if the verbosity level was defined by the user: */
- if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
+ if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL)
{
- /* If there is no dump file, print to stderr. */
- if (!dump_file)
- vect_dump = stderr;
- return;
+ vect_verbosity_level = user_vect_verbosity_level;
+ /* Ignore user defined verbosity if dump flags require higher level of
+ verbosity. */
+ if (dump_file)
+ {
+ if (((dump_flags & TDF_DETAILS)
+ && vect_verbosity_level >= REPORT_DETAILS)
+ || ((dump_flags & TDF_STATS)
+ && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS))
+ return;
+ }
+ else
+ {
+ /* If there is no dump file, print to stderr in case of loop
+ vectorization. */
+ if (!slp)
+ vect_dump = stderr;
+
+ return;
+ }
}
/* User didn't specify verbosity level: */
return 0;
/* Fix the verbosity level if not defined explicitly by the user. */
- vect_set_dump_settings ();
+ vect_set_dump_settings (false);
/* Allocate the bitmap that records which virtual variables
need to be renamed. */
}
+/* Entry point to basic block SLP phase. */
+
+static unsigned int
+execute_vect_slp (void)
+{
+ basic_block bb;
+
+ /* Fix the verbosity level if not defined explicitly by the user. */
+ vect_set_dump_settings (true);
+
+ init_stmt_vec_info_vec ();
+
+ FOR_EACH_BB (bb)
+ {
+ vect_location = find_bb_location (bb);
+
+ if (vect_slp_analyze_bb (bb))
+ {
+ vect_slp_transform_bb (bb);
+
+ if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "basic block vectorized using SLP\n");
+ }
+ }
+
+ free_stmt_vec_info_vec ();
+ return 0;
+}
+
+static bool
+gate_vect_slp (void)
+{
+ /* Apply SLP either if the vectorizer is on and the user didn't specify
+ whether to run SLP or not, or if the SLP flag was set by the user. */
+ return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0)
+ || flag_tree_slp_vectorize == 1);
+}
+
+struct gimple_opt_pass pass_slp_vectorize =
+{
+ {
+ GIMPLE_PASS,
+ "slp", /* name */
+ gate_vect_slp, /* gate */
+ execute_vect_slp, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_SLP_VECTORIZATION, /* tv_id */
+ PROP_ssa | PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_ggc_collect
+ | TODO_verify_ssa
+ | TODO_dump_func
+ | TODO_update_ssa
+ | TODO_verify_stmts /* todo_flags_finish */
+ }
+};
+
+
/* Increase alignment of global arrays to improve vectorization potential.
TODO:
- Consider also structs that have an array field.
&& (loop->inner == (gimple_bb (stmt))->loop_father));
}
+typedef struct _bb_vec_info {
+
+ basic_block bb;
+ /* All interleaving chains of stores in the basic block, represented by the
+ first stmt in the chain. */
+ VEC(gimple, heap) *strided_stores;
+
+ /* All SLP instances in the basic block. This is a subset of the set of
+ STRIDED_STORES of the basic block. */
+ VEC(slp_instance, heap) *slp_instances;
+
+ /* All data references in the basic block. */
+ VEC (data_reference_p, heap) *datarefs;
+
+ /* All data dependences in the basic block. */
+ VEC (ddr_p, heap) *ddrs;
+} *bb_vec_info;
+
+#define BB_VINFO_BB(B) (B)->bb
+#define BB_VINFO_STRIDED_STORES(B) (B)->strided_stores
+#define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
+#define BB_VINFO_DATAREFS(B) (B)->datarefs
+#define BB_VINFO_DDRS(B) (B)->ddrs
+
+static inline bb_vec_info
+vec_info_for_bb (basic_block bb)
+{
+ return (bb_vec_info) bb->aux;
+}
+
/*-----------------------------------------------------------------*/
/* Info on vectorized defs. */
/*-----------------------------------------------------------------*/
/* Whether the stmt is SLPed, loop-based vectorized, or both. */
enum slp_vect_type slp_type;
+
+ /* The bb_vec_info with respect to which STMT is vectorized. */
+ bb_vec_info bb_vinfo;
} *stmt_vec_info;
/* Access Functions. */
#define STMT_VINFO_TYPE(S) (S)->type
#define STMT_VINFO_STMT(S) (S)->stmt
#define STMT_VINFO_LOOP_VINFO(S) (S)->loop_vinfo
+#define STMT_VINFO_BB_VINFO(S) (S)->bb_vinfo
#define STMT_VINFO_RELEVANT(S) (S)->relevant
#define STMT_VINFO_LIVE_P(S) (S)->live
#define STMT_VINFO_VECTYPE(S) (S)->vectype
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
extern void vect_loop_versioning (loop_vec_info, bool, tree *, gimple_seq *);
extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
- tree, gimple_seq);
+ tree, gimple_seq);
extern void vect_do_peeling_for_alignment (loop_vec_info);
extern LOC find_loop_location (struct loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info);
/* In tree-vect-stmts.c. */
extern tree get_vectype_for_scalar_type (tree);
-extern bool vect_is_simple_use (tree, loop_vec_info, gimple *, tree *,
- enum vect_def_type *);
+extern bool vect_is_simple_use (tree, loop_vec_info, bb_vec_info, gimple *,
+ tree *, enum vect_def_type *);
extern bool supportable_widening_operation (enum tree_code, gimple, tree,
tree *, tree *, enum tree_code *,
enum tree_code *, int *,
extern bool supportable_narrowing_operation (enum tree_code, const_gimple,
tree, enum tree_code *, int *,
VEC (tree, heap) **);
-extern stmt_vec_info new_stmt_vec_info (gimple stmt, loop_vec_info);
+extern stmt_vec_info new_stmt_vec_info (gimple stmt, loop_vec_info,
+ bb_vec_info);
extern void free_stmt_vec_info (gimple stmt);
extern tree vectorizable_function (gimple, tree, tree);
extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
bool *, slp_tree, slp_instance);
extern void vect_remove_stores (gimple);
-extern bool vect_analyze_stmt (gimple, bool *);
+extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
(struct data_reference *);
extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
HOST_WIDE_INT *);
-extern bool vect_analyze_data_ref_dependences (loop_vec_info);
+extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info);
extern bool vect_enhance_data_refs_alignment (loop_vec_info);
-extern bool vect_analyze_data_refs_alignment (loop_vec_info);
-extern bool vect_analyze_data_ref_accesses (loop_vec_info);
+extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info);
+extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
+extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
-extern bool vect_analyze_data_refs (loop_vec_info);
+extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info);
extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *,
- gimple *, bool, bool *);
+ gimple *, bool, bool *);
extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
extern tree vect_create_destination_var (tree, tree);
extern bool vect_strided_store_supported (tree);
extern bool vect_transform_slp_perm_load (gimple, VEC (tree, heap) *,
gimple_stmt_iterator *, int,
slp_instance, bool);
-extern bool vect_schedule_slp (loop_vec_info);
+extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
-extern bool vect_analyze_slp (loop_vec_info);
+extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
extern void vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
extern void vect_get_slp_defs (slp_tree, VEC (tree,heap) **,
VEC (tree,heap) **);
+extern LOC find_bb_location (basic_block);
+extern bb_vec_info vect_slp_analyze_bb (basic_block);
+extern void vect_slp_transform_bb (basic_block);
/* In tree-vect-patterns.c. */
/* Pattern recognition functions.