/* If-conversion for vectorizer.
- Copyright (C) 2004-2016 Free Software Foundation, Inc.
+ Copyright (C) 2004-2018 Free Software Foundation, Inc.
Contributed by Devang Patel <dpatel@apple.com>
This file is part of GCC.
/* Mask should be integer mode of the same size as the load/store
mode. */
mode = TYPE_MODE (TREE_TYPE (lhs));
- if (int_mode_for_mode (mode) == BLKmode
- || VECTOR_MODE_P (mode))
+ if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
return false;
if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
|| TREE_CODE (ref) == REALPART_EXPR)
ref = TREE_OPERAND (ref, 0);
- DR_BASE_ADDRESS (dr) = ref;
- DR_OFFSET (dr) = NULL;
- DR_INIT (dr) = NULL;
- DR_STEP (dr) = NULL;
- DR_ALIGNED_TO (dr) = NULL;
+ memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
+ DR_BASE_ADDRESS (dr) = ref;
}
hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
}
return cond;
}
+/* Local valueization callback that follows all-use SSA edges. */
+
+static tree
+ifcvt_follow_ssa_use_edges (tree val)
+{
+ return val;
+}
+
/* Replace a scalar PHI node with a COND_EXPR using COND as condition.
This routine can handle PHI nodes with more than two arguments.
arg0, arg1);
new_stmt = gimple_build_assign (res, rhs);
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
- update_stmt (new_stmt);
+ gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
+ if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
+ {
+ new_stmt = gsi_stmt (new_gsi);
+ update_stmt (new_stmt);
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
{
gimple *stmt;
int index;
- if (is_true_predicate (cond) || is_false_predicate (cond))
+ if (is_true_predicate (cond))
continue;
swap = false;
vect_sizes.truncate (0);
vect_masks.truncate (0);
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
- continue;
- else if (gimple_plf (stmt, GF_PLF_2))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- tree ref, addr, ptr, mask;
- gimple *new_stmt;
- gimple_seq stmts = NULL;
- int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
- ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
- mark_addressable (ref);
- addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
- true, NULL_TREE, true,
- GSI_SAME_STMT);
- if (!vect_sizes.is_empty ()
- && (index = mask_exists (bitsize, vect_sizes)) != -1)
- /* Use created mask. */
- mask = vect_masks[index];
- else
- {
- if (COMPARISON_CLASS_P (cond))
- mask = gimple_build (&stmts, TREE_CODE (cond),
- boolean_type_node,
- TREE_OPERAND (cond, 0),
- TREE_OPERAND (cond, 1));
- else
- {
- gcc_assert (TREE_CODE (cond) == SSA_NAME);
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+ {
+ if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
+ ;
+ else if (is_false_predicate (cond)
+ && gimple_vdef (stmt))
+ {
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
+ }
+ else if (gimple_plf (stmt, GF_PLF_2))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ tree ref, addr, ptr, mask;
+ gcall *new_stmt;
+ gimple_seq stmts = NULL;
+ machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+ /* We checked before setting GF_PLF_2 that an equivalent
+ integer mode exists. */
+ int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+ ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
+ mark_addressable (ref);
+ addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ if (!vect_sizes.is_empty ()
+ && (index = mask_exists (bitsize, vect_sizes)) != -1)
+ /* Use created mask. */
+ mask = vect_masks[index];
+ else
+ {
+ if (COMPARISON_CLASS_P (cond))
+ mask = gimple_build (&stmts, TREE_CODE (cond),
+ boolean_type_node,
+ TREE_OPERAND (cond, 0),
+ TREE_OPERAND (cond, 1));
+ else
mask = cond;
- }
-
- if (swap)
- {
- tree true_val
- = constant_boolean_node (true, TREE_TYPE (mask));
- mask = gimple_build (&stmts, BIT_XOR_EXPR,
- TREE_TYPE (mask), mask, true_val);
- }
- gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
- mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
- /* Save mask and its size for further use. */
- vect_sizes.safe_push (bitsize);
- vect_masks.safe_push (mask);
- }
- ptr = build_int_cst (reference_alias_ptr_type (ref),
- get_object_alignment (ref));
- /* Copy points-to info if possible. */
- if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
- copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
- ref);
- if (TREE_CODE (lhs) == SSA_NAME)
- {
- new_stmt
- = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
- ptr, mask);
- gimple_call_set_lhs (new_stmt, lhs);
- gimple_set_vuse (new_stmt, gimple_vuse (stmt));
- }
- else
- {
- new_stmt
- = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
+ if (swap)
+ {
+ tree true_val
+ = constant_boolean_node (true, TREE_TYPE (mask));
+ mask = gimple_build (&stmts, BIT_XOR_EXPR,
+ TREE_TYPE (mask), mask, true_val);
+ }
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+
+ mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
+ /* Save mask and its size for further use. */
+ vect_sizes.safe_push (bitsize);
+ vect_masks.safe_push (mask);
+ }
+ ptr = build_int_cst (reference_alias_ptr_type (ref),
+ get_object_alignment (ref));
+ /* Copy points-to info if possible. */
+ if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
+ copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
+ ref);
+ if (TREE_CODE (lhs) == SSA_NAME)
+ {
+ new_stmt
+ = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
+ ptr, mask);
+ gimple_call_set_lhs (new_stmt, lhs);
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ }
+ else
+ {
+ new_stmt
+ = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
mask, rhs);
- gimple_set_vuse (new_stmt, gimple_vuse (stmt));
- gimple_set_vdef (new_stmt, gimple_vdef (stmt));
- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
- }
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+ }
+ gimple_call_set_nothrow (new_stmt, true);
- gsi_replace (&gsi, new_stmt, true);
- }
- else if (gimple_vdef (stmt))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- tree type = TREE_TYPE (lhs);
-
- lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
- rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
- if (swap)
- std::swap (lhs, rhs);
- cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
- is_gimple_condexpr, NULL_TREE,
- true, GSI_SAME_STMT);
- rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
- gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
- update_stmt (stmt);
- }
+ gsi_replace (&gsi, new_stmt, true);
+ }
+ else if (gimple_vdef (stmt))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ tree type = TREE_TYPE (lhs);
+
+ lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
+ rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
+ if (swap)
+ std::swap (lhs, rhs);
+ cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
+ is_gimple_condexpr, NULL_TREE,
+ true, GSI_SAME_STMT);
+ rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
+ gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
+ update_stmt (stmt);
+ }
+ gsi_next (&gsi);
+ }
}
}
if (exit_bb != loop->header)
{
/* Connect this node to loop header. */
- make_edge (loop->header, exit_bb, EDGE_FALLTHRU);
+ make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
}
will be if-converted, the new copy of the loop will not,
and the LOOP_VECTORIZED internal call will be guarding which
loop to execute. The vectorizer pass will fold this
- internal call into either true or false. */
+ internal call into either true or false.
-static bool
+ Note that this function intentionally invalidates profile. Both edges
+ out of LOOP_VECTORIZED must have 100% probability so the profile remains
+ consistent after the condition is folded in the vectorizer. */
+
+static struct loop *
version_loop_for_if_conversion (struct loop *loop)
{
basic_block cond_bb;
saved_preds[i] = ifc_bbs[i]->aux;
initialize_original_copy_tables ();
+ /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
+ is re-merged in the vectorizer. */
new_loop = loop_version (loop, cond, &cond_bb,
- REG_BR_PROB_BASE, REG_BR_PROB_BASE,
- REG_BR_PROB_BASE, true);
+ profile_probability::always (),
+ profile_probability::always (),
+ profile_probability::always (),
+ profile_probability::always (), true);
free_original_copy_tables ();
for (unsigned i = 0; i < save_length; i++)
ifc_bbs[i]->aux = saved_preds[i];
if (new_loop == NULL)
- return false;
+ return NULL;
new_loop->dont_vectorize = true;
new_loop->force_vectorize = false;
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
update_ssa (TODO_update_ssa);
- return true;
+ return new_loop;
}
/* Return true when LOOP satisfies the follow conditions that will
- The loop has a single exit.
- The loop header has a single successor, which is the inner
loop header.
+ - Each of the inner and outer loop latches have a single
+ predecessor.
- The loop exit block has a single predecessor, which is the
inner loop's exit block. */
versionable_outer_loop_p (struct loop *loop)
{
if (!loop_outer (loop)
+ || loop->dont_vectorize
|| !loop->inner
|| loop->inner->next
|| !single_exit (loop)
|| !single_succ_p (loop->header)
- || single_succ (loop->header) != loop->inner->header)
+ || single_succ (loop->header) != loop->inner->header
+ || !single_pred_p (loop->latch)
+ || !single_pred_p (loop->inner->latch))
return false;
-
+
basic_block outer_exit = single_pred (loop->latch);
basic_block inner_exit = single_pred (loop->inner->latch);
{
unsigned int todo = 0;
bool aggressive_if_conv;
+ struct loop *rloop;
+ again:
+ rloop = NULL;
ifc_bbs = NULL;
any_pred_load_store = false;
any_complicated_phi = false;
|| loop->dont_vectorize))
goto cleanup;
- /* Either version this loop, or if the pattern is right for outer-loop
+ /* Since we have no cost model, always version loops unless the user
+ specified -ftree-loop-if-convert or unless versioning is required.
+ Either version this loop, or if the pattern is right for outer-loop
vectorization, version the outer loop. In the latter case we will
still if-convert the original inner loop. */
- if ((any_pred_load_store || any_complicated_phi)
- && !version_loop_for_if_conversion
- (versionable_outer_loop_p (loop_outer (loop))
- ? loop_outer (loop) : loop))
- goto cleanup;
+ if (any_pred_load_store
+ || any_complicated_phi
+ || flag_tree_loop_if_convert != 1)
+ {
+ struct loop *vloop
+ = (versionable_outer_loop_p (loop_outer (loop))
+ ? loop_outer (loop) : loop);
+ struct loop *nloop = version_loop_for_if_conversion (vloop);
+ if (nloop == NULL)
+ goto cleanup;
+ if (vloop != loop)
+ {
+ /* If versionable_outer_loop_p decided to version the
+ outer loop, version also the inner loop of the non-vectorized
+ loop copy. So we transform:
+ loop1
+ loop2
+ into:
+ if (LOOP_VECTORIZED (1, 3))
+ {
+ loop1
+ loop2
+ }
+ else
+ loop3 (copy of loop1)
+ if (LOOP_VECTORIZED (4, 5))
+ loop4 (copy of loop2)
+ else
+ loop5 (copy of loop4) */
+ gcc_assert (nloop->inner && nloop->inner->next == NULL);
+ rloop = nloop->inner;
+ }
+ }
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
free (ifc_bbs);
ifc_bbs = NULL;
}
+ if (rloop != NULL)
+ {
+ loop = rloop;
+ goto again;
+ }
return todo;
}
{
return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
&& flag_tree_loop_if_convert != 0)
- || flag_tree_loop_if_convert == 1
- || flag_tree_loop_if_convert_stores == 1);
+ || flag_tree_loop_if_convert == 1);
}
unsigned int
FOR_EACH_LOOP (loop, 0)
if (flag_tree_loop_if_convert == 1
- || flag_tree_loop_if_convert_stores == 1
|| ((flag_tree_loop_vectorize || loop->force_vectorize)
&& !loop->dont_vectorize))
todo |= tree_if_conversion (loop);
+ if (todo)
+ {
+ free_numbers_of_iterations_estimates (fun);
+ scev_reset ();
+ }
+
if (flag_checking)
{
basic_block bb;