return ret;
}
+/* Return an expression that computes the popcount of src. */
+
+static tree
+build_popcount_expr (tree src)
+{
+ tree fn;
+ int prec = TYPE_PRECISION (TREE_TYPE (src));
+ int i_prec = TYPE_PRECISION (integer_type_node);
+ int li_prec = TYPE_PRECISION (long_integer_type_node);
+ int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
+ if (prec <= i_prec)
+ fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
+ else if (prec == li_prec)
+ fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
+ else if (prec == lli_prec || prec == 2 * lli_prec)
+ fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+ else
+ return NULL_TREE;
+
+ tree utype = unsigned_type_for (TREE_TYPE (src));
+ src = fold_convert (utype, src);
+ if (prec < i_prec)
+ src = fold_convert (unsigned_type_node, src);
+ tree call;
+ if (prec == 2 * lli_prec)
+ {
+ tree src1 = fold_convert (long_long_unsigned_type_node,
+ fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+ unshare_expr (src),
+ build_int_cst (integer_type_node,
+ lli_prec)));
+ tree src2 = fold_convert (long_long_unsigned_type_node, src);
+ tree call1 = build_call_expr (fn, 1, src1);
+ tree call2 = build_call_expr (fn, 1, src2);
+ call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2);
+ }
+ else
+ call = build_call_expr (fn, 1, src);
+
+ return call;
+}
+
/* Utility function to check if OP is defined by a stmt
that is a val - 1. */
&& integer_minus_onep (gimple_assign_rhs2 (stmt)));
}
-/* See if LOOP is a popcout implementation, determine NITER for the loop
+/* See comment below for number_of_iterations_bitcount.
+ For popcount, we have:
- We match:
- <bb 2>
- goto <bb 4>
+ modify:
+ _1 = iv_1 + -1
+ iv_2 = iv_1 & _1
- <bb 3>
- _1 = b_11 + -1
- b_6 = _1 & b_11
-
- <bb 4>
- b_11 = PHI <b_5(D)(2), b_6(3)>
+ test:
+ if (iv != 0)
- exit block
- if (b_11 != 0)
- goto <bb 3>
- else
- goto <bb 5>
-
- OR we match copy-header version:
- if (b_5 != 0)
- goto <bb 3>
- else
- goto <bb 4>
-
- <bb 3>
- b_11 = PHI <b_5(2), b_6(3)>
- _1 = b_11 + -1
- b_6 = _1 & b_11
-
- exit block
- if (b_6 != 0)
- goto <bb 3>
- else
- goto <bb 4>
-
- If popcount pattern, update NITER accordingly.
- i.e., set NITER to __builtin_popcount (b)
- return true if we did, false otherwise.
+ modification count:
+ popcount (src)
*/
enum tree_code code,
class tree_niter_desc *niter)
{
- bool adjust = true;
- tree iter;
+ bool modify_before_test = true;
HOST_WIDE_INT max;
- adjust = true;
- tree fn = NULL_TREE;
-
- /* Check loop terminating branch is like
- if (b != 0). */
- gimple *stmt = last_stmt (exit->src);
- if (!stmt
- || gimple_code (stmt) != GIMPLE_COND
+
+ /* Check that condition for staying inside the loop is like
+ if (iv != 0). */
+ gimple *cond_stmt = last_stmt (exit->src);
+ if (!cond_stmt
+ || gimple_code (cond_stmt) != GIMPLE_COND
|| code != NE_EXPR
- || !integer_zerop (gimple_cond_rhs (stmt))
- || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME)
+ || !integer_zerop (gimple_cond_rhs (cond_stmt))
+ || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
return false;
- gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
+ tree iv_2 = gimple_cond_lhs (cond_stmt);
+ gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
- /* Depending on copy-header is performed, feeding PHI stmts might be in
- the loop header or loop latch, handle this. */
- if (gimple_code (and_stmt) == GIMPLE_PHI
- && gimple_bb (and_stmt) == loop->header
- && gimple_phi_num_args (and_stmt) == 2
- && (TREE_CODE (gimple_phi_arg_def (and_stmt,
+ /* If the test comes before the iv modification, then these will actually be
+ iv_1 and a phi node. */
+ if (gimple_code (iv_2_stmt) == GIMPLE_PHI
+ && gimple_bb (iv_2_stmt) == loop->header
+ && gimple_phi_num_args (iv_2_stmt) == 2
+ && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
loop_latch_edge (loop)->dest_idx))
== SSA_NAME))
{
- /* SSA used in exit condition is defined by PHI stmt
- b_11 = PHI <b_5(D)(2), b_6(3)>
- from the PHI stmt, get the and_stmt
- b_6 = _1 & b_11. */
- tree t = gimple_phi_arg_def (and_stmt, loop_latch_edge (loop)->dest_idx);
- and_stmt = SSA_NAME_DEF_STMT (t);
- adjust = false;
+ /* iv_2 is actually one of the inputs to the phi. */
+ iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
+ iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+ modify_before_test = false;
}
- /* Make sure it is indeed an and stmt (b_6 = _1 & b_11). */
- if (!is_gimple_assign (and_stmt)
- || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR)
+ /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1). */
+ if (!is_gimple_assign (iv_2_stmt)
+ || gimple_assign_rhs_code (iv_2_stmt) != BIT_AND_EXPR)
return false;
- tree b_11 = gimple_assign_rhs1 (and_stmt);
- tree _1 = gimple_assign_rhs2 (and_stmt);
+ tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
+ tree _1 = gimple_assign_rhs2 (iv_2_stmt);
- /* Check that _1 is defined by _b11 + -1 (_1 = b_11 + -1).
- Also make sure that b_11 is the same in and_stmt and _1 defining stmt.
+ /* Check that _1 is defined by (_1 = iv_1 + -1).
+ Also make sure that _1 is the same in and_stmt and _1 defining stmt.
Also canonicalize if _1 and _b11 are revrsed. */
- if (ssa_defined_by_minus_one_stmt_p (b_11, _1))
- std::swap (b_11, _1);
- else if (ssa_defined_by_minus_one_stmt_p (_1, b_11))
+ if (ssa_defined_by_minus_one_stmt_p (iv_1, _1))
+ std::swap (iv_1, _1);
+ else if (ssa_defined_by_minus_one_stmt_p (_1, iv_1))
;
else
return false;
- /* Check the recurrence:
- ... = PHI <b_5(2), b_6(3)>. */
- gimple *phi = SSA_NAME_DEF_STMT (b_11);
+
+ /* Check the recurrence. */
+ gimple *phi = SSA_NAME_DEF_STMT (iv_1);
if (gimple_code (phi) != GIMPLE_PHI
|| (gimple_bb (phi) != loop_latch_edge (loop)->dest)
- || (gimple_assign_lhs (and_stmt)
- != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
+ || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
return false;
- /* We found a match. Get the corresponding popcount builtin. */
+ /* We found a match. */
tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
- if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
- fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
- else if (TYPE_PRECISION (TREE_TYPE (src))
- == TYPE_PRECISION (long_integer_type_node))
- fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
- else if (TYPE_PRECISION (TREE_TYPE (src))
- == TYPE_PRECISION (long_long_integer_type_node)
- || (TYPE_PRECISION (TREE_TYPE (src))
- == 2 * TYPE_PRECISION (long_long_integer_type_node)))
- fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+ int src_precision = TYPE_PRECISION (TREE_TYPE (src));
- if (!fn)
+ /* Get the corresponding popcount builtin. */
+ tree expr = build_popcount_expr (src);
+
+ if (!expr)
return false;
- /* Update NITER params accordingly */
- tree utype = unsigned_type_for (TREE_TYPE (src));
- src = fold_convert (utype, src);
- if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
- src = fold_convert (unsigned_type_node, src);
- tree call;
- if (TYPE_PRECISION (TREE_TYPE (src))
- == 2 * TYPE_PRECISION (long_long_integer_type_node))
+ max = src_precision;
+
+ tree may_be_zero = boolean_false_node;
+
+ if (modify_before_test)
{
- int prec = TYPE_PRECISION (long_long_integer_type_node);
- tree src1 = fold_convert (long_long_unsigned_type_node,
- fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
- unshare_expr (src),
- build_int_cst (integer_type_node,
- prec)));
- tree src2 = fold_convert (long_long_unsigned_type_node, src);
- call = build_call_expr (fn, 1, src1);
- call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
- build_call_expr (fn, 1, src2));
- call = fold_convert (utype, call);
+ expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
+ integer_one_node);
+ max = max - 1;
+ may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
+ build_zero_cst (TREE_TYPE (src)));
}
- else
- call = fold_convert (utype, build_call_expr (fn, 1, src));
- if (adjust)
- iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
- else
- iter = call;
- if (TREE_CODE (call) == INTEGER_CST)
- max = tree_to_uhwi (call);
- else
- max = TYPE_PRECISION (TREE_TYPE (src));
- if (adjust)
- max = max - 1;
+ expr = fold_convert (unsigned_type_node, expr);
- niter->niter = iter;
niter->assumptions = boolean_true_node;
+ niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
+ niter->niter = simplify_using_initial_conditions(loop, expr);
- if (adjust)
- {
- tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
- build_zero_cst (TREE_TYPE (src)));
- niter->may_be_zero
- = simplify_using_initial_conditions (loop, may_be_zero);
- }
+ if (TREE_CODE (niter->niter) == INTEGER_CST)
+ niter->max = tree_to_uhwi (niter->niter);
else
- niter->may_be_zero = boolean_false_node;
+ niter->max = max;
- niter->max = max;
niter->bound = NULL_TREE;
niter->cmp = ERROR_MARK;
return true;
}
+/* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
+ 1. A modification to the induction variabler;.
+ 2. A test to determine whether or not to exit the loop.
+
+ These can come in either order - i.e.:
+
+ <bb 3>
+ iv_1 = PHI <src(2), iv_2(4)>
+ if (test (iv_1))
+ goto <bb 4>
+ else
+ goto <bb 5>
+
+ <bb 4>
+ iv_2 = modify (iv_1)
+ goto <bb 3>
+
+ OR
+
+ <bb 3>
+ iv_1 = PHI <src(2), iv_2(4)>
+ iv_2 = modify (iv_1)
+
+ <bb 4>
+ if (test (iv_2))
+ goto <bb 3>
+ else
+ goto <bb 5>
+
+ The second form can be generated by copying the loop header out of the loop.
+
+ In the first case, the number of latch executions will be equal to the
+ number of induction variable modifications required before the test fails.
+
+ In the second case (modify_before_test), if we assume that the number of
+ modifications required before the test fails is nonzero, then the number of
+ latch executions will be one less than this number.
+
+ If we recognise the pattern, then we update niter accordingly, and return
+ true. */
+
+static bool
+number_of_iterations_bitcount (loop_p loop, edge exit,
+ enum tree_code code,
+ class tree_niter_desc *niter)
+{
+ return number_of_iterations_popcount (loop, exit, code, niter);
+}
+
/* Substitute NEW_TREE for OLD in EXPR and fold the result.
If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead
all SSA names are replaced with the result of calling the VALUEIZE
tree iv0_niters = NULL_TREE;
if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
op0, &iv0, safe ? &iv0_niters : NULL, false))
- return number_of_iterations_popcount (loop, exit, code, niter);
+ return number_of_iterations_bitcount (loop, exit, code, niter);
tree iv1_niters = NULL_TREE;
if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
op1, &iv1, safe ? &iv1_niters : NULL, false))