Refactor number_of_iterations_popcount

author Andrew Carlotti <andrew.carlotti@arm.com>

Tue, 4 Oct 2022 17:21:57 +0000 (18:21 +0100)

committer Andrew Carlotti <andrew.carlotti@arm.com>

Mon, 21 Nov 2022 16:00:51 +0000 (16:00 +0000)
author Andrew Carlotti <andrew.carlotti@arm.com>
Tue, 4 Oct 2022 17:21:57 +0000 (18:21 +0100)
committer Andrew Carlotti <andrew.carlotti@arm.com>
Mon, 21 Nov 2022 16:00:51 +0000 (16:00 +0000)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c

new file mode 100644 (file)

index 0000000..ca7204c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */
+
+#define PREC (__CHAR_BIT__)
+
+int count1 (unsigned char b) {
+    int c = 0;
+
+    while (b) {
+       b &= b - 1;
+       c++;
+    }
+    if (c <= PREC)
+      return 0;
+    else
+      return 34567;
+}
+
+int count2 (unsigned char b) {
+    int c = 0;
+
+    while (b) {
+       b &= b - 1;
+       c++;
+    }
+    if (c <= PREC - 1)
+      return 0;
+    else
+      return 76543;
+}
+
+/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc

index 0af34e4..fece876 100644 (file)
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2026,6 +2026,48 @@ number_of_iterations_cond (class loop *loop,
    return ret;
  }
  
+/* Return an expression that computes the popcount of src.  */
+
+static tree
+build_popcount_expr (tree src)
+{
+  tree fn;
+  int prec = TYPE_PRECISION (TREE_TYPE (src));
+  int i_prec = TYPE_PRECISION (integer_type_node);
+  int li_prec = TYPE_PRECISION (long_integer_type_node);
+  int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
+  if (prec <= i_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
+  else if (prec == li_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
+  else if (prec == lli_prec || prec == 2 * lli_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+  else
+    return NULL_TREE;
+
+  tree utype = unsigned_type_for (TREE_TYPE (src));
+  src = fold_convert (utype, src);
+  if (prec < i_prec)
+    src = fold_convert (unsigned_type_node, src);
+  tree call;
+  if (prec == 2 * lli_prec)
+    {
+      tree src1 = fold_convert (long_long_unsigned_type_node,
+                               fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+                                            unshare_expr (src),
+                                            build_int_cst (integer_type_node,
+                                                           lli_prec)));
+      tree src2 = fold_convert (long_long_unsigned_type_node, src);
+      tree call1 = build_call_expr (fn, 1, src1);
+      tree call2 = build_call_expr (fn, 1, src2);
+      call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2);
+    }
+  else
+    call = build_call_expr (fn, 1, src);
+
+  return call;
+}
+
  /* Utility function to check if OP is defined by a stmt
     that is a val - 1.  */
  
@@ -2041,45 +2083,18 @@ ssa_defined_by_minus_one_stmt_p (tree op, tree val)
           && integer_minus_onep (gimple_assign_rhs2 (stmt)));
  }
  
-/* See if LOOP is a popcout implementation, determine NITER for the loop
+/* See comment below for number_of_iterations_bitcount.
+   For popcount, we have:
  
-   We match:
-   <bb 2>
-   goto <bb 4>
+   modify:
+   _1 = iv_1 + -1
+   iv_2 = iv_1 & _1
  
-   <bb 3>
-   _1 = b_11 + -1
-   b_6 = _1 & b_11
-
-   <bb 4>
-   b_11 = PHI <b_5(D)(2), b_6(3)>
+   test:
+   if (iv != 0)
  
-   exit block
-   if (b_11 != 0)
-       goto <bb 3>
-   else
-       goto <bb 5>
-
-   OR we match copy-header version:
-   if (b_5 != 0)
-       goto <bb 3>
-   else
-       goto <bb 4>
-
-   <bb 3>
-   b_11 = PHI <b_5(2), b_6(3)>
-   _1 = b_11 + -1
-   b_6 = _1 & b_11
-
-   exit block
-   if (b_6 != 0)
-       goto <bb 3>
-   else
-       goto <bb 4>
-
-   If popcount pattern, update NITER accordingly.
-   i.e., set NITER to  __builtin_popcount (b)
-   return true if we did, false otherwise.
+   modification count:
+   popcount (src)
  
   */
  
@@ -2088,138 +2103,150 @@ number_of_iterations_popcount (loop_p loop, edge exit,
                                enum tree_code code,
                                class tree_niter_desc *niter)
  {
-  bool adjust = true;
-  tree iter;
+  bool modify_before_test = true;
    HOST_WIDE_INT max;
-  adjust = true;
-  tree fn = NULL_TREE;
-
-  /* Check loop terminating branch is like
-     if (b != 0).  */
-  gimple *stmt = last_stmt (exit->src);
-  if (!stmt
-      || gimple_code (stmt) != GIMPLE_COND
+
+  /* Check that condition for staying inside the loop is like
+     if (iv != 0).  */
+  gimple *cond_stmt = last_stmt (exit->src);
+  if (!cond_stmt
+      || gimple_code (cond_stmt) != GIMPLE_COND
        || code != NE_EXPR
-      || !integer_zerop (gimple_cond_rhs (stmt))
-      || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME)
+      || !integer_zerop (gimple_cond_rhs (cond_stmt))
+      || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
      return false;
  
-  gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
+  tree iv_2 = gimple_cond_lhs (cond_stmt);
+  gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
  
-  /* Depending on copy-header is performed, feeding PHI stmts might be in
-     the loop header or loop latch, handle this.  */
-  if (gimple_code (and_stmt) == GIMPLE_PHI
-      && gimple_bb (and_stmt) == loop->header
-      && gimple_phi_num_args (and_stmt) == 2
-      && (TREE_CODE (gimple_phi_arg_def (and_stmt,
+  /* If the test comes before the iv modification, then these will actually be
+     iv_1 and a phi node.  */
+  if (gimple_code (iv_2_stmt) == GIMPLE_PHI
+      && gimple_bb (iv_2_stmt) == loop->header
+      && gimple_phi_num_args (iv_2_stmt) == 2
+      && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
                                          loop_latch_edge (loop)->dest_idx))
           == SSA_NAME))
      {
-      /* SSA used in exit condition is defined by PHI stmt
-       b_11 = PHI <b_5(D)(2), b_6(3)>
-       from the PHI stmt, get the and_stmt
-       b_6 = _1 & b_11.  */
-      tree t = gimple_phi_arg_def (and_stmt, loop_latch_edge (loop)->dest_idx);
-      and_stmt = SSA_NAME_DEF_STMT (t);
-      adjust = false;
+      /* iv_2 is actually one of the inputs to the phi.  */
+      iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
+      iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+      modify_before_test = false;
      }
  
-  /* Make sure it is indeed an and stmt (b_6 = _1 & b_11).  */
-  if (!is_gimple_assign (and_stmt)
-      || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR)
+  /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1).  */
+  if (!is_gimple_assign (iv_2_stmt)
+      || gimple_assign_rhs_code (iv_2_stmt) != BIT_AND_EXPR)
      return false;
  
-  tree b_11 = gimple_assign_rhs1 (and_stmt);
-  tree _1 = gimple_assign_rhs2 (and_stmt);
+  tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
+  tree _1 = gimple_assign_rhs2 (iv_2_stmt);
  
-  /* Check that _1 is defined by _b11 + -1 (_1 = b_11 + -1).
-     Also make sure that b_11 is the same in and_stmt and _1 defining stmt.
+  /* Check that _1 is defined by (_1 = iv_1 + -1).
+     Also make sure that _1 is the same in and_stmt and _1 defining stmt.
       Also canonicalize if _1 and _b11 are revrsed.  */
-  if (ssa_defined_by_minus_one_stmt_p (b_11, _1))
-    std::swap (b_11, _1);
-  else if (ssa_defined_by_minus_one_stmt_p (_1, b_11))
+  if (ssa_defined_by_minus_one_stmt_p (iv_1, _1))
+    std::swap (iv_1, _1);
+  else if (ssa_defined_by_minus_one_stmt_p (_1, iv_1))
      ;
    else
      return false;
-  /* Check the recurrence:
-   ... = PHI <b_5(2), b_6(3)>.  */
-  gimple *phi = SSA_NAME_DEF_STMT (b_11);
+
+  /* Check the recurrence.  */
+  gimple *phi = SSA_NAME_DEF_STMT (iv_1);
    if (gimple_code (phi) != GIMPLE_PHI
        || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
-      || (gimple_assign_lhs (and_stmt)
-         != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
+      || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
      return false;
  
-  /* We found a match. Get the corresponding popcount builtin.  */
+  /* We found a match.  */
    tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
-  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
-  else if (TYPE_PRECISION (TREE_TYPE (src))
-          == TYPE_PRECISION (long_integer_type_node))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
-  else if (TYPE_PRECISION (TREE_TYPE (src))
-          == TYPE_PRECISION (long_long_integer_type_node)
-          || (TYPE_PRECISION (TREE_TYPE (src))
-              == 2 * TYPE_PRECISION (long_long_integer_type_node)))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+  int src_precision = TYPE_PRECISION (TREE_TYPE (src));
  
-  if (!fn)
+  /* Get the corresponding popcount builtin.  */
+  tree expr = build_popcount_expr (src);
+
+  if (!expr)
      return false;
  
-  /* Update NITER params accordingly  */
-  tree utype = unsigned_type_for (TREE_TYPE (src));
-  src = fold_convert (utype, src);
-  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
-    src = fold_convert (unsigned_type_node, src);
-  tree call;
-  if (TYPE_PRECISION (TREE_TYPE (src))
-      == 2 * TYPE_PRECISION (long_long_integer_type_node))
+  max = src_precision;
+
+  tree may_be_zero = boolean_false_node;
+
+  if (modify_before_test)
      {
-      int prec = TYPE_PRECISION (long_long_integer_type_node);
-      tree src1 = fold_convert (long_long_unsigned_type_node,
-                               fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
-                                            unshare_expr (src),
-                                            build_int_cst (integer_type_node,
-                                                           prec)));
-      tree src2 = fold_convert (long_long_unsigned_type_node, src);
-      call = build_call_expr (fn, 1, src1);
-      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
-                         build_call_expr (fn, 1, src2));
-      call = fold_convert (utype, call);
+      expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
+                         integer_one_node);
+      max = max - 1;
+      may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
+                                     build_zero_cst (TREE_TYPE (src)));
      }
-  else
-    call = fold_convert (utype, build_call_expr (fn, 1, src));
-  if (adjust)
-    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
-  else
-    iter = call;
  
-  if (TREE_CODE (call) == INTEGER_CST)
-    max = tree_to_uhwi (call);
-  else
-    max = TYPE_PRECISION (TREE_TYPE (src));
-  if (adjust)
-    max = max - 1;
+  expr = fold_convert (unsigned_type_node, expr);
  
-  niter->niter = iter;
    niter->assumptions = boolean_true_node;
+  niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
+  niter->niter = simplify_using_initial_conditions(loop, expr);
  
-  if (adjust)
-    {
-      tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
-                                     build_zero_cst (TREE_TYPE (src)));
-      niter->may_be_zero
-       = simplify_using_initial_conditions (loop, may_be_zero);
-    }
+  if (TREE_CODE (niter->niter) == INTEGER_CST)
+    niter->max = tree_to_uhwi (niter->niter);
    else
-    niter->may_be_zero = boolean_false_node;
+    niter->max = max;
  
-  niter->max = max;
    niter->bound = NULL_TREE;
    niter->cmp = ERROR_MARK;
    return true;
  }
  
+/* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
+   1. A modification to the induction variabler;.
+   2. A test to determine whether or not to exit the loop.
+
+   These can come in either order - i.e.:
+
+   <bb 3>
+   iv_1 = PHI <src(2), iv_2(4)>
+   if (test (iv_1))
+     goto <bb 4>
+   else
+     goto <bb 5>
+
+   <bb 4>
+   iv_2 = modify (iv_1)
+   goto <bb 3>
+
+   OR
+
+   <bb 3>
+   iv_1 = PHI <src(2), iv_2(4)>
+   iv_2 = modify (iv_1)
+
+   <bb 4>
+   if (test (iv_2))
+     goto <bb 3>
+   else
+     goto <bb 5>
+
+   The second form can be generated by copying the loop header out of the loop.
+
+   In the first case, the number of latch executions will be equal to the
+   number of induction variable modifications required before the test fails.
+
+   In the second case (modify_before_test), if we assume that the number of
+   modifications required before the test fails is nonzero, then the number of
+   latch executions will be one less than this number.
+
+   If we recognise the pattern, then we update niter accordingly, and return
+   true.  */
+
+static bool
+number_of_iterations_bitcount (loop_p loop, edge exit,
+                              enum tree_code code,
+                              class tree_niter_desc *niter)
+{
+  return number_of_iterations_popcount (loop, exit, code, niter);
+}
+
  /* Substitute NEW_TREE for OLD in EXPR and fold the result.
     If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead
     all SSA names are replaced with the result of calling the VALUEIZE
@@ -2758,7 +2785,7 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
    tree iv0_niters = NULL_TREE;
    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
                               op0, &iv0, safe ? &iv0_niters : NULL, false))
-    return number_of_iterations_popcount (loop, exit, code, niter);
+    return number_of_iterations_bitcount (loop, exit, code, niter);
    tree iv1_niters = NULL_TREE;
    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
                               op1, &iv1, safe ? &iv1_niters : NULL, false))
author	Andrew Carlotti <andrew.carlotti@arm.com>
	Tue, 4 Oct 2022 17:21:57 +0000 (18:21 +0100)
committer	Andrew Carlotti <andrew.carlotti@arm.com>
	Mon, 21 Nov 2022 16:00:51 +0000 (16:00 +0000)
gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c	[new file with mode: 0644]	patch \| blob
gcc/tree-ssa-loop-niter.cc		patch \| blob \| history