re PR tree-optimization/66718 (Non-invariant ADDR_EXPR not vectorized)
authorJakub Jelinek <jakub@redhat.com>
Thu, 9 Jul 2015 21:11:28 +0000 (23:11 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Thu, 9 Jul 2015 21:11:28 +0000 (23:11 +0200)
PR tree-optimization/66718
* tree-vect-stmts.c (struct simd_call_arg_info): Add simd_lane_linear
field.
(vect_simd_lane_linear): New function.
(vectorizable_simd_clone_call): Support using linear arguments for
addresses of arrays elements indexed by GOMP_SIMD_LANE result.

From-SVN: r225637

gcc/ChangeLog
gcc/tree-vect-stmts.c

index f865711..eaf7f7d 100644 (file)
@@ -1,3 +1,12 @@
+2015-07-09  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/66718
+       * tree-vect-stmts.c (struct simd_call_arg_info): Add simd_lane_linear
+       field.
+       (vect_simd_lane_linear): New function.
+       (vectorizable_simd_clone_call): Support using linear arguments for
+       addresses of arrays elements indexed by GOMP_SIMD_LANE result.
+
 2015-07-09  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR target/66821
index 8504b4d..f06e57c 100644 (file)
@@ -2629,8 +2629,79 @@ struct simd_call_arg_info
   enum vect_def_type dt;
   HOST_WIDE_INT linear_step;
   unsigned int align;
+  bool simd_lane_linear;
 };
 
+/* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
+   is linear within simd lane (but not within whole loop), note it in
+   *ARGINFO.  */
+
+static void
+vect_simd_lane_linear (tree op, struct loop *loop,
+                      struct simd_call_arg_info *arginfo)
+{
+  gimple def_stmt = SSA_NAME_DEF_STMT (op);
+
+  if (!is_gimple_assign (def_stmt)
+      || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
+      || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
+    return;
+
+  tree base = gimple_assign_rhs1 (def_stmt);
+  HOST_WIDE_INT linear_step = 0;
+  tree v = gimple_assign_rhs2 (def_stmt);
+  while (TREE_CODE (v) == SSA_NAME)
+    {
+      tree t;
+      def_stmt = SSA_NAME_DEF_STMT (v);
+      if (is_gimple_assign (def_stmt))
+       switch (gimple_assign_rhs_code (def_stmt))
+         {
+         case PLUS_EXPR:
+           t = gimple_assign_rhs2 (def_stmt);
+           if (linear_step || TREE_CODE (t) != INTEGER_CST)
+             return;
+           base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
+           v = gimple_assign_rhs1 (def_stmt);
+           continue;
+         case MULT_EXPR:
+           t = gimple_assign_rhs2 (def_stmt);
+           if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
+             return;
+           linear_step = tree_to_shwi (t);
+           v = gimple_assign_rhs1 (def_stmt);
+           continue;
+         CASE_CONVERT:
+           t = gimple_assign_rhs1 (def_stmt);
+           if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
+               || (TYPE_PRECISION (TREE_TYPE (v))
+                   < TYPE_PRECISION (TREE_TYPE (t))))
+             return;
+           if (!linear_step)
+             linear_step = 1;
+           v = t;
+           continue;
+         default:
+           return;
+         }
+      else if (is_gimple_call (def_stmt)
+              && gimple_call_internal_p (def_stmt)
+              && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
+              && loop->simduid
+              && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
+              && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
+                  == loop->simduid))
+       {
+         if (!linear_step)
+           linear_step = 1;
+         arginfo->linear_step = linear_step;
+         arginfo->op = base;
+         arginfo->simd_lane_linear = true;
+         return;
+       }
+    }
+}
+
 /* Function vectorizable_simd_clone_call.
 
    Check if STMT performs a function call that can be vectorized
@@ -2713,6 +2784,7 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
       thisarginfo.linear_step = 0;
       thisarginfo.align = 0;
       thisarginfo.op = NULL_TREE;
+      thisarginfo.simd_lane_linear = false;
 
       op = gimple_call_arg (stmt, i);
       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
@@ -2735,21 +2807,24 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
 
       /* For linear arguments, the analyze phase should have saved
         the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
-      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
-         && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
+      if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
+         && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
        {
          gcc_assert (vec_stmt);
          thisarginfo.linear_step
-           = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
+           = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
          thisarginfo.op
-           = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
+           = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
+         thisarginfo.simd_lane_linear
+           = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
+              == boolean_true_node);
          /* If loop has been peeled for alignment, we need to adjust it.  */
          tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
          tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
-         if (n1 != n2)
+         if (n1 != n2 && !thisarginfo.simd_lane_linear)
            {
              tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
-             tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
+             tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
              tree opt = TREE_TYPE (thisarginfo.op);
              bias = fold_convert (TREE_TYPE (step), bias);
              bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
@@ -2775,6 +2850,17 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                || thisarginfo.dt == vect_external_def)
               && POINTER_TYPE_P (TREE_TYPE (op)))
        thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
+      /* Addresses of array elements indexed by GOMP_SIMD_LANE are
+        linear too.  */
+      if (POINTER_TYPE_P (TREE_TYPE (op))
+         && !thisarginfo.linear_step
+         && !vec_stmt
+         && thisarginfo.dt != vect_constant_def
+         && thisarginfo.dt != vect_external_def
+         && loop_vinfo
+         && !slp_node
+         && TREE_CODE (op) == SSA_NAME)
+       vect_simd_lane_linear (op, loop, &thisarginfo);
 
       arginfo.quick_push (thisarginfo);
     }
@@ -2906,13 +2992,16 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
        if (bestn->simdclone->args[i].arg_type
            == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
          {
-           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
                                                                        + 1);
            STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
            tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
                       ? size_type_node : TREE_TYPE (arginfo[i].op);
            tree ls = build_int_cst (lst, arginfo[i].linear_step);
            STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
+           tree sll = arginfo[i].simd_lane_linear
+                      ? boolean_true_node : boolean_false_node;
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
          }
       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
       if (dump_enabled_p ())
@@ -3050,6 +3139,11 @@ vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
                      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
                      gcc_assert (!new_bb);
                    }
+                 if (arginfo[i].simd_lane_linear)
+                   {
+                     vargs.safe_push (arginfo[i].op);
+                     break;
+                   }
                  tree phi_res = copy_ssa_name (op);
                  gphi *new_phi = create_phi_node (phi_res, loop->header);
                  set_vinfo_for_stmt (new_phi,