vect: Rename can_fully_mask_p to can_use_partial_vectors_p
authorKewen Lin <linkw@gcc.gnu.org>
Wed, 3 Jun 2020 09:15:14 +0000 (17:15 +0800)
committerKewen Lin <linkw@linux.ibm.com>
Thu, 11 Jun 2020 08:26:42 +0000 (03:26 -0500)
Power supports vector memory access with length (in bytes) instructions.
Like existing fully masking for SVE, it is another approach to vectorize
the loop using partially-populated vectors.

As Richard Sandiford pointed out, we should extend the existing flag
can_fully_mask_p to be more generic, to indicate whether we have
any chances with partial vectors for this loop.  So this patch
is to rename this flag to can_use_partial_vectors_p to be more
meaningful, also rename the macro LOOP_VINFO_CAN_FULLY_MASK_P
to LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P.

Bootstrapped/regtested on aarch64-linux-gnu.

gcc/ChangeLog:

* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Rename
can_fully_mask_p to can_use_partial_vectors_p.
(vect_analyze_loop_2): Rename LOOP_VINFO_CAN_FULLY_MASK_P to
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P.  Rename saved_can_fully_mask_p
to saved_can_use_partial_vectors_p.
(vectorizable_reduction): Rename LOOP_VINFO_CAN_FULLY_MASK_P to
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P.
(vectorizable_live_operation): Likewise.
* tree-vect-stmts.c (permute_vec_elements): Likewise.
(check_load_store_masking): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_store): Likewise.
(vectorizable_load): Likewise.
(vectorizable_condition): Likewise.
* tree-vectorizer.h (LOOP_VINFO_CAN_FULLY_MASK_P): Renamed to ...
(LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P): ... this.
(_loop_vec_info): Rename can_fully_mask_p to can_use_partial_vectors_p.

gcc/tree-vect-loop.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 53def19..cec903d 100644 (file)
@@ -814,7 +814,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     vec_outside_cost (0),
     vec_inside_cost (0),
     vectorizable (false),
-    can_fully_mask_p (true),
+    can_use_partial_vectors_p (true),
     fully_masked_p (false),
     peeling_for_gaps (false),
     peeling_for_niter (false),
@@ -2061,7 +2061,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
       vect_optimize_slp (loop_vinfo);
     }
 
-  bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
+  bool saved_can_use_partial_vectors_p
+    = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
 
   /* We don't expect to have to roll back to anything other than an empty
      set of rgroups.  */
@@ -2146,7 +2147,7 @@ start_over:
   /* Decide whether to use a fully-masked loop for this vectorization
      factor.  */
   LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
-    = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+    = (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
        && vect_verify_full_masking (loop_vinfo));
   if (dump_enabled_p ())
     {
@@ -2383,7 +2384,8 @@ again:
   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
   LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
-  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
+  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+    = saved_can_use_partial_vectors_p;
 
   goto start_over;
 }
@@ -6778,7 +6780,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
       STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
     }
-  else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+  else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
     {
       vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
       internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6793,7 +6795,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "can't use a fully-masked loop because no"
                             " conditional operation is available.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
        }
       else if (reduction_type == FOLD_LEFT_REDUCTION
               && reduc_fn == IFN_LAST
@@ -6805,7 +6807,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "can't use a fully-masked loop because no"
                             " conditional operation is available.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
        }
       else
        vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
@@ -7876,7 +7878,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
   if (!vec_stmt_p)
     {
       /* No transformation required.  */
-      if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+      if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
        {
          if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
                                               OPTIMIZE_FOR_SPEED))
@@ -7886,7 +7888,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
                                 "can't use a fully-masked loop because "
                                 "the target doesn't support extract last "
                                 "reduction.\n");
-             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else if (slp_node)
            {
@@ -7894,7 +7896,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "can't use a fully-masked loop because an "
                                 "SLP statement is live after the loop.\n");
-             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else if (ncopies > 1)
            {
@@ -7902,7 +7904,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "can't use a fully-masked loop because"
                                 " ncopies is greater than 1.\n");
-             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else
            {
index 72f0e80..71ee831 100644 (file)
@@ -1667,7 +1667,7 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
    its arguments.  If the load or store is conditional, SCALAR_MASK is the
    condition under which it occurs.
 
-   Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
+   Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not
    supported, otherwise record the required mask types.  */
 
 static void
@@ -1694,7 +1694,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
                             "can't use a fully-masked loop because the"
                             " target doesn't have an appropriate masked"
                             " load/store-lanes instruction.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
          return;
        }
       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
@@ -1717,7 +1717,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
                             "can't use a fully-masked loop because the"
                             " target doesn't have an appropriate masked"
                             " gather load or scatter store instruction.\n");
-         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
          return;
        }
       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
@@ -1734,7 +1734,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                         "can't use a fully-masked loop because an access"
                         " isn't contiguous.\n");
-      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
       return;
     }
 
@@ -1748,7 +1748,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
                         "can't use a fully-masked loop because the target"
                         " doesn't have the appropriate masked load or"
                         " store.\n");
-      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
       return;
     }
   /* We might load more scalars than we need for permuting SLP loads.
@@ -5866,7 +5866,7 @@ vectorizable_operation (vec_info *vinfo,
         should only change the active lanes of the reduction chain,
         keeping the inactive lanes as-is.  */
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
          && reduc_idx >= 0)
        {
          if (cond_fn == IFN_LAST
@@ -5877,7 +5877,7 @@ vectorizable_operation (vec_info *vinfo,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "can't use a fully-masked loop because no"
                                 " conditional operation is available.\n");
-             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else
            vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
@@ -7139,7 +7139,7 @@ vectorizable_store (vec_info *vinfo,
       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
 
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
        check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
                                  memory_access_type, &gs_info, mask);
 
@@ -8432,7 +8432,7 @@ vectorizable_load (vec_info *vinfo,
        STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
 
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
        check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
                                  memory_access_type, &gs_info, mask);
 
@@ -9845,7 +9845,7 @@ vectorizable_condition (vec_info *vinfo,
        }
 
       if (loop_vinfo
-         && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
          && reduction_type == EXTRACT_LAST_REDUCTION)
        vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
                               ncopies * vec_num, vectype, NULL);
index 14f68f7..c459280 100644 (file)
@@ -631,8 +631,11 @@ public:
   /* Is the loop vectorizable? */
   bool vectorizable;
 
-  /* Records whether we still have the option of using a fully-masked loop.  */
-  bool can_fully_mask_p;
+  /* Records whether we still have the option of vectorizing this loop
+     using partially-populated vectors; in other words, whether it is
+     still possible for one iteration of the vector loop to handle
+     fewer than VF scalars.  */
+  bool can_use_partial_vectors_p;
 
   /* True if have decided to use a fully-masked loop.  */
   bool fully_masked_p;
@@ -698,7 +701,7 @@ public:
 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
-#define LOOP_VINFO_CAN_FULLY_MASK_P(L)     (L)->can_fully_mask_p
+#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
 #define LOOP_VINFO_FULLY_MASKED_P(L)       (L)->fully_masked_p
 #define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor