From 042f4082979aa22e08c008ed4c5b4bab3915a9c2 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Wed, 3 Jun 2020 17:15:14 +0800 Subject: [PATCH] vect: Rename can_fully_mask_p to can_use_partial_vectors_p Power supports vector memory access with length (in bytes) instructions. Like existing fully masking for SVE, it is another approach to vectorize the loop using partially-populated vectors. As Richard Sandiford pointed out, we should extend the existing flag can_fully_mask_p to be more generic, to indicate whether we have any chances with partial vectors for this loop. So this patch is to rename this flag to can_use_partial_vectors_p to be more meaningful, also rename the macro LOOP_VINFO_CAN_FULLY_MASK_P to LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P. Bootstrapped/regtested on aarch64-linux-gnu. gcc/ChangeLog: * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Rename can_fully_mask_p to can_use_partial_vectors_p. (vect_analyze_loop_2): Rename LOOP_VINFO_CAN_FULLY_MASK_P to LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P. Rename saved_can_fully_mask_p to saved_can_use_partial_vectors_p. (vectorizable_reduction): Rename LOOP_VINFO_CAN_FULLY_MASK_P to LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P. (vectorizable_live_operation): Likewise. * tree-vect-stmts.c (permute_vec_elements): Likewise. (check_load_store_masking): Likewise. (vectorizable_operation): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Likewise. * tree-vectorizer.h (LOOP_VINFO_CAN_FULLY_MASK_P): Renamed to ... (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P): ... this. (_loop_vec_info): Rename can_fully_mask_p to can_use_partial_vectors_p. --- gcc/tree-vect-loop.c | 24 +++++++++++++----------- gcc/tree-vect-stmts.c | 20 ++++++++++---------- gcc/tree-vectorizer.h | 9 ++++++--- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 53def19..cec903d 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -814,7 +814,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) vec_outside_cost (0), vec_inside_cost (0), vectorizable (false), - can_fully_mask_p (true), + can_use_partial_vectors_p (true), fully_masked_p (false), peeling_for_gaps (false), peeling_for_niter (false), @@ -2061,7 +2061,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) vect_optimize_slp (loop_vinfo); } - bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo); + bool saved_can_use_partial_vectors_p + = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo); /* We don't expect to have to roll back to anything other than an empty set of rgroups. */ @@ -2146,7 +2147,7 @@ start_over: /* Decide whether to use a fully-masked loop for this vectorization factor. */ LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) - = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + = (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) && vect_verify_full_masking (loop_vinfo)); if (dump_enabled_p ()) { @@ -2383,7 +2384,8 @@ again: LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false; LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0; LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0; - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) + = saved_can_use_partial_vectors_p; goto start_over; } @@ -6778,7 +6780,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def; STMT_VINFO_DEF_TYPE (tem) = vect_internal_def; } - else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) + else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) { vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); internal_fn cond_fn = get_conditional_internal_fn (code); @@ -6793,7 +6795,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't use a fully-masked loop because no" " conditional operation is available.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (reduction_type == FOLD_LEFT_REDUCTION && reduc_fn == IFN_LAST @@ -6805,7 +6807,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't use a fully-masked loop because no" " conditional operation is available.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, @@ -7876,7 +7878,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, if (!vec_stmt_p) { /* No transformation required. */ - if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) { if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, OPTIMIZE_FOR_SPEED)) @@ -7886,7 +7888,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, "can't use a fully-masked loop because " "the target doesn't support extract last " "reduction.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (slp_node) { @@ -7894,7 +7896,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't use a fully-masked loop because an " "SLP statement is live after the loop.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (ncopies > 1) { @@ -7902,7 +7904,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't use a fully-masked loop because" " ncopies is greater than 1.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 72f0e80..71ee831 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1667,7 +1667,7 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info, its arguments. If the load or store is conditional, SCALAR_MASK is the condition under which it occurs. - Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not + Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not supported, otherwise record the required mask types. */ static void @@ -1694,7 +1694,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, "can't use a fully-masked loop because the" " target doesn't have an appropriate masked" " load/store-lanes instruction.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; } unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); @@ -1717,7 +1717,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, "can't use a fully-masked loop because the" " target doesn't have an appropriate masked" " gather load or scatter store instruction.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; } unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); @@ -1734,7 +1734,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't use a fully-masked loop because an access" " isn't contiguous.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; } @@ -1748,7 +1748,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, "can't use a fully-masked loop because the target" " doesn't have the appropriate masked load or" " store.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; return; } /* We might load more scalars than we need for permuting SLP loads. @@ -5866,7 +5866,7 @@ vectorizable_operation (vec_info *vinfo, should only change the active lanes of the reduction chain, keeping the inactive lanes as-is. */ if (loop_vinfo - && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) && reduc_idx >= 0) { if (cond_fn == IFN_LAST @@ -5877,7 +5877,7 @@ vectorizable_operation (vec_info *vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "can't use a fully-masked loop because no" " conditional operation is available.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, @@ -7139,7 +7139,7 @@ vectorizable_store (vec_info *vinfo, STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; if (loop_vinfo - && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_masking (loop_vinfo, vectype, vls_type, group_size, memory_access_type, &gs_info, mask); @@ -8432,7 +8432,7 @@ vectorizable_load (vec_info *vinfo, STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; if (loop_vinfo - && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size, memory_access_type, &gs_info, mask); @@ -9845,7 +9845,7 @@ vectorizable_condition (vec_info *vinfo, } if (loop_vinfo - && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) && reduction_type == EXTRACT_LAST_REDUCTION) vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), ncopies * vec_num, vectype, NULL); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 14f68f7..c459280 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -631,8 +631,11 @@ public: /* Is the loop vectorizable? */ bool vectorizable; - /* Records whether we still have the option of using a fully-masked loop. */ - bool can_fully_mask_p; + /* Records whether we still have the option of vectorizing this loop + using partially-populated vectors; in other words, whether it is + still possible for one iteration of the vector loop to handle + fewer than VF scalars. */ + bool can_use_partial_vectors_p; /* True if have decided to use a fully-masked loop. */ bool fully_masked_p; @@ -698,7 +701,7 @@ public: #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable -#define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p +#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor -- 2.7.4