From 09eb042a8a8ee16e8f23085a175be25c8ef68820 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 8 Nov 2019 08:32:19 +0000 Subject: [PATCH] Generalise gather and scatter optabs The gather and scatter optabs required the vector offset to be the integer equivalent of the vector mode being loaded or stored. This patch generalises them so that the two vectors can have different element sizes, although they still need to have the same number of elements. One consequence of this is that it's possible (if unlikely) for two IFN_GATHER_LOADs to have the same arguments but different return types. E.g. the same scalar base and vector of 32-bit offsets could be used to load 8-bit elements and to load 16-bit elements. From just looking at the arguments, we could wrongly deduce that they're equivalent. I know we saw this happen at one point with IFN_WHILE_ULT, and we dealt with it there by passing a zero of the return type as an extra argument. Doing the same here also makes the load and store functions have the same argument assignment. For now this patch should be a no-op, but later SVE patches take advantage of the new flexibility. 2019-11-08 Richard Sandiford gcc/ * optabs.def (gather_load_optab, mask_gather_load_optab) (scatter_store_optab, mask_scatter_store_optab): Turn into conversion optabs, with the offset mode given explicitly. * doc/md.texi: Update accordingly. * config/aarch64/aarch64-sve-builtins-base.cc (svld1_gather_impl::expand): Likewise. (svst1_scatter_impl::expand): Likewise. * internal-fn.c (gather_load_direct, scatter_store_direct): Likewise. (expand_scatter_store_optab_fn): Likewise. (direct_gather_load_optab_supported_p): Likewise. (direct_scatter_store_optab_supported_p): Likewise. (expand_gather_load_optab_fn): Likewise. Expect the mask argument to be argument 4. (internal_fn_mask_index): Return 4 for IFN_MASK_GATHER_LOAD. (internal_gather_scatter_fn_supported_p): Replace the offset sign argument with the offset vector type. Require the two vector types to have the same number of elements but allow their element sizes to be different. Treat the optabs as conversion optabs. * internal-fn.h (internal_gather_scatter_fn_supported_p): Update prototype accordingly. * optabs-query.c (supports_at_least_one_mode_p): Replace with... (supports_vec_convert_optab_p): ...this new function. (supports_vec_gather_load_p): Update accordingly. (supports_vec_scatter_store_p): Likewise. * tree-vectorizer.h (vect_gather_scatter_fn_p): Take a vec_info. Replace the offset sign and bits parameters with a scalar type tree. * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. Pass back the offset vector type instead of the scalar element type. Allow the offset to be wider than the memory elements. Search for an offset type that the target supports, stopping once we've reached the maximum of the element size and pointer size. Update call to internal_gather_scatter_fn_supported_p. (vect_check_gather_scatter): Update calls accordingly. When testing a new scale before knowing the final offset type, check whether the scale is supported for any signed or unsigned offset type. Check whether the target supports the source and target types of a conversion before deciding whether to look through the conversion. Record the chosen offset_vectype. * tree-vect-patterns.c (vect_get_gather_scatter_offset_type): Delete. (vect_recog_gather_scatter_pattern): Get the scalar offset type directly from the gs_info's offset_vectype instead. Pass a zero of the result type to IFN_GATHER_LOAD and IFN_MASK_GATHER_LOAD. * tree-vect-stmts.c (check_load_store_masking): Update call to internal_gather_scatter_fn_supported_p, passing the offset vector type recorded in the gs_info. (vect_truncate_gather_scatter_offset): Update call to vect_check_gather_scatter, leaving it to search for a valid offset vector type. (vect_use_strided_gather_scatters_p): Convert the offset to the element type of the gs_info's offset_vectype. (vect_get_gather_scatter_ops): Get the offset vector type directly from the gs_info. (vect_get_strided_load_store_ops): Likewise. (vectorizable_load): Pass a zero of the result type to IFN_GATHER_LOAD and IFN_MASK_GATHER_LOAD. * config/aarch64/aarch64-sve.md (gather_load): Rename to... (gather_load): ...this. (mask_gather_load): Rename to... (mask_gather_load): ...this. (scatter_store): Rename to... (scatter_store): ...this. (mask_scatter_store): Rename to... (mask_scatter_store): ...this. From-SVN: r277949 --- gcc/ChangeLog | 66 ++++++++++++++++ gcc/config/aarch64/aarch64-sve-builtins-base.cc | 10 ++- gcc/config/aarch64/aarch64-sve.md | 20 ++--- gcc/doc/md.texi | 34 ++++---- gcc/internal-fn.c | 41 +++++----- gcc/internal-fn.h | 2 +- gcc/optabs-query.c | 16 ++-- gcc/optabs.def | 9 +-- gcc/tree-vect-data-refs.c | 101 ++++++++++++++---------- gcc/tree-vect-patterns.c | 33 ++------ gcc/tree-vect-stmts.c | 70 +++++++--------- gcc/tree-vectorizer.h | 4 +- 12 files changed, 230 insertions(+), 176 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0780b31..ab690af 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,69 @@ +2019-11-08 Richard Sandiford + + * optabs.def (gather_load_optab, mask_gather_load_optab) + (scatter_store_optab, mask_scatter_store_optab): Turn into + conversion optabs, with the offset mode given explicitly. + * doc/md.texi: Update accordingly. + * config/aarch64/aarch64-sve-builtins-base.cc + (svld1_gather_impl::expand): Likewise. + (svst1_scatter_impl::expand): Likewise. + * internal-fn.c (gather_load_direct, scatter_store_direct): Likewise. + (expand_scatter_store_optab_fn): Likewise. + (direct_gather_load_optab_supported_p): Likewise. + (direct_scatter_store_optab_supported_p): Likewise. + (expand_gather_load_optab_fn): Likewise. Expect the mask argument + to be argument 4. + (internal_fn_mask_index): Return 4 for IFN_MASK_GATHER_LOAD. + (internal_gather_scatter_fn_supported_p): Replace the offset sign + argument with the offset vector type. Require the two vector + types to have the same number of elements but allow their element + sizes to be different. Treat the optabs as conversion optabs. + * internal-fn.h (internal_gather_scatter_fn_supported_p): Update + prototype accordingly. + * optabs-query.c (supports_at_least_one_mode_p): Replace with... + (supports_vec_convert_optab_p): ...this new function. + (supports_vec_gather_load_p): Update accordingly. + (supports_vec_scatter_store_p): Likewise. + * tree-vectorizer.h (vect_gather_scatter_fn_p): Take a vec_info. + Replace the offset sign and bits parameters with a scalar type tree. + * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. + Pass back the offset vector type instead of the scalar element type. + Allow the offset to be wider than the memory elements. Search for + an offset type that the target supports, stopping once we've + reached the maximum of the element size and pointer size. + Update call to internal_gather_scatter_fn_supported_p. + (vect_check_gather_scatter): Update calls accordingly. + When testing a new scale before knowing the final offset type, + check whether the scale is supported for any signed or unsigned + offset type. Check whether the target supports the source and + target types of a conversion before deciding whether to look + through the conversion. Record the chosen offset_vectype. + * tree-vect-patterns.c (vect_get_gather_scatter_offset_type): Delete. + (vect_recog_gather_scatter_pattern): Get the scalar offset type + directly from the gs_info's offset_vectype instead. Pass a zero + of the result type to IFN_GATHER_LOAD and IFN_MASK_GATHER_LOAD. + * tree-vect-stmts.c (check_load_store_masking): Update call to + internal_gather_scatter_fn_supported_p, passing the offset vector + type recorded in the gs_info. + (vect_truncate_gather_scatter_offset): Update call to + vect_check_gather_scatter, leaving it to search for a valid + offset vector type. + (vect_use_strided_gather_scatters_p): Convert the offset to the + element type of the gs_info's offset_vectype. + (vect_get_gather_scatter_ops): Get the offset vector type directly + from the gs_info. + (vect_get_strided_load_store_ops): Likewise. + (vectorizable_load): Pass a zero of the result type to IFN_GATHER_LOAD + and IFN_MASK_GATHER_LOAD. + * config/aarch64/aarch64-sve.md (gather_load): Rename to... + (gather_load): ...this. + (mask_gather_load): Rename to... + (mask_gather_load): ...this. + (scatter_store): Rename to... + (scatter_store): ...this. + (mask_scatter_store): Rename to... + (mask_scatter_store): ...this. + 2019-11-08 Kewen Lin PR target/92132 diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index ce70f80..e12882f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1076,7 +1076,9 @@ public: /* Put the predicate last, as required by mask_gather_load_optab. */ e.rotate_inputs_left (0, 5); machine_mode mem_mode = e.memory_vector_mode (); - insn_code icode = direct_optab_handler (mask_gather_load_optab, mem_mode); + machine_mode int_mode = aarch64_sve_int_mode (mem_mode); + insn_code icode = convert_optab_handler (mask_gather_load_optab, + mem_mode, int_mode); return e.use_exact_insn (icode); } }; @@ -2043,8 +2045,10 @@ public: e.prepare_gather_address_operands (1); /* Put the predicate last, as required by mask_scatter_store_optab. */ e.rotate_inputs_left (0, 6); - insn_code icode = direct_optab_handler (mask_scatter_store_optab, - e.memory_vector_mode ()); + machine_mode mem_mode = e.memory_vector_mode (); + machine_mode int_mode = aarch64_sve_int_mode (mem_mode); + insn_code icode = convert_optab_handler (mask_scatter_store_optab, + mem_mode, int_mode); return e.use_exact_insn (icode); } }; diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 0cda882..51e876a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1336,7 +1336,7 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated gather loads. -(define_expand "gather_load" +(define_expand "gather_load" [(set (match_operand:SVE_SD 0 "register_operand") (unspec:SVE_SD [(match_dup 5) @@ -1354,7 +1354,7 @@ ;; Predicated gather loads for 32-bit elements. Operand 3 is true for ;; unsigned extension and false for signed extension. -(define_insn "mask_gather_load" +(define_insn "mask_gather_load" [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w, w") (unspec:SVE_S [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") @@ -1376,7 +1376,7 @@ ;; Predicated gather loads for 64-bit elements. The value of operand 3 ;; doesn't matter in this case. -(define_insn "mask_gather_load" +(define_insn "mask_gather_load" [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w, w") (unspec:SVE_D [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") @@ -1395,7 +1395,7 @@ ) ;; Likewise, but with the offset being sign-extended from 32 bits. -(define_insn "*mask_gather_load_sxtw" +(define_insn "*mask_gather_load_sxtw" [(set (match_operand:SVE_D 0 "register_operand" "=w, w") (unspec:SVE_D [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -1417,7 +1417,7 @@ ) ;; Likewise, but with the offset being zero-extended from 32 bits. -(define_insn "*mask_gather_load_uxtw" +(define_insn "*mask_gather_load_uxtw" [(set (match_operand:SVE_D 0 "register_operand" "=w, w") (unspec:SVE_D [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -2054,7 +2054,7 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated scatter stores. -(define_expand "scatter_store" +(define_expand "scatter_store" [(set (mem:BLK (scratch)) (unspec:BLK [(match_dup 5) @@ -2072,7 +2072,7 @@ ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for ;; unsigned extension and false for signed extension. -(define_insn "mask_scatter_store" +(define_insn "mask_scatter_store" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") @@ -2094,7 +2094,7 @@ ;; Predicated scatter stores for 64-bit elements. The value of operand 2 ;; doesn't matter in this case. -(define_insn "mask_scatter_store" +(define_insn "mask_scatter_store" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") @@ -2113,7 +2113,7 @@ ) ;; Likewise, but with the offset being sign-extended from 32 bits. -(define_insn_and_rewrite "*mask_scatter_store_sxtw" +(define_insn_and_rewrite "*mask_scatter_store_sxtw" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -2139,7 +2139,7 @@ ) ;; Likewise, but with the offset being zero-extended from 32 bits. -(define_insn "*mask_scatter_store_uxtw" +(define_insn "*mask_scatter_store_uxtw" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 19d6893..87bbeb4 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -4959,12 +4959,12 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++) This pattern is not allowed to @code{FAIL}. -@cindex @code{gather_load@var{m}} instruction pattern -@item @samp{gather_load@var{m}} +@cindex @code{gather_load@var{m}@var{n}} instruction pattern +@item @samp{gather_load@var{m}@var{n}} Load several separate memory locations into a vector of mode @var{m}. -Operand 1 is a scalar base address and operand 2 is a vector of -offsets from that base. Operand 0 is a destination vector with the -same number of elements as the offset. For each element index @var{i}: +Operand 1 is a scalar base address and operand 2 is a vector of mode @var{n} +containing offsets from that base. Operand 0 is a destination vector with +the same number of elements as @var{n}. For each element index @var{i}: @itemize @bullet @item @@ -4981,20 +4981,20 @@ load the value at that address into element @var{i} of operand 0. The value of operand 3 does not matter if the offsets are already address width. -@cindex @code{mask_gather_load@var{m}} instruction pattern -@item @samp{mask_gather_load@var{m}} -Like @samp{gather_load@var{m}}, but takes an extra mask operand as +@cindex @code{mask_gather_load@var{m}@var{n}} instruction pattern +@item @samp{mask_gather_load@var{m}@var{n}} +Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand as operand 5. Bit @var{i} of the mask is set if element @var{i} of the result should be loaded from memory and clear if element @var{i} of the result should be set to zero. -@cindex @code{scatter_store@var{m}} instruction pattern -@item @samp{scatter_store@var{m}} +@cindex @code{scatter_store@var{m}@var{n}} instruction pattern +@item @samp{scatter_store@var{m}@var{n}} Store a vector of mode @var{m} into several distinct memory locations. -Operand 0 is a scalar base address and operand 1 is a vector of offsets -from that base. Operand 4 is the vector of values that should be stored, -which has the same number of elements as the offset. For each element -index @var{i}: +Operand 0 is a scalar base address and operand 1 is a vector of mode +@var{n} containing offsets from that base. Operand 4 is the vector of +values that should be stored, which has the same number of elements as +@var{n}. For each element index @var{i}: @itemize @bullet @item @@ -5011,9 +5011,9 @@ store element @var{i} of operand 4 to that address. The value of operand 2 does not matter if the offsets are already address width. -@cindex @code{mask_scatter_store@var{m}} instruction pattern -@item @samp{mask_scatter_store@var{m}} -Like @samp{scatter_store@var{m}}, but takes an extra mask operand as +@cindex @code{mask_scatter_store@var{m}@var{n}} instruction pattern +@item @samp{mask_scatter_store@var{m}@var{n}} +Like @samp{scatter_store@var{m}@var{n}}, but takes an extra mask operand as operand 5. Bit @var{i} of the mask is set if element @var{i} of the result should be stored to memory. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 549d6f1..6a878bd 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -103,11 +103,11 @@ init_internal_fns () #define mask_load_direct { -1, 2, false } #define load_lanes_direct { -1, -1, false } #define mask_load_lanes_direct { -1, -1, false } -#define gather_load_direct { -1, -1, false } +#define gather_load_direct { 3, 1, false } #define mask_store_direct { 3, 2, false } #define store_lanes_direct { 0, 0, false } #define mask_store_lanes_direct { 0, 0, false } -#define scatter_store_direct { 3, 3, false } +#define scatter_store_direct { 3, 1, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } @@ -2785,7 +2785,8 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask))); } - insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs))); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)), + TYPE_MODE (TREE_TYPE (offset))); expand_insn (icode, i, ops); } @@ -2813,11 +2814,12 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) create_integer_operand (&ops[i++], scale_int); if (optab == mask_gather_load_optab) { - tree mask = gimple_call_arg (stmt, 3); + tree mask = gimple_call_arg (stmt, 4); rtx mask_rtx = expand_normal (mask); create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask))); } - insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)), + TYPE_MODE (TREE_TYPE (offset))); expand_insn (icode, i, ops); } @@ -3084,11 +3086,11 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_mask_load_optab_supported_p direct_optab_supported_p #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p -#define direct_gather_load_optab_supported_p direct_optab_supported_p +#define direct_gather_load_optab_supported_p convert_optab_supported_p #define direct_mask_store_optab_supported_p direct_optab_supported_p #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p -#define direct_scatter_store_optab_supported_p direct_optab_supported_p +#define direct_scatter_store_optab_supported_p convert_optab_supported_p #define direct_while_optab_supported_p convert_optab_supported_p #define direct_fold_extract_optab_supported_p direct_optab_supported_p #define direct_fold_left_optab_supported_p direct_optab_supported_p @@ -3513,8 +3515,6 @@ internal_fn_mask_index (internal_fn fn) return 2; case IFN_MASK_GATHER_LOAD: - return 3; - case IFN_MASK_SCATTER_STORE: return 4; @@ -3546,27 +3546,30 @@ internal_fn_stored_value_index (internal_fn fn) IFN. For loads, VECTOR_TYPE is the vector type of the load result, while for stores it is the vector type of the stored data argument. MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded - or stored. OFFSET_SIGN is the sign of the offset argument, which is - only relevant when the offset is narrower than an address. SCALE is - the amount by which the offset should be multiplied *after* it has - been extended to address width. */ + or stored. OFFSET_VECTOR_TYPE is the vector type that holds the + offset from the shared base address of each loaded or stored element. + SCALE is the amount by which these offsets should be multiplied + *after* they have been extended to address width. */ bool internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, tree memory_element_type, - signop offset_sign, int scale) + tree offset_vector_type, int scale) { if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)), TYPE_SIZE (memory_element_type))) return false; + if (maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), + TYPE_VECTOR_SUBPARTS (offset_vector_type))) + return false; optab optab = direct_internal_fn_optab (ifn); - insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type)); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (vector_type), + TYPE_MODE (offset_vector_type)); int output_ops = internal_load_fn_p (ifn) ? 1 : 0; + bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type)); return (icode != CODE_FOR_nothing - && insn_operand_matches (icode, 2 + output_ops, - GEN_INT (offset_sign == UNSIGNED)) - && insn_operand_matches (icode, 3 + output_ops, - GEN_INT (scale))); + && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) + && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale))); } /* Expand STMT as though it were a call to internal function FN. */ diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 7164ee5..389241a 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -220,7 +220,7 @@ extern bool internal_gather_scatter_fn_p (internal_fn); extern int internal_fn_mask_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, - tree, signop, int); + tree, tree, int); extern void expand_internal_call (gcall *); extern void expand_internal_call (internal_fn, gcall *); diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 2a06696..6465b5c 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -698,14 +698,18 @@ lshift_cheap_p (bool speed_p) return cheap[speed_p]; } -/* Return true if optab OP supports at least one mode. */ +/* Return true if vector conversion optab OP supports at least one mode, + given that the second mode is always an integer vector. */ static bool -supports_at_least_one_mode_p (optab op) +supports_vec_convert_optab_p (optab op) { for (int i = 0; i < NUM_MACHINE_MODES; ++i) - if (direct_optab_handler (op, (machine_mode) i) != CODE_FOR_nothing) - return true; + if (VECTOR_MODE_P ((machine_mode) i)) + for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j) + if (convert_optab_handler (op, (machine_mode) i, + (machine_mode) j) != CODE_FOR_nothing) + return true; return false; } @@ -722,7 +726,7 @@ supports_vec_gather_load_p () this_fn_optabs->supports_vec_gather_load_cached = true; this_fn_optabs->supports_vec_gather_load - = supports_at_least_one_mode_p (gather_load_optab); + = supports_vec_convert_optab_p (gather_load_optab); return this_fn_optabs->supports_vec_gather_load; } @@ -739,7 +743,7 @@ supports_vec_scatter_store_p () this_fn_optabs->supports_vec_scatter_store_cached = true; this_fn_optabs->supports_vec_scatter_store - = supports_at_least_one_mode_p (scatter_store_optab); + = supports_vec_convert_optab_p (scatter_store_optab); return this_fn_optabs->supports_vec_scatter_store; } diff --git a/gcc/optabs.def b/gcc/optabs.def index e937315..90e177a 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -91,6 +91,10 @@ OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b") OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b") OPTAB_CD(maskload_optab, "maskload$a$b") OPTAB_CD(maskstore_optab, "maskstore$a$b") +OPTAB_CD(gather_load_optab, "gather_load$a$b") +OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b") +OPTAB_CD(scatter_store_optab, "scatter_store$a$b") +OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store$a$b") OPTAB_CD(vec_extract_optab, "vec_extract$a$b") OPTAB_CD(vec_init_optab, "vec_init$a$b") @@ -425,11 +429,6 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a") OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a") OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a") -OPTAB_D (gather_load_optab, "gather_load$a") -OPTAB_D (mask_gather_load_optab, "mask_gather_load$a") -OPTAB_D (scatter_store_optab, "scatter_store$a") -OPTAB_D (mask_scatter_store_optab, "mask_scatter_store$a") - OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE) OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES) OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a") diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 9dd18d2..36639b6 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3660,28 +3660,22 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) /* Check whether we can use an internal function for a gather load or scatter store. READ_P is true for loads and false for stores. MASKED_P is true if the load or store is conditional. MEMORY_TYPE is - the type of the memory elements being loaded or stored. OFFSET_BITS - is the number of bits in each scalar offset and OFFSET_SIGN is the - sign of the offset. SCALE is the amount by which the offset should + the type of the memory elements being loaded or stored. OFFSET_TYPE + is the type of the offset that is being applied to the invariant + base address. SCALE is the amount by which the offset should be multiplied *after* it has been converted to address width. - Return true if the function is supported, storing the function - id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */ + Return true if the function is supported, storing the function id in + *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */ bool -vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, - tree memory_type, unsigned int offset_bits, - signop offset_sign, int scale, - internal_fn *ifn_out, tree *element_type_out) +vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, + tree vectype, tree memory_type, tree offset_type, + int scale, internal_fn *ifn_out, + tree *offset_vectype_out) { unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); - if (offset_bits > element_bits) - /* Internal functions require the offset to be the same width as - the vector elements. We can extend narrower offsets, but it isn't - safe to truncate wider offsets. */ - return false; - if (element_bits != memory_bits) /* For now the vector elements must be the same width as the memory elements. */ @@ -3694,14 +3688,28 @@ vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype, else ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; - /* Test whether the target supports this combination. */ - if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, - offset_sign, scale)) - return false; + for (;;) + { + tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + return false; - *ifn_out = ifn; - *element_type_out = TREE_TYPE (vectype); - return true; + /* Test whether the target supports this combination. */ + if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, + offset_vectype, scale)) + { + *ifn_out = ifn; + *offset_vectype_out = offset_vectype; + return true; + } + + if (TYPE_PRECISION (offset_type) >= POINTER_SIZE + && TYPE_PRECISION (offset_type) >= element_bits) + return false; + + offset_type = build_nonstandard_integer_type + (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); + } } /* STMT_INFO is a call to an internal gather load or scatter store function. @@ -3744,7 +3752,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, machine_mode pmode; int punsignedp, reversep, pvolatilep = 0; internal_fn ifn; - tree element_type; + tree offset_vectype; bool masked_p = false; /* See whether this is already a call to a gather/scatter internal function. @@ -3905,13 +3913,18 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, { int new_scale = tree_to_shwi (op1); /* Only treat this as a scaling operation if the target - supports it. */ + supports it for at least some offset type. */ if (use_ifn_p - && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, - vectype, memory_type, 1, - TYPE_SIGN (TREE_TYPE (op0)), + && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + signed_char_type_node, + new_scale, &ifn, + &offset_vectype) + && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + unsigned_char_type_node, new_scale, &ifn, - &element_type)) + &offset_vectype)) break; scale = new_scale; off = op0; @@ -3925,6 +3938,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, if (!POINTER_TYPE_P (TREE_TYPE (op0)) && !INTEGRAL_TYPE_P (TREE_TYPE (op0))) break; + + /* Don't include the conversion if the target is happy with + the current offset type. */ + if (use_ifn_p + && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), + masked_p, vectype, memory_type, + TREE_TYPE (off), scale, &ifn, + &offset_vectype)) + break; + if (TYPE_PRECISION (TREE_TYPE (op0)) == TYPE_PRECISION (TREE_TYPE (off))) { @@ -3932,14 +3955,6 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, continue; } - /* The internal functions need the offset to be the same width - as the elements of VECTYPE. Don't include operations that - cast the offset from that width to a different width. */ - if (use_ifn_p - && (int_size_in_bytes (TREE_TYPE (vectype)) - == int_size_in_bytes (TREE_TYPE (off)))) - break; - if (TYPE_PRECISION (TREE_TYPE (op0)) < TYPE_PRECISION (TREE_TYPE (off))) { @@ -3966,10 +3981,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, if (use_ifn_p) { - if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, - memory_type, TYPE_PRECISION (offtype), - TYPE_SIGN (offtype), scale, &ifn, - &element_type)) + if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, + vectype, memory_type, offtype, scale, + &ifn, &offset_vectype)) return false; } else @@ -3989,7 +4003,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, return false; ifn = IFN_LAST; - element_type = TREE_TYPE (vectype); + /* The offset vector type will be read from DECL when needed. */ + offset_vectype = NULL_TREE; } info->ifn = ifn; @@ -3997,9 +4012,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, info->base = base; info->offset = off; info->offset_dt = vect_unknown_def_type; - info->offset_vectype = NULL_TREE; + info->offset_vectype = offset_vectype; info->scale = scale; - info->element_type = element_type; + info->element_type = TREE_TYPE (vectype); info->memory_type = memory_type; return true; } diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index c0fdde6..8ebbcd7 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -4498,28 +4498,6 @@ vect_get_load_store_mask (stmt_vec_info stmt_info) gcc_unreachable (); } -/* Return the scalar offset type that an internal gather/scatter function - should use. GS_INFO describes the gather/scatter operation. */ - -static tree -vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info) -{ - tree offset_type = TREE_TYPE (gs_info->offset); - unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type)); - - /* Enforced by vect_check_gather_scatter. */ - unsigned int offset_bits = TYPE_PRECISION (offset_type); - gcc_assert (element_bits >= offset_bits); - - /* If the offset is narrower than the elements, extend it according - to its sign. */ - if (element_bits > offset_bits) - return build_nonstandard_integer_type (element_bits, - TYPE_UNSIGNED (offset_type)); - - return offset_type; -} - /* Return MASK if MASK is suitable for masking an operation on vectors of type VECTYPE, otherwise convert it into such a form and return the result. Associate any conversion statements with STMT_INFO's @@ -4604,7 +4582,7 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out) /* Get the invariant base and non-invariant offset, converting the latter to the same width as the vector elements. */ tree base = gs_info.base; - tree offset_type = vect_get_gather_scatter_offset_type (&gs_info); + tree offset_type = TREE_TYPE (gs_info.offset_vectype); tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset, stmt_info); @@ -4613,12 +4591,13 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out) gcall *pattern_stmt; if (DR_IS_READ (dr)) { + tree zero = build_zero_cst (gs_info.element_type); if (mask != NULL) - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, - offset, scale, mask); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, + offset, scale, zero, mask); else - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base, - offset, scale); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, + offset, scale, zero); tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); gimple_call_set_lhs (pattern_stmt, load_lhs); } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 3cda888..2bbc783 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1910,10 +1910,9 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, internal_fn ifn = (is_load ? IFN_MASK_GATHER_LOAD : IFN_MASK_SCATTER_STORE); - tree offset_type = TREE_TYPE (gs_info->offset); if (!internal_gather_scatter_fn_supported_p (ifn, vectype, gs_info->memory_type, - TYPE_SIGN (offset_type), + gs_info->offset_vectype, gs_info->scale)) { if (dump_enabled_p ()) @@ -2046,35 +2045,33 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor)) continue; - /* See whether we can calculate (COUNT - 1) * STEP / SCALE - in OFFSET_BITS bits. */ + /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */ widest_int range = wi::mul (count, factor, SIGNED, &overflow); if (overflow) continue; signop sign = range >= 0 ? UNSIGNED : SIGNED; - if (wi::min_precision (range, sign) > element_bits) - { - overflow = wi::OVF_UNKNOWN; - continue; - } + unsigned int min_offset_bits = wi::min_precision (range, sign); - /* See whether the target supports the operation. */ + /* Find the narrowest viable offset type. */ + unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits); + tree offset_type = build_nonstandard_integer_type (offset_bits, + sign == UNSIGNED); + + /* See whether the target supports the operation with an offset + no narrower than OFFSET_TYPE. */ tree memory_type = TREE_TYPE (DR_REF (dr)); - if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, - memory_type, element_bits, sign, scale, - &gs_info->ifn, &gs_info->element_type)) + if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, + vectype, memory_type, offset_type, scale, + &gs_info->ifn, &gs_info->offset_vectype)) continue; - tree offset_type = build_nonstandard_integer_type (element_bits, - sign == UNSIGNED); - gs_info->decl = NULL_TREE; /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, but we don't need to store that here. */ gs_info->base = NULL_TREE; + gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); gs_info->offset_dt = vect_constant_def; - gs_info->offset_vectype = NULL_TREE; gs_info->scale = scale; gs_info->memory_type = memory_type; return true; @@ -2104,22 +2101,12 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, masked_p, gs_info); - scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type); - unsigned int element_bits = GET_MODE_BITSIZE (element_mode); - tree offset_type = TREE_TYPE (gs_info->offset); - unsigned int offset_bits = TYPE_PRECISION (offset_type); + tree old_offset_type = TREE_TYPE (gs_info->offset); + tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - /* Enforced by vect_check_gather_scatter. */ - gcc_assert (element_bits >= offset_bits); - - /* If the elements are wider than the offset, convert the offset to the - same width, without changing its sign. */ - if (element_bits > offset_bits) - { - bool unsigned_p = TYPE_UNSIGNED (offset_type); - offset_type = build_nonstandard_integer_type (element_bits, unsigned_p); - gs_info->offset = fold_convert (offset_type, gs_info->offset); - } + gcc_assert (TYPE_PRECISION (new_offset_type) + >= TYPE_PRECISION (old_offset_type)); + gs_info->offset = fold_convert (new_offset_type, gs_info->offset); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -2963,7 +2950,6 @@ vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info, gather_scatter_info *gs_info, tree *dataref_ptr, tree *vec_offset) { - vec_info *vinfo = stmt_info->vinfo; gimple_seq stmts = NULL; *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); if (stmts != NULL) @@ -2973,10 +2959,8 @@ vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info, new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); gcc_assert (!new_bb); } - tree offset_type = TREE_TYPE (gs_info->offset); - tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info, - offset_vectype); + gs_info->offset_vectype); } /* Prepare to implement a grouped or strided load or store using @@ -3009,8 +2993,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, /* The offset given in GS_INFO can have pointer type, so use the element type of the vector instead. */ tree offset_type = TREE_TYPE (gs_info->offset); - tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type); - offset_type = TREE_TYPE (offset_vectype); + offset_type = TREE_TYPE (gs_info->offset_vectype); /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr), @@ -3019,7 +3002,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, step = force_gimple_operand (step, &stmts, true, NULL_TREE); /* Create {0, X, X*2, X*3, ...}. */ - *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype, + *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype, build_zero_cst (offset_type), step); if (stmts) gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); @@ -9442,16 +9425,17 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (memory_access_type == VMAT_GATHER_SCATTER) { + tree zero = build_zero_cst (vectype); tree scale = size_int (gs_info.scale); gcall *call; if (loop_masks) call = gimple_build_call_internal - (IFN_MASK_GATHER_LOAD, 4, dataref_ptr, - vec_offset, scale, final_mask); + (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, + vec_offset, scale, zero, final_mask); else call = gimple_build_call_internal - (IFN_GATHER_LOAD, 3, dataref_ptr, - vec_offset, scale); + (IFN_GATHER_LOAD, 4, dataref_ptr, + vec_offset, scale, zero); gimple_call_set_nothrow (call, true); new_stmt = call; data_ref = NULL_TREE; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 6b4e92e..96eb1f5 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1678,8 +1678,8 @@ extern opt_result vect_verify_datarefs_alignment (loop_vec_info); extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); extern opt_result vect_analyze_data_ref_accesses (vec_info *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); -extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int, - signop, int, internal_fn *, tree *); +extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, + tree, int, internal_fn *, tree *); extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, gather_scatter_info *); extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, -- 2.7.4