+2019-11-08 Richard Sandiford <richard.sandiford@arm.com>
+
+ * optabs.def (gather_load_optab, mask_gather_load_optab)
+ (scatter_store_optab, mask_scatter_store_optab): Turn into
+ conversion optabs, with the offset mode given explicitly.
+ * doc/md.texi: Update accordingly.
+ * config/aarch64/aarch64-sve-builtins-base.cc
+ (svld1_gather_impl::expand): Likewise.
+ (svst1_scatter_impl::expand): Likewise.
+ * internal-fn.c (gather_load_direct, scatter_store_direct): Likewise.
+ (expand_scatter_store_optab_fn): Likewise.
+ (direct_gather_load_optab_supported_p): Likewise.
+ (direct_scatter_store_optab_supported_p): Likewise.
+ (expand_gather_load_optab_fn): Likewise. Expect the mask argument
+ to be argument 4.
+ (internal_fn_mask_index): Return 4 for IFN_MASK_GATHER_LOAD.
+ (internal_gather_scatter_fn_supported_p): Replace the offset sign
+ argument with the offset vector type. Require the two vector
+ types to have the same number of elements but allow their element
+ sizes to be different. Treat the optabs as conversion optabs.
+ * internal-fn.h (internal_gather_scatter_fn_supported_p): Update
+ prototype accordingly.
+ * optabs-query.c (supports_at_least_one_mode_p): Replace with...
+ (supports_vec_convert_optab_p): ...this new function.
+ (supports_vec_gather_load_p): Update accordingly.
+ (supports_vec_scatter_store_p): Likewise.
+ * tree-vectorizer.h (vect_gather_scatter_fn_p): Take a vec_info.
+ Replace the offset sign and bits parameters with a scalar type tree.
+ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
+ Pass back the offset vector type instead of the scalar element type.
+ Allow the offset to be wider than the memory elements. Search for
+ an offset type that the target supports, stopping once we've
+ reached the maximum of the element size and pointer size.
+ Update call to internal_gather_scatter_fn_supported_p.
+ (vect_check_gather_scatter): Update calls accordingly.
+ When testing a new scale before knowing the final offset type,
+ check whether the scale is supported for any signed or unsigned
+ offset type. Check whether the target supports the source and
+ target types of a conversion before deciding whether to look
+ through the conversion. Record the chosen offset_vectype.
+ * tree-vect-patterns.c (vect_get_gather_scatter_offset_type): Delete.
+ (vect_recog_gather_scatter_pattern): Get the scalar offset type
+ directly from the gs_info's offset_vectype instead. Pass a zero
+ of the result type to IFN_GATHER_LOAD and IFN_MASK_GATHER_LOAD.
+ * tree-vect-stmts.c (check_load_store_masking): Update call to
+ internal_gather_scatter_fn_supported_p, passing the offset vector
+ type recorded in the gs_info.
+ (vect_truncate_gather_scatter_offset): Update call to
+ vect_check_gather_scatter, leaving it to search for a valid
+ offset vector type.
+ (vect_use_strided_gather_scatters_p): Convert the offset to the
+ element type of the gs_info's offset_vectype.
+ (vect_get_gather_scatter_ops): Get the offset vector type directly
+ from the gs_info.
+ (vect_get_strided_load_store_ops): Likewise.
+ (vectorizable_load): Pass a zero of the result type to IFN_GATHER_LOAD
+ and IFN_MASK_GATHER_LOAD.
+ * config/aarch64/aarch64-sve.md (gather_load<mode>): Rename to...
+ (gather_load<mode><v_int_equiv>): ...this.
+ (mask_gather_load<mode>): Rename to...
+ (mask_gather_load<mode><v_int_equiv>): ...this.
+ (scatter_store<mode>): Rename to...
+ (scatter_store<mode><v_int_equiv>): ...this.
+ (mask_scatter_store<mode>): Rename to...
+ (mask_scatter_store<mode><v_int_equiv>): ...this.
+
2019-11-08 Kewen Lin <linkw@gcc.gnu.org>
PR target/92132
/* Put the predicate last, as required by mask_gather_load_optab. */
e.rotate_inputs_left (0, 5);
machine_mode mem_mode = e.memory_vector_mode ();
- insn_code icode = direct_optab_handler (mask_gather_load_optab, mem_mode);
+ machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
+ insn_code icode = convert_optab_handler (mask_gather_load_optab,
+ mem_mode, int_mode);
return e.use_exact_insn (icode);
}
};
e.prepare_gather_address_operands (1);
/* Put the predicate last, as required by mask_scatter_store_optab. */
e.rotate_inputs_left (0, 6);
- insn_code icode = direct_optab_handler (mask_scatter_store_optab,
- e.memory_vector_mode ());
+ machine_mode mem_mode = e.memory_vector_mode ();
+ machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
+ insn_code icode = convert_optab_handler (mask_scatter_store_optab,
+ mem_mode, int_mode);
return e.use_exact_insn (icode);
}
};
;; -------------------------------------------------------------------------
;; Unpredicated gather loads.
-(define_expand "gather_load<mode>"
+(define_expand "gather_load<mode><v_int_equiv>"
[(set (match_operand:SVE_SD 0 "register_operand")
(unspec:SVE_SD
[(match_dup 5)
;; Predicated gather loads for 32-bit elements. Operand 3 is true for
;; unsigned extension and false for signed extension.
-(define_insn "mask_gather_load<mode>"
+(define_insn "mask_gather_load<mode><v_int_equiv>"
[(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w, w")
(unspec:SVE_S
[(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
;; Predicated gather loads for 64-bit elements. The value of operand 3
;; doesn't matter in this case.
-(define_insn "mask_gather_load<mode>"
+(define_insn "mask_gather_load<mode><v_int_equiv>"
[(set (match_operand:SVE_D 0 "register_operand" "=w, w, w, w")
(unspec:SVE_D
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
)
;; Likewise, but with the offset being sign-extended from 32 bits.
-(define_insn "*mask_gather_load<mode>_sxtw"
+(define_insn "*mask_gather_load<mode><v_int_equiv>_sxtw"
[(set (match_operand:SVE_D 0 "register_operand" "=w, w")
(unspec:SVE_D
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
)
;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*mask_gather_load<mode>_uxtw"
+(define_insn "*mask_gather_load<mode><v_int_equiv>_uxtw"
[(set (match_operand:SVE_D 0 "register_operand" "=w, w")
(unspec:SVE_D
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
;; -------------------------------------------------------------------------
;; Unpredicated scatter stores.
-(define_expand "scatter_store<mode>"
+(define_expand "scatter_store<mode><v_int_equiv>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_dup 5)
;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode>"
+(define_insn "mask_scatter_store<mode><v_int_equiv>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
;; Predicated scatter stores for 64-bit elements. The value of operand 2
;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode>"
+(define_insn "mask_scatter_store<mode><v_int_equiv>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
)
;; Likewise, but with the offset being sign-extended from 32 bits.
-(define_insn_and_rewrite "*mask_scatter_store<mode>_sxtw"
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_equiv>_sxtw"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
)
;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*mask_scatter_store<mode>_uxtw"
+(define_insn "*mask_scatter_store<mode><v_int_equiv>_uxtw"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
This pattern is not allowed to @code{FAIL}.
-@cindex @code{gather_load@var{m}} instruction pattern
-@item @samp{gather_load@var{m}}
+@cindex @code{gather_load@var{m}@var{n}} instruction pattern
+@item @samp{gather_load@var{m}@var{n}}
Load several separate memory locations into a vector of mode @var{m}.
-Operand 1 is a scalar base address and operand 2 is a vector of
-offsets from that base. Operand 0 is a destination vector with the
-same number of elements as the offset. For each element index @var{i}:
+Operand 1 is a scalar base address and operand 2 is a vector of mode @var{n}
+containing offsets from that base. Operand 0 is a destination vector with
+the same number of elements as @var{n}. For each element index @var{i}:
@itemize @bullet
@item
The value of operand 3 does not matter if the offsets are already
address width.
-@cindex @code{mask_gather_load@var{m}} instruction pattern
-@item @samp{mask_gather_load@var{m}}
-Like @samp{gather_load@var{m}}, but takes an extra mask operand as
+@cindex @code{mask_gather_load@var{m}@var{n}} instruction pattern
+@item @samp{mask_gather_load@var{m}@var{n}}
+Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand as
operand 5. Bit @var{i} of the mask is set if element @var{i}
of the result should be loaded from memory and clear if element @var{i}
of the result should be set to zero.
-@cindex @code{scatter_store@var{m}} instruction pattern
-@item @samp{scatter_store@var{m}}
+@cindex @code{scatter_store@var{m}@var{n}} instruction pattern
+@item @samp{scatter_store@var{m}@var{n}}
Store a vector of mode @var{m} into several distinct memory locations.
-Operand 0 is a scalar base address and operand 1 is a vector of offsets
-from that base. Operand 4 is the vector of values that should be stored,
-which has the same number of elements as the offset. For each element
-index @var{i}:
+Operand 0 is a scalar base address and operand 1 is a vector of mode
+@var{n} containing offsets from that base. Operand 4 is the vector of
+values that should be stored, which has the same number of elements as
+@var{n}. For each element index @var{i}:
@itemize @bullet
@item
The value of operand 2 does not matter if the offsets are already
address width.
-@cindex @code{mask_scatter_store@var{m}} instruction pattern
-@item @samp{mask_scatter_store@var{m}}
-Like @samp{scatter_store@var{m}}, but takes an extra mask operand as
+@cindex @code{mask_scatter_store@var{m}@var{n}} instruction pattern
+@item @samp{mask_scatter_store@var{m}@var{n}}
+Like @samp{scatter_store@var{m}@var{n}}, but takes an extra mask operand as
operand 5. Bit @var{i} of the mask is set if element @var{i}
of the result should be stored to memory.
#define mask_load_direct { -1, 2, false }
#define load_lanes_direct { -1, -1, false }
#define mask_load_lanes_direct { -1, -1, false }
-#define gather_load_direct { -1, -1, false }
+#define gather_load_direct { 3, 1, false }
#define mask_store_direct { 3, 2, false }
#define store_lanes_direct { 0, 0, false }
#define mask_store_lanes_direct { 0, 0, false }
-#define scatter_store_direct { 3, 3, false }
+#define scatter_store_direct { 3, 1, false }
#define unary_direct { 0, 0, true }
#define binary_direct { 0, 0, true }
#define ternary_direct { 0, 0, true }
create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
}
- insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)));
+ insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
+ TYPE_MODE (TREE_TYPE (offset)));
expand_insn (icode, i, ops);
}
create_integer_operand (&ops[i++], scale_int);
if (optab == mask_gather_load_optab)
{
- tree mask = gimple_call_arg (stmt, 3);
+ tree mask = gimple_call_arg (stmt, 4);
rtx mask_rtx = expand_normal (mask);
create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
}
- insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)));
+ insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
+ TYPE_MODE (TREE_TYPE (offset)));
expand_insn (icode, i, ops);
}
#define direct_mask_load_optab_supported_p direct_optab_supported_p
#define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
-#define direct_gather_load_optab_supported_p direct_optab_supported_p
+#define direct_gather_load_optab_supported_p convert_optab_supported_p
#define direct_mask_store_optab_supported_p direct_optab_supported_p
#define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
-#define direct_scatter_store_optab_supported_p direct_optab_supported_p
+#define direct_scatter_store_optab_supported_p convert_optab_supported_p
#define direct_while_optab_supported_p convert_optab_supported_p
#define direct_fold_extract_optab_supported_p direct_optab_supported_p
#define direct_fold_left_optab_supported_p direct_optab_supported_p
return 2;
case IFN_MASK_GATHER_LOAD:
- return 3;
-
case IFN_MASK_SCATTER_STORE:
return 4;
IFN. For loads, VECTOR_TYPE is the vector type of the load result,
while for stores it is the vector type of the stored data argument.
MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded
- or stored. OFFSET_SIGN is the sign of the offset argument, which is
- only relevant when the offset is narrower than an address. SCALE is
- the amount by which the offset should be multiplied *after* it has
- been extended to address width. */
+ or stored. OFFSET_VECTOR_TYPE is the vector type that holds the
+ offset from the shared base address of each loaded or stored element.
+ SCALE is the amount by which these offsets should be multiplied
+ *after* they have been extended to address width. */
bool
internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
tree memory_element_type,
- signop offset_sign, int scale)
+ tree offset_vector_type, int scale)
{
if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
TYPE_SIZE (memory_element_type)))
return false;
+ if (maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type),
+ TYPE_VECTOR_SUBPARTS (offset_vector_type)))
+ return false;
optab optab = direct_internal_fn_optab (ifn);
- insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type));
+ insn_code icode = convert_optab_handler (optab, TYPE_MODE (vector_type),
+ TYPE_MODE (offset_vector_type));
int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
+ bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type));
return (icode != CODE_FOR_nothing
- && insn_operand_matches (icode, 2 + output_ops,
- GEN_INT (offset_sign == UNSIGNED))
- && insn_operand_matches (icode, 3 + output_ops,
- GEN_INT (scale)));
+ && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
+ && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
}
/* Expand STMT as though it were a call to internal function FN. */
extern int internal_fn_mask_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
- tree, signop, int);
+ tree, tree, int);
extern void expand_internal_call (gcall *);
extern void expand_internal_call (internal_fn, gcall *);
return cheap[speed_p];
}
-/* Return true if optab OP supports at least one mode. */
+/* Return true if vector conversion optab OP supports at least one mode,
+ given that the second mode is always an integer vector. */
static bool
-supports_at_least_one_mode_p (optab op)
+supports_vec_convert_optab_p (optab op)
{
for (int i = 0; i < NUM_MACHINE_MODES; ++i)
- if (direct_optab_handler (op, (machine_mode) i) != CODE_FOR_nothing)
- return true;
+ if (VECTOR_MODE_P ((machine_mode) i))
+ for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
+ if (convert_optab_handler (op, (machine_mode) i,
+ (machine_mode) j) != CODE_FOR_nothing)
+ return true;
return false;
}
this_fn_optabs->supports_vec_gather_load_cached = true;
this_fn_optabs->supports_vec_gather_load
- = supports_at_least_one_mode_p (gather_load_optab);
+ = supports_vec_convert_optab_p (gather_load_optab);
return this_fn_optabs->supports_vec_gather_load;
}
this_fn_optabs->supports_vec_scatter_store_cached = true;
this_fn_optabs->supports_vec_scatter_store
- = supports_at_least_one_mode_p (scatter_store_optab);
+ = supports_vec_convert_optab_p (scatter_store_optab);
return this_fn_optabs->supports_vec_scatter_store;
}
OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b")
OPTAB_CD(maskload_optab, "maskload$a$b")
OPTAB_CD(maskstore_optab, "maskstore$a$b")
+OPTAB_CD(gather_load_optab, "gather_load$a$b")
+OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b")
+OPTAB_CD(scatter_store_optab, "scatter_store$a$b")
+OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store$a$b")
OPTAB_CD(vec_extract_optab, "vec_extract$a$b")
OPTAB_CD(vec_init_optab, "vec_init$a$b")
OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
-OPTAB_D (gather_load_optab, "gather_load$a")
-OPTAB_D (mask_gather_load_optab, "mask_gather_load$a")
-OPTAB_D (scatter_store_optab, "scatter_store$a")
-OPTAB_D (mask_scatter_store_optab, "mask_scatter_store$a")
-
OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
/* Check whether we can use an internal function for a gather load
or scatter store. READ_P is true for loads and false for stores.
MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
- the type of the memory elements being loaded or stored. OFFSET_BITS
- is the number of bits in each scalar offset and OFFSET_SIGN is the
- sign of the offset. SCALE is the amount by which the offset should
+ the type of the memory elements being loaded or stored. OFFSET_TYPE
+ is the type of the offset that is being applied to the invariant
+ base address. SCALE is the amount by which the offset should
be multiplied *after* it has been converted to address width.
- Return true if the function is supported, storing the function
- id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */
+ Return true if the function is supported, storing the function id in
+ *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
bool
-vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
- tree memory_type, unsigned int offset_bits,
- signop offset_sign, int scale,
- internal_fn *ifn_out, tree *element_type_out)
+vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
+ tree vectype, tree memory_type, tree offset_type,
+ int scale, internal_fn *ifn_out,
+ tree *offset_vectype_out)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
- if (offset_bits > element_bits)
- /* Internal functions require the offset to be the same width as
- the vector elements. We can extend narrower offsets, but it isn't
- safe to truncate wider offsets. */
- return false;
-
if (element_bits != memory_bits)
/* For now the vector elements must be the same width as the
memory elements. */
else
ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
- /* Test whether the target supports this combination. */
- if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_sign, scale))
- return false;
+ for (;;)
+ {
+ tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
+ if (!offset_vectype)
+ return false;
- *ifn_out = ifn;
- *element_type_out = TREE_TYPE (vectype);
- return true;
+ /* Test whether the target supports this combination. */
+ if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
+ offset_vectype, scale))
+ {
+ *ifn_out = ifn;
+ *offset_vectype_out = offset_vectype;
+ return true;
+ }
+
+ if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
+ && TYPE_PRECISION (offset_type) >= element_bits)
+ return false;
+
+ offset_type = build_nonstandard_integer_type
+ (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
+ }
}
/* STMT_INFO is a call to an internal gather load or scatter store function.
machine_mode pmode;
int punsignedp, reversep, pvolatilep = 0;
internal_fn ifn;
- tree element_type;
+ tree offset_vectype;
bool masked_p = false;
/* See whether this is already a call to a gather/scatter internal function.
{
int new_scale = tree_to_shwi (op1);
/* Only treat this as a scaling operation if the target
- supports it. */
+ supports it for at least some offset type. */
if (use_ifn_p
- && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
- vectype, memory_type, 1,
- TYPE_SIGN (TREE_TYPE (op0)),
+ && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
+ masked_p, vectype, memory_type,
+ signed_char_type_node,
+ new_scale, &ifn,
+ &offset_vectype)
+ && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
+ masked_p, vectype, memory_type,
+ unsigned_char_type_node,
new_scale, &ifn,
- &element_type))
+ &offset_vectype))
break;
scale = new_scale;
off = op0;
if (!POINTER_TYPE_P (TREE_TYPE (op0))
&& !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
break;
+
+ /* Don't include the conversion if the target is happy with
+ the current offset type. */
+ if (use_ifn_p
+ && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
+ masked_p, vectype, memory_type,
+ TREE_TYPE (off), scale, &ifn,
+ &offset_vectype))
+ break;
+
if (TYPE_PRECISION (TREE_TYPE (op0))
== TYPE_PRECISION (TREE_TYPE (off)))
{
continue;
}
- /* The internal functions need the offset to be the same width
- as the elements of VECTYPE. Don't include operations that
- cast the offset from that width to a different width. */
- if (use_ifn_p
- && (int_size_in_bytes (TREE_TYPE (vectype))
- == int_size_in_bytes (TREE_TYPE (off))))
- break;
-
if (TYPE_PRECISION (TREE_TYPE (op0))
< TYPE_PRECISION (TREE_TYPE (off)))
{
if (use_ifn_p)
{
- if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
- memory_type, TYPE_PRECISION (offtype),
- TYPE_SIGN (offtype), scale, &ifn,
- &element_type))
+ if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
+ vectype, memory_type, offtype, scale,
+ &ifn, &offset_vectype))
return false;
}
else
return false;
ifn = IFN_LAST;
- element_type = TREE_TYPE (vectype);
+ /* The offset vector type will be read from DECL when needed. */
+ offset_vectype = NULL_TREE;
}
info->ifn = ifn;
info->base = base;
info->offset = off;
info->offset_dt = vect_unknown_def_type;
- info->offset_vectype = NULL_TREE;
+ info->offset_vectype = offset_vectype;
info->scale = scale;
- info->element_type = element_type;
+ info->element_type = TREE_TYPE (vectype);
info->memory_type = memory_type;
return true;
}
gcc_unreachable ();
}
-/* Return the scalar offset type that an internal gather/scatter function
- should use. GS_INFO describes the gather/scatter operation. */
-
-static tree
-vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
-{
- tree offset_type = TREE_TYPE (gs_info->offset);
- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
-
- /* Enforced by vect_check_gather_scatter. */
- unsigned int offset_bits = TYPE_PRECISION (offset_type);
- gcc_assert (element_bits >= offset_bits);
-
- /* If the offset is narrower than the elements, extend it according
- to its sign. */
- if (element_bits > offset_bits)
- return build_nonstandard_integer_type (element_bits,
- TYPE_UNSIGNED (offset_type));
-
- return offset_type;
-}
-
/* Return MASK if MASK is suitable for masking an operation on vectors
of type VECTYPE, otherwise convert it into such a form and return
the result. Associate any conversion statements with STMT_INFO's
/* Get the invariant base and non-invariant offset, converting the
latter to the same width as the vector elements. */
tree base = gs_info.base;
- tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
+ tree offset_type = TREE_TYPE (gs_info.offset_vectype);
tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
stmt_info);
gcall *pattern_stmt;
if (DR_IS_READ (dr))
{
+ tree zero = build_zero_cst (gs_info.element_type);
if (mask != NULL)
- pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
- offset, scale, mask);
+ pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
+ offset, scale, zero, mask);
else
- pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
- offset, scale);
+ pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
+ offset, scale, zero);
tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
gimple_call_set_lhs (pattern_stmt, load_lhs);
}
internal_fn ifn = (is_load
? IFN_MASK_GATHER_LOAD
: IFN_MASK_SCATTER_STORE);
- tree offset_type = TREE_TYPE (gs_info->offset);
if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
gs_info->memory_type,
- TYPE_SIGN (offset_type),
+ gs_info->offset_vectype,
gs_info->scale))
{
if (dump_enabled_p ())
if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
continue;
- /* See whether we can calculate (COUNT - 1) * STEP / SCALE
- in OFFSET_BITS bits. */
+ /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
widest_int range = wi::mul (count, factor, SIGNED, &overflow);
if (overflow)
continue;
signop sign = range >= 0 ? UNSIGNED : SIGNED;
- if (wi::min_precision (range, sign) > element_bits)
- {
- overflow = wi::OVF_UNKNOWN;
- continue;
- }
+ unsigned int min_offset_bits = wi::min_precision (range, sign);
- /* See whether the target supports the operation. */
+ /* Find the narrowest viable offset type. */
+ unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
+ tree offset_type = build_nonstandard_integer_type (offset_bits,
+ sign == UNSIGNED);
+
+ /* See whether the target supports the operation with an offset
+ no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
- if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
- memory_type, element_bits, sign, scale,
- &gs_info->ifn, &gs_info->element_type))
+ if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
+ vectype, memory_type, offset_type, scale,
+ &gs_info->ifn, &gs_info->offset_vectype))
continue;
- tree offset_type = build_nonstandard_integer_type (element_bits,
- sign == UNSIGNED);
-
gs_info->decl = NULL_TREE;
/* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
but we don't need to store that here. */
gs_info->base = NULL_TREE;
+ gs_info->element_type = TREE_TYPE (vectype);
gs_info->offset = fold_convert (offset_type, step);
gs_info->offset_dt = vect_constant_def;
- gs_info->offset_vectype = NULL_TREE;
gs_info->scale = scale;
gs_info->memory_type = memory_type;
return true;
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
masked_p, gs_info);
- scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
- unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
- tree offset_type = TREE_TYPE (gs_info->offset);
- unsigned int offset_bits = TYPE_PRECISION (offset_type);
+ tree old_offset_type = TREE_TYPE (gs_info->offset);
+ tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
- /* Enforced by vect_check_gather_scatter. */
- gcc_assert (element_bits >= offset_bits);
-
- /* If the elements are wider than the offset, convert the offset to the
- same width, without changing its sign. */
- if (element_bits > offset_bits)
- {
- bool unsigned_p = TYPE_UNSIGNED (offset_type);
- offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
- gs_info->offset = fold_convert (offset_type, gs_info->offset);
- }
+ gcc_assert (TYPE_PRECISION (new_offset_type)
+ >= TYPE_PRECISION (old_offset_type));
+ gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
gather_scatter_info *gs_info,
tree *dataref_ptr, tree *vec_offset)
{
- vec_info *vinfo = stmt_info->vinfo;
gimple_seq stmts = NULL;
*dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
if (stmts != NULL)
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
- tree offset_type = TREE_TYPE (gs_info->offset);
- tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
*vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
- offset_vectype);
+ gs_info->offset_vectype);
}
/* Prepare to implement a grouped or strided load or store using
/* The offset given in GS_INFO can have pointer type, so use the element
type of the vector instead. */
tree offset_type = TREE_TYPE (gs_info->offset);
- tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type);
- offset_type = TREE_TYPE (offset_vectype);
+ offset_type = TREE_TYPE (gs_info->offset_vectype);
/* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
step = force_gimple_operand (step, &stmts, true, NULL_TREE);
/* Create {0, X, X*2, X*3, ...}. */
- *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
+ *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
build_zero_cst (offset_type), step);
if (stmts)
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
if (memory_access_type == VMAT_GATHER_SCATTER)
{
+ tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
gcall *call;
if (loop_masks)
call = gimple_build_call_internal
- (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
- vec_offset, scale, final_mask);
+ (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
+ vec_offset, scale, zero, final_mask);
else
call = gimple_build_call_internal
- (IFN_GATHER_LOAD, 3, dataref_ptr,
- vec_offset, scale);
+ (IFN_GATHER_LOAD, 4, dataref_ptr,
+ vec_offset, scale, zero);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
-extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int,
- signop, int, internal_fn *, tree *);
+extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
+ tree, int, internal_fn *, tree *);
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
gather_scatter_info *);
extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,