#include "builtins.h"
#include "internal-fn.h"
#include "tree-vector-builder.h"
+#include "vec-perm-indices.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
/* Costs of the stores. */
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_GATHER_SCATTER)
- /* N scalar stores plus extracting the elements. */
- inside_cost += record_stmt_cost (body_cost_vec,
- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
- scalar_store, stmt_info, 0, vect_body);
+ {
+ /* N scalar stores plus extracting the elements. */
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+ inside_cost += record_stmt_cost (body_cost_vec,
+ ncopies * assumed_nunits,
+ scalar_store, stmt_info, 0, vect_body);
+ }
else
vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_STRIDED_SLP)
- inside_cost += record_stmt_cost (body_cost_vec,
- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
- vec_to_scalar, stmt_info, 0, vect_body);
+ {
+ /* N scalar stores plus extracting the elements. */
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+ inside_cost += record_stmt_cost (body_cost_vec,
+ ncopies * assumed_nunits,
+ vec_to_scalar, stmt_info, 0, vect_body);
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
{
/* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
inside_cost += record_stmt_cost (body_cost_vec,
- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+ ncopies * assumed_nunits,
scalar_load, stmt_info, 0, vect_body);
}
else
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- auto_vec_perm_indices sel (nunits);
- for (i = 0; i < nunits; ++i)
+ /* The encoding has a single stepped pattern. */
+ vec_perm_builder sel (nunits, 1, 3);
+ for (i = 0; i < 3; ++i)
sel.quick_push (nunits - 1 - i);
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
+ vec_perm_indices indices (sel, 1, nunits);
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
return NULL_TREE;
- return vect_gen_perm_mask_checked (vectype, sel);
+ return vect_gen_perm_mask_checked (vectype, indices);
}
/* A subroutine of get_load_store_type, with a subset of the same
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
vec_info *vinfo = stmt_info->vinfo;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
*memory_access_type = VMAT_GATHER_SCATTER;
*memory_access_type = VMAT_CONTIGUOUS;
}
+ if ((*memory_access_type == VMAT_ELEMENTWISE
+ || *memory_access_type == VMAT_STRIDED_SLP)
+ && !nunits.is_constant ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not using elementwise accesses due to variable "
+ "vectorization factor.\n");
+ return false;
+ }
+
/* FIXME: At the moment the cost model seems to underestimate the
cost of using elementwise accesses. This check preserves the
traditional behavior until that can be fixed. */
tree dummy;
tree dataref_ptr = NULL_TREE;
gimple *ptr_incr;
- int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
int i, j;
bool inv_p;
gimple_seq seq;
basic_block new_bb;
enum { NARROW, NONE, WIDEN } modifier;
- int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+ poly_uint64 gather_off_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
gcc_checking_assert (types_compatible_p (srctype, rettype)
&& types_compatible_p (srctype, masktype));
- if (nunits == gather_off_nunits)
+ if (known_eq (nunits, gather_off_nunits))
modifier = NONE;
- else if (nunits == gather_off_nunits / 2)
+ else if (known_eq (nunits * 2, gather_off_nunits))
{
modifier = WIDEN;
- auto_vec_perm_indices sel (gather_off_nunits);
- for (i = 0; i < gather_off_nunits; ++i)
- sel.quick_push (i | nunits);
+ /* Currently widening gathers and scatters are only supported for
+ fixed-length vectors. */
+ int count = gather_off_nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ for (i = 0; i < count; ++i)
+ sel.quick_push (i | (count / 2));
- perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
+ vec_perm_indices indices (sel, 1, count);
+ perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
+ indices);
}
- else if (nunits == gather_off_nunits * 2)
+ else if (known_eq (nunits, gather_off_nunits * 2))
{
modifier = NARROW;
- auto_vec_perm_indices sel (nunits);
- sel.quick_grow (nunits);
- for (i = 0; i < nunits; ++i)
- sel[i] = i < gather_off_nunits
- ? i : i + nunits - gather_off_nunits;
+ /* Currently narrowing gathers and scatters are only supported for
+ fixed-length vectors. */
+ int count = nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ sel.quick_grow (count);
+ for (i = 0; i < count; ++i)
+ sel[i] = i < count / 2 ? i : i + count / 2;
+ vec_perm_indices indices (sel, 2, count);
+ perm_mask = vect_gen_perm_mask_checked (vectype, indices);
- perm_mask = vect_gen_perm_mask_checked (vectype, sel);
ncopies *= 2;
- for (i = 0; i < nunits; ++i)
- sel[i] = i | gather_off_nunits;
- mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
+ for (i = 0; i < count; ++i)
+ sel[i] = i | (count / 2);
+ indices.new_vector (sel, 2, count);
+ mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
}
else
gcc_unreachable ();
unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
unsigned word_bytes = num_bytes / nunits;
- auto_vec_perm_indices elts (num_bytes);
- for (unsigned i = 0; i < nunits; ++i)
+ /* The encoding uses one stepped pattern for each byte in the word. */
+ vec_perm_builder elts (num_bytes, word_bytes, 3);
+ for (unsigned i = 0; i < 3; ++i)
for (unsigned j = 0; j < word_bytes; ++j)
elts.quick_push ((i + 1) * word_bytes - j - 1);
- if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
+ vec_perm_indices indices (elts, 1, num_bytes);
+ if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
return false;
if (! vec_stmt)
return true;
}
- tree_vector_builder telts (char_vectype, num_bytes, 1);
- for (unsigned i = 0; i < num_bytes; ++i)
- telts.quick_push (build_int_cst (char_type_node, elts[i]));
- tree bswap_vconst = telts.build ();
+ tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
/* Transform. */
vec<tree> vec_oprnds = vNULL;
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
tree vectype_out, vectype_in;
- int nunits_in;
- int nunits_out;
+ poly_uint64 nunits_in;
+ poly_uint64 nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
vec_info *vinfo = stmt_info->vinfo;
/* FORNOW */
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- if (nunits_in == nunits_out / 2)
+ if (known_eq (nunits_in * 2, nunits_out))
modifier = NARROW;
- else if (nunits_out == nunits_in)
+ else if (known_eq (nunits_out, nunits_in))
modifier = NONE;
- else if (nunits_out == nunits_in / 2)
+ else if (known_eq (nunits_out * 2, nunits_in))
modifier = WIDEN;
else
return false;
if (gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
{
- tree_vector_builder v (vectype_out, 1, 3);
- for (int k = 0; k < 3; ++k)
- v.quick_push (build_int_cst (unsigned_type_node,
- j * nunits_out + k));
- tree cst = v.build ();
+ tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
tree new_var
= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
gimple *init_stmt = gimple_build_assign (new_var, cst);
}
}
+/* Return the number of elements in vector type VECTYPE, which is associated
+ with a SIMD clone. At present these vectors always have a constant
+ length. */
+
+static unsigned HOST_WIDE_INT
+simd_clone_subparts (tree vectype)
+{
+ return TYPE_VECTOR_SUBPARTS (vectype);
+}
+
/* Function vectorizable_simd_clone_call.
Check if STMT performs a function call that can be vectorized
arginfo.quick_push (thisarginfo);
}
+ unsigned HOST_WIDE_INT vf;
+ if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not considering SIMD clones; not yet supported"
+ " for variable-width vectors.\n");
+ return NULL;
+ }
+
unsigned int badness = 0;
struct cgraph_node *bestn = NULL;
if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
n = n->simdclone->next_clone)
{
unsigned int this_badness = 0;
- if (n->simdclone->simdlen
- > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ if (n->simdclone->simdlen > vf
|| n->simdclone->nargs != nargs)
continue;
- if (n->simdclone->simdlen
- < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ if (n->simdclone->simdlen < vf)
+ this_badness += (exact_log2 (vf)
- exact_log2 (n->simdclone->simdlen)) * 1024;
if (n->simdclone->inbranch)
this_badness += 2048;
= get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
i)));
if (arginfo[i].vectype == NULL
- || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
+ || (simd_clone_subparts (arginfo[i].vectype)
> bestn->simdclone->simdlen))
return false;
}
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ ncopies = vf / nunits;
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
atype = bestn->simdclone->args[i].vector_type;
- o = nunits / TYPE_VECTOR_SUBPARTS (atype);
+ o = nunits / simd_clone_subparts (atype);
for (m = j * o; m < (j + 1) * o; m++)
{
- if (TYPE_VECTOR_SUBPARTS (atype)
- < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
+ if (simd_clone_subparts (atype)
+ < simd_clone_subparts (arginfo[i].vectype))
{
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
- k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
- / TYPE_VECTOR_SUBPARTS (atype));
+ k = (simd_clone_subparts (arginfo[i].vectype)
+ / simd_clone_subparts (atype));
gcc_assert ((k & (k - 1)) == 0);
if (m == 0)
vec_oprnd0
}
else
{
- k = (TYPE_VECTOR_SUBPARTS (atype)
- / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
+ k = (simd_clone_subparts (atype)
+ / simd_clone_subparts (arginfo[i].vectype));
gcc_assert ((k & (k - 1)) == 0);
vec<constructor_elt, va_gc> *ctor_elts;
if (k != 1)
new_stmt = gimple_build_call_vec (fndecl, vargs);
if (vec_dest)
{
- gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
+ gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
if (ratype)
new_temp = create_tmp_var (ratype);
- else if (TYPE_VECTOR_SUBPARTS (vectype)
- == TYPE_VECTOR_SUBPARTS (rtype))
+ else if (simd_clone_subparts (vectype)
+ == simd_clone_subparts (rtype))
new_temp = make_ssa_name (vec_dest, new_stmt);
else
new_temp = make_ssa_name (rtype, new_stmt);
if (vec_dest)
{
- if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
+ if (simd_clone_subparts (vectype) < nunits)
{
unsigned int k, l;
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
- k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
+ k = nunits / simd_clone_subparts (vectype);
gcc_assert ((k & (k - 1)) == 0);
for (l = 0; l < k; l++)
{
}
continue;
}
- else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
+ else if (simd_clone_subparts (vectype) > nunits)
{
- unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
- / TYPE_VECTOR_SUBPARTS (rtype));
+ unsigned int k = (simd_clone_subparts (vectype)
+ / simd_clone_subparts (rtype));
gcc_assert ((k & (k - 1)) == 0);
if ((j & (k - 1)) == 0)
vec_alloc (ret_ctor_elts, k);
if (ratype)
{
- unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
+ unsigned int m, o = nunits / simd_clone_subparts (rtype);
for (m = 0; m < o; m++)
{
tree tem = build4 (ARRAY_REF, rtype, new_temp,
int ndts = 2;
gimple *new_stmt = NULL;
stmt_vec_info prev_stmt_info;
- int nunits_in;
- int nunits_out;
+ poly_uint64 nunits_in;
+ poly_uint64 nunits_out;
tree vectype_out, vectype_in;
int ncopies, i, j;
tree lhs_type, rhs_type;
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- if (nunits_in < nunits_out)
- modifier = NARROW;
- else if (nunits_out == nunits_in)
+ if (known_eq (nunits_out, nunits_in))
modifier = NONE;
+ else if (multiple_p (nunits_out, nunits_in))
+ modifier = NARROW;
else
- modifier = WIDEN;
+ {
+ gcc_checking_assert (multiple_p (nunits_in, nunits_out));
+ modifier = WIDEN;
+ }
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
gather_scatter_info gs_info;
enum vect_def_type scatter_src_dt = vect_unknown_def_type;
gimple *new_stmt;
- int vf;
+ poly_uint64 vf;
vec_load_store_type vls_type;
tree ref_type;
gcc_assert (gimple_assign_single_p (stmt));
tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
- unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (loop_vinfo)
{
gimple_seq seq;
basic_block new_bb;
enum { NARROW, NONE, WIDEN } modifier;
- int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+ poly_uint64 scatter_off_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
- if (nunits == (unsigned int) scatter_off_nunits)
+ if (known_eq (nunits, scatter_off_nunits))
modifier = NONE;
- else if (nunits == (unsigned int) scatter_off_nunits / 2)
+ else if (known_eq (nunits * 2, scatter_off_nunits))
{
modifier = WIDEN;
- auto_vec_perm_indices sel (scatter_off_nunits);
- for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
- sel.quick_push (i | nunits);
+ /* Currently gathers and scatters are only supported for
+ fixed-length vectors. */
+ unsigned int count = scatter_off_nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ for (i = 0; i < (unsigned int) count; ++i)
+ sel.quick_push (i | (count / 2));
- perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
+ vec_perm_indices indices (sel, 1, count);
+ perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
+ indices);
gcc_assert (perm_mask != NULL_TREE);
}
- else if (nunits == (unsigned int) scatter_off_nunits * 2)
+ else if (known_eq (nunits, scatter_off_nunits * 2))
{
modifier = NARROW;
- auto_vec_perm_indices sel (nunits);
- for (i = 0; i < (unsigned int) nunits; ++i)
- sel.quick_push (i | scatter_off_nunits);
+ /* Currently gathers and scatters are only supported for
+ fixed-length vectors. */
+ unsigned int count = nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ for (i = 0; i < (unsigned int) count; ++i)
+ sel.quick_push (i | (count / 2));
- perm_mask = vect_gen_perm_mask_checked (vectype, sel);
+ vec_perm_indices indices (sel, 2, count);
+ perm_mask = vect_gen_perm_mask_checked (vectype, indices);
gcc_assert (perm_mask != NULL_TREE);
ncopies *= 2;
}
tree stride_base, stride_step, alias_off;
tree vec_oprnd;
unsigned int g;
+ /* Checked by get_load_store_type. */
+ unsigned int const_nunits = nunits.to_constant ();
gcc_assert (!nested_in_vect_loop_p (loop, stmt));
...
*/
- unsigned nstores = nunits;
+ unsigned nstores = const_nunits;
unsigned lnel = 1;
tree ltype = elem_type;
tree lvectype = vectype;
if (slp)
{
- if (group_size < nunits
- && nunits % group_size == 0)
+ if (group_size < const_nunits
+ && const_nunits % group_size == 0)
{
- nstores = nunits / group_size;
+ nstores = const_nunits / group_size;
lnel = group_size;
ltype = build_vector_type (elem_type, group_size);
lvectype = vectype;
unsigned lsize
= group_size * GET_MODE_BITSIZE (elmode);
elmode = int_mode_for_size (lsize, 0).require ();
+ unsigned int lnunits = const_nunits / group_size;
/* If we can't construct such a vector fall back to
element extracts from the original vector type and
element size stores. */
- if (mode_for_vector (elmode,
- nunits / group_size).exists (&vmode)
+ if (mode_for_vector (elmode, lnunits).exists (&vmode)
&& VECTOR_MODE_P (vmode)
&& (convert_optab_handler (vec_extract_optab,
vmode, elmode)
!= CODE_FOR_nothing))
{
- nstores = nunits / group_size;
+ nstores = lnunits;
lnel = group_size;
ltype = build_nonstandard_integer_type (lsize, 1);
lvectype = build_vector_type (ltype, nstores);
issue exists here for reasonable archs. */
}
}
- else if (group_size >= nunits
- && group_size % nunits == 0)
+ else if (group_size >= const_nunits
+ && group_size % const_nunits == 0)
{
nstores = 1;
- lnel = nunits;
+ lnel = const_nunits;
ltype = vectype;
lvectype = vectype;
}
/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
VECTOR_CST mask. No checks are made that the target platform supports the
- mask, so callers may wish to test can_vec_perm_p separately, or use
+ mask, so callers may wish to test can_vec_perm_const_p separately, or use
vect_gen_perm_mask_checked. */
tree
-vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel)
+vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
{
- tree mask_elt_type, mask_type;
-
- unsigned int nunits = sel.length ();
- gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
+ tree mask_type;
- mask_elt_type = lang_hooks.types.type_for_mode
- (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
- mask_type = get_vectype_for_scalar_type (mask_elt_type);
+ poly_uint64 nunits = sel.length ();
+ gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
- tree_vector_builder mask_elts (mask_type, nunits, 1);
- for (unsigned int i = 0; i < nunits; ++i)
- mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
- return mask_elts.build ();
+ mask_type = build_vector_type (ssizetype, nunits);
+ return vec_perm_indices_to_tree (mask_type, sel);
}
-/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
+/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
i.e. that the target supports the pattern _for arbitrary input vectors_. */
tree
-vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel)
+vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
{
- gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
+ gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
return vect_gen_perm_mask_any (vectype, sel);
}
tree dataref_offset = NULL_TREE;
gimple *ptr_incr = NULL;
int ncopies;
- int i, j, group_size, group_gap_adj;
+ int i, j;
+ unsigned int group_size;
+ poly_uint64 group_gap_adj;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
bool slp_perm = false;
enum tree_code code;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- int vf;
+ poly_uint64 vf;
tree aggr_type;
gather_scatter_info gs_info;
vec_info *vinfo = stmt_info->vinfo;
return false;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (loop_vinfo)
{
on the unrolled body effectively re-orders stmts. */
if (ncopies > 1
&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
- && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
+ && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ STMT_VINFO_MIN_NEG_DIST (stmt_info)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
on the unrolled body effectively re-orders stmts. */
if (!PURE_SLP_STMT (stmt_info)
&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
- && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
+ && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ STMT_VINFO_MIN_NEG_DIST (stmt_info)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
gimple_seq seq;
basic_block new_bb;
enum { NARROW, NONE, WIDEN } modifier;
- int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+ poly_uint64 gather_off_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
- if (nunits == gather_off_nunits)
+ if (known_eq (nunits, gather_off_nunits))
modifier = NONE;
- else if (nunits == gather_off_nunits / 2)
+ else if (known_eq (nunits * 2, gather_off_nunits))
{
modifier = WIDEN;
- auto_vec_perm_indices sel (gather_off_nunits);
- for (i = 0; i < gather_off_nunits; ++i)
- sel.quick_push (i | nunits);
+ /* Currently widening gathers are only supported for
+ fixed-length vectors. */
+ int count = gather_off_nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ for (i = 0; i < count; ++i)
+ sel.quick_push (i | (count / 2));
- perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
+ vec_perm_indices indices (sel, 1, count);
+ perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
+ indices);
}
- else if (nunits == gather_off_nunits * 2)
+ else if (known_eq (nunits, gather_off_nunits * 2))
{
modifier = NARROW;
- auto_vec_perm_indices sel (nunits);
- for (i = 0; i < nunits; ++i)
- sel.quick_push (i < gather_off_nunits
- ? i : i + nunits - gather_off_nunits);
+ /* Currently narrowing gathers are only supported for
+ fixed-length vectors. */
+ int count = nunits.to_constant ();
+ vec_perm_builder sel (count, count, 1);
+ for (i = 0; i < count; ++i)
+ sel.quick_push (i < count / 2 ? i : i + count / 2);
- perm_mask = vect_gen_perm_mask_checked (vectype, sel);
+ vec_perm_indices indices (sel, 2, count);
+ perm_mask = vect_gen_perm_mask_checked (vectype, indices);
ncopies *= 2;
}
else
vec<constructor_elt, va_gc> *v = NULL;
gimple_seq stmts = NULL;
tree stride_base, stride_step, alias_off;
+ /* Checked by get_load_store_type. */
+ unsigned int const_nunits = nunits.to_constant ();
gcc_assert (!nested_in_vect_loop);
prev_stmt_info = NULL;
running_off = offvar;
alias_off = build_int_cst (ref_type, 0);
- int nloads = nunits;
+ int nloads = const_nunits;
int lnel = 1;
tree ltype = TREE_TYPE (vectype);
tree lvectype = vectype;
auto_vec<tree> dr_chain;
if (memory_access_type == VMAT_STRIDED_SLP)
{
- if (group_size < nunits)
+ if (group_size < const_nunits)
{
/* First check if vec_init optab supports construction from
vector elts directly. */
TYPE_MODE (vectype), vmode)
!= CODE_FOR_nothing))
{
- nloads = nunits / group_size;
+ nloads = const_nunits / group_size;
lnel = group_size;
ltype = build_vector_type (TREE_TYPE (vectype), group_size);
}
unsigned lsize
= group_size * TYPE_PRECISION (TREE_TYPE (vectype));
elmode = int_mode_for_size (lsize, 0).require ();
+ unsigned int lnunits = const_nunits / group_size;
/* If we can't construct such a vector fall back to
element loads of the original vector type. */
- if (mode_for_vector (elmode,
- nunits / group_size).exists (&vmode)
+ if (mode_for_vector (elmode, lnunits).exists (&vmode)
&& VECTOR_MODE_P (vmode)
&& (convert_optab_handler (vec_init_optab, vmode, elmode)
!= CODE_FOR_nothing))
{
- nloads = nunits / group_size;
+ nloads = lnunits;
lnel = group_size;
ltype = build_nonstandard_integer_type (lsize, 1);
lvectype = build_vector_type (ltype, nloads);
else
{
nloads = 1;
- lnel = nunits;
+ lnel = const_nunits;
ltype = vectype;
}
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
fits in. */
if (slp_perm)
{
- ncopies = (group_size * vf + nunits - 1) / nunits;
+ /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
+ variable VF. */
+ unsigned int const_vf = vf.to_constant ();
+ ncopies = CEIL (group_size * const_vf, const_nunits);
dr_chain.create (ncopies);
}
else
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
- int group_el = 0;
+ unsigned int group_el = 0;
unsigned HOST_WIDE_INT
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++)
fits in. */
if (slp_perm)
{
- vec_num = (group_size * vf + nunits - 1) / nunits;
+ /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
+ variable VF. */
+ unsigned int const_vf = vf.to_constant ();
+ unsigned int const_nunits = nunits.to_constant ();
+ vec_num = CEIL (group_size * const_vf, const_nunits);
group_gap_adj = vf * group_size - nunits * vec_num;
}
else
aggr_type = vectype;
prev_stmt_info = NULL;
- int group_elt = 0;
+ poly_uint64 group_elt = 0;
for (j = 0; j < ncopies; j++)
{
/* 1. Create the vector or array pointer update chain. */
we need to skip the gaps after we manage to fully load
all elements. group_gap_adj is GROUP_SIZE here. */
group_elt += nunits;
- if (group_gap_adj != 0 && ! slp_perm
- && group_elt == group_size - group_gap_adj)
+ if (maybe_ne (group_gap_adj, 0U)
+ && !slp_perm
+ && known_eq (group_elt, group_size - group_gap_adj))
{
- wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
- * group_gap_adj);
+ poly_wide_int bump_val
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
+ * group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
}
/* Bump the vector pointer to account for a gap or for excess
elements loaded for a permuted SLP load. */
- if (group_gap_adj != 0 && slp_perm)
+ if (maybe_ne (group_gap_adj, 0U) && slp_perm)
{
- wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
- * group_gap_adj);
+ poly_wide_int bump_val
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
+ * group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
by the target. */
static tree
-get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
+get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
{
tree orig_scalar_type = scalar_type;
scalar_mode inner_mode;
machine_mode simd_mode;
- int nunits;
+ poly_uint64 nunits;
tree vectype;
if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
/* If no size was supplied use the mode the target prefers. Otherwise
lookup a vector mode of the specified size. */
- if (size == 0)
+ if (known_eq (size, 0U))
simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
- else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
+ else if (!multiple_p (size, nbytes, &nunits)
+ || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
return NULL_TREE;
- nunits = GET_MODE_SIZE (simd_mode) / nbytes;
/* NOTE: nunits == 1 is allowed to support single element vector types. */
- if (nunits < 1)
+ if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
return NULL_TREE;
vectype = build_vector_type (scalar_type, nunits);
return vectype;
}
-unsigned int current_vector_size;
+poly_uint64 current_vector_size;
/* Function get_vectype_for_scalar_type.
vectype = get_vectype_for_scalar_type_and_size (scalar_type,
current_vector_size);
if (vectype
- && current_vector_size == 0)
+ && known_eq (current_vector_size, 0U))
current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
return vectype;
}