case VEC_COND_EXPR:
case VEC_EXTRACT_EVEN_EXPR:
case VEC_EXTRACT_ODD_EXPR:
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
case VEC_LSHIFT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
return ok;
}
+static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
+
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
a two vector permutation into a single vector permutation by using
an interleave operation to merge the vectors. */
/* For 32-byte modes allow even d->op0 == d->op1.
The lack of cross-lane shuffling in some instructions
might prevent a single insn shuffle. */
+ dfinal = *d;
+ dfinal.testing_p = true;
+ /* If expand_vec_perm_interleave3 can expand this into
+ a 3 insn sequence, give up and let it be expanded as
+ 3 insn sequence. While that is one insn longer,
+ it doesn't need a memory operand and in the common
+ case that both interleave low and high permutations
+ with the same operands are adjacent needs 4 insns
+ for both after CSE. */
+ if (expand_vec_perm_interleave3 (&dfinal))
+ return false;
}
else
return false;
stopping once we have promoted to V4SImode and then use pshufd. */
do
{
- optab otab = vec_interleave_low_optab;
+ rtx dest;
+ rtx (*gen) (rtx, rtx, rtx)
+ = vmode == V16QImode ? gen_vec_interleave_lowv16qi
+ : gen_vec_interleave_lowv8hi;
if (elt >= nelt2)
{
- otab = vec_interleave_high_optab;
+ gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
+ : gen_vec_interleave_highv8hi;
elt -= nelt2;
}
nelt2 /= 2;
- op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
+ dest = gen_reg_rtx (vmode);
+ emit_insn (gen (dest, op0, op0));
vmode = get_mode_wider_vector (vmode);
- op0 = gen_lowpart (vmode, op0);
+ op0 = gen_lowpart (vmode, dest);
}
while (vmode != V4SImode);
@tindex VEC_PACK_FIX_TRUNC_EXPR
@tindex VEC_EXTRACT_EVEN_EXPR
@tindex VEC_EXTRACT_ODD_EXPR
-@tindex VEC_INTERLEAVE_HIGH_EXPR
-@tindex VEC_INTERLEAVE_LOW_EXPR
@table @code
@item VEC_LSHIFT_EXPR
vectors, respectively. Their operands and result are vectors that contain the
same number of elements of the same type.
-@item VEC_INTERLEAVE_HIGH_EXPR
-@itemx VEC_INTERLEAVE_LOW_EXPR
-These nodes represent merging and interleaving of the high/low elements of the
-two input vectors, respectively. The operands and the result are vectors that
-contain the same number of elements (@code{N}) of the same type.
-In the case of @code{VEC_INTERLEAVE_HIGH_EXPR}, the high @code{N/2} elements of
-the first input vector are interleaved with the high @code{N/2} elements of the
-second input vector. In the case of @code{VEC_INTERLEAVE_LOW_EXPR}, the low
-@code{N/2} elements of the first input vector are interleaved with the low
-@code{N/2} elements of the second input vector.
-
@end table
1 in their original order. The result is stored in operand 0.
The output and input vectors should have the same modes.
-@cindex @code{vec_interleave_high@var{m}} instruction pattern
-@item @samp{vec_interleave_high@var{m}}
-Merge high elements of the two input vectors into the output vector. The output
-and input vectors should have the same modes (@code{N} elements). The high
-@code{N/2} elements of the first input vector are interleaved with the high
-@code{N/2} elements of the second input vector.
-
-@cindex @code{vec_interleave_low@var{m}} instruction pattern
-@item @samp{vec_interleave_low@var{m}}
-Merge low elements of the two input vectors into the output vector. The output
-and input vectors should have the same modes (@code{N} elements). The low
-@code{N/2} elements of the first input vector are interleaved with the low
-@code{N/2} elements of the second input vector.
-
@cindex @code{vec_init@var{m}} instruction pattern
@item @samp{vec_init@var{m}}
Initialize the vector to given values. Operand 0 is the vector to initialize
case VEC_EXTRACT_EVEN_EXPR:
case VEC_EXTRACT_ODD_EXPR:
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
goto binop;
case VEC_LSHIFT_EXPR:
case VEC_EXTRACT_EVEN_EXPR:
case VEC_EXTRACT_ODD_EXPR:
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
if ((TREE_CODE (arg0) == VECTOR_CST
|| TREE_CODE (arg0) == CONSTRUCTOR)
&& (TREE_CODE (arg1) == VECTOR_CST
case VEC_EXTRACT_ODD_EXPR:
sel[i] = i * 2 + 1;
break;
- case VEC_INTERLEAVE_HIGH_EXPR:
- sel[i] = (i + (BYTES_BIG_ENDIAN ? 0 : nelts)) / 2
- + ((i & 1) ? nelts : 0);
- break;
- case VEC_INTERLEAVE_LOW_EXPR:
- sel[i] = (i + (BYTES_BIG_ENDIAN ? nelts : 0)) / 2
- + ((i & 1) ? nelts : 0);
- break;
default:
gcc_unreachable ();
}
/* Generate code to initialize optabs from machine description.
Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
- 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011
Free Software Foundation, Inc.
This file is part of GCC.
"set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))",
"set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))",
"set_optab_handler (vec_extract_odd_optab, $A, CODE_FOR_$(vec_extract_odd$a$))",
- "set_optab_handler (vec_interleave_high_optab, $A, CODE_FOR_$(vec_interleave_high$a$))",
- "set_optab_handler (vec_interleave_low_optab, $A, CODE_FOR_$(vec_interleave_low$a$))",
"set_optab_handler (vec_init_optab, $A, CODE_FOR_$(vec_init$a$))",
"set_optab_handler (vec_shl_optab, $A, CODE_FOR_$(vec_shl_$a$))",
"set_optab_handler (vec_shr_optab, $A, CODE_FOR_$(vec_shr_$a$))",
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_EXTRACT_EVEN_EXPR:
case VEC_EXTRACT_ODD_EXPR:
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
for (p = tree_code_name [(int) code]; *p; p++)
case VEC_EXTRACT_ODD_EXPR:
return vec_extract_odd_optab;
- case VEC_INTERLEAVE_HIGH_EXPR:
- return vec_interleave_high_optab;
-
- case VEC_INTERLEAVE_LOW_EXPR:
- return vec_interleave_low_optab;
-
default:
return NULL;
}
enum tree_code tcode = ERROR_MARK;
rtx sel;
- if (binoptab == vec_interleave_high_optab)
- tcode = VEC_INTERLEAVE_HIGH_EXPR;
- else if (binoptab == vec_interleave_low_optab)
- tcode = VEC_INTERLEAVE_LOW_EXPR;
- else if (binoptab == vec_extract_even_optab)
+ if (binoptab == vec_extract_even_optab)
tcode = VEC_EXTRACT_EVEN_EXPR;
else if (binoptab == vec_extract_odd_optab)
tcode = VEC_EXTRACT_ODD_EXPR;
init_optab (vec_extract_optab, UNKNOWN);
init_optab (vec_extract_even_optab, UNKNOWN);
init_optab (vec_extract_odd_optab, UNKNOWN);
- init_optab (vec_interleave_high_optab, UNKNOWN);
- init_optab (vec_interleave_low_optab, UNKNOWN);
init_optab (vec_set_optab, UNKNOWN);
init_optab (vec_init_optab, UNKNOWN);
init_optab (vec_shl_optab, UNKNOWN);
return true;
}
-/* Return true if we can implement VEC_INTERLEAVE_{HIGH,LOW}_EXPR or
- VEC_EXTRACT_{EVEN,ODD}_EXPR with VEC_PERM_EXPR for this target.
+/* Return true if we can implement with VEC_PERM_EXPR for this target.
If PSEL is non-null, return the selector for the permutation. */
bool
data[i] = i * 2 + alt;
break;
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
- if ((BYTES_BIG_ENDIAN != 0) ^ (code == VEC_INTERLEAVE_HIGH_EXPR))
- alt = nelt / 2;
- for (i = 0; i < nelt / 2; ++i)
- {
- data[i * 2] = i + alt;
- data[i * 2 + 1] = i + nelt + alt;
- }
- break;
-
default:
gcc_unreachable ();
}
/* Extract even/odd fields of vector operands. */
OTI_vec_extract_even,
OTI_vec_extract_odd,
- /* Interleave fields of vector operands. */
- OTI_vec_interleave_high,
- OTI_vec_interleave_low,
/* Initialize vector operand. */
OTI_vec_init,
/* Whole vector shift. The shift amount is in bits. */
#define vec_extract_optab (&optab_table[OTI_vec_extract])
#define vec_extract_even_optab (&optab_table[OTI_vec_extract_even])
#define vec_extract_odd_optab (&optab_table[OTI_vec_extract_odd])
-#define vec_interleave_high_optab (&optab_table[OTI_vec_interleave_high])
-#define vec_interleave_low_optab (&optab_table[OTI_vec_interleave_low])
#define vec_init_optab (&optab_table[OTI_vec_init])
#define vec_shl_optab (&optab_table[OTI_vec_shl])
#define vec_shr_optab (&optab_table[OTI_vec_shr])
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_EXTRACT_EVEN_EXPR:
case VEC_EXTRACT_ODD_EXPR:
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
/* FIXME. */
return false;
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_EXTRACT_EVEN_EXPR:
case VEC_EXTRACT_ODD_EXPR:
- case VEC_INTERLEAVE_HIGH_EXPR:
- case VEC_INTERLEAVE_LOW_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
pp_string (buffer, " > ");
break;
- case VEC_INTERLEAVE_HIGH_EXPR:
- pp_string (buffer, " VEC_INTERLEAVE_HIGH_EXPR < ");
- dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
- pp_string (buffer, ", ");
- dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
- pp_string (buffer, " > ");
- break;
-
- case VEC_INTERLEAVE_LOW_EXPR:
- pp_string (buffer, " VEC_INTERLEAVE_LOW_EXPR < ");
- dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
- pp_string (buffer, ", ");
- dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
- pp_string (buffer, " > ");
- break;
-
default:
NIY;
}
bool
vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
{
- optab ih_optab, il_optab;
enum machine_mode mode;
mode = TYPE_MODE (vectype);
}
/* Check that the operation is supported. */
- ih_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
- vectype, optab_default);
- il_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
- vectype, optab_default);
- if (il_optab && ih_optab
- && optab_handler (ih_optab, mode) != CODE_FOR_nothing
- && optab_handler (il_optab, mode) != CODE_FOR_nothing)
- return true;
-
- if (can_vec_perm_for_code_p (VEC_INTERLEAVE_HIGH_EXPR, mode, NULL)
- && can_vec_perm_for_code_p (VEC_INTERLEAVE_LOW_EXPR, mode, NULL))
- return true;
+ if (VECTOR_MODE_P (mode))
+ {
+ unsigned int i, nelt = GET_MODE_NUNITS (mode);
+ unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+ for (i = 0; i < nelt / 2; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nelt;
+ }
+ if (can_vec_perm_p (mode, false, sel))
+ {
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ if (can_vec_perm_p (mode, false, sel))
+ return true;
+ }
+ }
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "interleave op not supported by target.");
tree perm_dest, vect1, vect2, high, low;
gimple perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- int i;
- unsigned int j;
- enum tree_code high_code, low_code;
+ tree perm_mask_low, perm_mask_high;
+ unsigned int i, n;
+ unsigned int j, nelt = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
gcc_assert (vect_strided_store_supported (vectype, length));
*result_chain = VEC_copy (tree, heap, dr_chain);
- for (i = 0; i < exact_log2 (length); i++)
+ for (i = 0, n = nelt / 2; i < n; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nelt;
+ }
+ perm_mask_high = vect_gen_perm_mask (vectype, sel);
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ perm_mask_low = vect_gen_perm_mask (vectype, sel);
+
+ for (i = 0, n = exact_log2 (length); i < n; i++)
{
for (j = 0; j < length/2; j++)
{
vect2 = VEC_index (tree, dr_chain, j+length/2);
/* Create interleaving stmt:
- in the case of big endian:
- high = interleave_high (vect1, vect2)
- and in the case of little endian:
- high = interleave_low (vect1, vect2). */
+ high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
perm_dest = create_tmp_var (vectype, "vect_inter_high");
DECL_GIMPLE_REG_P (perm_dest) = 1;
add_referenced_var (perm_dest);
- if (BYTES_BIG_ENDIAN)
- {
- high_code = VEC_INTERLEAVE_HIGH_EXPR;
- low_code = VEC_INTERLEAVE_LOW_EXPR;
- }
- else
- {
- low_code = VEC_INTERLEAVE_HIGH_EXPR;
- high_code = VEC_INTERLEAVE_LOW_EXPR;
- }
- perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
- vect1, vect2);
- high = make_ssa_name (perm_dest, perm_stmt);
- gimple_assign_set_lhs (perm_stmt, high);
+ high = make_ssa_name (perm_dest, NULL);
+ perm_stmt
+ = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
+ vect1, vect2, perm_mask_high);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
VEC_replace (tree, *result_chain, 2*j, high);
/* Create interleaving stmt:
- in the case of big endian:
- low = interleave_low (vect1, vect2)
- and in the case of little endian:
- low = interleave_high (vect1, vect2). */
+ low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
+ nelt*3/2+1, ...}> */
perm_dest = create_tmp_var (vectype, "vect_inter_low");
DECL_GIMPLE_REG_P (perm_dest) = 1;
add_referenced_var (perm_dest);
- perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
- vect1, vect2);
- low = make_ssa_name (perm_dest, perm_stmt);
- gimple_assign_set_lhs (perm_stmt, low);
+ low = make_ssa_name (perm_dest, NULL);
+ perm_stmt
+ = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
+ vect1, vect2, perm_mask_low);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
VEC_replace (tree, *result_chain, 2*j+1, low);
}
/* These are only created by the vectorizer, after having queried
the target support. It's more than just looking at the optab,
and there's no need to do it again. */
- if (code == VEC_INTERLEAVE_HIGH_EXPR
- || code == VEC_INTERLEAVE_LOW_EXPR
- || code == VEC_EXTRACT_EVEN_EXPR
+ if (code == VEC_EXTRACT_EVEN_EXPR
|| code == VEC_EXTRACT_ODD_EXPR)
return;
Then permutation statements are generated:
- VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
- VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
+ VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
+ VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
...
And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
the VECTOR_CST mask that implements the permutation of the
vector elements. If that is impossible to do, returns NULL. */
-static tree
-gen_perm_mask (tree vectype, unsigned char *sel)
+tree
+vect_gen_perm_mask (tree vectype, unsigned char *sel)
{
tree mask_elt_type, mask_type, mask_vec;
int i, nunits;
for (i = 0; i < nunits; ++i)
sel[i] = nunits - 1 - i;
- return gen_perm_mask (vectype, sel);
+ return vect_gen_perm_mask (vectype, sel);
}
/* Given a vector variable X and Y, that was generated for the scalar
for (i = 0; i < gather_off_nunits; ++i)
sel[i] = i | nunits;
- perm_mask = gen_perm_mask (gather_off_vectype, sel);
+ perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
gcc_assert (perm_mask != NULL_TREE);
}
else if (nunits == gather_off_nunits * 2)
sel[i] = i < gather_off_nunits
? i : i + nunits - gather_off_nunits;
- perm_mask = gen_perm_mask (vectype, sel);
+ perm_mask = vect_gen_perm_mask (vectype, sel);
gcc_assert (perm_mask != NULL_TREE);
ncopies *= 2;
}
extern bool vect_supportable_shift (enum tree_code, tree);
extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **,
VEC (tree, heap) **, slp_tree, int);
+extern tree vect_gen_perm_mask (tree, unsigned char *);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extract_even_expr", tcc_binary, 2)
DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extract_odd_expr", tcc_binary, 2)
-/* Merge input vectors interleaving their fields. */
-DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleave_high_expr", tcc_binary, 2)
-DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleave_low_expr", tcc_binary, 2)
-
/* Widening vector shift left in bits.
Operand 0 is a vector to be shifted with N elements of size S.
Operand 1 is an integer shift amount in bits.