+2007-04-22 Uros Bizjak <ubizjak@gmail.com>
+
+ PR tree-optimization/24659
+ * optabs.h (enum optab_index) [OTI_vec_unpacks_hi,
+ OTI_vec_unpacks_lo]: Update comment to mention floating point operands.
+ (vec_pack_trunc_optab): Rename from vec_pack_mod_optab.
+ * genopinit.c (optabs): Rename vec_pack_mod_optab
+ to vec_pack_trunc_optab.
+ * tree-vect-transform.c (vectorizable_type_demotion): Do not fail
+ early for scalar floating point operands for NOP_EXPR.
+ (vectorizable_type_promotion): Ditto.
+ * optabs.c (optab_for_tree_code) [VEC_PACK_TRUNC_EXPR]: Return
+ vec_pack_trunc_optab.
+ (expand_binop): Rename vec_float_trunc_optab to vec_pack_mod_optab.
+
+ * tree.def (VEC_PACK_TRUNC_EXPR): Rename from VEC_PACK_MOD_EXPR.
+ * tree-pretty-print.c (dump_generic_node) [VEC_PACK_TRUNC_EXPR]:
+ Rename from VEC_PACK_MOD_EXPR.
+ (op_prio) [VEC_PACK_TRUNC_EXPR]: Ditto.
+ * expr.c (expand_expr_real_1): Ditto.
+ * tree-inline.c (estimate_num_insns_1): Ditto.
+ * tree-vect-generic.c (expand_vector_operations_1): Ditto.
+
+ * config/i386/sse.md (vec_unpacks_hi_v4sf): New expander.
+ (vec_unpacks_lo_v4sf): Ditto.
+ (vec_pack_trunc_v2df): Ditto.
+ (vec_pack_trunc_v8hi): Rename from vec_pack_mod_v8hi.
+ (vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si.
+ (vec_pack_trunc_v2di): Rename from vec_pack_mod_v2di.
+
+ * config/rs6000/altivec.md (vec_pack_trunc_v8hi): Rename from
+ vec_pack_mod_v8hi.
+ (vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si.
+
+ * doc/c-tree.texi (Expression trees) [VEC_PACK_TRUNC_EXPR]:
+ Rename from VEC_PACK_MOD_EXPR. This expression also represent
+ packing of floating point operands.
+ [VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR]: These expression also
+ represent unpacking of floating point operands.
+ * doc/md.texi (Standard Names) [vec_pack_trunc]: Update documentation.
+ [vec_unpacks_hi]: Ditto.
+ [vec_unpacks_lo]: Ditto.
+
2007-04-22 Jan Hubicka <jh@suse.cz>
* final.c (rest_of_handle_final): Call
(set_attr "mode" "V2DF")
(set_attr "amdfam10_decode" "direct")])
+(define_expand "vec_unpacks_hi_v4sf"
+ [(set (match_dup 2)
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_dup 2)
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_pack_trunc_v2df"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SFmode);
+ r2 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
+ emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point element swizzling
;; h3 = aeimquy2bfjnrvz3
;; l3 = cgkosw04dhlptx15
;; result = bdfhjlnprtvxz135
-(define_expand "vec_pack_mod_v8hi"
+(define_expand "vec_pack_trunc_v8hi"
[(match_operand:V16QI 0 "register_operand" "")
(match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")]
;; h2 = aeimbfjn
;; l2 = cgkodhlp
;; result = bdfhjlnp
-(define_expand "vec_pack_mod_v4si"
+(define_expand "vec_pack_trunc_v4si"
[(match_operand:V8HI 0 "register_operand" "")
(match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")]
;; h1 = aebf
;; l1 = cgdh
;; result = bdfh
-(define_expand "vec_pack_mod_v2di"
+(define_expand "vec_pack_trunc_v2di"
[(match_operand:V4SI 0 "register_operand" "")
(match_operand:V2DI 1 "register_operand" "")
(match_operand:V2DI 2 "register_operand" "")]
DONE;
}")
-(define_expand "vec_pack_mod_v8hi"
+(define_expand "vec_pack_trunc_v8hi"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
(match_operand:V8HI 2 "register_operand" "v")]
DONE;
}")
-(define_expand "vec_pack_mod_v4si"
+(define_expand "vec_pack_trunc_v4si"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
(match_operand:V4SI 2 "register_operand" "v")]
@tindex VEC_WIDEN_MULT_LO_EXPR
@tindex VEC_UNPACK_HI_EXPR
@tindex VEC_UNPACK_LO_EXPR
-@tindex VEC_PACK_MOD_EXPR
+@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_EXTRACT_EVEN_EXPR
@tindex VEC_EXTRACT_ODD_EXPR
@item VEC_UNPACK_HI_EXPR
@item VEC_UNPACK_LO_EXPR
-These nodes represent unpacking of the high and low parts of the input vector,
+These nodes represent unpacking of the high and low parts of the input vector,
respectively. The single operand is a vector that contains @code{N} elements
-of the same integral type. The result is a vector that contains half as many
-elements, of an integral type whose size is twice as wide. In the case of
-@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are
-extracted and widened (promoted). In the case of @code{VEC_UNPACK_LO_EXPR} the
-low @code{N/2} elements of the vector are extracted and widened (promoted).
+of the same integral or floating point type. The result is a vector
+that contains half as many elements, of an integral or floating point type
+whose size is twice as wide. In the case of @code{VEC_UNPACK_HI_EXPR} the
+high @code{N/2} elements of the vector are extracted and widened (promoted).
+In the case of @code{VEC_UNPACK_LO_EXPR} the low @code{N/2} elements of the
+vector are extracted and widened (promoted).
+
+@item VEC_PACK_TRUNC_EXPR
+This node represents packing of truncated elements of the two input vectors
+into the output vector. Input operands are vectors that contain the same
+number of elements of the same integral or floating point type. The result
+is a vector that contains twice as many elements of an integral or floating
+point type whose size is half as wide. The elements of the two vectors are
+demoted and merged (concatenated) to form the output vector.
-@item VEC_PACK_MOD_EXPR
@item VEC_PACK_SAT_EXPR
-These nodes represent packing of elements of the two input vectors into the
-output vector, using modulo or saturating arithmetic, respectively.
-Their operands are vectors that contain the same number of elements
-of the same integral type. The result is a vector that contains twice as many
-elements, of an integral type whose size is half as wide. In both cases
-the elements of the two vectors are demoted and merged (concatenated) to form
-the output vector.
+This node represents packing of elements of the two input vectors into the
+output vector using saturation. Input operands are vectors that contain
+the same number of elements of the same integral type. The result is a
+vector that contains twice as many elements of an integral type whose size
+is half as wide. The elements of the two vectors are demoted and merged
+(concatenated) to form the output vector.
@item VEC_EXTRACT_EVEN_EXPR
@item VEC_EXTRACT_ODD_EXPR
Operand 0 is where the resulting shifted vector is stored.
The output and input vectors should have the same modes.
-@cindex @code{vec_pack_mod_@var{m}} instruction pattern
+@cindex @code{vec_pack_trunc_@var{m}} instruction pattern
+@item @samp{vec_pack_trunc_@var{m}}
+Narrow (demote) and merge the elements of two vectors. Operands 1 and 2
+are vectors of the same mode having N integral or floating point elements
+of size S. Operand 0 is the resulting vector in which 2*N elements of
+size N/2 are concatenated after narrowing them down using truncation.
+
@cindex @code{vec_pack_ssat_@var{m}} instruction pattern
@cindex @code{vec_pack_usat_@var{m}} instruction pattern
-@item @samp{vec_pack_mod_@var{m}}, @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}}
-Narrow (demote) and merge the elements of two vectors.
-Operands 1 and 2 are vectors of the same mode.
+@item @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}}
+Narrow (demote) and merge the elements of two vectors. Operands 1 and 2
+are vectors of the same mode having N integral elements of size S.
Operand 0 is the resulting vector in which the elements of the two input
-vectors are concatenated after narrowing them down using modulo arithmetic or
-signed/unsigned saturating arithmetic.
+vectors are concatenated after narrowing them down using signed/unsigned
+saturating arithmetic.
@cindex @code{vec_unpacks_hi_@var{m}} instruction pattern
@cindex @code{vec_unpacks_lo_@var{m}} instruction pattern
+@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}
+Extract and widen (promote) the high/low part of a vector of signed
+integral or floating point elements. The input vector (operand 1) has N
+elements of size S. Widen (promote) the high/low elements of the vector
+using signed or floating point extension and place the resulting N/2
+values of size 2*S in the output vector (operand 0).
+
@cindex @code{vec_unpacku_hi_@var{m}} instruction pattern
@cindex @code{vec_unpacku_lo_@var{m}} instruction pattern
-@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}, @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}}
-Extract and widen (promote) the high/low part of a vector of signed/unsigned
-elements. The input vector (operand 1) has N signed/unsigned elements of size S.
-Using sign/zero extension widen (promote) the high/low elements of the vector,
-and place the resulting N/2 values of size 2*S in the output vector (operand 0).
+@item @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}}
+Extract and widen (promote) the high/low part of a vector of unsigned
+integral elements. The input vector (operand 1) has N elements of size S.
+Widen (promote) the high/low elements of the vector using zero extension and
+place the resulting N/2 values of size 2*S in the output vector (operand 0).
@cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern
@cindex @code{vec_widen_umult_lo__@var{m}} instruction pattern
@cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern
@cindex @code{vec_widen_smult_lo_@var{m}} instruction pattern
@item @samp{vec_widen_umult_hi_@var{m}}, @samp{vec_widen_umult_lo_@var{m}}, @samp{vec_widen_smult_hi_@var{m}}, @samp{vec_widen_smult_lo_@var{m}}
-Signed/Unsigned widening multiplication.
-The two inputs (operands 1 and 2) are vectors with N
-signed/unsigned elements of size S. Multiply the high/low elements of the two
-vectors, and put the N/2 products of size 2*S in the output vector (operand 0).
+Signed/Unsigned widening multiplication. The two inputs (operands 1 and 2)
+are vectors with N signed/unsigned elements of size S. Multiply the high/low
+elements of the two vectors, and put the N/2 products of size 2*S in the
+output vector (operand 0).
@cindex @code{mulhisi3} instruction pattern
@item @samp{mulhisi3}
return target;
}
- case VEC_PACK_MOD_EXPR:
+ case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
{
mode = TYPE_MODE (TREE_TYPE (TREE_OPERAND (exp, 0)));
"vec_unpacks_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacks_lo_$a$)",
"vec_unpacku_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_hi_$a$)",
"vec_unpacku_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_lo_$a$)",
- "vec_pack_mod_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_mod_$a$)",
- "vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)", "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)"
+ "vec_pack_trunc_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_trunc_$a$)",
+ "vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)",
+ "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)"
};
static void gen_insn (rtx);
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
case VEC_UNPACK_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
+ return TYPE_UNSIGNED (type) ?
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
case VEC_UNPACK_LO_EXPR:
return TYPE_UNSIGNED (type) ?
vec_unpacku_lo_optab : vec_unpacks_lo_optab;
- case VEC_PACK_MOD_EXPR:
- return vec_pack_mod_optab;
-
+ case VEC_PACK_TRUNC_EXPR:
+ return vec_pack_trunc_optab;
+
case VEC_PACK_SAT_EXPR:
return TYPE_UNSIGNED (type) ? vec_pack_usat_optab : vec_pack_ssat_optab;
-
+
default:
break;
}
&& mode1 != VOIDmode)
xop1 = copy_to_mode_reg (mode1, xop1);
- if (binoptab == vec_pack_mod_optab
+ if (binoptab == vec_pack_trunc_optab
|| binoptab == vec_pack_usat_optab
|| binoptab == vec_pack_ssat_optab)
{
vec_unpacks_lo_optab = init_optab (UNKNOWN);
vec_unpacku_hi_optab = init_optab (UNKNOWN);
vec_unpacku_lo_optab = init_optab (UNKNOWN);
- vec_pack_mod_optab = init_optab (UNKNOWN);
+ vec_pack_trunc_optab = init_optab (UNKNOWN);
vec_pack_usat_optab = init_optab (UNKNOWN);
vec_pack_ssat_optab = init_optab (UNKNOWN);
OTI_vec_widen_umult_lo,
OTI_vec_widen_smult_hi,
OTI_vec_widen_smult_lo,
- /* Extract and widen the high/low part of a vector of signed/unsigned
- elements. */
+ /* Extract and widen the high/low part of a vector of signed or
+ floating point elements. */
OTI_vec_unpacks_hi,
OTI_vec_unpacks_lo,
+ /* Extract and widen the high/low part of a vector of unsigned
+ elements. */
OTI_vec_unpacku_hi,
OTI_vec_unpacku_lo,
/* Narrow (demote) and merge the elements of two vectors. */
- OTI_vec_pack_mod,
+ OTI_vec_pack_trunc,
OTI_vec_pack_usat,
OTI_vec_pack_ssat,
#define reduc_umin_optab (optab_table[OTI_reduc_umin])
#define reduc_splus_optab (optab_table[OTI_reduc_splus])
#define reduc_uplus_optab (optab_table[OTI_reduc_uplus])
-
+
#define ssum_widen_optab (optab_table[OTI_ssum_widen])
#define usum_widen_optab (optab_table[OTI_usum_widen])
#define sdot_prod_optab (optab_table[OTI_sdot_prod])
#define vec_widen_smult_hi_optab (optab_table[OTI_vec_widen_smult_hi])
#define vec_widen_smult_lo_optab (optab_table[OTI_vec_widen_smult_lo])
#define vec_unpacks_hi_optab (optab_table[OTI_vec_unpacks_hi])
-#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi])
#define vec_unpacks_lo_optab (optab_table[OTI_vec_unpacks_lo])
+#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi])
#define vec_unpacku_lo_optab (optab_table[OTI_vec_unpacku_lo])
-#define vec_pack_mod_optab (optab_table[OTI_vec_pack_mod])
+#define vec_pack_trunc_optab (optab_table[OTI_vec_pack_trunc])
#define vec_pack_ssat_optab (optab_table[OTI_vec_pack_ssat])
#define vec_pack_usat_optab (optab_table[OTI_vec_pack_usat])
-
+
#define powi_optab (optab_table[OTI_powi])
/* Conversion optabs have their own table and indexes. */
+2007-04-22 Uros Bizjak <ubizjak@gmail.com>
+
+ PR tree-optimization/24659
+ * gcc.dg/vect/vect-float-extend-1.c: New test.
+ * gcc.dg/vect/vect-float-truncate-1.c: New test.
+
2007-04-22 Richard Guenther <rguenther@suse.de>
PR tree-optimization/29789
case VEC_WIDEN_MULT_LO_EXPR:
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
- case VEC_PACK_MOD_EXPR:
+ case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case WIDEN_MULT_EXPR:
pp_string (buffer, " > ");
break;
- case VEC_PACK_MOD_EXPR:
- pp_string (buffer, " VEC_PACK_MOD_EXPR < ");
+ case VEC_PACK_TRUNC_EXPR:
+ pp_string (buffer, " VEC_PACK_TRUNC_EXPR < ");
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
pp_string (buffer, ", ");
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
case VEC_RSHIFT_EXPR:
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
- case VEC_PACK_MOD_EXPR:
+ case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
return 16;
|| code == VEC_WIDEN_MULT_LO_EXPR
|| code == VEC_UNPACK_HI_EXPR
|| code == VEC_UNPACK_LO_EXPR
- || code == VEC_PACK_MOD_EXPR
+ || code == VEC_PACK_TRUNC_EXPR
|| code == VEC_PACK_SAT_EXPR)
type = TREE_TYPE (TREE_OPERAND (rhs, 0));
offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
base_offset, offset);
- base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
+ base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
append_to_statement_list_force (new_stmt, new_stmt_list);
}
access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
gcc_assert (access_fn);
- ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_expr, &step_expr);
+ ok = vect_is_simple_iv_evolution (loop->num, access_fn,
+ &init_expr, &step_expr);
gcc_assert (ok);
/* Create the vector that holds the initial_value of the induction. */
vectorized stmt to be created (by the caller to this function) is a "copy"
created in case the vectorized result cannot fit in one vector, and several
copies of the vector-stmt are required. In this case the vector-def is
- retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
+ retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
of the stmt that defines VEC_OPRND.
DT is the type of the vector def VEC_OPRND.
loop:
vec_def = phi <null, null> # REDUCTION_PHI
- VECT_DEF = vector_stmt # vectorized form of STMT
+ VECT_DEF = vector_stmt # vectorized form of STMT
s_loop = scalar_stmt # (scalar) STMT
loop_exit:
s_out0 = phi <s_loop> # (scalar) EXIT_PHI
exit_bsi = bsi_start (exit_bb);
/* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
- (i.e. when reduc_code is not available) and in the final adjustment code
- (if needed). Also get the original scalar reduction variable as
+ (i.e. when reduc_code is not available) and in the final adjustment
+ code (if needed). Also get the original scalar reduction variable as
defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
represents a reduction pattern), the tree-code and scalar-def are
taken from the original stmt that the pattern-stmt (STMT) replaces.
bitpos);
BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
- epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
+ epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
sequence that had been detected and replaced by the pattern-stmt (STMT).
- In some cases of reduction patterns, the type of the reduction variable X is
+ In some cases of reduction patterns, the type of the reduction variable X is
different than the type of the other arguments of STMT.
In such cases, the vectype that is used when transforming STMT into a vector
- stmt is different than the vectype that is used to determine the
+ stmt is different than the vectype that is used to determine the
vectorization factor, because it consists of a different number of elements
than the actual number of elements that are being operated upon in parallel.
- For example, consider an accumulation of shorts into an int accumulator.
+ For example, consider an accumulation of shorts into an int accumulator.
On some targets it's possible to vectorize this pattern operating on 8
shorts at a time (hence, the vectype for purposes of determining the
vectorization factor should be V8HI); on the other hand, the vectype that
- is used to create the vector form is actually V4SI (the type of the result).
+ is used to create the vector form is actually V4SI (the type of the result).
- Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
- indicates what is the actual level of parallelism (V8HI in the example), so
- that the right vectorization factor would be derived. This vectype
- corresponds to the type of arguments to the reduction stmt, and should *NOT*
+ Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
+ indicates what is the actual level of parallelism (V8HI in the example), so
+ that the right vectorization factor would be derived. This vectype
+ corresponds to the type of arguments to the reduction stmt, and should *NOT*
be used to create the vectorized stmt. The right vectype for the vectorized
- stmt is obtained from the type of the result X:
+ stmt is obtained from the type of the result X:
get_vectype_for_scalar_type (TREE_TYPE (X))
- This means that, contrary to "regular" reductions (or "regular" stmts in
+ This means that, contrary to "regular" reductions (or "regular" stmts in
general), the following equation:
STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
does *NOT* necessarily hold for reduction patterns. */
op = TREE_OPERAND (operation, 1);
loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
}
-
+
/* Get the vector def for the reduction variable from the phi node */
reduc_def = PHI_RESULT (new_phi);
}
loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
if (op_type == ternary_op)
loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
-
+
/* Get the vector def for the reduction variable from the vectorized
reduction operation generated in the previous iteration (j-1) */
reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
}
-
+
/* Arguments are ready. create the new vector stmt. */
-
if (op_type == binary_op)
expr = build2 (code, vectype, loop_vec_def0, reduc_def);
else
expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
- reduc_def);
+ reduc_def);
new_stmt = build_gimple_modify_stmt (vec_dest, expr);
new_temp = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
-
+
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
-
+
/* Finalize the reduction-phi (set it's arguments) and create the
epilog reduction code. */
- vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
+ vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
return true;
}
fprintf (vect_dump, "use not simple.");
return false;
}
-
+
if (op_type == binary_op)
{
op1 = TREE_OPERAND (operation, 1);
stmts that use the defs of the current stmt. The example below illustrates
the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4 vectorized stmts):
-
+
before vectorization:
RELATED_STMT VEC_STMT
S1: x = memref - -
S2: z = x + 1 - -
-
+
step 1: vectorize stmt S1 (done in vectorizable_load. See more details
there):
RELATED_STMT VEC_STMT
VS1_3: vx3 = memref3 - -
S1: x = load - VS1_0
S2: z = x + 1 - -
-
+
step2: vectorize stmt S2 (done here):
To vectorize stmt S2 we first need to find the relevant vector
def for the first operand 'x'. This is, as usual, obtained from
VS2_2: vz2 = vx2 + v1 VS2_3 -
VS2_3: vz3 = vx3 + v1 - -
S2: z = x + 1 - VS2_0 */
-
+
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
}
/* Arguments are ready. create the new vector stmt. */
-
+
if (op_type == binary_op)
new_stmt = build_gimple_modify_stmt (vec_dest,
build2 (code, vectype, vec_oprnd0, vec_oprnd1));
new_temp = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
-
+
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
/* Function vectorizable_type_demotion
-
+
Check if STMT performs a binary or unary operation that involves
type demotion, and if it can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
-
+
bool
vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
tree *vec_stmt)
tree scalar_type;
optab optab;
enum machine_mode vec_mode;
-
+
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
fprintf (vect_dump, "value used after loop.");
return false;
}
-
+
/* Is STMT a vectorizable type-demotion operation? */
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
return false;
-
+
if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
return false;
-
+
operation = GIMPLE_STMT_OPERAND (stmt, 1);
code = TREE_CODE (operation);
if (code != NOP_EXPR && code != CONVERT_EXPR)
return false;
-
+
op0 = TREE_OPERAND (operation, 0);
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
-
+
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
scalar_type = TREE_TYPE (scalar_dest);
vectype_out = get_vectype_for_scalar_type (scalar_type);
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (nunits_in != nunits_out / 2) /* FORNOW */
return false;
-
+
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
gcc_assert (ncopies >= 1);
- if (! INTEGRAL_TYPE_P (scalar_type)
- || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+ if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+ && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+ || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
+ && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
+ && (code == NOP_EXPR || code == CONVERT_EXPR))))
return false;
-
+
/* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
{
fprintf (vect_dump, "use not simple.");
return false;
}
-
+
/* Supportable by target? */
- code = VEC_PACK_MOD_EXPR;
- optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
+ code = VEC_PACK_TRUNC_EXPR;
+ optab = optab_for_tree_code (code, vectype_in);
if (!optab)
return false;
-
+
vec_mode = TYPE_MODE (vectype_in);
if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
return false;
-
+
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
-
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
return true;
}
-
+
/** Transform. **/
-
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
- ncopies);
-
+ ncopies);
+
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
}
-
+
/* Arguments are ready. Create the new vector stmt. */
expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
new_stmt = build_gimple_modify_stmt (vec_dest, expr);
new_temp = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
-
+
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
-
+
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
-
+
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
return true;
}
if (nunits_out != nunits_in / 2) /* FORNOW */
return false;
- if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
- || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+ if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+ && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+ || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
+ && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
+ && (code == CONVERT_EXPR || code == NOP_EXPR))))
return false;
/* Check the operands of the operation. */
/* For interleaved stores we created vectorized defs for all the
defs stored in OPRNDS in the previous iteration (previous copy).
DR_CHAIN is then used as an input to vect_permute_store_chain(),
- and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
+ and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
next copy.
If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
OPRNDS are of size 1. */
done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
-
+
case type_promotion_vec_info_type:
done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
gcc_assert (done);
tree wide_vectype = get_vectype_for_scalar_type (type);
enum tree_code c1, c2;
- /* The result of a vectorized widening operation usually requires two vectors
+ /* The result of a vectorized widening operation usually requires two vectors
(because the widened results do not fit int one vector). The generated
vector results would normally be expected to be generated in the same
order as in the original scalar computation. i.e. if 8 results are
DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_unpack_lo_expr", tcc_unary, 1)
/* Pack (demote/narrow and merge) the elements of the two input vectors
- into the output vector, using modulo/saturating arithmetic.
+ into the output vector using truncation/saturation.
The elements of the input vectors are twice the size of the elements of the
output vector. This is used to support type demotion. */
-DEFTREECODE (VEC_PACK_MOD_EXPR, "vec_pack_mod_expr", tcc_binary, 2)
+DEFTREECODE (VEC_PACK_TRUNC_EXPR, "vec_pack_trunc_expr", tcc_binary, 2)
DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2)
-
+
/* Extract even/odd fields from vectors. */
DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extracteven_expr", tcc_binary, 2)
DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extractodd_expr", tcc_binary, 2)