1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
44 static void expand_vector_operations_1 (gimple_stmt_iterator *);
47 /* Build a constant of type TYPE, made of VALUE's bits replicated
48 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
50 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
52 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
53 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
54 / HOST_BITS_PER_WIDE_INT;
55 unsigned HOST_WIDE_INT low, mask;
56 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
59 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
61 if (width == HOST_BITS_PER_WIDE_INT)
65 mask = ((HOST_WIDE_INT)1 << width) - 1;
66 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
69 for (i = 0; i < n; i++)
72 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
73 return wide_int_to_tree
74 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
77 static GTY(()) tree vector_inner_type;
78 static GTY(()) tree vector_last_type;
79 static GTY(()) int vector_last_nunits;
81 /* Return a suitable vector types made of SUBPARTS units each of mode
82 "word_mode" (the global variable). */
84 build_word_mode_vector_type (int nunits)
86 if (!vector_inner_type)
87 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
88 else if (vector_last_nunits == nunits)
90 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
91 return vector_last_type;
94 /* We build a new type, but we canonicalize it nevertheless,
95 because it still saves some memory. */
96 vector_last_nunits = nunits;
97 vector_last_type = type_hash_canon (nunits,
98 build_vector_type (vector_inner_type,
100 return vector_last_type;
103 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
104 tree, tree, tree, tree, tree, enum tree_code,
108 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
109 tree t, tree bitsize, tree bitpos)
111 if (TREE_CODE (t) == SSA_NAME)
113 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
114 if (is_gimple_assign (def_stmt)
115 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
117 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
118 t = gimple_assign_rhs1 (def_stmt);
122 if (TREE_CODE (type) == BOOLEAN_TYPE)
125 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
126 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
128 return gimplify_build2 (gsi, NE_EXPR, type, field,
129 build_zero_cst (itype));
132 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
135 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
139 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
140 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
141 enum tree_code code, tree type ATTRIBUTE_UNUSED)
143 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
144 return gimplify_build1 (gsi, code, inner_type, a);
148 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
149 tree bitpos, tree bitsize, enum tree_code code,
150 tree type ATTRIBUTE_UNUSED)
152 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
153 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
154 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
155 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
156 return gimplify_build2 (gsi, code, inner_type, a, b);
159 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
161 INNER_TYPE is the type of A and B elements
163 returned expression is of signed integer type with the
164 size equal to the size of INNER_TYPE. */
166 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
167 tree bitpos, tree bitsize, enum tree_code code, tree type)
169 tree stype = TREE_TYPE (type);
170 tree cst_false = build_zero_cst (stype);
171 tree cst_true = build_all_ones_cst (stype);
174 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
175 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
177 cmp = build2 (code, boolean_type_node, a, b);
178 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
181 /* Expand vector addition to scalars. This does bit twiddling
182 in order to increase parallelism:
184 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
187 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
188 (a ^ ~b) & 0x80808080
190 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
192 This optimization should be done only if 4 vector items or more
195 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
196 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
197 enum tree_code code, tree type ATTRIBUTE_UNUSED)
199 tree inner_type = TREE_TYPE (TREE_TYPE (a));
200 unsigned HOST_WIDE_INT max;
201 tree low_bits, high_bits, a_low, b_low, result_low, signs;
203 max = GET_MODE_MASK (TYPE_MODE (inner_type));
204 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
205 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
207 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
208 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
210 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
211 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
212 if (code == PLUS_EXPR)
213 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
216 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
217 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
220 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
221 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
222 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
226 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
227 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
228 tree bitsize ATTRIBUTE_UNUSED,
229 enum tree_code code ATTRIBUTE_UNUSED,
230 tree type ATTRIBUTE_UNUSED)
232 tree inner_type = TREE_TYPE (TREE_TYPE (b));
234 tree low_bits, high_bits, b_low, result_low, signs;
236 max = GET_MODE_MASK (TYPE_MODE (inner_type));
237 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
238 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
240 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
242 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
243 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
244 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
245 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
246 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
249 /* Expand a vector operation to scalars, by using many operations
250 whose type is the vector type's inner type. */
252 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
253 tree type, tree inner_type,
254 tree a, tree b, enum tree_code code)
256 vec<constructor_elt, va_gc> *v;
257 tree part_width = TYPE_SIZE (inner_type);
258 tree index = bitsize_int (0);
259 int nunits = TYPE_VECTOR_SUBPARTS (type);
260 int delta = tree_to_uhwi (part_width)
261 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
263 location_t loc = gimple_location (gsi_stmt (*gsi));
265 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
266 warning_at (loc, OPT_Wvector_operation_performance,
267 "vector operation will be expanded piecewise");
269 warning_at (loc, OPT_Wvector_operation_performance,
270 "vector operation will be expanded in parallel");
272 vec_alloc (v, (nunits + delta - 1) / delta);
273 for (i = 0; i < nunits;
274 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
276 tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
277 constructor_elt ce = {NULL_TREE, result};
281 return build_constructor (type, v);
284 /* Expand a vector operation to scalars with the freedom to use
285 a scalar integer type, or to use a different size for the items
286 in the vector type. */
288 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
292 tree result, compute_type;
293 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
294 location_t loc = gimple_location (gsi_stmt (*gsi));
296 /* We have three strategies. If the type is already correct, just do
297 the operation an element at a time. Else, if the vector is wider than
298 one word, do it a word at a time; finally, if the vector is smaller
299 than one word, do it as a scalar. */
300 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
301 return expand_vector_piecewise (gsi, f,
302 type, TREE_TYPE (type),
304 else if (n_words > 1)
306 tree word_type = build_word_mode_vector_type (n_words);
307 result = expand_vector_piecewise (gsi, f,
308 word_type, TREE_TYPE (word_type),
310 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
315 /* Use a single scalar operation with a mode no wider than word_mode. */
317 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
318 compute_type = lang_hooks.types.type_for_mode (mode, 1);
319 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
320 warning_at (loc, OPT_Wvector_operation_performance,
321 "vector operation will be expanded with a "
322 "single scalar operation");
328 /* Expand a vector operation to scalars; for integer types we can use
329 special bit twiddling tricks to do the sums a word at a time, using
330 function F_PARALLEL instead of F. These tricks are done only if
331 they can process at least four items, that is, only if the vector
332 holds at least four items and if a word can hold four items. */
334 expand_vector_addition (gimple_stmt_iterator *gsi,
335 elem_op_func f, elem_op_func f_parallel,
336 tree type, tree a, tree b, enum tree_code code)
338 int parts_per_word = UNITS_PER_WORD
339 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
341 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
342 && parts_per_word >= 4
343 && TYPE_VECTOR_SUBPARTS (type) >= 4)
344 return expand_vector_parallel (gsi, f_parallel,
347 return expand_vector_piecewise (gsi, f,
348 type, TREE_TYPE (type),
352 /* Try to expand vector comparison expression OP0 CODE OP1 by
353 querying optab if the following expression:
354 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
357 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
358 tree op1, enum tree_code code)
361 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
362 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
363 t = expand_vector_piecewise (gsi, do_compare, type,
364 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
371 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
372 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
373 the result if successful, otherwise return NULL_TREE. */
375 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
378 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
379 bool scalar_shift = true;
381 for (i = 1; i < nunits; i++)
383 if (shiftcnts[i] != shiftcnts[0])
384 scalar_shift = false;
387 if (scalar_shift && shiftcnts[0] == 0)
392 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
393 if (op != unknown_optab
394 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
395 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
396 build_int_cst (NULL_TREE, shiftcnts[0]));
399 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
400 if (op != unknown_optab
401 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
403 tree_vector_builder vec (type, nunits, 1);
404 for (i = 0; i < nunits; i++)
405 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
406 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
412 /* Try to expand integer vector division by constant using
413 widening multiply, shifts and additions. */
415 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
416 tree op1, enum tree_code code)
418 bool use_pow2 = true;
419 bool has_vector_shift = true;
420 int mode = -1, this_mode;
421 int pre_shift = -1, post_shift;
422 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
423 int *shifts = XALLOCAVEC (int, nunits * 4);
424 int *pre_shifts = shifts + nunits;
425 int *post_shifts = pre_shifts + nunits;
426 int *shift_temps = post_shifts + nunits;
427 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
428 int prec = TYPE_PRECISION (TREE_TYPE (type));
431 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
432 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
433 tree cur_op, mulcst, tem;
436 if (prec > HOST_BITS_PER_WIDE_INT)
439 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
440 if (op == unknown_optab
441 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
442 has_vector_shift = false;
444 /* Analysis phase. Determine if all op1 elements are either power
445 of two and it is possible to expand it using shifts (or for remainder
446 using masking). Additionally compute the multiplicative constants
447 and pre and post shifts if the division is to be expanded using
448 widening or high part multiplication plus shifts. */
449 for (i = 0; i < nunits; i++)
451 tree cst = VECTOR_CST_ELT (op1, i);
452 unsigned HOST_WIDE_INT ml;
454 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
460 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
464 shifts[i] = tree_log2 (cst);
465 if (shifts[i] != shifts[0]
466 && code == TRUNC_DIV_EXPR
467 && !has_vector_shift)
472 if (sign_p == UNSIGNED)
474 unsigned HOST_WIDE_INT mh;
475 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
477 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
478 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
487 /* Find a suitable multiplier and right shift count
488 instead of multiplying with D. */
489 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
491 /* If the suggested multiplier is more than SIZE bits, we can
492 do better for even divisors, using an initial right shift. */
493 if ((mh != 0 && (d & 1) == 0)
494 || (!has_vector_shift && pre_shift != -1))
496 if (has_vector_shift)
497 pre_shift = ctz_or_zero (d);
498 else if (pre_shift == -1)
501 for (j = 0; j < nunits; j++)
503 tree cst2 = VECTOR_CST_ELT (op1, j);
504 unsigned HOST_WIDE_INT d2;
507 if (!tree_fits_uhwi_p (cst2))
509 d2 = tree_to_uhwi (cst2) & mask;
512 this_pre_shift = floor_log2 (d2 & -d2);
513 if (pre_shift == -1 || this_pre_shift < pre_shift)
514 pre_shift = this_pre_shift;
516 if (i != 0 && pre_shift != 0)
526 if ((d >> pre_shift) <= 1)
531 mh = choose_multiplier (d >> pre_shift, prec,
533 &ml, &post_shift, &dummy_int);
535 pre_shifts[i] = pre_shift;
545 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
546 unsigned HOST_WIDE_INT abs_d;
551 /* Since d might be INT_MIN, we have to cast to
552 unsigned HOST_WIDE_INT before negating to avoid
553 undefined signed overflow. */
555 ? (unsigned HOST_WIDE_INT) d
556 : - (unsigned HOST_WIDE_INT) d);
558 /* n rem d = n rem -d */
559 if (code == TRUNC_MOD_EXPR && d < 0)
561 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
563 /* This case is not handled correctly below. */
573 choose_multiplier (abs_d, prec, prec - 1, &ml,
574 &post_shift, &dummy_int);
575 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
577 this_mode = 4 + (d < 0);
578 ml |= HOST_WIDE_INT_M1U << (prec - 1);
581 this_mode = 2 + (d < 0);
584 post_shifts[i] = post_shift;
585 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
586 || post_shift >= prec
587 || pre_shifts[i] >= prec)
592 else if (mode != this_mode)
598 tree addend = NULL_TREE;
599 if (sign_p == SIGNED)
603 /* Both division and remainder sequences need
604 op0 < 0 ? mask : 0 computed. It can be either computed as
605 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
606 if none of the shifts is 0, or as the conditional. */
607 for (i = 0; i < nunits; i++)
611 = build_vector_type (build_nonstandard_integer_type (prec, 1),
613 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
615 for (i = 0; i < nunits; i++)
616 shift_temps[i] = prec - 1;
617 cur_op = add_rshift (gsi, type, op0, shift_temps);
618 if (cur_op != NULL_TREE)
620 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
622 for (i = 0; i < nunits; i++)
623 shift_temps[i] = prec - shifts[i];
624 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
625 if (cur_op != NULL_TREE)
626 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
630 if (addend == NULL_TREE
631 && expand_vec_cond_expr_p (type, type, LT_EXPR))
633 tree zero, cst, cond, mask_type;
636 mask_type = build_same_sized_truth_vector_type (type);
637 zero = build_zero_cst (type);
638 cond = build2 (LT_EXPR, mask_type, op0, zero);
639 tree_vector_builder vec (type, nunits, 1);
640 for (i = 0; i < nunits; i++)
641 vec.quick_push (build_int_cst (TREE_TYPE (type),
645 addend = make_ssa_name (type);
646 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
648 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
651 if (code == TRUNC_DIV_EXPR)
653 if (sign_p == UNSIGNED)
655 /* q = op0 >> shift; */
656 cur_op = add_rshift (gsi, type, op0, shifts);
657 if (cur_op != NULL_TREE)
660 else if (addend != NULL_TREE)
662 /* t1 = op0 + addend;
664 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
665 if (op != unknown_optab
666 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
668 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
669 cur_op = add_rshift (gsi, type, cur_op, shifts);
670 if (cur_op != NULL_TREE)
678 tree_vector_builder vec (type, nunits, 1);
679 for (i = 0; i < nunits; i++)
680 vec.quick_push (build_int_cst (TREE_TYPE (type),
684 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
685 if (op != unknown_optab
686 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
688 if (sign_p == UNSIGNED)
689 /* r = op0 & mask; */
690 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
691 else if (addend != NULL_TREE)
693 /* t1 = op0 + addend;
696 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
697 if (op != unknown_optab
698 && optab_handler (op, TYPE_MODE (type))
701 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
703 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
705 op = optab_for_tree_code (MINUS_EXPR, type,
707 if (op != unknown_optab
708 && optab_handler (op, TYPE_MODE (type))
710 return gimplify_build2 (gsi, MINUS_EXPR, type,
718 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
721 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
729 gcc_assert (sign_p == UNSIGNED);
730 /* t1 = oprnd0 >> pre_shift;
732 q = t2 >> post_shift; */
733 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
734 if (cur_op == NULL_TREE)
738 gcc_assert (sign_p == UNSIGNED);
739 for (i = 0; i < nunits; i++)
749 gcc_assert (sign_p == SIGNED);
750 for (i = 0; i < nunits; i++)
751 shift_temps[i] = prec - 1;
757 tree_vector_builder vec (type, nunits, 1);
758 for (i = 0; i < nunits; i++)
759 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
760 mulcst = vec.build ();
762 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
767 /* t1 = oprnd0 >> pre_shift;
769 q = t2 >> post_shift; */
770 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
773 /* t1 = oprnd0 h* ml;
777 q = t4 >> (post_shift - 1); */
778 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
779 if (op == unknown_optab
780 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
782 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
783 tem = add_rshift (gsi, type, tem, shift_temps);
784 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
785 if (op == unknown_optab
786 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
788 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
789 cur_op = add_rshift (gsi, type, tem, post_shifts);
790 if (cur_op == NULL_TREE)
797 /* t1 = oprnd0 h* ml;
798 t2 = t1; [ iff (mode & 2) != 0 ]
799 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
800 t3 = t2 >> post_shift;
801 t4 = oprnd0 >> (prec - 1);
802 q = t3 - t4; [ iff (mode & 1) == 0 ]
803 q = t4 - t3; [ iff (mode & 1) != 0 ] */
806 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
807 if (op == unknown_optab
808 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
810 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
812 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
813 if (cur_op == NULL_TREE)
815 tem = add_rshift (gsi, type, op0, shift_temps);
816 if (tem == NULL_TREE)
818 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
819 if (op == unknown_optab
820 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
823 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
825 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
831 if (code == TRUNC_DIV_EXPR)
834 /* We divided. Now finish by:
837 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
838 if (op == unknown_optab
839 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
841 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
842 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
843 if (op == unknown_optab
844 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
846 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
849 /* Expand a vector condition to scalars, by using many conditions
850 on the vector's elements. */
852 expand_vector_condition (gimple_stmt_iterator *gsi)
854 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
855 tree type = gimple_expr_type (stmt);
856 tree a = gimple_assign_rhs1 (stmt);
859 bool a_is_comparison = false;
860 tree b = gimple_assign_rhs2 (stmt);
861 tree c = gimple_assign_rhs3 (stmt);
862 vec<constructor_elt, va_gc> *v;
864 tree inner_type = TREE_TYPE (type);
865 tree cond_type = TREE_TYPE (TREE_TYPE (a));
866 tree comp_inner_type = cond_type;
867 tree width = TYPE_SIZE (inner_type);
868 tree index = bitsize_int (0);
869 tree comp_width = width;
870 tree comp_index = index;
871 int nunits = TYPE_VECTOR_SUBPARTS (type);
873 location_t loc = gimple_location (gsi_stmt (*gsi));
875 if (!is_gimple_val (a))
877 gcc_assert (COMPARISON_CLASS_P (a));
878 a_is_comparison = true;
879 a1 = TREE_OPERAND (a, 0);
880 a2 = TREE_OPERAND (a, 1);
881 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
882 comp_width = TYPE_SIZE (comp_inner_type);
885 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
888 /* Handle vector boolean types with bitmasks. If there is a comparison
889 and we can expand the comparison into the vector boolean bitmask,
890 or otherwise if it is compatible with type, we can transform
891 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
894 tmp_7 = tmp_6 & vbfld_4;
896 tmp_9 = tmp_8 & vbfld_5;
897 vbfld_1 = tmp_7 | tmp_9;
898 Similarly for vbfld_10 instead of x_2 < y_3. */
899 if (VECTOR_BOOLEAN_TYPE_P (type)
900 && SCALAR_INT_MODE_P (TYPE_MODE (type))
901 && (GET_MODE_BITSIZE (TYPE_MODE (type))
902 < (TYPE_VECTOR_SUBPARTS (type)
903 * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type)))))
905 ? useless_type_conversion_p (type, TREE_TYPE (a))
906 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
909 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
910 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
911 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
912 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
913 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
914 gimple_assign_set_rhs_from_tree (gsi, a);
915 update_stmt (gsi_stmt (*gsi));
919 /* TODO: try and find a smaller vector type. */
921 warning_at (loc, OPT_Wvector_operation_performance,
922 "vector condition will be expanded piecewise");
924 vec_alloc (v, nunits);
925 for (i = 0; i < nunits; i++)
928 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
929 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
932 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
933 comp_width, comp_index);
934 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
935 comp_width, comp_index);
936 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
939 aa = tree_vec_extract (gsi, cond_type, a, width, index);
940 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
941 constructor_elt ce = {NULL_TREE, result};
943 index = int_const_binop (PLUS_EXPR, index, width);
944 if (width == comp_width)
947 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
950 constr = build_constructor (type, v);
951 gimple_assign_set_rhs_from_tree (gsi, constr);
952 update_stmt (gsi_stmt (*gsi));
956 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
957 gassign *assign, enum tree_code code)
959 machine_mode compute_mode = TYPE_MODE (compute_type);
961 /* If the compute mode is not a vector mode (hence we are not decomposing
962 a BLKmode vector to smaller, hardware-supported vectors), we may want
963 to expand the operations in parallel. */
964 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
965 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
966 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
967 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
968 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
969 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
974 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
975 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
976 gimple_assign_rhs1 (assign),
977 gimple_assign_rhs2 (assign), code);
981 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
982 return expand_vector_addition (gsi, do_unop, do_negate, type,
983 gimple_assign_rhs1 (assign),
990 return expand_vector_parallel (gsi, do_binop, type,
991 gimple_assign_rhs1 (assign),
992 gimple_assign_rhs2 (assign), code);
995 return expand_vector_parallel (gsi, do_unop, type,
996 gimple_assign_rhs1 (assign),
1011 case UNORDERED_EXPR:
1013 tree rhs1 = gimple_assign_rhs1 (assign);
1014 tree rhs2 = gimple_assign_rhs2 (assign);
1016 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1019 case TRUNC_DIV_EXPR:
1020 case TRUNC_MOD_EXPR:
1022 tree rhs1 = gimple_assign_rhs1 (assign);
1023 tree rhs2 = gimple_assign_rhs2 (assign);
1027 || !VECTOR_INTEGER_TYPE_P (type)
1028 || TREE_CODE (rhs2) != VECTOR_CST
1029 || !VECTOR_MODE_P (TYPE_MODE (type)))
1032 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1033 if (ret != NULL_TREE)
1042 if (TREE_CODE_CLASS (code) == tcc_unary)
1043 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1044 gimple_assign_rhs1 (assign),
1047 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1048 gimple_assign_rhs1 (assign),
1049 gimple_assign_rhs2 (assign), code);
1053 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1055 _9 = { b_7, b_7, b_7, b_7 };
1056 a_5 = _9 + { 0, 3, 6, 9 };
1057 because vector splat operation is usually more efficient
1058 than piecewise initialization of the vector. */
1061 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1063 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1064 tree lhs = gimple_assign_lhs (stmt);
1065 tree rhs = gimple_assign_rhs1 (stmt);
1066 tree type = TREE_TYPE (rhs);
1067 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1068 bool all_same = true;
1069 constructor_elt *elt;
1071 tree base = NULL_TREE;
1074 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1076 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1077 if (op == unknown_optab
1078 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1080 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1081 if (TREE_CODE (elt->value) != SSA_NAME
1082 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1086 tree this_base = elt->value;
1087 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1089 for (j = 0; j < nelts + 1; j++)
1091 g = SSA_NAME_DEF_STMT (this_base);
1092 if (is_gimple_assign (g)
1093 && gimple_assign_rhs_code (g) == PLUS_EXPR
1094 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1095 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1096 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1097 this_base = gimple_assign_rhs1 (g);
1103 else if (this_base != base)
1108 tree_vector_builder cst (type, nelts, 1);
1109 for (i = 0; i < nelts; i++)
1111 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1112 tree elt = build_zero_cst (TREE_TYPE (base));
1113 while (this_base != base)
1115 g = SSA_NAME_DEF_STMT (this_base);
1116 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1117 elt, gimple_assign_rhs2 (g));
1118 if (elt == NULL_TREE
1119 || TREE_CODE (elt) != INTEGER_CST
1120 || TREE_OVERFLOW (elt))
1122 this_base = gimple_assign_rhs1 (g);
1124 cst.quick_push (elt);
1126 for (i = 0; i < nelts; i++)
1127 CONSTRUCTOR_ELT (rhs, i)->value = base;
1128 g = gimple_build_assign (make_ssa_name (type), rhs);
1129 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1130 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1132 gsi_replace (gsi, g, false);
1135 /* Return a type for the widest vector mode whose components are of type
1136 TYPE, or NULL_TREE if none is found. */
1139 type_for_widest_vector_mode (tree type, optab op)
1141 machine_mode inner_mode = TYPE_MODE (type);
1142 machine_mode best_mode = VOIDmode, mode;
1143 int best_nunits = 0;
1145 if (SCALAR_FLOAT_MODE_P (inner_mode))
1146 mode = MIN_MODE_VECTOR_FLOAT;
1147 else if (SCALAR_FRACT_MODE_P (inner_mode))
1148 mode = MIN_MODE_VECTOR_FRACT;
1149 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1150 mode = MIN_MODE_VECTOR_UFRACT;
1151 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1152 mode = MIN_MODE_VECTOR_ACCUM;
1153 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1154 mode = MIN_MODE_VECTOR_UACCUM;
1156 mode = MIN_MODE_VECTOR_INT;
1158 FOR_EACH_MODE_FROM (mode, mode)
1159 if (GET_MODE_INNER (mode) == inner_mode
1160 && GET_MODE_NUNITS (mode) > best_nunits
1161 && optab_handler (op, mode) != CODE_FOR_nothing)
1162 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1164 if (best_mode == VOIDmode)
1167 return build_vector_type_for_mode (type, best_mode);
1171 /* Build a reference to the element of the vector VECT. Function
1172 returns either the element itself, either BIT_FIELD_REF, or an
1173 ARRAY_REF expression.
1175 GSI is required to insert temporary variables while building a
1176 refernece to the element of the vector VECT.
1178 PTMPVEC is a pointer to the temporary variable for caching
1179 purposes. In case when PTMPVEC is NULL new temporary variable
1182 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1184 tree vect_type, vect_elt_type;
1188 bool need_asgn = true;
1189 unsigned int elements;
1191 vect_type = TREE_TYPE (vect);
1192 vect_elt_type = TREE_TYPE (vect_type);
1193 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1195 if (TREE_CODE (idx) == INTEGER_CST)
1197 unsigned HOST_WIDE_INT index;
1199 /* Given that we're about to compute a binary modulus,
1200 we don't care about the high bits of the value. */
1201 index = TREE_INT_CST_LOW (idx);
1202 if (!tree_fits_uhwi_p (idx) || index >= elements)
1204 index &= elements - 1;
1205 idx = build_int_cst (TREE_TYPE (idx), index);
1208 /* When lowering a vector statement sequence do some easy
1209 simplification by looking through intermediate vector results. */
1210 if (TREE_CODE (vect) == SSA_NAME)
1212 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1213 if (is_gimple_assign (def_stmt)
1214 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1215 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1216 vect = gimple_assign_rhs1 (def_stmt);
1219 if (TREE_CODE (vect) == VECTOR_CST)
1220 return VECTOR_CST_ELT (vect, index);
1221 else if (TREE_CODE (vect) == CONSTRUCTOR
1222 && (CONSTRUCTOR_NELTS (vect) == 0
1223 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1226 if (index < CONSTRUCTOR_NELTS (vect))
1227 return CONSTRUCTOR_ELT (vect, index)->value;
1228 return build_zero_cst (vect_elt_type);
1232 tree size = TYPE_SIZE (vect_elt_type);
1233 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1235 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1240 tmpvec = create_tmp_var (vect_type, "vectmp");
1242 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1251 TREE_ADDRESSABLE (tmpvec) = 1;
1252 asgn = gimple_build_assign (tmpvec, vect);
1253 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1256 arraytype = build_array_type_nelts (vect_elt_type, elements);
1257 return build4 (ARRAY_REF, vect_elt_type,
1258 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1259 idx, NULL_TREE, NULL_TREE);
1262 /* Check if VEC_PERM_EXPR within the given setting is supported
1263 by hardware, or lower it piecewise.
1265 When VEC_PERM_EXPR has the same first and second operands:
1266 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1267 {v0[mask[0]], v0[mask[1]], ...}
1268 MASK and V0 must have the same number of elements.
1270 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1271 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1272 V0 and V1 must have the same type. MASK, V0, V1 must have the
1273 same number of arguments. */
1276 lower_vec_perm (gimple_stmt_iterator *gsi)
1278 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1279 tree mask = gimple_assign_rhs3 (stmt);
1280 tree vec0 = gimple_assign_rhs1 (stmt);
1281 tree vec1 = gimple_assign_rhs2 (stmt);
1282 tree vect_type = TREE_TYPE (vec0);
1283 tree mask_type = TREE_TYPE (mask);
1284 tree vect_elt_type = TREE_TYPE (vect_type);
1285 tree mask_elt_type = TREE_TYPE (mask_type);
1286 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1287 vec<constructor_elt, va_gc> *v;
1288 tree constr, t, si, i_val;
1289 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1290 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1291 location_t loc = gimple_location (gsi_stmt (*gsi));
1294 if (TREE_CODE (mask) == SSA_NAME)
1296 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1297 if (is_gimple_assign (def_stmt)
1298 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1299 mask = gimple_assign_rhs1 (def_stmt);
1302 vec_perm_builder sel_int;
1304 if (TREE_CODE (mask) == VECTOR_CST
1305 && tree_to_vec_perm_builder (&sel_int, mask))
1307 vec_perm_indices indices (sel_int, 2, elements);
1308 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1310 gimple_assign_set_rhs3 (stmt, mask);
1314 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1315 vector as VEC1 and a right element shift MASK. */
1316 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1318 && TREE_CODE (vec1) == VECTOR_CST
1319 && initializer_zerop (vec1)
1321 && indices[0] < elements)
1323 for (i = 1; i < elements; ++i)
1325 unsigned int expected = i + indices[0];
1326 /* Indices into the second vector are all equivalent. */
1327 if (MIN (elements, (unsigned) indices[i])
1328 != MIN (elements, expected))
1333 gimple_assign_set_rhs3 (stmt, mask);
1339 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1342 warning_at (loc, OPT_Wvector_operation_performance,
1343 "vector shuffling operation will be expanded piecewise");
1345 vec_alloc (v, elements);
1346 for (i = 0; i < elements; i++)
1349 i_val = vector_element (gsi, mask, si, &masktmp);
1351 if (TREE_CODE (i_val) == INTEGER_CST)
1353 unsigned HOST_WIDE_INT index;
1355 index = TREE_INT_CST_LOW (i_val);
1356 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1357 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1359 if (two_operand_p && (index & elements) != 0)
1360 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1362 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1364 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1365 true, GSI_SAME_STMT);
1369 tree cond = NULL_TREE, v0_val;
1373 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1374 build_int_cst (mask_elt_type, elements));
1375 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1376 true, GSI_SAME_STMT);
1379 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1380 build_int_cst (mask_elt_type, elements - 1));
1381 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1382 true, GSI_SAME_STMT);
1384 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1385 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1386 true, GSI_SAME_STMT);
1392 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1393 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1394 true, GSI_SAME_STMT);
1396 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1397 cond, build_zero_cst (mask_elt_type));
1398 cond = fold_build3 (COND_EXPR, vect_elt_type,
1399 cond, v0_val, v1_val);
1400 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1401 true, GSI_SAME_STMT);
1407 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1410 constr = build_constructor (vect_type, v);
1411 gimple_assign_set_rhs_from_tree (gsi, constr);
1412 update_stmt (gsi_stmt (*gsi));
1415 /* If OP is a uniform vector return the element it is a splat from. */
1418 ssa_uniform_vector_p (tree op)
1420 if (TREE_CODE (op) == VECTOR_CST
1421 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1422 || TREE_CODE (op) == CONSTRUCTOR)
1423 return uniform_vector_p (op);
1424 if (TREE_CODE (op) == SSA_NAME)
1426 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1427 if (gimple_assign_single_p (def_stmt))
1428 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1433 /* Return type in which CODE operation with optab OP can be
1437 get_compute_type (enum tree_code code, optab op, tree type)
1439 /* For very wide vectors, try using a smaller vector mode. */
1440 tree compute_type = type;
1442 && (!VECTOR_MODE_P (TYPE_MODE (type))
1443 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1445 tree vector_compute_type
1446 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1447 if (vector_compute_type != NULL_TREE
1448 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1449 < TYPE_VECTOR_SUBPARTS (compute_type))
1450 && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1
1451 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1452 != CODE_FOR_nothing))
1453 compute_type = vector_compute_type;
1456 /* If we are breaking a BLKmode vector into smaller pieces,
1457 type_for_widest_vector_mode has already looked into the optab,
1458 so skip these checks. */
1459 if (compute_type == type)
1461 machine_mode compute_mode = TYPE_MODE (compute_type);
1462 if (VECTOR_MODE_P (compute_mode))
1464 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1465 return compute_type;
1466 if (code == MULT_HIGHPART_EXPR
1467 && can_mult_highpart_p (compute_mode,
1468 TYPE_UNSIGNED (compute_type)))
1469 return compute_type;
1471 /* There is no operation in hardware, so fall back to scalars. */
1472 compute_type = TREE_TYPE (type);
1475 return compute_type;
1478 /* Helper function of expand_vector_operations_1. Return number of
1479 vector elements for vector types or 1 for other types. */
1482 count_type_subparts (tree type)
1484 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1488 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1489 tree bitpos, tree bitsize, enum tree_code code,
1490 tree type ATTRIBUTE_UNUSED)
1492 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1493 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1494 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1495 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1496 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1497 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1500 /* Expand a vector COND_EXPR to scalars, piecewise. */
1502 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1504 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1505 tree type = gimple_expr_type (stmt);
1506 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1507 machine_mode compute_mode = TYPE_MODE (compute_type);
1508 gcc_assert (compute_mode != BLKmode);
1509 tree lhs = gimple_assign_lhs (stmt);
1510 tree rhs2 = gimple_assign_rhs2 (stmt);
1511 tree rhs3 = gimple_assign_rhs3 (stmt);
1514 /* If the compute mode is not a vector mode (hence we are not decomposing
1515 a BLKmode vector to smaller, hardware-supported vectors), we may want
1516 to expand the operations in parallel. */
1517 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1518 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1519 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1520 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1521 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1522 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1523 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1526 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1527 rhs2, rhs3, COND_EXPR);
1528 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1529 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1532 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1533 way to do it is change expand_vector_operation and its callees to
1534 return a tree_code, RHS1 and RHS2 instead of a tree. */
1535 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1536 update_stmt (gsi_stmt (*gsi));
1539 /* Process one statement. If we identify a vector operation, expand it. */
1542 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1544 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1545 enum tree_code code;
1546 optab op = unknown_optab;
1547 enum gimple_rhs_class rhs_class;
1550 /* Only consider code == GIMPLE_ASSIGN. */
1551 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1555 code = gimple_assign_rhs_code (stmt);
1556 rhs_class = get_gimple_rhs_class (code);
1557 lhs = gimple_assign_lhs (stmt);
1559 if (code == VEC_PERM_EXPR)
1561 lower_vec_perm (gsi);
1565 if (code == VEC_COND_EXPR)
1567 expand_vector_condition (gsi);
1571 if (code == COND_EXPR
1572 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1573 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1575 expand_vector_scalar_condition (gsi);
1579 if (code == CONSTRUCTOR
1580 && TREE_CODE (lhs) == SSA_NAME
1581 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1582 && !gimple_clobber_p (stmt)
1585 optimize_vector_constructor (gsi);
1589 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1592 rhs1 = gimple_assign_rhs1 (stmt);
1593 type = gimple_expr_type (stmt);
1594 if (rhs_class == GIMPLE_BINARY_RHS)
1595 rhs2 = gimple_assign_rhs2 (stmt);
1597 if (!VECTOR_TYPE_P (type)
1598 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
1601 /* If the vector operation is operating on all same vector elements
1602 implement it with a scalar operation and a splat if the target
1603 supports the scalar operation. */
1604 tree srhs1, srhs2 = NULL_TREE;
1605 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1606 && (rhs2 == NULL_TREE
1607 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1609 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1610 /* As we query direct optabs restrict to non-convert operations. */
1611 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1613 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1614 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1615 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1617 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1618 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1619 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1620 gimple_assign_set_rhs_from_tree (gsi,
1621 build_vector_from_val (type, slhs));
1627 /* A scalar operation pretending to be a vector one. */
1628 if (VECTOR_BOOLEAN_TYPE_P (type)
1629 && !VECTOR_MODE_P (TYPE_MODE (type))
1630 && TYPE_MODE (type) != BLKmode)
1633 if (CONVERT_EXPR_CODE_P (code)
1634 || code == FLOAT_EXPR
1635 || code == FIX_TRUNC_EXPR
1636 || code == VIEW_CONVERT_EXPR)
1639 /* The signedness is determined from input argument. */
1640 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1641 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1643 type = TREE_TYPE (rhs1);
1644 /* We do not know how to scalarize those. */
1648 /* For widening/narrowing vector operations, the relevant type is of the
1649 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1650 calculated in the same way above. */
1651 if (code == WIDEN_SUM_EXPR
1652 || code == VEC_WIDEN_MULT_HI_EXPR
1653 || code == VEC_WIDEN_MULT_LO_EXPR
1654 || code == VEC_WIDEN_MULT_EVEN_EXPR
1655 || code == VEC_WIDEN_MULT_ODD_EXPR
1656 || code == VEC_UNPACK_HI_EXPR
1657 || code == VEC_UNPACK_LO_EXPR
1658 || code == VEC_PACK_TRUNC_EXPR
1659 || code == VEC_PACK_SAT_EXPR
1660 || code == VEC_PACK_FIX_TRUNC_EXPR
1661 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1662 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1664 type = TREE_TYPE (rhs1);
1665 /* We do not know how to scalarize those. */
1669 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1671 if (code == LSHIFT_EXPR
1672 || code == RSHIFT_EXPR
1673 || code == LROTATE_EXPR
1674 || code == RROTATE_EXPR)
1678 /* Check whether we have vector <op> {x,x,x,x} where x
1679 could be a scalar variable or a constant. Transform
1680 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1681 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1685 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1687 gimple_assign_set_rhs2 (stmt, first);
1693 opv = optab_for_tree_code (code, type, optab_vector);
1694 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1698 op = optab_for_tree_code (code, type, optab_scalar);
1700 compute_type = get_compute_type (code, op, type);
1701 if (compute_type == type)
1703 /* The rtl expander will expand vector/scalar as vector/vector
1704 if necessary. Pick one with wider vector type. */
1705 tree compute_vtype = get_compute_type (code, opv, type);
1706 if (count_type_subparts (compute_vtype)
1707 > count_type_subparts (compute_type))
1709 compute_type = compute_vtype;
1714 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1716 if (compute_type == NULL_TREE)
1717 compute_type = get_compute_type (code, op, type);
1718 if (compute_type == type)
1720 /* Before splitting vector rotates into scalar rotates,
1721 see if we can't use vector shifts and BIT_IOR_EXPR
1722 instead. For vector by vector rotates we'd also
1723 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1724 for now, fold doesn't seem to create such rotates anyway. */
1725 if (compute_type == TREE_TYPE (type)
1726 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1728 optab oplv = vashl_optab, opl = ashl_optab;
1729 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1730 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1731 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1732 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1733 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1734 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1735 /* The rtl expander will expand vector/scalar as vector/vector
1736 if necessary. Pick one with wider vector type. */
1737 if (count_type_subparts (compute_lvtype)
1738 > count_type_subparts (compute_ltype))
1740 compute_ltype = compute_lvtype;
1743 if (count_type_subparts (compute_rvtype)
1744 > count_type_subparts (compute_rtype))
1746 compute_rtype = compute_rvtype;
1749 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1751 compute_type = compute_ltype;
1752 if (count_type_subparts (compute_type)
1753 > count_type_subparts (compute_rtype))
1754 compute_type = compute_rtype;
1755 if (count_type_subparts (compute_type)
1756 > count_type_subparts (compute_otype))
1757 compute_type = compute_otype;
1758 /* Verify all 3 operations can be performed in that type. */
1759 if (compute_type != TREE_TYPE (type))
1761 if (optab_handler (opl, TYPE_MODE (compute_type))
1763 || optab_handler (opr, TYPE_MODE (compute_type))
1765 || optab_handler (opo, TYPE_MODE (compute_type))
1766 == CODE_FOR_nothing)
1767 compute_type = TREE_TYPE (type);
1773 op = optab_for_tree_code (code, type, optab_default);
1775 /* Optabs will try converting a negation into a subtraction, so
1776 look for it as well. TODO: negation of floating-point vectors
1777 might be turned into an exclusive OR toggling the sign bit. */
1778 if (op == unknown_optab
1779 && code == NEGATE_EXPR
1780 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1781 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1783 if (compute_type == NULL_TREE)
1784 compute_type = get_compute_type (code, op, type);
1785 if (compute_type == type)
1788 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1790 /* Leave expression untouched for later expansion. */
1791 if (new_rhs == NULL_TREE)
1794 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1795 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1798 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1799 way to do it is change expand_vector_operation and its callees to
1800 return a tree_code, RHS1 and RHS2 instead of a tree. */
1801 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1802 update_stmt (gsi_stmt (*gsi));
1805 /* Use this to lower vector operations introduced by the vectorizer,
1806 if it may need the bit-twiddling tricks implemented in this file. */
1809 expand_vector_operations (void)
1811 gimple_stmt_iterator gsi;
1813 bool cfg_changed = false;
1815 FOR_EACH_BB_FN (bb, cfun)
1817 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1819 expand_vector_operations_1 (&gsi);
1820 /* ??? If we do not cleanup EH then we will ICE in
1821 verification. But in reality we have created wrong-code
1822 as we did not properly transition EH info and edges to
1823 the piecewise computations. */
1824 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1825 && gimple_purge_dead_eh_edges (bb))
1830 return cfg_changed ? TODO_cleanup_cfg : 0;
1835 const pass_data pass_data_lower_vector =
1837 GIMPLE_PASS, /* type */
1838 "veclower", /* name */
1839 OPTGROUP_VEC, /* optinfo_flags */
1840 TV_NONE, /* tv_id */
1841 PROP_cfg, /* properties_required */
1842 PROP_gimple_lvec, /* properties_provided */
1843 0, /* properties_destroyed */
1844 0, /* todo_flags_start */
1845 TODO_update_ssa, /* todo_flags_finish */
1848 class pass_lower_vector : public gimple_opt_pass
1851 pass_lower_vector (gcc::context *ctxt)
1852 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1855 /* opt_pass methods: */
1856 virtual bool gate (function *fun)
1858 return !(fun->curr_properties & PROP_gimple_lvec);
1861 virtual unsigned int execute (function *)
1863 return expand_vector_operations ();
1866 }; // class pass_lower_vector
1871 make_pass_lower_vector (gcc::context *ctxt)
1873 return new pass_lower_vector (ctxt);
1878 const pass_data pass_data_lower_vector_ssa =
1880 GIMPLE_PASS, /* type */
1881 "veclower2", /* name */
1882 OPTGROUP_VEC, /* optinfo_flags */
1883 TV_NONE, /* tv_id */
1884 PROP_cfg, /* properties_required */
1885 PROP_gimple_lvec, /* properties_provided */
1886 0, /* properties_destroyed */
1887 0, /* todo_flags_start */
1889 | TODO_cleanup_cfg ), /* todo_flags_finish */
1892 class pass_lower_vector_ssa : public gimple_opt_pass
1895 pass_lower_vector_ssa (gcc::context *ctxt)
1896 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1899 /* opt_pass methods: */
1900 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1901 virtual unsigned int execute (function *)
1903 return expand_vector_operations ();
1906 }; // class pass_lower_vector_ssa
1911 make_pass_lower_vector_ssa (gcc::context *ctxt)
1913 return new pass_lower_vector_ssa (ctxt);
1916 #include "gt-tree-vect-generic.h"