1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
46 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
50 inner_type = TREE_TYPE (TREE_TYPE (t));
51 switch (TREE_CODE (t))
54 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
58 ret = TREE_OPERAND (t, imagpart_p);
63 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
71 return gimplify_val (bsi, inner_type, ret);
74 /* Update an assignment to a complex variable in place. */
77 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
79 tree stmt = bsi_stmt (*bsi);
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
87 mark_stmt_modified (stmt);
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
96 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
97 tree ar, tree ai, tree br, tree bi,
102 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
103 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
105 update_complex_assignment (bsi, rr, ri);
108 /* Expand a complex multiplication or division to a libcall to the c99
109 compliant routines. */
112 expand_complex_libcall (block_stmt_iterator *bsi, tree ar, tree ai,
113 tree br, tree bi, enum tree_code code)
115 enum machine_mode mode;
116 enum built_in_function bcode;
117 tree args, fn, stmt, type;
119 args = tree_cons (NULL, bi, NULL);
120 args = tree_cons (NULL, br, args);
121 args = tree_cons (NULL, ai, args);
122 args = tree_cons (NULL, ar, args);
124 stmt = bsi_stmt (*bsi);
125 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
127 mode = TYPE_MODE (type);
128 gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
129 if (code == MULT_EXPR)
130 bcode = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
131 else if (code == RDIV_EXPR)
132 bcode = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
135 fn = built_in_decls[bcode];
137 TREE_OPERAND (stmt, 1)
138 = build3 (CALL_EXPR, type, build_fold_addr_expr (fn), args, NULL);
142 /* Expand complex multiplication to scalars:
143 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
147 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
148 tree ar, tree ai, tree br, tree bi)
150 tree t1, t2, t3, t4, rr, ri;
152 if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type))
154 expand_complex_libcall (bsi, ar, ai, br, bi, MULT_EXPR);
158 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
159 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
160 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
162 /* Avoid expanding redundant multiplication for the common
163 case of squaring a complex number. */
164 if (ar == br && ai == bi)
167 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
169 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
170 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
172 update_complex_assignment (bsi, rr, ri);
175 /* Expand complex division to scalars, straightforward algorithm.
176 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
181 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
182 tree ar, tree ai, tree br, tree bi,
185 tree rr, ri, div, t1, t2, t3;
187 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
188 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
189 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
191 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
192 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
193 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
194 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
196 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
197 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
198 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
199 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
201 update_complex_assignment (bsi, rr, ri);
204 /* Expand complex division to scalars, modified algorithm to minimize
205 overflow with wide input ranges. */
208 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
209 tree ar, tree ai, tree br, tree bi,
212 tree rr, ri, ratio, div, t1, t2, tr, ti, cond;
213 basic_block bb_cond, bb_true, bb_false, bb_join;
215 /* Examine |br| < |bi|, and branch. */
216 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
217 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
218 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
221 bb_cond = bb_true = bb_false = bb_join = NULL;
222 rr = ri = tr = ti = NULL;
223 if (!TREE_CONSTANT (cond))
227 cond = build (COND_EXPR, void_type_node, cond, NULL, NULL);
228 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
230 /* Split the original block, and create the TRUE and FALSE blocks. */
231 e = split_block (bsi->bb, cond);
234 bb_true = create_empty_bb (bb_cond);
235 bb_false = create_empty_bb (bb_true);
237 t1 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_true));
238 t2 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_false));
239 COND_EXPR_THEN (cond) = t1;
240 COND_EXPR_ELSE (cond) = t2;
242 /* Wire the blocks together. */
243 e->flags = EDGE_TRUE_VALUE;
244 redirect_edge_succ (e, bb_true);
245 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
246 make_edge (bb_true, bb_join, EDGE_FALLTHRU);
247 make_edge (bb_false, bb_join, EDGE_FALLTHRU);
249 /* Update dominance info. Note that bb_join's data was
250 updated by split_block. */
251 if (dom_info_available_p (CDI_DOMINATORS))
253 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
254 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
257 rr = make_rename_temp (inner_type, NULL);
258 ri = make_rename_temp (inner_type, NULL);
261 /* In the TRUE branch, we compute
263 div = (br * ratio) + bi;
264 tr = (ar * ratio) + ai;
265 ti = (ai * ratio) - ar;
268 if (bb_true || integer_nonzerop (cond))
272 *bsi = bsi_last (bb_true);
273 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
276 ratio = gimplify_build2 (bsi, code, inner_type, br, bi);
278 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, ratio);
279 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, bi);
281 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
282 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ai);
284 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
285 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, ar);
287 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
288 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
292 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
293 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
294 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
295 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
300 /* In the FALSE branch, we compute
302 divisor = (d * ratio) + c;
303 tr = (b * ratio) + a;
304 ti = b - (a * ratio);
307 if (bb_false || integer_zerop (cond))
311 *bsi = bsi_last (bb_false);
312 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
315 ratio = gimplify_build2 (bsi, code, inner_type, bi, br);
317 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, ratio);
318 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, br);
320 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
321 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ar);
323 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
324 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
326 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
327 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
331 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
332 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
333 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
334 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
340 *bsi = bsi_start (bb_join);
344 update_complex_assignment (bsi, rr, ri);
347 /* Expand complex division to scalars. */
350 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
351 tree ar, tree ai, tree br, tree bi,
354 switch (flag_complex_method)
357 /* straightforward implementation of complex divide acceptable. */
358 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
362 if (SCALAR_FLOAT_TYPE_P (inner_type))
364 expand_complex_libcall (bsi, ar, ai, br, bi, code);
370 /* wide ranges of inputs must work for complex divide. */
371 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
379 /* Expand complex negation to scalars:
384 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
389 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
390 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
392 update_complex_assignment (bsi, rr, ri);
395 /* Expand complex conjugate to scalars:
400 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
405 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
407 update_complex_assignment (bsi, ar, ri);
410 /* Expand complex comparison (EQ or NE only). */
413 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
414 tree br, tree bi, enum tree_code code)
416 tree cr, ci, cc, stmt, expr, type;
418 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
419 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
420 cc = gimplify_build2 (bsi,
421 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
422 boolean_type_node, cr, ci);
424 stmt = expr = bsi_stmt (*bsi);
426 switch (TREE_CODE (stmt))
429 expr = TREE_OPERAND (stmt, 0);
432 type = TREE_TYPE (TREE_OPERAND (expr, 1));
433 TREE_OPERAND (expr, 1) = fold_convert (type, cc);
436 TREE_OPERAND (stmt, 0) = cc;
442 mark_stmt_modified (stmt);
445 /* Process one statement. If we identify a complex operation, expand it. */
448 expand_complex_operations_1 (block_stmt_iterator *bsi)
450 tree stmt = bsi_stmt (*bsi);
451 tree rhs, type, inner_type;
452 tree ac, ar, ai, bc, br, bi;
455 switch (TREE_CODE (stmt))
458 stmt = TREE_OPERAND (stmt, 0);
461 if (TREE_CODE (stmt) != MODIFY_EXPR)
466 rhs = TREE_OPERAND (stmt, 1);
470 rhs = TREE_OPERAND (stmt, 0);
477 type = TREE_TYPE (rhs);
478 code = TREE_CODE (rhs);
480 /* Initial filter for operations we handle. */
493 if (TREE_CODE (type) != COMPLEX_TYPE)
495 inner_type = TREE_TYPE (type);
500 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
501 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
509 /* Extract the components of the two complex values. Make sure and
510 handle the common case of the same value used twice specially. */
511 ac = TREE_OPERAND (rhs, 0);
512 ar = extract_component (bsi, ac, 0);
513 ai = extract_component (bsi, ac, 1);
515 if (TREE_CODE_CLASS (code) == tcc_unary)
519 bc = TREE_OPERAND (rhs, 1);
524 br = extract_component (bsi, bc, 0);
525 bi = extract_component (bsi, bc, 1);
533 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
537 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
545 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
549 expand_complex_negation (bsi, inner_type, ar, ai);
553 expand_complex_conjugate (bsi, inner_type, ar, ai);
558 expand_complex_comparison (bsi, ar, ai, br, bi, code);
564 update_stmt_if_modified (stmt);
567 /* Build a constant of type TYPE, made of VALUE's bits replicated
568 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
570 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
572 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
573 int n = HOST_BITS_PER_WIDE_INT / width;
574 unsigned HOST_WIDE_INT low, high, mask;
579 if (width == HOST_BITS_PER_WIDE_INT)
583 mask = ((HOST_WIDE_INT)1 << width) - 1;
584 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
587 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
588 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
589 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
591 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
596 ret = build_int_cst_wide (type, low, high);
600 static GTY(()) tree vector_inner_type;
601 static GTY(()) tree vector_last_type;
602 static GTY(()) int vector_last_nunits;
604 /* Return a suitable vector types made of SUBPARTS units each of mode
605 "word_mode" (the global variable). */
607 build_word_mode_vector_type (int nunits)
609 if (!vector_inner_type)
610 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
611 else if (vector_last_nunits == nunits)
613 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
614 return vector_last_type;
617 /* We build a new type, but we canonicalize it nevertheless,
618 because it still saves some memory. */
619 vector_last_nunits = nunits;
620 vector_last_type = type_hash_canon (nunits,
621 build_vector_type (vector_inner_type,
623 return vector_last_type;
626 typedef tree (*elem_op_func) (block_stmt_iterator *,
627 tree, tree, tree, tree, tree, enum tree_code);
630 tree_vec_extract (block_stmt_iterator *bsi, tree type,
631 tree t, tree bitsize, tree bitpos)
634 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
636 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
640 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
641 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
644 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
645 return gimplify_build1 (bsi, code, inner_type, a);
649 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
650 tree bitpos, tree bitsize, enum tree_code code)
652 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
653 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
654 return gimplify_build2 (bsi, code, inner_type, a, b);
657 /* Expand vector addition to scalars. This does bit twiddling
658 in order to increase parallelism:
660 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
663 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
664 (a ^ ~b) & 0x80808080
666 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
668 This optimization should be done only if 4 vector items or more
671 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
672 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
675 tree inner_type = TREE_TYPE (TREE_TYPE (a));
676 unsigned HOST_WIDE_INT max;
677 tree low_bits, high_bits, a_low, b_low, result_low, signs;
679 max = GET_MODE_MASK (TYPE_MODE (inner_type));
680 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
681 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
683 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
684 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
686 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
687 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
688 if (code == PLUS_EXPR)
689 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
692 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
693 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
696 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
697 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
698 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
702 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
703 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
704 tree bitsize ATTRIBUTE_UNUSED,
705 enum tree_code code ATTRIBUTE_UNUSED)
707 tree inner_type = TREE_TYPE (TREE_TYPE (b));
709 tree low_bits, high_bits, b_low, result_low, signs;
711 max = GET_MODE_MASK (TYPE_MODE (inner_type));
712 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
713 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
715 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
717 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
718 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
719 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
720 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
721 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
724 /* Expand a vector operation to scalars, by using many operations
725 whose type is the vector type's inner type. */
727 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
728 tree type, tree inner_type,
729 tree a, tree b, enum tree_code code)
731 tree head, *chain = &head;
732 tree part_width = TYPE_SIZE (inner_type);
733 tree index = bitsize_int (0);
734 int nunits = TYPE_VECTOR_SUBPARTS (type);
735 int delta = tree_low_cst (part_width, 1)
736 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
739 for (i = 0; i < nunits;
740 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
742 tree result = f (bsi, inner_type, a, b, index, part_width, code);
743 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
744 chain = &TREE_CHAIN (*chain);
747 return build1 (CONSTRUCTOR, type, head);
750 /* Expand a vector operation to scalars with the freedom to use
751 a scalar integer type, or to use a different size for the items
752 in the vector type. */
754 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
758 tree result, compute_type;
759 enum machine_mode mode;
760 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
762 /* We have three strategies. If the type is already correct, just do
763 the operation an element at a time. Else, if the vector is wider than
764 one word, do it a word at a time; finally, if the vector is smaller
765 than one word, do it as a scalar. */
766 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
767 return expand_vector_piecewise (bsi, f,
768 type, TREE_TYPE (type),
770 else if (n_words > 1)
772 tree word_type = build_word_mode_vector_type (n_words);
773 result = expand_vector_piecewise (bsi, f,
774 word_type, TREE_TYPE (word_type),
776 result = gimplify_val (bsi, word_type, result);
780 /* Use a single scalar operation with a mode no wider than word_mode. */
781 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
782 compute_type = lang_hooks.types.type_for_mode (mode, 1);
783 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
786 return build1 (VIEW_CONVERT_EXPR, type, result);
789 /* Expand a vector operation to scalars; for integer types we can use
790 special bit twiddling tricks to do the sums a word at a time, using
791 function F_PARALLEL instead of F. These tricks are done only if
792 they can process at least four items, that is, only if the vector
793 holds at least four items and if a word can hold four items. */
795 expand_vector_addition (block_stmt_iterator *bsi,
796 elem_op_func f, elem_op_func f_parallel,
797 tree type, tree a, tree b, enum tree_code code)
799 int parts_per_word = UNITS_PER_WORD
800 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
802 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
803 && parts_per_word >= 4
804 && TYPE_VECTOR_SUBPARTS (type) >= 4)
805 return expand_vector_parallel (bsi, f_parallel,
808 return expand_vector_piecewise (bsi, f,
809 type, TREE_TYPE (type),
813 /* Return a type for the widest vector mode whose components are of mode
814 INNER_MODE, or NULL_TREE if none is found. */
816 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
818 enum machine_mode best_mode = VOIDmode, mode;
821 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
822 mode = MIN_MODE_VECTOR_FLOAT;
824 mode = MIN_MODE_VECTOR_INT;
826 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
827 if (GET_MODE_INNER (mode) == inner_mode
828 && GET_MODE_NUNITS (mode) > best_nunits
829 && op->handlers[mode].insn_code != CODE_FOR_nothing)
830 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
832 if (best_mode == VOIDmode)
835 return lang_hooks.types.type_for_mode (best_mode, 1);
838 /* Process one statement. If we identify a vector operation, expand it. */
841 expand_vector_operations_1 (block_stmt_iterator *bsi)
843 tree stmt = bsi_stmt (*bsi);
844 tree *p_rhs, rhs, type, compute_type;
846 enum machine_mode compute_mode;
849 switch (TREE_CODE (stmt))
852 stmt = TREE_OPERAND (stmt, 0);
853 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
859 p_rhs = &TREE_OPERAND (stmt, 1);
867 type = TREE_TYPE (rhs);
868 if (TREE_CODE (type) != VECTOR_TYPE)
871 code = TREE_CODE (rhs);
872 if (TREE_CODE_CLASS (code) != tcc_unary
873 && TREE_CODE_CLASS (code) != tcc_binary)
876 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
879 gcc_assert (code != CONVERT_EXPR);
880 op = optab_for_tree_code (code, type);
882 /* Optabs will try converting a negation into a subtraction, so
883 look for it as well. TODO: negation of floating-point vectors
884 might be turned into an exclusive OR toggling the sign bit. */
886 && code == NEGATE_EXPR
887 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
888 op = optab_for_tree_code (MINUS_EXPR, type);
890 /* For very wide vectors, try using a smaller vector mode. */
892 if (TYPE_MODE (type) == BLKmode && op)
894 tree vector_compute_type
895 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
896 if (vector_compute_type != NULL_TREE)
897 compute_type = vector_compute_type;
900 compute_mode = TYPE_MODE (compute_type);
902 /* If we are breaking a BLKmode vector into smaller pieces,
903 type_for_widest_vector_mode has already looked into the optab,
904 so skip these checks. */
905 if (compute_type == type)
907 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
908 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
910 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
914 /* There is no operation in hardware, so fall back to scalars. */
915 compute_type = TREE_TYPE (type);
916 compute_mode = TYPE_MODE (compute_type);
920 /* If the compute mode is not a vector mode (hence we are decomposing
921 a BLKmode vector to smaller, hardware-supported vectors), we may
922 want to expand the operations in parallel. */
923 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
924 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
929 if (TYPE_TRAP_SIGNED (type))
932 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
933 TREE_OPERAND (rhs, 0),
934 TREE_OPERAND (rhs, 1), code);
935 mark_stmt_modified (bsi_stmt (*bsi));
939 if (TYPE_TRAP_SIGNED (type))
942 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
943 TREE_OPERAND (rhs, 0),
945 mark_stmt_modified (bsi_stmt (*bsi));
951 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
952 TREE_OPERAND (rhs, 0),
953 TREE_OPERAND (rhs, 1), code);
954 mark_stmt_modified (bsi_stmt (*bsi));
958 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
959 TREE_OPERAND (rhs, 0),
961 mark_stmt_modified (bsi_stmt (*bsi));
968 if (TREE_CODE_CLASS (code) == tcc_unary)
969 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
970 TREE_OPERAND (rhs, 0),
973 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
974 TREE_OPERAND (rhs, 0),
975 TREE_OPERAND (rhs, 1), code);
977 mark_stmt_modified (bsi_stmt (*bsi));
981 expand_vector_operations (void)
983 block_stmt_iterator bsi;
988 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
989 expand_vector_operations_1 (&bsi);
994 tree_lower_operations (void)
996 int old_last_basic_block = last_basic_block;
997 block_stmt_iterator bsi;
1002 if (bb->index >= old_last_basic_block)
1004 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1006 expand_complex_operations_1 (&bsi);
1007 expand_vector_operations_1 (&bsi);
1013 struct tree_opt_pass pass_lower_vector_ssa =
1015 "vector", /* name */
1017 expand_vector_operations, /* execute */
1020 0, /* static_pass_number */
1022 PROP_cfg, /* properties_required */
1023 0, /* properties_provided */
1024 0, /* properties_destroyed */
1025 0, /* todo_flags_start */
1026 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
1027 | TODO_ggc_collect | TODO_verify_ssa
1028 | TODO_verify_stmts | TODO_verify_flow,
1032 struct tree_opt_pass pass_pre_expand =
1034 "oplower", /* name */
1036 tree_lower_operations, /* execute */
1039 0, /* static_pass_number */
1041 PROP_cfg, /* properties_required */
1042 0, /* properties_provided */
1043 0, /* properties_destroyed */
1044 0, /* todo_flags_start */
1045 TODO_dump_func | TODO_ggc_collect
1046 | TODO_verify_stmts, /* todo_flags_finish */
1050 #include "gt-tree-complex.h"