1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
32 #include "insn-config.h"
37 /* In this file value profile based optimizations will be placed (none are
38 here just now, but they are hopefully coming soon).
40 Every such optimization should add its requirements for profiled values to
41 insn_values_to_profile function. This function is called from branch_prob
42 in profile.c and the requested values are instrumented by it in the first
43 compilation with -fprofile-arcs. The optimization may then read the
44 gathered data in the second compilation with -fbranch-probabilities.
45 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
46 insn. The argument to the note consists of an EXPR_LIST where its
47 members have the following meaning (from the first to the last):
49 -- type of information gathered (HIST_TYPE*)
50 -- the expression that is profiled
51 -- list of counters starting from the first one. */
53 static void insn_divmod_values_to_profile (rtx, unsigned *,
54 struct histogram_value **);
55 static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
56 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
58 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx);
59 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
61 static bool divmod_fixed_value_transform (rtx insn);
62 static bool mod_pow2_value_transform (rtx);
63 static bool mod_subtract_transform (rtx);
65 /* Release the list of VALUES of length N_VALUES for that we want to measure
68 free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
69 struct histogram_value *values)
74 /* Find values inside INSN for that we want to measure histograms for
75 division/modulo optimization. */
77 insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
78 struct histogram_value **values)
80 rtx set, set_src, op1, op2;
81 enum machine_mode mode;
86 set = single_set (insn);
90 mode = GET_MODE (SET_DEST (set));
91 if (!INTEGRAL_MODE_P (mode))
94 set_src = SET_SRC (set);
95 switch (GET_CODE (set_src))
101 op1 = XEXP (set_src, 0);
102 op2 = XEXP (set_src, 1);
103 if (side_effects_p (op2))
106 /* Check for a special case where the divisor is power of 2. */
107 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
109 *values = xrealloc (*values,
111 * sizeof (struct histogram_value));
112 (*values)[*n_values].value = op2;
113 (*values)[*n_values].seq = NULL_RTX;
114 (*values)[*n_values].mode = mode;
115 (*values)[*n_values].insn = insn;
116 (*values)[*n_values].type = HIST_TYPE_POW2;
117 (*values)[*n_values].hdata.pow2.may_be_other = 1;
121 /* Check whether the divisor is not in fact a constant. */
122 if (!CONSTANT_P (op2))
124 *values = xrealloc (*values,
126 * sizeof (struct histogram_value));
127 (*values)[*n_values].value = op2;
128 (*values)[*n_values].mode = mode;
129 (*values)[*n_values].seq = NULL_RTX;
130 (*values)[*n_values].insn = insn;
131 (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
135 /* For mod, check whether it is not often a noop (or replaceable by
136 a few subtractions). */
137 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
141 *values = xrealloc (*values,
143 * sizeof (struct histogram_value));
145 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
146 (*values)[*n_values].value = force_operand (tmp, NULL_RTX);
147 (*values)[*n_values].seq = get_insns ();
149 (*values)[*n_values].mode = mode;
150 (*values)[*n_values].insn = insn;
151 (*values)[*n_values].type = HIST_TYPE_INTERVAL;
152 (*values)[*n_values].hdata.intvl.int_start = 0;
153 (*values)[*n_values].hdata.intvl.steps = 2;
154 (*values)[*n_values].hdata.intvl.may_be_less = 1;
155 (*values)[*n_values].hdata.intvl.may_be_more = 1;
165 /* Find values inside INSN for that we want to measure histograms and adds
166 them to list VALUES (increasing the record of its length in N_VALUES). */
168 insn_values_to_profile (rtx insn,
170 struct histogram_value **values)
172 if (flag_value_profile_transformations)
173 insn_divmod_values_to_profile (insn, n_values, values);
176 /* Find list of values for that we want to measure histograms. */
178 find_values_to_profile (unsigned *n_values, struct histogram_value **values)
185 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
186 insn_values_to_profile (insn, n_values, values);
188 for (i = 0; i < *n_values; i++)
190 switch ((*values)[i].type)
192 case HIST_TYPE_INTERVAL:
195 "Interval counter for insn %d, range %d -- %d.\n",
196 INSN_UID ((*values)[i].insn),
197 (*values)[i].hdata.intvl.int_start,
198 ((*values)[i].hdata.intvl.int_start
199 + (*values)[i].hdata.intvl.steps - 1));
200 (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
201 ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
202 ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
208 "Pow2 counter for insn %d.\n",
209 INSN_UID ((*values)[i].insn));
210 (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) +
211 ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
214 case HIST_TYPE_SINGLE_VALUE:
217 "Single value counter for insn %d.\n",
218 INSN_UID ((*values)[i].insn));
219 (*values)[i].n_counters = 3;
222 case HIST_TYPE_CONST_DELTA:
225 "Constant delta counter for insn %d.\n",
226 INSN_UID ((*values)[i].insn));
227 (*values)[i].n_counters = 4;
236 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
237 them to identify and exploit properties of values that are hard to analyze
240 We do following transformations:
246 where b is almost always a constant N is transformed to
259 where b is almost always a power of 2 and the division is unsigned
260 TODO -- handle signed case as well
262 if ((b & (b - 1)) == 0)
267 Note that when b = 0, no error will occur and x = a; this is correct,
268 as result of such operation is undefined.
274 where a is almost always less then b and the division is unsigned
275 TODO -- handle signed case as well
285 where a is almost always less then 2 * b and the division is unsigned
286 TODO -- handle signed case as well
294 It would be possible to continue analogically for K * b for other small
295 K's, but it is probably not useful.
299 There are other useful cases that could be handled by a similar mechanism,
302 for (i = 0; i < n; i++)
305 transform to (for constant N):
308 for (i = 0; i < N; i++)
311 for (i = 0; i < n; i++)
313 making unroller happy. Since this may grow the code significantly,
314 we would have to be very careful here. */
317 value_profile_transformations (void)
322 for (insn = get_insns (); insn; insn = next)
324 next = NEXT_INSN (insn);
329 /* Scan for insn carrying a histogram. */
330 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
333 /* Ignore cold areas -- we are growing a code. */
334 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
339 fprintf (dump_file, "Trying transformations on insn %d\n",
341 print_rtl_single (dump_file, insn);
344 /* Transformations: */
345 if (flag_value_profile_transformations
346 && (mod_subtract_transform (insn)
347 || divmod_fixed_value_transform (insn)
348 || mod_pow2_value_transform (insn)))
354 commit_edge_insertions ();
355 allocate_reg_info (max_reg_num (), FALSE, FALSE);
361 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
362 and OP2 whose value is expected to be VALUE and result TARGET). */
364 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
365 rtx target, rtx op1, rtx op2, gcov_type value)
368 rtx neq_label = gen_label_rtx ();
369 rtx end_label = gen_label_rtx ();
376 tmp = gen_reg_rtx (mode);
377 emit_move_insn (tmp, copy_rtx (op2));
382 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
383 NULL_RTX, neq_label);
384 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value));
385 tmp1 = force_operand (tmp1, target);
387 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
389 emit_jump_insn (gen_jump (end_label));
392 emit_label (neq_label);
393 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
394 tmp1 = force_operand (tmp1, target);
396 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
398 emit_label (end_label);
400 sequence = get_insns ();
402 rebuild_jump_labels (sequence);
406 /* Do transform 1) on INSN if applicable. */
408 divmod_fixed_value_transform (rtx insn)
410 rtx set, set_src, set_dest, op1, op2, value, histogram;
412 enum machine_mode mode;
413 gcov_type val, count, all;
416 set = single_set (insn);
420 set_src = SET_SRC (set);
421 set_dest = SET_DEST (set);
422 code = GET_CODE (set_src);
423 mode = GET_MODE (set_dest);
425 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
427 op1 = XEXP (set_src, false);
428 op2 = XEXP (set_src, 1);
430 for (histogram = REG_NOTES (insn);
432 histogram = XEXP (histogram, 1))
433 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
434 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
440 histogram = XEXP (XEXP (histogram, 0), 1);
441 value = XEXP (histogram, 0);
442 histogram = XEXP (histogram, 1);
443 val = INTVAL (XEXP (histogram, 0));
444 histogram = XEXP (histogram, 1);
445 count = INTVAL (XEXP (histogram, 0));
446 histogram = XEXP (histogram, 1);
447 all = INTVAL (XEXP (histogram, 0));
449 /* We require that count is at least half of all; this means
450 that for the transformation to fire the value must be constant
451 at least 50% of time (and 75% gives the guarantee of usage). */
452 if (!rtx_equal_p (op2, value) || 2 * count < all)
456 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
459 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
462 insert_insn_on_edge (
463 gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e);
468 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
469 and OP2 and result TARGET). */
471 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
474 rtx tmp, tmp1, tmp2, tmp3;
475 rtx neq_label = gen_label_rtx ();
476 rtx end_label = gen_label_rtx ();
483 tmp = gen_reg_rtx (mode);
484 emit_move_insn (tmp, copy_rtx (op2));
489 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
491 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
493 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
494 NULL_RTX, neq_label);
495 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
498 emit_move_insn (copy_rtx (target), tmp3);
499 emit_jump_insn (gen_jump (end_label));
502 emit_label (neq_label);
503 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
504 tmp1 = force_operand (tmp1, target);
506 emit_move_insn (target, tmp1);
508 emit_label (end_label);
510 sequence = get_insns ();
512 rebuild_jump_labels (sequence);
516 /* Do transform 2) on INSN if applicable. */
518 mod_pow2_value_transform (rtx insn)
520 rtx set, set_src, set_dest, op1, op2, value, histogram;
522 enum machine_mode mode;
523 gcov_type wrong_values, count;
527 set = single_set (insn);
531 set_src = SET_SRC (set);
532 set_dest = SET_DEST (set);
533 code = GET_CODE (set_src);
534 mode = GET_MODE (set_dest);
538 op1 = XEXP (set_src, 0);
539 op2 = XEXP (set_src, 1);
541 for (histogram = REG_NOTES (insn);
543 histogram = XEXP (histogram, 1))
544 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
545 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
551 histogram = XEXP (XEXP (histogram, 0), 1);
552 value = XEXP (histogram, 0);
553 histogram = XEXP (histogram, 1);
554 wrong_values =INTVAL (XEXP (histogram, 0));
555 histogram = XEXP (histogram, 1);
558 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
560 count += INTVAL (XEXP (histogram, 0));
561 histogram = XEXP (histogram, 1);
564 if (!rtx_equal_p (op2, value))
567 /* We require that we hit a power of two at least half of all evaluations. */
568 if (count < wrong_values)
572 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
575 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
578 insert_insn_on_edge (
579 gen_mod_pow2 (mode, code, set_dest, op1, op2), e);
584 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
585 operands OP1 and OP2, result TARGET and at most SUB subtractions). */
587 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
588 rtx target, rtx op1, rtx op2, int sub)
591 rtx end_label = gen_label_rtx ();
599 tmp = gen_reg_rtx (mode);
600 emit_move_insn (tmp, copy_rtx (op2));
605 emit_move_insn (target, copy_rtx (op1));
606 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
607 NULL_RTX, end_label);
610 for (i = 0; i < sub; i++)
612 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
615 emit_move_insn (target, tmp1);
616 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
617 NULL_RTX, end_label);
620 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
621 tmp1 = force_operand (tmp1, target);
623 emit_move_insn (target, tmp1);
625 emit_label (end_label);
627 sequence = get_insns ();
629 rebuild_jump_labels (sequence);
633 /* Do transforms 3) and 4) on INSN if applicable. */
635 mod_subtract_transform (rtx insn)
637 rtx set, set_src, set_dest, op1, op2, value, histogram;
639 enum machine_mode mode;
640 gcov_type wrong_values, counts[2], count, all;
644 set = single_set (insn);
648 set_src = SET_SRC (set);
649 set_dest = SET_DEST (set);
650 code = GET_CODE (set_src);
651 mode = GET_MODE (set_dest);
655 op1 = XEXP (set_src, 0);
656 op2 = XEXP (set_src, 1);
658 for (histogram = REG_NOTES (insn);
660 histogram = XEXP (histogram, 1))
661 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
662 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
668 histogram = XEXP (XEXP (histogram, 0), 1);
669 value = XEXP (histogram, 0);
670 histogram = XEXP (histogram, 1);
673 for (i = 0; i < 2; i++)
675 counts[i] = INTVAL (XEXP (histogram, 0));
677 histogram = XEXP (histogram, 1);
679 wrong_values = INTVAL (XEXP (histogram, 0));
680 histogram = XEXP (histogram, 1);
681 wrong_values += INTVAL (XEXP (histogram, 0));
684 /* We require that we use just subtractions in at least 50% of all
687 for (i = 0; i < 2; i++)
690 if (count * 2 >= all)
698 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
701 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
704 insert_insn_on_edge (
705 gen_mod_subtract (mode, code, set_dest, op1, op2, i), e);