From b6a753667ad2f372afc8e8d8bacbcc8fe47556cf Mon Sep 17 00:00:00 2001 From: hagog Date: Sun, 3 Apr 2005 09:27:07 +0000 Subject: [PATCH] 2005-03-31 Mostafa Hagog * cfg.c (clear_bb_flags): Don't clear BB_DISABLE_SCHEDULE. * modulo-sched.c (undo_replace_buff_elem): New structure. (kernel_number_of_cycles, ps_unschedule_node, undo_generate_reg_moves,free_undo_replace_buff, undo_permute_partial_schedule, loop_single_full_bb_p, SIMPLE_SMS_LOOP_P, loop_canon_p, canon_loop, build_loops_structure, get_sched_window): New. (generate_reg_moves): Return undo_replace_buff_elem and other fixes. (generate_prolog_epilog): Remove old loop versioning. (sms_schedule): Use loop information and loop_version. (sms_schedule_by_order): Split part of it to get_sched_window. * passes.c (rest_of_handle_sms): call cfg_layout_initialize cfg_layout_finalize and free_dominance_info before/after SMS. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@97484 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 17 + gcc/cfg.c | 2 +- gcc/modulo-sched.c | 987 ++++++++++++++++++++++++++++++++++++----------------- gcc/passes.c | 10 +- 4 files changed, 693 insertions(+), 323 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 16ac20a..1a87962 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,22 @@ 2005-04-03 Mostafa Hagog + * cfg.c (clear_bb_flags): Don't clear BB_DISABLE_SCHEDULE. + * modulo-sched.c (undo_replace_buff_elem): New structure. + (kernel_number_of_cycles, ps_unschedule_node, + undo_generate_reg_moves,free_undo_replace_buff, + undo_permute_partial_schedule, loop_single_full_bb_p, + SIMPLE_SMS_LOOP_P, loop_canon_p, canon_loop, + build_loops_structure, get_sched_window): New. + (generate_reg_moves): Return undo_replace_buff_elem and other + fixes. + (generate_prolog_epilog): Remove old loop versioning. + (sms_schedule): Use loop information and loop_version. + (sms_schedule_by_order): Split part of it to get_sched_window. + * passes.c (rest_of_handle_sms): call cfg_layout_initialize + cfg_layout_finalize and free_dominance_info before/after SMS. + +2005-04-03 Mostafa Hagog + * cfghooks.c (lv_flush_pending_stmts, cfg_hook_duplicate_loop_to_header_edge, extract_cond_bb_edges, lv_adjust_loop_header_phi, lv_add_condition_to_bb): New. diff --git a/gcc/cfg.c b/gcc/cfg.c index c0e38f2..b8dccb0 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -461,7 +461,7 @@ clear_bb_flags (void) basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) - bb->flags = BB_PARTITION (bb); + bb->flags = BB_PARTITION (bb) | (bb->flags & BB_DISABLE_SCHEDULE); } /* Check the consistency of profile information. We can't do that diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c index 950313e..a2443c1 100644 --- a/gcc/modulo-sched.c +++ b/gcc/modulo-sched.c @@ -145,16 +145,31 @@ struct partial_schedule ddg_ptr g; /* The DDG of the insns in the partial schedule. */ }; +/* We use this to record all the register replacements we do in + the kernel so we can undo SMS if it is not profitable. */ +struct undo_replace_buff_elem +{ + rtx insn; + rtx orig_reg; + rtx new_reg; + struct undo_replace_buff_elem *next; +}; -static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); -static void free_partial_schedule (partial_schedule_ptr); -static void reset_partial_schedule (partial_schedule_ptr, int new_ii); + + +partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); +void free_partial_schedule (partial_schedule_ptr); +void reset_partial_schedule (partial_schedule_ptr, int new_ii); void print_partial_schedule (partial_schedule_ptr, FILE *); +static int kernel_number_of_cycles (rtx first_insn, rtx last_insn); static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, ddg_node_ptr node, int cycle, sbitmap must_precede, sbitmap must_follow); static void rotate_partial_schedule (partial_schedule_ptr, int); +void set_row_column_for_ps (partial_schedule_ptr); +static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr ); + /* This page defines constants and structures for the modulo scheduling driver. */ @@ -174,12 +189,11 @@ static void set_node_sched_params (ddg_ptr); static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *, FILE*); static void permute_partial_schedule (partial_schedule_ptr ps, rtx last); -static void generate_prolog_epilog (partial_schedule_ptr, rtx, rtx, int); +static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx); static void duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, int to_stage, int is_prolog); - #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) #define SCHED_FIRST_REG_MOVE(x) \ @@ -458,12 +472,13 @@ calculate_maxii (ddg_ptr g) nreg_moves = ----------------------------------- + 1 - { dependence. ii { 1 if not. */ -static void +static struct undo_replace_buff_elem * generate_reg_moves (partial_schedule_ptr ps) { ddg_ptr g = ps->g; int ii = ps->ii; int i; + struct undo_replace_buff_elem *reg_move_replaces = NULL; for (i = 0; i < g->num_nodes; i++) { @@ -536,11 +551,77 @@ generate_reg_moves (partial_schedule_ptr ps) SCHED_FIRST_REG_MOVE (u) = reg_move; EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, - replace_rtx (g->nodes[i_use].insn, old_reg, new_reg)); + { + struct undo_replace_buff_elem *rep; + + rep = (struct undo_replace_buff_elem *) + xcalloc (1, sizeof (struct undo_replace_buff_elem)); + rep->insn = g->nodes[i_use].insn; + rep->orig_reg = old_reg; + rep->new_reg = new_reg; + + if (! reg_move_replaces) + reg_move_replaces = rep; + else + { + rep->next = reg_move_replaces; + reg_move_replaces = rep; + } + + replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); + }); prev_reg = new_reg; } } + return reg_move_replaces; +} + +/* We call this when we want to undo the SMS schedule for a given loop. + One of the things that we do is to delete the register moves generated + for the sake of SMS; this function deletes the register move instructions + recorded in the undo buffer. */ +static void +undo_generate_reg_moves (partial_schedule_ptr ps, + struct undo_replace_buff_elem *reg_move_replaces) +{ + int i,j; + + for (i = 0; i < ps->g->num_nodes; i++) + { + ddg_node_ptr u = &ps->g->nodes[i]; + rtx prev; + rtx crr = SCHED_FIRST_REG_MOVE (u); + + for (j = 0; j < SCHED_NREG_MOVES (u); j++) + { + prev = PREV_INSN (crr); + delete_insn (crr); + crr = prev; + } + } + + while (reg_move_replaces) + { + struct undo_replace_buff_elem *rep = reg_move_replaces; + + reg_move_replaces = reg_move_replaces->next; + replace_rtx (rep->insn, rep->new_reg, rep->orig_reg); + } +} + +/* Free memory allocated for the undo buffer. */ +static void +free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) +{ + + while (reg_move_replaces) + { + struct undo_replace_buff_elem *rep = reg_move_replaces; + + reg_move_replaces = reg_move_replaces->next; + free (rep); + } } /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values @@ -601,6 +682,25 @@ permute_partial_schedule (partial_schedule_ptr ps, rtx last) PREV_INSN (last)); } +/* As part of undoing SMS we return to the original ordering of the + instructions inside the loop kernel. Given the partial schedule PS, this + function returns the ordering of the instruction according to their CUID + in the DDG (PS->G), which is the original order of the instruction before + performing SMS. */ +static void +undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last) +{ + int i; + + for (i = 0 ; i < ps->g->num_nodes; i++) + if (last == ps->g->nodes[i].insn + || last == ps->g->nodes[i].first_note) + break; + else if (PREV_INSN (last) != ps->g->nodes[i].insn) + reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn, + PREV_INSN (last)); +} + /* Used to generate the prologue & epilogue. Duplicate the subset of nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive of both), together with a prefix/suffix of their reg_moves. */ @@ -657,7 +757,6 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) emit_insn (copy_rtx (PATTERN (reg_move))); - if (SCHED_STAGE (u_node) >= from_stage && SCHED_STAGE (u_node) <= to_stage) duplicate_insn_chain (u_node->first_note, u_node->insn); @@ -667,39 +766,27 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, /* Generate the instructions (including reg_moves) for prolog & epilog. */ static void -generate_prolog_epilog (partial_schedule_ptr ps, rtx orig_loop_beg, - rtx orig_loop_end, int unknown_count) +generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg) { int i; int last_stage = PS_STAGE_COUNT (ps) - 1; edge e; - rtx c_reg = NULL_RTX; - rtx cmp = NULL_RTX; - rtx precond_jump = NULL_RTX; - rtx precond_exit_label = NULL_RTX; - rtx precond_exit_label_insn = NULL_RTX; - rtx last_epilog_insn = NULL_RTX; - rtx loop_exit_label = NULL_RTX; - rtx loop_exit_label_insn = NULL_RTX; - rtx orig_loop_bct = NULL_RTX; - - /* Loop header edge. */ - e = EDGE_PRED (ps->g->bb, 0); - if (e->src == ps->g->bb) - e = EDGE_PRED (ps->g->bb, 1); /* Generate the prolog, inserting its insns on the loop-entry edge. */ start_sequence (); - /* This is the place where we want to insert the precondition. */ - if (unknown_count) - precond_jump = emit_note (NOTE_INSN_DELETED); + if (count_reg) + /* Generate a subtract instruction at the beginning of the prolog to + adjust the loop count by STAGE_COUNT. */ + emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage))); for (i = 0; i < last_stage; i++) duplicate_insns_of_cycles (ps, 0, i, 1); - /* No need to call insert_insn_on_edge; we prepared the sequence. */ - e->insns.r = get_insns (); + /* Put the prolog , on the one and only entry edge. */ + e = loop_preheader_edge (loop); + loop_split_edge_with(e , get_insns()); + end_sequence (); /* Generate the epilog, inserting its insns on the loop-exit edge. */ @@ -708,107 +795,179 @@ generate_prolog_epilog (partial_schedule_ptr ps, rtx orig_loop_beg, for (i = 0; i < last_stage; i++) duplicate_insns_of_cycles (ps, i + 1, last_stage, 0); - last_epilog_insn = emit_note (NOTE_INSN_DELETED); + /* Put the epilogue on the one and only one exit edge. */ + gcc_assert (loop->single_exit); + e = loop->single_exit; + loop_split_edge_with(e , get_insns()); + end_sequence (); +} + +/* Return the line note insn preceding INSN, for debugging. Taken from + emit-rtl.c. */ +static rtx +find_line_note (rtx insn) +{ + for (; insn; insn = PREV_INSN (insn)) + if (NOTE_P (insn) + && NOTE_LINE_NUMBER (insn) >= 0) + break; + + return insn; +} - /* Emit the label where to put the original loop code. */ - if (unknown_count) +/* Return true if all the BBs of the loop are empty except the + loop header. */ +static bool +loop_single_full_bb_p (struct loop *loop) +{ + unsigned i; + basic_block *bbs = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes ; i++) { - rtx label, cond; + rtx head, tail; + bool empty_bb = true; + + if (bbs[i] == loop->header) + continue; + + /* Make sure that basic blocks other than the header + have only notes labels or jumps. */ + get_block_head_tail (bbs[i]->index, &head, &tail); + for (; head != NEXT_INSN (tail); head = NEXT_INSN (head)) + { + if (NOTE_P (head) || LABEL_P (head) + || (INSN_P (head) && JUMP_P (head))) + continue; + empty_bb = false; + break; + } + + if (! empty_bb) + { + free (bbs); + return false; + } + } + free (bbs); + return true; +} - precond_exit_label = gen_label_rtx (); - precond_exit_label_insn = emit_label (precond_exit_label); +/* A simple loop from SMS point of view; it is a loop that is composed of + either a single basic block or two BBs - a header and a latch. */ +#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ + && (EDGE_COUNT (loop->latch->preds) == 1) \ + && (EDGE_COUNT (loop->latch->succs) == 1)) - /* Put the original loop code. */ - reorder_insns_nobb (orig_loop_beg, orig_loop_end, precond_exit_label_insn); +/* Return true if the loop is in its canonical form and false if not. + i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ +static bool +loop_canon_p (struct loop *loop, FILE *dump_file) +{ - /* Change the label of the BCT to be the PRECOND_EXIT_LABEL. */ - orig_loop_bct = get_last_insn (); - c_reg = doloop_register_get (orig_loop_bct, &cmp); - label = XEXP (SET_SRC (cmp), 1); - cond = XEXP (SET_SRC (cmp), 0); + if (loop->inner || ! loop->outer) + return false; - if (! c_reg || GET_CODE (cond) != NE) - abort (); + if (!loop->single_exit) + { + if (dump_file) + { + rtx line_note = find_line_note (BB_END (loop->header)); - XEXP (label, 0) = precond_exit_label; - JUMP_LABEL (orig_loop_bct) = precond_exit_label_insn; - LABEL_NUSES (precond_exit_label_insn)++; + fprintf (dump_file, "SMS loop many exits "); + if (line_note) + { + expanded_location xloc; + NOTE_EXPANDED_LOCATION (xloc, line_note); + fprintf (stats_file, " %s %d (file, line)\n", + xloc.file, xloc.line); + } + } + return false; + } - /* Generate the loop exit label. */ - loop_exit_label = gen_label_rtx (); - loop_exit_label_insn = emit_label (loop_exit_label); + if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop)) + { + if (dump_file) + { + rtx line_note = find_line_note (BB_END (loop->header)); + + fprintf (dump_file, "SMS loop many BBs. "); + if (line_note) + { + expanded_location xloc; + NOTE_EXPANDED_LOCATION (xloc, line_note); + fprintf (stats_file, " %s %d (file, line)\n", + xloc.file, xloc.line); + } + } + return false; } - e = EDGE_SUCC (ps->g->bb, 0); - if (e->dest == ps->g->bb) - e = EDGE_SUCC (ps->g->bb, 1); + return true; +} - e->insns.r = get_insns (); - end_sequence (); +/* If there are more than one entry for the loop, + make it one by splitting the first entry edge and + redirecting the others to the new BB. */ +static void +canon_loop (struct loop *loop) +{ + edge e; + edge_iterator i; - commit_edge_insertions (); + /* Avoid annoying special cases of edges going to exit + block. */ + FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds) + if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1)) + loop_split_edge_with (e, NULL_RTX); - if (unknown_count) + if (loop->latch == loop->header + || EDGE_COUNT (loop->latch->succs) > 1) { - rtx precond_insns, epilog_jump, insert_after_insn; - basic_block loop_exit_bb = BLOCK_FOR_INSN (loop_exit_label_insn); - basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn); - basic_block precond_bb = BLOCK_FOR_INSN (precond_jump); - basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn); - edge epilog_exit_edge = single_succ_edge (epilog_bb); - - /* Do loop preconditioning to take care of cases were the loop count is - less than the stage count. Update the CFG properly. */ - insert_after_insn = precond_jump; - start_sequence (); - c_reg = doloop_register_get (ps->g->closing_branch->insn, &cmp); - emit_cmp_and_jump_insns (c_reg, GEN_INT (PS_STAGE_COUNT (ps)), LT, NULL, - GET_MODE (c_reg), 1, precond_exit_label); - precond_insns = get_insns (); - precond_jump = get_last_insn (); - end_sequence (); - reorder_insns (precond_insns, precond_jump, insert_after_insn); - - /* Generate a subtract instruction at the beginning of the prolog to - adjust the loop count by STAGE_COUNT. */ - emit_insn_after (gen_sub2_insn (c_reg, GEN_INT (PS_STAGE_COUNT (ps) - 1)), - precond_jump); - update_bb_for_insn (precond_bb); - delete_insn (insert_after_insn); - - /* Update label info for the precondition jump. */ - JUMP_LABEL (precond_jump) = precond_exit_label_insn; - LABEL_NUSES (precond_exit_label_insn)++; - - /* Update the CFG. */ - split_block (precond_bb, precond_jump); - make_edge (precond_bb, orig_loop_bb, 0); - - /* Add a jump at end of the epilog to the LOOP_EXIT_LABEL to jump over the - original loop copy and update the CFG. */ - epilog_jump = emit_jump_insn_after (gen_jump (loop_exit_label), - last_epilog_insn); - delete_insn (last_epilog_insn); - JUMP_LABEL (epilog_jump) = loop_exit_label_insn; - LABEL_NUSES (loop_exit_label_insn)++; - - redirect_edge_succ (epilog_exit_edge, loop_exit_bb); - epilog_exit_edge->flags &= ~EDGE_FALLTHRU; - emit_barrier_after (BB_END (epilog_bb)); + FOR_EACH_EDGE (e, i, loop->header->preds) + if (e->src == loop->latch) + break; + loop_split_edge_with (e, NULL_RTX); } } -/* Return the line note insn preceding INSN, for debugging. Taken from - emit-rtl.c. */ -static rtx -find_line_note (rtx insn) +/* Build the loop information without loop + canonization, the loop canonization will + be perfromed if the loop is SMSable. */ +static struct loops * +build_loops_structure (FILE *dumpfile) { - for (; insn; insn = PREV_INSN (insn)) - if (NOTE_P (insn) - && NOTE_LINE_NUMBER (insn) >= 0) - break; + struct loops *loops = xcalloc (1, sizeof (struct loops)); - return insn; + /* Find the loops. */ + + if (flow_loops_find (loops) <= 1) + { + /* No loops. */ + flow_loops_free (loops); + free (loops); + + return NULL; + } + + /* Not going to update these. */ + free (loops->cfg.rc_order); + loops->cfg.rc_order = NULL; + free (loops->cfg.dfs_order); + loops->cfg.dfs_order = NULL; + + create_preheaders (loops, CP_SIMPLE_PREHEADERS); + mark_single_exit_loops (loops); + /* Dump loops. */ + flow_loops_dump (loops, dumpfile, NULL, 1); + +#ifdef ENABLE_CHECKING + verify_dominators (CDI_DOMINATORS); + verify_loop_structure (loops); +#endif + + return loops; } /* Main entry point, perform SMS scheduling on the loops of the function @@ -819,13 +978,22 @@ sms_schedule (FILE *dump_file) static int passes = 0; rtx insn; ddg_ptr *g_arr, g; - basic_block bb, pre_header = NULL; int * node_order; int maxii; - int i; + unsigned i,num_loops; partial_schedule_ptr ps; - int max_bb_index = last_basic_block; struct df *df; + struct loops *loops; + basic_block bb = NULL; + /* vars to the versioning only if needed*/ + struct loop * nloop; + basic_block condition_bb = NULL; + edge latch_edge; + gcov_type trip_count = 0; + + if (! (loops = build_loops_structure (dump_file))) + return; /* There is no loops to schedule. */ + stats_file = dump_file; @@ -849,58 +1017,47 @@ sms_schedule (FILE *dump_file) df = df_init (); df_analyze (df, 0, DF_ALL); - /* Allocate memory to hold the DDG array. */ - g_arr = xcalloc (max_bb_index, sizeof (ddg_ptr)); + /* Allocate memory to hold the DDG array one entry for each loop. + We use loop->num as index into this array. */ + g_arr = xcalloc (loops->num, sizeof (ddg_ptr)); + /* Build DDGs for all the relevant loops and hold them in G_ARR - indexed by the loop BB index. */ - FOR_EACH_BB (bb) + indexed by the loop index. */ + for (i = 0; i < loops->num; i++) { rtx head, tail; rtx count_reg, comp; - edge e, pre_header_edge; - - if (bb->index < 0) - continue; + struct loop *loop = loops->parray[i]; - /* Check if bb has two successors, one being itself. */ - if (EDGE_COUNT (bb->succs) != 2) - continue; - - if (EDGE_SUCC (bb, 0)->dest != bb && EDGE_SUCC (bb, 1)->dest != bb) - continue; - - if ((EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX) - || (EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX)) - continue; + /* For debugging. */ + if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1)) + { + if (dump_file) + fprintf (dump_file, "SMS reached MAX_PASSES... \n"); - /* Check if bb has two predecessors, one being itself. */ - if (EDGE_COUNT (bb->preds) != 2) - continue; + break; + } - if (EDGE_PRED (bb, 0)->src != bb && EDGE_PRED (bb, 1)->src != bb) - continue; + if (! loop_canon_p (loop, dump_file)) + continue; - if ((EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX) - || (EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX)) + if (! loop_single_full_bb_p (loop)) continue; - /* For debugging. */ - if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1)) - { - if (dump_file) - fprintf (dump_file, "SMS reached MAX_PASSES... \n"); - break; - } + bb = loop->header; get_block_head_tail (bb->index, &head, &tail); - pre_header_edge = EDGE_PRED (bb, 0); - if (EDGE_PRED (bb, 0)->src != bb) - pre_header_edge = EDGE_PRED (bb, 1); + latch_edge = loop_latch_edge (loop); + gcc_assert (loop->single_exit); + if (loop->single_exit->count) + trip_count = latch_edge->count / loop->single_exit->count; /* Perfrom SMS only on loops that their average count is above threshold. */ - if (bb->count < pre_header_edge->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD) - { + + if ( latch_edge->count + && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)) + { if (stats_file) { rtx line_note = find_line_note (tail); @@ -919,10 +1076,10 @@ sms_schedule (FILE *dump_file) fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) bb->count); fprintf (stats_file, "\n"); - fprintf (stats_file, "SMS preheader-count "); - fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, - (HOST_WIDEST_INT) pre_header_edge->count); - fprintf (stats_file, "\n"); + fprintf (stats_file, "SMS trip-count "); + fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, + (HOST_WIDEST_INT) trip_count); + fprintf (stats_file, "\n"); fprintf (stats_file, "SMS profile-sum-max "); fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) profile_info->sum_max); @@ -936,12 +1093,6 @@ sms_schedule (FILE *dump_file) if ( !(count_reg = doloop_register_get (tail, &comp))) continue; - e = EDGE_PRED (bb, 0); - if (e->src == bb) - pre_header = EDGE_PRED (bb, 1)->src; - else - pre_header = e->src; - /* Don't handle BBs with calls or barriers, or !single_set insns. */ for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) if (CALL_P (insn) @@ -973,21 +1124,23 @@ sms_schedule (FILE *dump_file) continue; } - g_arr[bb->index] = g; + g_arr[i] = g; } /* Release Data Flow analysis data structures. */ df_finish (df); + /* We don't want to perform SMS on new loops - created by versioning. */ + num_loops = loops->num; /* Go over the built DDGs and perfrom SMS for each one of them. */ - for (i = 0; i < max_bb_index; i++) + for (i = 0; i < num_loops; i++) { rtx head, tail; rtx count_reg, count_init, comp; - edge pre_header_edge; int mii, rec_mii; - int stage_count = 0; + unsigned stage_count = 0; HOST_WIDEST_INT loop_count = 0; + struct loop *loop = loops->parray[i]; if (! (g = g_arr[i])) continue; @@ -995,11 +1148,12 @@ sms_schedule (FILE *dump_file) if (dump_file) print_ddg (dump_file, g); - get_block_head_tail (g->bb->index, &head, &tail); + get_block_head_tail (loop->header->index, &head, &tail); - pre_header_edge = EDGE_PRED (g->bb, 0); - if (EDGE_PRED (g->bb, 0)->src != g->bb) - pre_header_edge = EDGE_PRED (g->bb, 1); + latch_edge = loop_latch_edge (loop); + gcc_assert (loop->single_exit); + if (loop->single_exit->count) + trip_count = latch_edge->count / loop->single_exit->count; if (stats_file) { @@ -1019,10 +1173,6 @@ sms_schedule (FILE *dump_file) fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) bb->count); fprintf (stats_file, "\n"); - fprintf (stats_file, "SMS preheader-count "); - fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, - (HOST_WIDEST_INT) pre_header_edge->count); - fprintf (stats_file, "\n"); fprintf (stats_file, "SMS profile-sum-max "); fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) profile_info->sum_max); @@ -1034,17 +1184,25 @@ sms_schedule (FILE *dump_file) fprintf (stats_file, "SMS num-stores %d\n", g->num_stores); } - /* Make sure this is a doloop. */ - if ( !(count_reg = doloop_register_get (tail, &comp))) - abort (); - /* This should be NULL_RTX if the count is unknown at compile time. */ - count_init = const_iteration_count (count_reg, pre_header, &loop_count); + /* In case of th loop have doloop register it gets special + handling. */ + count_init = NULL_RTX; + if ((count_reg = doloop_register_get (tail, &comp))) + { + basic_block pre_header; + + pre_header = loop_preheader_edge (loop)->src; + count_init = const_iteration_count (count_reg, pre_header, + &loop_count); + } + gcc_assert (count_reg); if (stats_file && count_init) { fprintf (stats_file, "SMS const-doloop "); - fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); + fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, + loop_count); fprintf (stats_file, "\n"); } @@ -1068,46 +1226,40 @@ sms_schedule (FILE *dump_file) if (ps) stage_count = PS_STAGE_COUNT (ps); - if (stage_count == 0 || (count_init && (stage_count > loop_count))) + /* Stage count of 1 means that there is no interleaving between + iterations, let the scheduling passes do the job. */ + if (stage_count < 1 + || (count_init && (loop_count <= stage_count)) + || (flag_branch_probabilities && (trip_count <= stage_count))) { if (dump_file) - fprintf (dump_file, "SMS failed... \n"); - if (stats_file) - fprintf (stats_file, "SMS sched-failed %d\n", stage_count); + { + fprintf (dump_file, "SMS failed... \n"); + fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); + fprintf (dump_file, ", trip-count="); + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); + fprintf (dump_file, ")\n"); + } + continue; } else { - rtx orig_loop_beg = NULL_RTX; - rtx orig_loop_end = NULL_RTX; + int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb)); + int new_cycles; + struct undo_replace_buff_elem *reg_move_replaces; if (stats_file) { fprintf (stats_file, "SMS succeeded %d %d (with ii, sc)\n", ps->ii, stage_count); - print_partial_schedule (ps, dump_file); - fprintf (dump_file, + print_partial_schedule (ps, stats_file); + fprintf (stats_file, "SMS Branch (%d) will later be scheduled at cycle %d.\n", g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); } - /* Save the original loop if we want to do loop preconditioning in - case the BCT count is not known. */ - if (! count_init) - { - int i; - - start_sequence (); - /* Copy the original loop code before modifying it - - so we can use it later. */ - for (i = 0; i < ps->g->num_nodes; i++) - duplicate_insn_chain (ps->g->nodes[i].first_note, - ps->g->nodes[i].insn); - - orig_loop_beg = get_insns (); - orig_loop_end = get_last_insn (); - end_sequence (); - } /* Set the stage boundaries. If the DDG is built with closing_branch_deps, the closing_branch was scheduled and should appear in the last (ii-1) row. Otherwise, we are free to schedule the branch, and we let nodes @@ -1117,28 +1269,66 @@ sms_schedule (FILE *dump_file) rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); set_columns_for_ps (ps); + /* Generate the kernel just to be able to measure its cycles. */ permute_partial_schedule (ps, g->closing_branch->first_note); + reg_move_replaces = generate_reg_moves (ps); - /* Mark this loop as software pipelined so the later - scheduling passes doesn't touch it. */ - if (! flag_resched_modulo_sched) - g->bb->flags |= BB_DISABLE_SCHEDULE; - /* The life-info is not valid any more. */ - g->bb->flags |= BB_DIRTY; + /* Get the number of cycles the new kernel expect to execute in. */ + new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb)); - generate_reg_moves (ps); - if (dump_file) - print_node_sched_params (dump_file, g->num_nodes); + /* Get back to the original loop so we can do loop versioning. */ + undo_permute_partial_schedule (ps, g->closing_branch->first_note); + if (reg_move_replaces) + undo_generate_reg_moves (ps, reg_move_replaces); + + if ( new_cycles >= orig_cycles) + { + /* SMS is not profitable so undo the permutation and reg move generation + and return the kernel to its original state. */ + if (dump_file) + fprintf (dump_file, "Undoing SMS becuase it is not profitable.\n"); + + } + else + { + canon_loop (loop); + + /* case the BCT count is not known , Do loop-versioning */ + if (count_reg && ! count_init) + { + rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, + GEN_INT(stage_count)); - /* Set new iteration count of loop kernel. */ - if (count_init) - SET_SRC (single_set (count_init)) = GEN_INT (loop_count - - stage_count + 1); + nloop = loop_version (loops, loop, comp_rtx, &condition_bb); + } - /* Generate prolog and epilog. */ - generate_prolog_epilog (ps, orig_loop_beg, orig_loop_end, - count_init ? 0 : 1); + /* Set new iteration count of loop kernel. */ + if (count_reg && count_init) + SET_SRC (single_set (count_init)) = GEN_INT (loop_count + - stage_count + 1); + + /* Now apply the scheduled kernel to the RTL of the loop. */ + permute_partial_schedule (ps, g->closing_branch->first_note); + + /* Mark this loop as software pipelined so the later + scheduling passes doesn't touch it. */ + if (! flag_resched_modulo_sched) + g->bb->flags |= BB_DISABLE_SCHEDULE; + /* The life-info is not valid any more. */ + g->bb->flags |= BB_DIRTY; + + reg_move_replaces = generate_reg_moves (ps); + if (dump_file) + print_node_sched_params (dump_file, g->num_nodes); + /* Generate prolog and epilog. */ + if (count_reg && !count_init) + generate_prolog_epilog (ps, loop, count_reg); + else + generate_prolog_epilog (ps, loop, NULL_RTX); + } + free_undo_replace_buff (reg_move_replaces); } + free_partial_schedule (ps); free (node_sched_params); free (node_order); @@ -1147,6 +1337,7 @@ sms_schedule (FILE *dump_file) /* Release scheduler data, needed until now because of DFA. */ sched_finish (); + loop_optimizer_finalize (loops, dump_file); } /* The SMS scheduling algorithm itself @@ -1225,6 +1416,140 @@ sms_schedule (FILE *dump_file) set to 0 to save compile time. */ #define DFA_HISTORY SMS_DFA_HISTORY +/* Given the partial schedule PS, this function calculates and returns the + cycles in wich we can schedule the node with the given index I. + NOTE: Here we do the backtracking in SMS, in some special cases. We have + noticed that there are several cases in which we fail to SMS the loop + because the sched window of a node is empty due to tight data-deps. In + such cases we want to unschedule some of the predecssors/successors + until we get non-empty scheduling window. It returns -1 if the + scheduling window is empty and zero otherwise. */ + +static int +get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, + sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) +{ + int start, step, end; + ddg_edge_ptr e; + int u = nodes_order [i]; + ddg_node_ptr u_node = &ps->g->nodes[u]; + sbitmap psp = sbitmap_alloc (ps->g->num_nodes); + sbitmap pss = sbitmap_alloc (ps->g->num_nodes); + sbitmap u_node_preds = NODE_PREDECESSORS (u_node); + sbitmap u_node_succs = NODE_SUCCESSORS (u_node); + int psp_not_empty; + int pss_not_empty; + + /* 1. compute sched window for u (start, end, step). */ + sbitmap_zero (psp); + sbitmap_zero (pss); + psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); + pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); + + if (psp_not_empty && !pss_not_empty) + { + int early_start = INT_MIN; + + end = INT_MAX; + for (e = u_node->in; e != 0; e = e->next_in) + { + ddg_node_ptr v_node = e->src; + if (TEST_BIT (sched_nodes, v_node->cuid)) + { + int node_st = SCHED_TIME (v_node) + + e->latency - (e->distance * ii); + + early_start = MAX (early_start, node_st); + + if (e->data_type == MEM_DEP) + end = MIN (end, SCHED_TIME (v_node) + ii - 1); + } + } + start = early_start; + end = MIN (end, early_start + ii); + step = 1; + } + + else if (!psp_not_empty && pss_not_empty) + { + int late_start = INT_MAX; + + end = INT_MIN; + for (e = u_node->out; e != 0; e = e->next_out) + { + ddg_node_ptr v_node = e->dest; + if (TEST_BIT (sched_nodes, v_node->cuid)) + { + late_start = MIN (late_start, + SCHED_TIME (v_node) - e->latency + + (e->distance * ii)); + if (e->data_type == MEM_DEP) + end = MAX (end, SCHED_TIME (v_node) - ii + 1); + } + } + start = late_start; + end = MAX (end, late_start - ii); + step = -1; + } + + else if (psp_not_empty && pss_not_empty) + { + int early_start = INT_MIN; + int late_start = INT_MAX; + + start = INT_MIN; + end = INT_MAX; + for (e = u_node->in; e != 0; e = e->next_in) + { + ddg_node_ptr v_node = e->src; + + if (TEST_BIT (sched_nodes, v_node->cuid)) + { + early_start = MAX (early_start, + SCHED_TIME (v_node) + e->latency + - (e->distance * ii)); + if (e->data_type == MEM_DEP) + end = MIN (end, SCHED_TIME (v_node) + ii - 1); + } + } + for (e = u_node->out; e != 0; e = e->next_out) + { + ddg_node_ptr v_node = e->dest; + + if (TEST_BIT (sched_nodes, v_node->cuid)) + { + late_start = MIN (late_start, + SCHED_TIME (v_node) - e->latency + + (e->distance * ii)); + if (e->data_type == MEM_DEP) + start = MAX (start, SCHED_TIME (v_node) - ii + 1); + } + } + start = MAX (start, early_start); + end = MIN (end, MIN (early_start + ii, late_start + 1)); + step = 1; + } + else /* psp is empty && pss is empty. */ + { + start = SCHED_ASAP (u_node); + end = start + ii; + step = 1; + } + + *start_p = start; + *step_p = step; + *end_p = end; + sbitmap_free (psp); + sbitmap_free (pss); + + if ((start >= end && step == 1) || (start <= end && step == -1)) + return -1; + else + return 0; +} + +/* This function implements the scheduling algorithm for SMS according to the + above algorithm. */ static partial_schedule_ptr sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file) { @@ -1237,126 +1562,70 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du sbitmap sched_nodes = sbitmap_alloc (num_nodes); sbitmap must_precede = sbitmap_alloc (num_nodes); sbitmap must_follow = sbitmap_alloc (num_nodes); + sbitmap tobe_scheduled = sbitmap_alloc (num_nodes); partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY); - while (try_again_with_larger_ii && ii < maxii) + sbitmap_ones (tobe_scheduled); + sbitmap_zero (sched_nodes); + + while ((! sbitmap_equal (tobe_scheduled, sched_nodes) + || try_again_with_larger_ii ) && ii < maxii) { + int j; + bool unscheduled_nodes = false; + if (dump_file) fprintf(dump_file, "Starting with ii=%d\n", ii); - try_again_with_larger_ii = false; - sbitmap_zero (sched_nodes); + if (try_again_with_larger_ii) + { + try_again_with_larger_ii = false; + sbitmap_zero (sched_nodes); + } for (i = 0; i < num_nodes; i++) { int u = nodes_order[i]; - ddg_node_ptr u_node = &g->nodes[u]; - sbitmap u_node_preds = NODE_PREDECESSORS (u_node); - sbitmap u_node_succs = NODE_SUCCESSORS (u_node); - int psp_not_empty; - int pss_not_empty; + ddg_node_ptr u_node = &ps->g->nodes[u]; rtx insn = u_node->insn; if (!INSN_P (insn)) - continue; - - if (JUMP_P (insn)) /* Closing branch handled later. */ - continue; - - /* 1. compute sched window for u (start, end, step). */ - psp_not_empty = sbitmap_any_common_bits (u_node_preds, sched_nodes); - pss_not_empty = sbitmap_any_common_bits (u_node_succs, sched_nodes); - - if (psp_not_empty && !pss_not_empty) { - int early_start = 0; - - end = INT_MAX; - for (e = u_node->in; e != 0; e = e->next_in) - { - ddg_node_ptr v_node = e->src; - if (TEST_BIT (sched_nodes, v_node->cuid)) - { - int node_st = SCHED_TIME (v_node) - + e->latency - (e->distance * ii); - - early_start = MAX (early_start, node_st); - - if (e->data_type == MEM_DEP) - end = MIN (end, SCHED_TIME (v_node) + ii - 1); - } - } - start = early_start; - end = MIN (end, early_start + ii); - step = 1; + RESET_BIT (tobe_scheduled, u); + continue; } - else if (!psp_not_empty && pss_not_empty) + if (JUMP_P (insn)) /* Closing branch handled later. */ { - int late_start = INT_MAX; - - end = INT_MIN; - for (e = u_node->out; e != 0; e = e->next_out) - { - ddg_node_ptr v_node = e->dest; - if (TEST_BIT (sched_nodes, v_node->cuid)) - { - late_start = MIN (late_start, - SCHED_TIME (v_node) - e->latency - + (e->distance * ii)); - if (e->data_type == MEM_DEP) - end = MAX (end, SCHED_TIME (v_node) - ii + 1); - } - } - start = late_start; - end = MAX (end, late_start - ii); - step = -1; + RESET_BIT (tobe_scheduled, u); + continue; } - else if (psp_not_empty && pss_not_empty) - { - int early_start = 0; - int late_start = INT_MAX; + if (TEST_BIT (sched_nodes, u)) + continue; - start = INT_MIN; - end = INT_MAX; - for (e = u_node->in; e != 0; e = e->next_in) - { - ddg_node_ptr v_node = e->src; - - if (TEST_BIT (sched_nodes, v_node->cuid)) - { - early_start = MAX (early_start, - SCHED_TIME (v_node) + e->latency - - (e->distance * ii)); - if (e->data_type == MEM_DEP) - end = MIN (end, SCHED_TIME (v_node) + ii - 1); - } - } - for (e = u_node->out; e != 0; e = e->next_out) + /* Try to get non-empty scheduling window. */ + j = i; + while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0 + && j > 0) + { + unscheduled_nodes = true; + if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1]) + || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1])) { - ddg_node_ptr v_node = e->dest; - - if (TEST_BIT (sched_nodes, v_node->cuid)) - { - late_start = MIN (late_start, - SCHED_TIME (v_node) - e->latency - + (e->distance * ii)); - if (e->data_type == MEM_DEP) - start = MAX (start, SCHED_TIME (v_node) - ii + 1); - } + ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]); + RESET_BIT (sched_nodes, nodes_order [j - 1]); } - start = MAX (start, early_start); - end = MIN (end, MIN (early_start + ii, late_start + 1)); - step = 1; + j--; } - else /* psp is empty && pss is empty. */ + if (j < 0) { - start = SCHED_ASAP (u_node); - end = start + ii; - step = 1; + /* ??? Try backtracking instead of immediately ii++? */ + ii++; + try_again_with_larger_ii = true; + reset_partial_schedule (ps, ii); + break; } - /* 2. Try scheduling u in window. */ if (dump_file) fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n", @@ -1406,6 +1675,9 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du reset_partial_schedule (ps, ii); break; } + if (unscheduled_nodes) + break; + /* ??? If (success), check register pressure estimates. */ } /* Continue with next node. */ } /* While try_again_with_larger_ii. */ @@ -1792,7 +2064,7 @@ order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, modulo scheduling. */ /* Create a partial schedule and allocate a memory to hold II rows. */ -static partial_schedule_ptr +partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr g, int history) { partial_schedule_ptr ps = (partial_schedule_ptr) @@ -1828,7 +2100,7 @@ free_ps_insns (partial_schedule_ptr ps) } /* Free all the memory allocated to the partial schedule. */ -static void +void free_partial_schedule (partial_schedule_ptr ps) { if (!ps) @@ -1840,7 +2112,7 @@ free_partial_schedule (partial_schedule_ptr ps) /* Clear the rows array with its PS_INSNs, and create a new one with NEW_II rows. */ -static void +void reset_partial_schedule (partial_schedule_ptr ps, int new_ii) { if (!ps) @@ -1895,7 +2167,7 @@ create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) /* Removes the given PS_INSN from the partial schedule. Returns false if the node is not found in the partial schedule, else returns true. */ -static int +static bool remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) { int row; @@ -2083,6 +2355,58 @@ advance_one_cycle (void) (*targetm.sched.dfa_post_cycle_insn) ()); } +/* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds + the number of cycles according to DFA that the kernel fits in, + we use this to check if we done well with SMS after we add + register moves. In some cases register moves overhead makes + it even worse than the original loop. We want SMS to be performed + when it gives less cycles after register moves are added. */ +static int +kernel_number_of_cycles (rtx first_insn, rtx last_insn) +{ + int cycles = 0; + rtx insn; + int can_issue_more = issue_rate; + + state_reset (curr_state); + + for (insn = first_insn; + insn != NULL_RTX && insn != last_insn; + insn = NEXT_INSN (insn)) + { + if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE) + continue; + + /* Check if there is room for the current insn. */ + if (!can_issue_more || state_dead_lock_p (curr_state)) + { + cycles ++; + advance_one_cycle (); + can_issue_more = issue_rate; + } + + /* Update the DFA state and return with failure if the DFA found + recource conflicts. */ + if (state_transition (curr_state, insn) >= 0) + { + cycles ++; + advance_one_cycle (); + can_issue_more = issue_rate; + } + + if (targetm.sched.variable_issue) + can_issue_more = + (*targetm.sched.variable_issue) (sched_dump, sched_verbose, + insn, can_issue_more); + /* A naked CLOBBER or USE generates no instruction, so don't + let them consume issue slots. */ + else if (GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + can_issue_more--; + } + return cycles; +} + /* Checks if PS has resource conflicts according to DFA, starting from FROM cycle to TO cycle; returns true if there are conflicts and false if there are no conflicts. Assumes DFA is being used. */ @@ -2140,7 +2464,7 @@ ps_has_conflicts (partial_schedule_ptr ps, int from, int to) is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come before/after (respectively) the node pointed to by PS_I when scheduled in the same cycle. */ -static ps_insn_ptr +ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c, sbitmap must_precede, sbitmap must_follow) @@ -2185,7 +2509,7 @@ ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, /* Rotate the rows of PS such that insns scheduled at time START_CYCLE will appear in row 0. Updates max/min_cycles. */ -static void +void rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) { int i, row, backward_rotates; @@ -2211,4 +2535,25 @@ rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) ps->min_cycle -= start_cycle; } +/* Remove the node N from the partial schedule PS; becuase we restart the DFA + each time we want to check for resuorce conflicts; this is equivalent to + unscheduling the node N. */ +static bool +ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n) +{ + ps_insn_ptr ps_i; + int row = SMODULO (SCHED_TIME (n), ps->ii); + + if (row < 0 || row > ps->ii) + return false; + + for (ps_i = ps->rows[row]; + ps_i && ps_i->node != n; + ps_i = ps_i->next_in_row); + if (!ps_i) + return false; + + return remove_node_from_ps (ps, ps_i); +} #endif /* INSN_SCHEDULING*/ + diff --git a/gcc/passes.c b/gcc/passes.c index f98fbf8..40e87a4 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -577,6 +577,7 @@ rest_of_handle_partition_blocks (void) static void rest_of_handle_sms (void) { + basic_block bb; sbitmap blocks; timevar_push (TV_SMS); @@ -584,10 +585,11 @@ rest_of_handle_sms (void) /* We want to be able to create new pseudos. */ no_new_pseudos = 0; + /* Collect loop information to be used in SMS. */ + cfg_layout_initialize (CLEANUP_UPDATE_LIFE); sms_schedule (dump_file); close_dump_file (DFI_sms, print_rtl, get_insns ()); - /* Update the life information, because we add pseudos. */ max_regno = max_reg_num (); allocate_reg_info (max_regno, FALSE, FALSE); @@ -601,6 +603,12 @@ rest_of_handle_sms (void) no_new_pseudos = 1; + /* Finalize layout changes. */ + FOR_EACH_BB (bb) + if (bb->next_bb != EXIT_BLOCK_PTR) + bb->rbi->next = bb->next_bb; + cfg_layout_finalize (); + free_dominance_info (CDI_DOMINATORS); ggc_collect (); timevar_pop (TV_SMS); } -- 2.7.4