From 6e27bc2b885207d51500b2c42f949ca5073dbe72 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 9 Sep 2021 10:52:12 +0200 Subject: [PATCH] Avoid full DOM walk in LIM fill_always_executed_in This avoids a full DOM walk via get_loop_body_in_dom_order in the loop body walk of fill_always_executed_in which is often terminating the walk of a loop body early by integrating the DOM walk of get_loop_body_in_dom_order with the actual processing done by fill_always_executed_in. This trades the fully populated loop body array with a worklist allocation of the same size and thus should be a strict improvement over the recursive approach of get_loop_body_in_dom_order. 2021-09-09 Richard Biener * tree-ssa-loop-im.c (fill_always_executed_in_1): Integrate DOM walk from get_loop_body_in_dom_order using a worklist approach. --- gcc/tree-ssa-loop-im.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 01f3fc2..5d68454 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3030,19 +3030,19 @@ do_store_motion (void) static void fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) { - basic_block bb = NULL, *bbs, last = NULL; - unsigned i; + basic_block bb = NULL, last = NULL; edge e; class loop *inn_loop = loop; if (ALWAYS_EXECUTED_IN (loop->header) == NULL) { - bbs = get_loop_body_in_dom_order (loop); - - for (i = 0; i < loop->num_nodes; i++) + auto_vec worklist; + worklist.reserve_exact (loop->num_nodes); + worklist.quick_push (loop->header); + do { edge_iterator ei; - bb = bbs[i]; + bb = worklist.pop (); if (!flow_bb_inside_loop_p (inn_loop, bb)) { @@ -3083,7 +3083,32 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) since it might not be finite. */ inn_loop = bb->loop_father; } + + /* Walk the body of LOOP sorted by dominance relation. Additionally, + if a basic block S dominates the latch, then only blocks dominated + by S are after it. + This is get_loop_body_in_dom_order using a worklist algorithm and + stopping once we are no longer interested in visiting further + blocks. */ + unsigned old_len = worklist.length (); + unsigned postpone = 0; + for (basic_block son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + { + if (!flow_bb_inside_loop_p (loop, son)) + continue; + if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) + postpone = worklist.length (); + worklist.quick_push (son); + } + if (postpone) + /* Postponing the block that dominates the latch means + processing it last and thus putting it earliest in the + worklist. */ + std::swap (worklist[old_len], worklist[postpone]); } + while (!worklist.is_empty ()); while (1) { @@ -3095,8 +3120,6 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) break; last = get_immediate_dominator (CDI_DOMINATORS, last); } - - free (bbs); } for (loop = loop->inner; loop; loop = loop->next) -- 2.7.4