From: titzer Date: Mon, 19 Jan 2015 15:19:43 +0000 (-0800) Subject: [turbofan] Improve loop analysis to handle more than 32 loops. X-Git-Tag: upstream/4.7.83~4900 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=36003d07eb650d8c2dddae95ad136aac17c83c0c;p=platform%2Fupstream%2Fv8.git [turbofan] Improve loop analysis to handle more than 32 loops. R=mstarzinger@chromium.org BUG= Review URL: https://codereview.chromium.org/855653002 Cr-Commit-Position: refs/heads/master@{#26140} --- diff --git a/src/compiler/loop-analysis.cc b/src/compiler/loop-analysis.cc index e839130..8a9698b 100644 --- a/src/compiler/loop-analysis.cc +++ b/src/compiler/loop-analysis.cc @@ -14,50 +14,18 @@ namespace v8 { namespace internal { namespace compiler { -typedef uint32_t LoopMarks; - +#define OFFSET(x) ((x)&0x1f) +#define BIT(x) (1u << OFFSET(x)) +#define INDEX(x) ((x) >> 5) // TODO(titzer): don't assume entry edges have a particular index. -// TODO(titzer): use a BitMatrix to generalize this algorithm. -static const size_t kMaxLoops = 31; static const int kAssumedLoopEntryIndex = 0; // assume loops are entered here. -static const LoopMarks kVisited = 1; // loop #0 is reserved. // Temporary information for each node during marking. struct NodeInfo { Node* node; NodeInfo* next; // link in chaining loop members - LoopMarks forward; // accumulated marks in the forward direction - LoopMarks backward; // accumulated marks in the backward direction - LoopMarks loop_mark; // loop mark for header nodes; encodes loop_num - - bool MarkBackward(LoopMarks bw) { - LoopMarks prev = backward; - LoopMarks next = backward | bw; - backward = next; - return prev != next; - } - - bool MarkForward(LoopMarks fw) { - LoopMarks prev = forward; - LoopMarks next = forward | fw; - forward = next; - return prev != next; - } - - bool IsInLoop(size_t loop_num) { - DCHECK(loop_num > 0 && loop_num <= 31); - return forward & backward & (1 << loop_num); - } - - bool IsLoopHeader() { return loop_mark != 0; } - bool IsInAnyLoop() { return (forward & backward) > kVisited; } - - bool IsInHeaderForLoop(size_t loop_num) { - DCHECK(loop_num > 0); - return loop_mark == (kVisited | (1 << loop_num)); - } }; @@ -70,9 +38,6 @@ struct LoopInfo { }; -static const NodeInfo kEmptyNodeInfo = {nullptr, nullptr, 0, 0, 0}; - - // Encapsulation of the loop finding algorithm. // ----------------------------------------------------------------------------- // Conceptually, the contents of a loop are those nodes that are "between" the @@ -90,13 +55,18 @@ static const NodeInfo kEmptyNodeInfo = {nullptr, nullptr, 0, 0, 0}; class LoopFinderImpl { public: LoopFinderImpl(Graph* graph, LoopTree* loop_tree, Zone* zone) - : end_(graph->end()), + : zone_(zone), + end_(graph->end()), queue_(zone), queued_(graph, 2), - info_(graph->NodeCount(), kEmptyNodeInfo, zone), + info_(graph->NodeCount(), {nullptr, nullptr}, zone), loops_(zone), + loop_num_(graph->NodeCount(), -1, zone), loop_tree_(loop_tree), - loops_found_(0) {} + loops_found_(0), + width_(0), + backward_(nullptr), + forward_(nullptr) {} void Run() { PropagateBackward(); @@ -108,12 +78,15 @@ class LoopFinderImpl { // Print out the results. for (NodeInfo& ni : info_) { if (ni.node == nullptr) continue; - for (size_t i = 1; i <= loops_.size(); i++) { - if (ni.IsInLoop(i)) { + for (int i = 1; i <= loops_found_; i++) { + int index = ni.node->id() * width_ + INDEX(i); + bool marked_forward = forward_[index] & BIT(i); + bool marked_backward = backward_[index] & BIT(i); + if (marked_forward && marked_backward) { PrintF("X"); - } else if (ni.forward & (1 << i)) { + } else if (marked_forward) { PrintF("/"); - } else if (ni.backward & (1 << i)) { + } else if (marked_backward) { PrintF("\\"); } else { PrintF(" "); @@ -134,113 +107,189 @@ class LoopFinderImpl { } private: + Zone* zone_; Node* end_; NodeDeque queue_; NodeMarker queued_; ZoneVector info_; ZoneVector loops_; + ZoneVector loop_num_; LoopTree* loop_tree_; - size_t loops_found_; + int loops_found_; + int width_; + uint32_t* backward_; + uint32_t* forward_; + + int num_nodes() { + return static_cast(loop_tree_->node_to_loop_num_.size()); + } + + // Tb = Tb | (Fb - loop_filter) + bool PropagateBackwardMarks(Node* from, Node* to, int loop_filter) { + if (from == to) return false; + uint32_t* fp = &backward_[from->id() * width_]; + uint32_t* tp = &backward_[to->id() * width_]; + bool change = false; + for (int i = 0; i < width_; i++) { + uint32_t mask = i == INDEX(loop_filter) ? ~BIT(loop_filter) : 0xFFFFFFFF; + uint32_t prev = tp[i]; + uint32_t next = prev | (fp[i] & mask); + tp[i] = next; + if (!change && (prev != next)) change = true; + } + return change; + } + + // Tb = Tb | B + bool SetBackwardMark(Node* to, int loop_num) { + uint32_t* tp = &backward_[to->id() * width_ + INDEX(loop_num)]; + uint32_t prev = tp[0]; + uint32_t next = prev | BIT(loop_num); + tp[0] = next; + return next != prev; + } + + // Tf = Tf | B + bool SetForwardMark(Node* to, int loop_num) { + uint32_t* tp = &forward_[to->id() * width_ + INDEX(loop_num)]; + uint32_t prev = tp[0]; + uint32_t next = prev | BIT(loop_num); + tp[0] = next; + return next != prev; + } + + // Tf = Tf | (Ff & Tb) + bool PropagateForwardMarks(Node* from, Node* to) { + if (from == to) return false; + bool change = false; + int findex = from->id() * width_; + int tindex = to->id() * width_; + for (int i = 0; i < width_; i++) { + uint32_t marks = backward_[tindex + i] & forward_[findex + i]; + uint32_t prev = forward_[tindex + i]; + uint32_t next = prev | marks; + forward_[tindex + i] = next; + if (!change && (prev != next)) change = true; + } + return change; + } + + bool IsInLoop(Node* node, int loop_num) { + int offset = node->id() * width_ + INDEX(loop_num); + return backward_[offset] & forward_[offset] & BIT(loop_num); + } // Propagate marks backward from loop headers. void PropagateBackward() { - PropagateBackward(end_, kVisited); + ResizeBackwardMarks(); + SetBackwardMark(end_, 0); + Queue(end_); while (!queue_.empty()) { Node* node = queue_.front(); + info(node); queue_.pop_front(); queued_.Set(node, false); + int loop_num = -1; // Setup loop headers first. - NodeInfo& ni = info(node); if (node->opcode() == IrOpcode::kLoop) { // found the loop node first. - CreateLoopInfo(node, ni); + loop_num = CreateLoopInfo(node); } else if (node->opcode() == IrOpcode::kPhi || node->opcode() == IrOpcode::kEffectPhi) { // found a phi first. Node* merge = node->InputAt(node->InputCount() - 1); if (merge->opcode() == IrOpcode::kLoop) { - NodeInfo& mi = info(merge); - CreateLoopInfo(merge, mi); - ni.MarkBackward(mi.loop_mark); + loop_num = CreateLoopInfo(merge); } } - // Propagate reachability marks backwards from this node. - if (ni.IsLoopHeader()) { - // Handle edges from loop header nodes specially. - for (int i = 0; i < node->InputCount(); i++) { - if (i == kAssumedLoopEntryIndex) { - // Don't propagate the loop mark backwards on the entry edge. - PropagateBackward(node->InputAt(0), - kVisited | (ni.backward & ~ni.loop_mark)); - } else { - // Only propagate the loop mark on backedges. - PropagateBackward(node->InputAt(i), ni.loop_mark); - } - } - } else { - // Propagate all loop marks backwards for a normal node. - for (Node* const input : node->inputs()) { - PropagateBackward(input, ni.backward); + // Propagate marks backwards from this node. + for (int i = 0; i < node->InputCount(); i++) { + Node* input = node->InputAt(i); + if (loop_num > 0 && i != kAssumedLoopEntryIndex) { + // Only propagate the loop mark on backedges. + if (SetBackwardMark(input, loop_num)) Queue(input); + } else { + // Entry or normal edge. Propagate all marks except loop_num. + if (PropagateBackwardMarks(node, input, loop_num)) Queue(input); } } } } - // Make a new loop header for the given node. - void CreateLoopInfo(Node* node, NodeInfo& ni) { - if (ni.IsLoopHeader()) return; // loop already set up. + // Make a new loop if necessary for the given node. + int CreateLoopInfo(Node* node) { + int loop_num = LoopNum(node); + if (loop_num > 0) return loop_num; + + loop_num = ++loops_found_; + if (INDEX(loop_num) >= width_) ResizeBackwardMarks(); - loops_found_++; - size_t loop_num = loops_.size() + 1; - CHECK(loops_found_ <= kMaxLoops); // TODO(titzer): don't crash. // Create a new loop. loops_.push_back({node, nullptr, nullptr, nullptr}); loop_tree_->NewLoop(); - LoopMarks loop_mark = kVisited | (1 << loop_num); - ni.loop_mark = loop_mark; + SetBackwardMark(node, loop_num); + loop_tree_->node_to_loop_num_[node->id()] = loop_num; // Setup loop mark for phis attached to loop header. for (Node* use : node->uses()) { if (use->opcode() == IrOpcode::kPhi || use->opcode() == IrOpcode::kEffectPhi) { - info(use).loop_mark = loop_mark; + SetBackwardMark(use, loop_num); + loop_tree_->node_to_loop_num_[use->id()] = loop_num; + } + } + + return loop_num; + } + + void ResizeBackwardMarks() { + int new_width = width_ + 1; + int max = num_nodes(); + uint32_t* new_backward = zone_->NewArray(new_width * max); + memset(new_backward, 0, new_width * max * sizeof(uint32_t)); + if (width_ > 0) { // copy old matrix data. + for (int i = 0; i < max; i++) { + uint32_t* np = &new_backward[i * new_width]; + uint32_t* op = &backward_[i * width_]; + for (int j = 0; j < width_; j++) np[j] = op[j]; } } + width_ = new_width; + backward_ = new_backward; + } + + void ResizeForwardMarks() { + int max = num_nodes(); + forward_ = zone_->NewArray(width_ * max); + memset(forward_, 0, width_ * max * sizeof(uint32_t)); } // Propagate marks forward from loops. void PropagateForward() { + ResizeForwardMarks(); for (LoopInfo& li : loops_) { - queued_.Set(li.header, true); - queue_.push_back(li.header); - NodeInfo& ni = info(li.header); - ni.forward = ni.loop_mark; + SetForwardMark(li.header, LoopNum(li.header)); + Queue(li.header); } // Propagate forward on paths that were backward reachable from backedges. while (!queue_.empty()) { Node* node = queue_.front(); queue_.pop_front(); queued_.Set(node, false); - NodeInfo& ni = info(node); for (Edge edge : node->use_edges()) { Node* use = edge.from(); - NodeInfo& ui = info(use); - if (IsBackedge(use, ui, edge)) continue; // skip backedges. - LoopMarks both = ni.forward & ui.backward; - if (ui.MarkForward(both) && !queued_.Get(use)) { - queued_.Set(use, true); - queue_.push_back(use); + if (!IsBackedge(use, edge)) { + if (PropagateForwardMarks(node, use)) Queue(use); } } } } - bool IsBackedge(Node* use, NodeInfo& ui, Edge& edge) { - // TODO(titzer): checking for backedges here is ugly. - if (!ui.IsLoopHeader()) return false; + bool IsBackedge(Node* use, Edge& edge) { + if (LoopNum(use) <= 0) return false; if (edge.index() == kAssumedLoopEntryIndex) return false; if (use->opcode() == IrOpcode::kPhi || use->opcode() == IrOpcode::kEffectPhi) { @@ -249,44 +298,57 @@ class LoopFinderImpl { return true; } + int LoopNum(Node* node) { return loop_tree_->node_to_loop_num_[node->id()]; } + NodeInfo& info(Node* node) { NodeInfo& i = info_[node->id()]; if (i.node == nullptr) i.node = node; return i; } - void PropagateBackward(Node* node, LoopMarks marks) { - if (info(node).MarkBackward(marks) && !queued_.Get(node)) { + void Queue(Node* node) { + if (!queued_.Get(node)) { queue_.push_back(node); queued_.Set(node, true); } } void FinishLoopTree() { + DCHECK(loops_found_ == static_cast(loops_.size())); + DCHECK(loops_found_ == static_cast(loop_tree_->all_loops_.size())); + // Degenerate cases. - if (loops_.size() == 0) return; - if (loops_.size() == 1) return FinishSingleLoop(); + if (loops_found_ == 0) return; + if (loops_found_ == 1) return FinishSingleLoop(); - for (size_t i = 1; i <= loops_.size(); i++) ConnectLoopTree(i); + for (int i = 1; i <= loops_found_; i++) ConnectLoopTree(i); size_t count = 0; // Place the node into the innermost nested loop of which it is a member. for (NodeInfo& ni : info_) { - if (ni.node == nullptr || !ni.IsInAnyLoop()) continue; + if (ni.node == nullptr) continue; LoopInfo* innermost = nullptr; - size_t index = 0; - for (size_t i = 1; i <= loops_.size(); i++) { - if (ni.IsInLoop(i)) { - LoopInfo* loop = &loops_[i - 1]; - if (innermost == nullptr || - loop->loop->depth_ > innermost->loop->depth_) { - innermost = loop; - index = i; + int innermost_index = 0; + int pos = ni.node->id() * width_; + // Search the marks word by word. + for (int i = 0; i < width_; i++) { + uint32_t marks = backward_[pos + i] & forward_[pos + i]; + for (int j = 0; j < 32; j++) { + if (marks & (1u << j)) { + int loop_num = i * 32 + j; + if (loop_num == 0) continue; + LoopInfo* loop = &loops_[loop_num - 1]; + if (innermost == nullptr || + loop->loop->depth_ > innermost->loop->depth_) { + innermost = loop; + innermost_index = loop_num; + } } } } - if (ni.IsInHeaderForLoop(index)) { + if (innermost == nullptr) continue; + if (LoopNum(ni.node) == innermost_index) { ni.next = innermost->header_list; innermost->header_list = ∋ } else { @@ -305,18 +367,14 @@ class LoopFinderImpl { // Handle the simpler case of a single loop (no checks for nesting necessary). void FinishSingleLoop() { - DCHECK(loops_.size() == 1); - DCHECK(loop_tree_->all_loops_.size() == 1); - // Place nodes into the loop header and body. LoopInfo* li = &loops_[0]; li->loop = &loop_tree_->all_loops_[0]; loop_tree_->SetParent(nullptr, li->loop); size_t count = 0; for (NodeInfo& ni : info_) { - if (ni.node == nullptr || !ni.IsInAnyLoop()) continue; - DCHECK(ni.IsInLoop(1)); - if (ni.IsInHeaderForLoop(1)) { + if (ni.node == nullptr || !IsInLoop(ni.node, 1)) continue; + if (LoopNum(ni.node) == 1) { ni.next = li->header_list; li->header_list = ∋ } else { @@ -334,25 +392,21 @@ class LoopFinderImpl { // Recursively serialize the list of header nodes and body nodes // so that nested loops occupy nested intervals. void SerializeLoop(LoopTree::Loop* loop) { - size_t loop_num = loop_tree_->LoopNum(loop); + int loop_num = loop_tree_->LoopNum(loop); LoopInfo& li = loops_[loop_num - 1]; // Serialize the header. loop->header_start_ = static_cast(loop_tree_->loop_nodes_.size()); for (NodeInfo* ni = li.header_list; ni != nullptr; ni = ni->next) { loop_tree_->loop_nodes_.push_back(ni->node); - // TODO(titzer): lift loop count restriction. - loop_tree_->node_to_loop_num_[ni->node->id()] = - static_cast(loop_num); + loop_tree_->node_to_loop_num_[ni->node->id()] = loop_num; } // Serialize the body. loop->body_start_ = static_cast(loop_tree_->loop_nodes_.size()); for (NodeInfo* ni = li.body_list; ni != nullptr; ni = ni->next) { loop_tree_->loop_nodes_.push_back(ni->node); - // TODO(titzer): lift loop count restriction. - loop_tree_->node_to_loop_num_[ni->node->id()] = - static_cast(loop_num); + loop_tree_->node_to_loop_num_[ni->node->id()] = loop_num; } // Serialize nested loops. @@ -362,15 +416,15 @@ class LoopFinderImpl { } // Connect the LoopTree loops to their parents recursively. - LoopTree::Loop* ConnectLoopTree(size_t loop_num) { + LoopTree::Loop* ConnectLoopTree(int loop_num) { LoopInfo& li = loops_[loop_num - 1]; if (li.loop != nullptr) return li.loop; NodeInfo& ni = info(li.header); LoopTree::Loop* parent = nullptr; - for (size_t i = 1; i <= loops_.size(); i++) { + for (int i = 1; i <= loops_found_; i++) { if (i == loop_num) continue; - if (ni.IsInLoop(i)) { + if (IsInLoop(ni.node, i)) { // recursively create potential parent loops first. LoopTree::Loop* upper = ConnectLoopTree(i); if (parent == nullptr || upper->depth_ > parent->depth_) { diff --git a/src/compiler/loop-analysis.h b/src/compiler/loop-analysis.h index 8c8d19a..030c93d 100644 --- a/src/compiler/loop-analysis.h +++ b/src/compiler/loop-analysis.h @@ -25,7 +25,7 @@ class LoopTree : public ZoneObject { : zone_(zone), outer_loops_(zone), all_loops_(zone), - node_to_loop_num_(static_cast(num_nodes), 0, zone), + node_to_loop_num_(static_cast(num_nodes), -1, zone), loop_nodes_(zone) {} // Represents a loop in the tree of loops, including the header nodes, @@ -61,7 +61,7 @@ class LoopTree : public ZoneObject { Loop* ContainingLoop(Node* node) { if (node->id() >= static_cast(node_to_loop_num_.size())) return nullptr; - uint8_t num = node_to_loop_num_[node->id()]; + int num = node_to_loop_num_[node->id()]; return num > 0 ? &all_loops_[num - 1] : nullptr; } @@ -116,8 +116,7 @@ class LoopTree : public ZoneObject { Zone* zone_; ZoneVector outer_loops_; ZoneVector all_loops_; - // TODO(titzer): lift loop count restriction. - ZoneVector node_to_loop_num_; + ZoneVector node_to_loop_num_; ZoneVector loop_nodes_; }; diff --git a/test/cctest/compiler/test-loop-analysis.cc b/test/cctest/compiler/test-loop-analysis.cc index 8ce1e80..3a86144 100644 --- a/test/cctest/compiler/test-loop-analysis.cc +++ b/test/cctest/compiler/test-loop-analysis.cc @@ -146,6 +146,7 @@ class LoopFinderTester : HandleAndZoneScope { } CHECK_EQ(body_count, static_cast(loop->BodySize())); + // TODO(turbofan): O(n^2) set equivalence in this test. for (int i = 0; i < body_count; i++) { // Each body node should be contained in the loop. CHECK(tree->Contains(loop, body[i])); @@ -154,7 +155,6 @@ class LoopFinderTester : HandleAndZoneScope { } void CheckRangeContains(NodeRange range, Node* node) { - // O(n) ftw. CHECK_NE(range.end(), std::find(range.begin(), range.end(), node)); } @@ -178,6 +178,8 @@ class LoopFinderTester : HandleAndZoneScope { } } } + + Zone* zone() { return main_zone(); } }; @@ -839,7 +841,7 @@ void RunEdgeMatrix3(int c1a, int c1b, int c1c, // line break // Runs all combinations with a fixed {i}. -void RunEdgeMatrix3_i(int i) { +static void RunEdgeMatrix3_i(int i) { for (int a = 0; a < 1; a++) { for (int b = 0; b < 1; b++) { for (int c = 0; c < 4; c++) { @@ -877,3 +879,99 @@ TEST(LaEdgeMatrix3_4) { RunEdgeMatrix3_i(4); } TEST(LaEdgeMatrix3_5) { RunEdgeMatrix3_i(5); } + + +static void RunManyChainedLoops_i(int count) { + LoopFinderTester t; + Node** nodes = t.zone()->NewArray(count * 4); + Node* k11 = t.jsgraph.Int32Constant(11); + Node* k12 = t.jsgraph.Int32Constant(12); + Node* last = t.start; + + // Build loops. + for (int i = 0; i < count; i++) { + Node* loop = t.graph.NewNode(t.common.Loop(2), last, t.start); + Node* phi = t.graph.NewNode(t.common.Phi(kMachInt32, 2), k11, k12, loop); + Node* branch = t.graph.NewNode(t.common.Branch(), phi, loop); + Node* if_true = t.graph.NewNode(t.common.IfTrue(), branch); + Node* exit = t.graph.NewNode(t.common.IfFalse(), branch); + loop->ReplaceInput(1, if_true); + + nodes[i * 4 + 0] = loop; + nodes[i * 4 + 1] = phi; + nodes[i * 4 + 2] = branch; + nodes[i * 4 + 3] = if_true; + + last = exit; + } + + Node* ret = t.graph.NewNode(t.common.Return(), t.p0, t.start, last); + t.graph.SetEnd(ret); + + // Verify loops. + for (int i = 0; i < count; i++) { + t.CheckLoop(nodes + i * 4, 2, nodes + i * 4 + 2, 2); + } +} + + +static void RunManyNestedLoops_i(int count) { + LoopFinderTester t; + Node** nodes = t.zone()->NewArray(count * 5); + Node* k11 = t.jsgraph.Int32Constant(11); + Node* k12 = t.jsgraph.Int32Constant(12); + Node* outer = nullptr; + Node* entry = t.start; + + // Build loops. + for (int i = 0; i < count; i++) { + Node* loop = t.graph.NewNode(t.common.Loop(2), entry, t.start); + Node* phi = t.graph.NewNode(t.common.Phi(kMachInt32, 2), k11, k12, loop); + Node* branch = t.graph.NewNode(t.common.Branch(), phi, loop); + Node* if_true = t.graph.NewNode(t.common.IfTrue(), branch); + Node* exit = t.graph.NewNode(t.common.IfFalse(), branch); + + nodes[i * 5 + 0] = exit; // outside + nodes[i * 5 + 1] = loop; // header + nodes[i * 5 + 2] = phi; // header + nodes[i * 5 + 3] = branch; // body + nodes[i * 5 + 4] = if_true; // body + + if (outer != nullptr) { + // inner loop. + outer->ReplaceInput(1, exit); + } else { + // outer loop. + Node* ret = t.graph.NewNode(t.common.Return(), t.p0, t.start, exit); + t.graph.SetEnd(ret); + } + outer = loop; + entry = if_true; + } + outer->ReplaceInput(1, entry); // innermost loop. + + // Verify loops. + for (int i = 0; i < count; i++) { + int k = i * 5; + t.CheckLoop(nodes + k + 1, 2, nodes + k + 3, count * 5 - k - 3); + } +} + + +TEST(LaManyChained_30) { RunManyChainedLoops_i(30); } +TEST(LaManyChained_31) { RunManyChainedLoops_i(31); } +TEST(LaManyChained_32) { RunManyChainedLoops_i(32); } +TEST(LaManyChained_33) { RunManyChainedLoops_i(33); } +TEST(LaManyChained_34) { RunManyChainedLoops_i(34); } +TEST(LaManyChained_62) { RunManyChainedLoops_i(62); } +TEST(LaManyChained_63) { RunManyChainedLoops_i(63); } +TEST(LaManyChained_64) { RunManyChainedLoops_i(64); } + +TEST(LaManyNested_30) { RunManyNestedLoops_i(30); } +TEST(LaManyNested_31) { RunManyNestedLoops_i(31); } +TEST(LaManyNested_32) { RunManyNestedLoops_i(32); } +TEST(LaManyNested_33) { RunManyNestedLoops_i(33); } +TEST(LaManyNested_34) { RunManyNestedLoops_i(34); } +TEST(LaManyNested_62) { RunManyNestedLoops_i(62); } +TEST(LaManyNested_63) { RunManyNestedLoops_i(63); } +TEST(LaManyNested_64) { RunManyNestedLoops_i(64); }