From: Benedikt Meurer Date: Tue, 3 Feb 2015 14:50:40 +0000 (+0100) Subject: [turbofan] Split pure nodes in the scheduler if beneficial. X-Git-Tag: upstream/4.7.83~4636 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d8cfbc633d6f2fcc7016dfb8db8c3f7636c3151e;p=platform%2Fupstream%2Fv8.git [turbofan] Split pure nodes in the scheduler if beneficial. If a (pure) node has two or more uses, but there exists a path from the common dominator of these uses to end, which does not contain a use, then we split the node such that no unnecessary computation takes place. Note however, that this only applies if the node cannot be hoisted out of a loop. BUG=v8:3864 LOG=n R=jarin@chromium.org Review URL: https://codereview.chromium.org/899433005 Cr-Commit-Position: refs/heads/master@{#26404} --- diff --git a/src/compiler.cc b/src/compiler.cc index f792f226b..de0d528d3 100644 --- a/src/compiler.cc +++ b/src/compiler.cc @@ -186,6 +186,7 @@ void CompilationInfo::Initialize(Isolate* isolate, if (isolate_->debug()->is_active()) MarkAsDebug(); if (FLAG_context_specialization) MarkAsContextSpecializing(); if (FLAG_turbo_inlining) MarkAsInliningEnabled(); + if (FLAG_turbo_splitting) MarkAsSplittingEnabled(); if (FLAG_turbo_types) MarkAsTypingEnabled(); if (!shared_info_.is_null()) { diff --git a/src/compiler.h b/src/compiler.h index 611a41ba8..e3b6afa6f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -90,7 +90,8 @@ class CompilationInfo { kTypingEnabled = 1 << 18, kDisableFutureOptimization = 1 << 19, kModule = 1 << 20, - kToplevel = 1 << 21 + kToplevel = 1 << 21, + kSplittingEnabled = 1 << 22 }; CompilationInfo(Handle closure, Zone* zone); @@ -222,6 +223,10 @@ class CompilationInfo { bool is_toplevel() const { return GetFlag(kToplevel); } + void MarkAsSplittingEnabled() { SetFlag(kSplittingEnabled); } + + bool is_splitting_enabled() const { return GetFlag(kSplittingEnabled); } + bool IsCodePreAgingActive() const { return FLAG_optimize_for_size && FLAG_age_code && !will_serialize() && !is_debug(); diff --git a/src/compiler/common-operator.cc b/src/compiler/common-operator.cc index e67759eba..d2a19dd57 100644 --- a/src/compiler/common-operator.cc +++ b/src/compiler/common-operator.cc @@ -489,11 +489,12 @@ const Operator* CommonOperatorBuilder::Call(const CallDescriptor* descriptor) { const Operator* CommonOperatorBuilder::Projection(size_t index) { - return new (zone()) Operator1( // -- - IrOpcode::kProjection, Operator::kPure, // opcode - "Projection", // name - 1, 0, 0, 1, 0, 0, // counts - index); // parameter + return new (zone()) Operator1( // -- + IrOpcode::kProjection, // opcode + Operator::kFoldable | Operator::kNoThrow, // flags + "Projection", // name + 1, 0, 0, 1, 0, 0, // counts + index); // parameter } diff --git a/src/compiler/pipeline.cc b/src/compiler/pipeline.cc index a6d46657b..4d5982aa4 100644 --- a/src/compiler/pipeline.cc +++ b/src/compiler/pipeline.cc @@ -548,7 +548,10 @@ struct ComputeSchedulePhase { static const char* phase_name() { return "scheduling"; } void Run(PipelineData* data, Zone* temp_zone) { - Schedule* schedule = Scheduler::ComputeSchedule(temp_zone, data->graph()); + Schedule* schedule = Scheduler::ComputeSchedule( + temp_zone, data->graph(), data->info()->is_splitting_enabled() + ? Scheduler::kSplitNodes + : Scheduler::kNoFlags); TraceSchedule(schedule); if (FLAG_turbo_verify) ScheduleVerifier::Run(schedule); data->set_schedule(schedule); diff --git a/src/compiler/scheduler.cc b/src/compiler/scheduler.cc index 6281371ab..70e65478a 100644 --- a/src/compiler/scheduler.cc +++ b/src/compiler/scheduler.cc @@ -27,20 +27,21 @@ static inline void Trace(const char* msg, ...) { } -Scheduler::Scheduler(Zone* zone, Graph* graph, Schedule* schedule) +Scheduler::Scheduler(Zone* zone, Graph* graph, Schedule* schedule, Flags flags) : zone_(zone), graph_(graph), schedule_(schedule), + flags_(flags), scheduled_nodes_(zone), schedule_root_nodes_(zone), schedule_queue_(zone), node_data_(graph_->NodeCount(), DefaultSchedulerData(), zone) {} -Schedule* Scheduler::ComputeSchedule(Zone* zone, Graph* graph) { +Schedule* Scheduler::ComputeSchedule(Zone* zone, Graph* graph, Flags flags) { Schedule* schedule = new (graph->zone()) Schedule(graph->zone(), static_cast(graph->NodeCount())); - Scheduler scheduler(zone, graph, schedule); + Scheduler scheduler(zone, graph, schedule, flags); scheduler.BuildCFG(); scheduler.ComputeSpecialRPONumbering(); @@ -1226,7 +1227,10 @@ void Scheduler::ScheduleEarly() { class ScheduleLateNodeVisitor { public: ScheduleLateNodeVisitor(Zone* zone, Scheduler* scheduler) - : scheduler_(scheduler), schedule_(scheduler_->schedule_) {} + : scheduler_(scheduler), + schedule_(scheduler_->schedule_), + marked_(scheduler->zone_), + marking_queue_(scheduler->zone_) {} // Run the schedule late algorithm on a set of fixed root nodes. void Run(NodeVector* roots) { @@ -1248,10 +1252,11 @@ class ScheduleLateNodeVisitor { if (scheduler_->GetData(node)->unscheduled_count_ != 0) continue; queue->push(node); - while (!queue->empty()) { - VisitNode(queue->front()); + do { + Node* const node = queue->front(); queue->pop(); - } + VisitNode(node); + } while (!queue->empty()); } } @@ -1282,13 +1287,19 @@ class ScheduleLateNodeVisitor { // into enclosing loop pre-headers until they would preceed their schedule // early position. BasicBlock* hoist_block = GetPreHeader(block); - while (hoist_block != NULL && - hoist_block->dominator_depth() >= min_block->dominator_depth()) { - Trace(" hoisting #%d:%s to block B%d\n", node->id(), - node->op()->mnemonic(), hoist_block->id().ToInt()); - DCHECK_LT(hoist_block->loop_depth(), block->loop_depth()); - block = hoist_block; - hoist_block = GetPreHeader(hoist_block); + if (hoist_block && + hoist_block->dominator_depth() >= min_block->dominator_depth()) { + do { + Trace(" hoisting #%d:%s to block B%d\n", node->id(), + node->op()->mnemonic(), hoist_block->id().ToInt()); + DCHECK_LT(hoist_block->loop_depth(), block->loop_depth()); + block = hoist_block; + hoist_block = GetPreHeader(hoist_block); + } while (hoist_block && + hoist_block->dominator_depth() >= min_block->dominator_depth()); + } else if (scheduler_->flags_ & Scheduler::kSplitNodes) { + // Split the {node} if beneficial and return the new {block} for it. + block = SplitNode(block, node); } // Schedule the node or a floating control structure. @@ -1299,6 +1310,101 @@ class ScheduleLateNodeVisitor { } } + // Mark {block} and push its non-marked predecessor on the marking queue. + void MarkBlock(BasicBlock* block) { + DCHECK_LT(block->id().ToSize(), marked_.size()); + marked_[block->id().ToSize()] = true; + for (BasicBlock* pred_block : block->predecessors()) { + DCHECK_LT(pred_block->id().ToSize(), marked_.size()); + if (marked_[pred_block->id().ToSize()]) continue; + marking_queue_.push_back(pred_block); + } + } + + BasicBlock* SplitNode(BasicBlock* block, Node* node) { + // For now, we limit splitting to pure nodes. + if (!node->op()->HasProperty(Operator::kPure)) return block; + + // The {block} is common dominator of all uses of {node}, so we cannot + // split anything unless the {block} has at least two successors. + DCHECK_EQ(block, GetCommonDominatorOfUses(node)); + if (block->SuccessorCount() < 2) return block; + + // Clear marking bits. + DCHECK(marking_queue_.empty()); + std::fill(marked_.begin(), marked_.end(), false); + marked_.resize(schedule_->BasicBlockCount() + 1, false); + + // Check if the {node} has uses in {block}. + for (Edge edge : node->use_edges()) { + BasicBlock* use_block = GetBlockForUse(edge); + if (use_block == nullptr || marked_[use_block->id().ToSize()]) continue; + if (use_block == block) { + Trace(" not splitting #%d:%s, it is used in B%d\n", node->id(), + node->op()->mnemonic(), block->id().ToInt()); + marking_queue_.clear(); + return block; + } + MarkBlock(use_block); + } + + // Compute transitive marking closure; a block is marked if all its + // successors are marked. + do { + BasicBlock* top_block = marking_queue_.front(); + marking_queue_.pop_front(); + if (marked_[top_block->id().ToSize()]) continue; + bool marked = true; + for (BasicBlock* successor : top_block->successors()) { + if (!marked_[successor->id().ToSize()]) { + marked = false; + break; + } + } + if (marked) MarkBlock(top_block); + } while (!marking_queue_.empty()); + + // If the (common dominator) {block} is marked, we know that all paths from + // {block} to the end contain at least one use of {node}, and hence there's + // no point in splitting the {node} in this case. + if (marked_[block->id().ToSize()]) { + Trace(" not splitting #%d:%s, its common dominator B%d is perfect\n", + node->id(), node->op()->mnemonic(), block->id().ToInt()); + return block; + } + + // Split {node} for uses according to the previously computed marking + // closure. Every marking partition has a unique dominator, which get's a + // copy of the {node} with the exception of the first partition, which get's + // the {node} itself. + ZoneMap dominators(scheduler_->zone_); + for (Edge edge : node->use_edges()) { + BasicBlock* use_block = GetBlockForUse(edge); + if (use_block == nullptr) continue; + while (marked_[use_block->dominator()->id().ToSize()]) { + use_block = use_block->dominator(); + } + auto& use_node = dominators[use_block]; + if (use_node == nullptr) { + if (dominators.size() == 1u) { + // Place the {node} at {use_block}. + block = use_block; + use_node = node; + Trace(" pushing #%d:%s down to B%d\n", node->id(), + node->op()->mnemonic(), block->id().ToInt()); + } else { + // Place a copy of {node} at {use_block}. + use_node = CloneNode(node); + Trace(" cloning #%d:%s for B%d\n", use_node->id(), + use_node->op()->mnemonic(), use_block->id().ToInt()); + scheduler_->schedule_queue_.push(use_node); + } + } + edge.UpdateTo(use_node); + } + return block; + } + BasicBlock* GetPreHeader(BasicBlock* block) { if (block->IsLoopHeader()) { return block->dominator(); @@ -1310,7 +1416,7 @@ class ScheduleLateNodeVisitor { } BasicBlock* GetCommonDominatorOfUses(Node* node) { - BasicBlock* block = NULL; + BasicBlock* block = nullptr; for (Edge edge : node->use_edges()) { BasicBlock* use_block = GetBlockForUse(edge); block = block == NULL ? use_block : use_block == NULL @@ -1361,8 +1467,25 @@ class ScheduleLateNodeVisitor { scheduler_->UpdatePlacement(node, Scheduler::kScheduled); } + Node* CloneNode(Node* node) { + int const input_count = node->InputCount(); + Node** const inputs = scheduler_->zone_->NewArray(input_count); + for (int index = 0; index < input_count; ++index) { + Node* const input = node->InputAt(index); + scheduler_->IncrementUnscheduledUseCount(input, index, node); + inputs[index] = input; + } + Node* copy = scheduler_->graph_->NewNode(node->op(), input_count, inputs); + scheduler_->node_data_.resize(copy->id() + 1, + scheduler_->DefaultSchedulerData()); + scheduler_->node_data_[copy->id()] = scheduler_->node_data_[node->id()]; + return copy; + } + Scheduler* scheduler_; Schedule* schedule_; + BoolVector marked_; + ZoneDeque marking_queue_; }; diff --git a/src/compiler/scheduler.h b/src/compiler/scheduler.h index b4cea2332..882c761f2 100644 --- a/src/compiler/scheduler.h +++ b/src/compiler/scheduler.h @@ -7,6 +7,7 @@ #include "src/v8.h" +#include "src/base/flags.h" #include "src/compiler/node.h" #include "src/compiler/opcodes.h" #include "src/compiler/schedule.h" @@ -28,9 +29,13 @@ class SpecialRPONumberer; // ordering the basic blocks in the special RPO order. class Scheduler { public: + // Flags that control the mode of operation. + enum Flag { kNoFlags = 0u, kSplitNodes = 1u << 1 }; + typedef base::Flags Flags; + // The complete scheduling algorithm. Creates a new schedule and places all // nodes from the graph into it. - static Schedule* ComputeSchedule(Zone* zone, Graph* graph); + static Schedule* ComputeSchedule(Zone* zone, Graph* graph, Flags flags); // Compute the RPO of blocks in an existing schedule. static BasicBlockVector* ComputeSpecialRPO(Zone* zone, Schedule* schedule); @@ -60,6 +65,7 @@ class Scheduler { Zone* zone_; Graph* graph_; Schedule* schedule_; + Flags flags_; NodeVectorVector scheduled_nodes_; // Per-block list of nodes in reverse. NodeVector schedule_root_nodes_; // Fixed root nodes seed the worklist. ZoneQueue schedule_queue_; // Worklist of schedulable nodes. @@ -68,7 +74,7 @@ class Scheduler { SpecialRPONumberer* special_rpo_; // Special RPO numbering of blocks. ControlEquivalence* equivalence_; // Control dependence equivalence. - Scheduler(Zone* zone, Graph* graph, Schedule* schedule); + Scheduler(Zone* zone, Graph* graph, Schedule* schedule, Flags flags); inline SchedulerData DefaultSchedulerData(); inline SchedulerData* GetData(Node* node); @@ -110,6 +116,9 @@ class Scheduler { void MovePlannedNodes(BasicBlock* from, BasicBlock* to); }; + +DEFINE_OPERATORS_FOR_FLAGS(Scheduler::Flags) + } // namespace compiler } // namespace internal } // namespace v8 diff --git a/src/flag-definitions.h b/src/flag-definitions.h index e4a9df6dd..9cdfa3a54 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -397,6 +397,7 @@ DEFINE_BOOL(trace_turbo_jt, false, "trace TurboFan's jump threading") DEFINE_BOOL(turbo_asm, true, "enable TurboFan for asm.js code") DEFINE_BOOL(turbo_verify, DEBUG_BOOL, "verify TurboFan graphs at each phase") DEFINE_BOOL(turbo_stats, false, "print TurboFan statistics") +DEFINE_BOOL(turbo_splitting, true, "split nodes during scheduling in TurboFan") DEFINE_BOOL(turbo_types, true, "use typed lowering in TurboFan") DEFINE_BOOL(turbo_source_positions, false, "track source code positions when building TurboFan IR") diff --git a/test/mjsunit/asm/redundancy1.js b/test/mjsunit/asm/redundancy1.js new file mode 100644 index 000000000..c7e0e3745 --- /dev/null +++ b/test/mjsunit/asm/redundancy1.js @@ -0,0 +1,26 @@ +// Copyright 2015 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --turbo-splitting + +function module(stdlib, foreign, heap) { + "use asm"; + function foo(i) { + var j = 0; + i = i|0; + if (i < 0) { + j = i+1|0; + } + if (i > 0) { + j = i+1|0; + } + return j; + } + return { foo: foo }; +} + +var foo = module(this, {}, new ArrayBuffer(64*1024)).foo; +assertEquals(0, foo(0)); +assertEquals(0, foo(-1)); +assertEquals(12, foo(11)); diff --git a/test/mjsunit/asm/redundancy2.js b/test/mjsunit/asm/redundancy2.js new file mode 100644 index 000000000..95a55b533 --- /dev/null +++ b/test/mjsunit/asm/redundancy2.js @@ -0,0 +1,29 @@ +// Copyright 2015 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --turbo-splitting + +function module(stdlib, foreign, heap) { + "use asm"; + function foo(i) { + var j = 0; + i = i|0; + switch (i) { + case 0: + j = i+1|0; + break; + case 1: + j = i+1|0; + break; + default: + j = i; + break; + } + return j; + } + return { foo: foo }; +} + +var foo = module(this, {}, new ArrayBuffer(64*1024)).foo; +print(foo(1)); diff --git a/test/unittests/compiler/scheduler-unittest.cc b/test/unittests/compiler/scheduler-unittest.cc index aa93c09e6..2b46b06cf 100644 --- a/test/unittests/compiler/scheduler-unittest.cc +++ b/test/unittests/compiler/scheduler-unittest.cc @@ -32,7 +32,8 @@ class SchedulerTest : public TestWithZone { os << AsDOT(*graph); } - Schedule* schedule = Scheduler::ComputeSchedule(graph->zone(), graph); + Schedule* schedule = Scheduler::ComputeSchedule(graph->zone(), graph, + Scheduler::kSplitNodes); if (FLAG_trace_turbo_scheduler) { OFStream os(stdout); @@ -151,7 +152,7 @@ const Operator kIntAdd(IrOpcode::kInt32Add, Operator::kPure, "Int32Add", 2, 0, TEST_F(SchedulerTest, BuildScheduleEmpty) { graph()->SetStart(graph()->NewNode(common()->Start(0))); graph()->SetEnd(graph()->NewNode(common()->End(), graph()->start())); - USE(Scheduler::ComputeSchedule(zone(), graph())); + USE(Scheduler::ComputeSchedule(zone(), graph(), Scheduler::kNoFlags)); } @@ -164,7 +165,7 @@ TEST_F(SchedulerTest, BuildScheduleOneParameter) { graph()->SetEnd(graph()->NewNode(common()->End(), ret)); - USE(Scheduler::ComputeSchedule(zone(), graph())); + USE(Scheduler::ComputeSchedule(zone(), graph(), Scheduler::kNoFlags)); } @@ -1734,7 +1735,7 @@ TARGET_TEST_F(SchedulerTest, NestedFloatingDiamondWithChain) { graph()->SetEnd(end); - ComputeAndVerifySchedule(35, graph()); + ComputeAndVerifySchedule(36, graph()); }