[turbofan] Split pure nodes in the scheduler if beneficial.
authorBenedikt Meurer <bmeurer@chromium.org>
Tue, 3 Feb 2015 14:50:40 +0000 (15:50 +0100)
committerBenedikt Meurer <bmeurer@chromium.org>
Tue, 3 Feb 2015 14:51:08 +0000 (14:51 +0000)
If a (pure) node has two or more uses, but there exists a path from the
common dominator of these uses to end, which does not contain a use,
then we split the node such that no unnecessary computation takes place.
Note however, that this only applies if the node cannot be hoisted out
of a loop.

BUG=v8:3864
LOG=n
R=jarin@chromium.org

Review URL: https://codereview.chromium.org/899433005

Cr-Commit-Position: refs/heads/master@{#26404}

src/compiler.cc
src/compiler.h
src/compiler/common-operator.cc
src/compiler/pipeline.cc
src/compiler/scheduler.cc
src/compiler/scheduler.h
src/flag-definitions.h
test/mjsunit/asm/redundancy1.js [new file with mode: 0644]
test/mjsunit/asm/redundancy2.js [new file with mode: 0644]
test/unittests/compiler/scheduler-unittest.cc

index f792f22..de0d528 100644 (file)
@@ -186,6 +186,7 @@ void CompilationInfo::Initialize(Isolate* isolate,
   if (isolate_->debug()->is_active()) MarkAsDebug();
   if (FLAG_context_specialization) MarkAsContextSpecializing();
   if (FLAG_turbo_inlining) MarkAsInliningEnabled();
+  if (FLAG_turbo_splitting) MarkAsSplittingEnabled();
   if (FLAG_turbo_types) MarkAsTypingEnabled();
 
   if (!shared_info_.is_null()) {
index 611a41b..e3b6afa 100644 (file)
@@ -90,7 +90,8 @@ class CompilationInfo {
     kTypingEnabled = 1 << 18,
     kDisableFutureOptimization = 1 << 19,
     kModule = 1 << 20,
-    kToplevel = 1 << 21
+    kToplevel = 1 << 21,
+    kSplittingEnabled = 1 << 22
   };
 
   CompilationInfo(Handle<JSFunction> closure, Zone* zone);
@@ -222,6 +223,10 @@ class CompilationInfo {
 
   bool is_toplevel() const { return GetFlag(kToplevel); }
 
+  void MarkAsSplittingEnabled() { SetFlag(kSplittingEnabled); }
+
+  bool is_splitting_enabled() const { return GetFlag(kSplittingEnabled); }
+
   bool IsCodePreAgingActive() const {
     return FLAG_optimize_for_size && FLAG_age_code && !will_serialize() &&
            !is_debug();
index e67759e..d2a19dd 100644 (file)
@@ -489,11 +489,12 @@ const Operator* CommonOperatorBuilder::Call(const CallDescriptor* descriptor) {
 
 
 const Operator* CommonOperatorBuilder::Projection(size_t index) {
-  return new (zone()) Operator1<size_t>(       // --
-      IrOpcode::kProjection, Operator::kPure,  // opcode
-      "Projection",                            // name
-      1, 0, 0, 1, 0, 0,                        // counts
-      index);                                  // parameter
+  return new (zone()) Operator1<size_t>(         // --
+      IrOpcode::kProjection,                     // opcode
+      Operator::kFoldable | Operator::kNoThrow,  // flags
+      "Projection",                              // name
+      1, 0, 0, 1, 0, 0,                          // counts
+      index);                                    // parameter
 }
 
 
index a6d4665..4d5982a 100644 (file)
@@ -548,7 +548,10 @@ struct ComputeSchedulePhase {
   static const char* phase_name() { return "scheduling"; }
 
   void Run(PipelineData* data, Zone* temp_zone) {
-    Schedule* schedule = Scheduler::ComputeSchedule(temp_zone, data->graph());
+    Schedule* schedule = Scheduler::ComputeSchedule(
+        temp_zone, data->graph(), data->info()->is_splitting_enabled()
+                                      ? Scheduler::kSplitNodes
+                                      : Scheduler::kNoFlags);
     TraceSchedule(schedule);
     if (FLAG_turbo_verify) ScheduleVerifier::Run(schedule);
     data->set_schedule(schedule);
index 6281371..70e6547 100644 (file)
@@ -27,20 +27,21 @@ static inline void Trace(const char* msg, ...) {
 }
 
 
-Scheduler::Scheduler(Zone* zone, Graph* graph, Schedule* schedule)
+Scheduler::Scheduler(Zone* zone, Graph* graph, Schedule* schedule, Flags flags)
     : zone_(zone),
       graph_(graph),
       schedule_(schedule),
+      flags_(flags),
       scheduled_nodes_(zone),
       schedule_root_nodes_(zone),
       schedule_queue_(zone),
       node_data_(graph_->NodeCount(), DefaultSchedulerData(), zone) {}
 
 
-Schedule* Scheduler::ComputeSchedule(Zone* zone, Graph* graph) {
+Schedule* Scheduler::ComputeSchedule(Zone* zone, Graph* graph, Flags flags) {
   Schedule* schedule = new (graph->zone())
       Schedule(graph->zone(), static_cast<size_t>(graph->NodeCount()));
-  Scheduler scheduler(zone, graph, schedule);
+  Scheduler scheduler(zone, graph, schedule, flags);
 
   scheduler.BuildCFG();
   scheduler.ComputeSpecialRPONumbering();
@@ -1226,7 +1227,10 @@ void Scheduler::ScheduleEarly() {
 class ScheduleLateNodeVisitor {
  public:
   ScheduleLateNodeVisitor(Zone* zone, Scheduler* scheduler)
-      : scheduler_(scheduler), schedule_(scheduler_->schedule_) {}
+      : scheduler_(scheduler),
+        schedule_(scheduler_->schedule_),
+        marked_(scheduler->zone_),
+        marking_queue_(scheduler->zone_) {}
 
   // Run the schedule late algorithm on a set of fixed root nodes.
   void Run(NodeVector* roots) {
@@ -1248,10 +1252,11 @@ class ScheduleLateNodeVisitor {
       if (scheduler_->GetData(node)->unscheduled_count_ != 0) continue;
 
       queue->push(node);
-      while (!queue->empty()) {
-        VisitNode(queue->front());
+      do {
+        Node* const node = queue->front();
         queue->pop();
-      }
+        VisitNode(node);
+      } while (!queue->empty());
     }
   }
 
@@ -1282,13 +1287,19 @@ class ScheduleLateNodeVisitor {
     // into enclosing loop pre-headers until they would preceed their schedule
     // early position.
     BasicBlock* hoist_block = GetPreHeader(block);
-    while (hoist_block != NULL &&
-           hoist_block->dominator_depth() >= min_block->dominator_depth()) {
-      Trace("  hoisting #%d:%s to block B%d\n", node->id(),
-            node->op()->mnemonic(), hoist_block->id().ToInt());
-      DCHECK_LT(hoist_block->loop_depth(), block->loop_depth());
-      block = hoist_block;
-      hoist_block = GetPreHeader(hoist_block);
+    if (hoist_block &&
+        hoist_block->dominator_depth() >= min_block->dominator_depth()) {
+      do {
+        Trace("  hoisting #%d:%s to block B%d\n", node->id(),
+              node->op()->mnemonic(), hoist_block->id().ToInt());
+        DCHECK_LT(hoist_block->loop_depth(), block->loop_depth());
+        block = hoist_block;
+        hoist_block = GetPreHeader(hoist_block);
+      } while (hoist_block &&
+               hoist_block->dominator_depth() >= min_block->dominator_depth());
+    } else if (scheduler_->flags_ & Scheduler::kSplitNodes) {
+      // Split the {node} if beneficial and return the new {block} for it.
+      block = SplitNode(block, node);
     }
 
     // Schedule the node or a floating control structure.
@@ -1299,6 +1310,101 @@ class ScheduleLateNodeVisitor {
     }
   }
 
+  // Mark {block} and push its non-marked predecessor on the marking queue.
+  void MarkBlock(BasicBlock* block) {
+    DCHECK_LT(block->id().ToSize(), marked_.size());
+    marked_[block->id().ToSize()] = true;
+    for (BasicBlock* pred_block : block->predecessors()) {
+      DCHECK_LT(pred_block->id().ToSize(), marked_.size());
+      if (marked_[pred_block->id().ToSize()]) continue;
+      marking_queue_.push_back(pred_block);
+    }
+  }
+
+  BasicBlock* SplitNode(BasicBlock* block, Node* node) {
+    // For now, we limit splitting to pure nodes.
+    if (!node->op()->HasProperty(Operator::kPure)) return block;
+
+    // The {block} is common dominator of all uses of {node}, so we cannot
+    // split anything unless the {block} has at least two successors.
+    DCHECK_EQ(block, GetCommonDominatorOfUses(node));
+    if (block->SuccessorCount() < 2) return block;
+
+    // Clear marking bits.
+    DCHECK(marking_queue_.empty());
+    std::fill(marked_.begin(), marked_.end(), false);
+    marked_.resize(schedule_->BasicBlockCount() + 1, false);
+
+    // Check if the {node} has uses in {block}.
+    for (Edge edge : node->use_edges()) {
+      BasicBlock* use_block = GetBlockForUse(edge);
+      if (use_block == nullptr || marked_[use_block->id().ToSize()]) continue;
+      if (use_block == block) {
+        Trace("  not splitting #%d:%s, it is used in B%d\n", node->id(),
+              node->op()->mnemonic(), block->id().ToInt());
+        marking_queue_.clear();
+        return block;
+      }
+      MarkBlock(use_block);
+    }
+
+    // Compute transitive marking closure; a block is marked if all its
+    // successors are marked.
+    do {
+      BasicBlock* top_block = marking_queue_.front();
+      marking_queue_.pop_front();
+      if (marked_[top_block->id().ToSize()]) continue;
+      bool marked = true;
+      for (BasicBlock* successor : top_block->successors()) {
+        if (!marked_[successor->id().ToSize()]) {
+          marked = false;
+          break;
+        }
+      }
+      if (marked) MarkBlock(top_block);
+    } while (!marking_queue_.empty());
+
+    // If the (common dominator) {block} is marked, we know that all paths from
+    // {block} to the end contain at least one use of {node}, and hence there's
+    // no point in splitting the {node} in this case.
+    if (marked_[block->id().ToSize()]) {
+      Trace("  not splitting #%d:%s, its common dominator B%d is perfect\n",
+            node->id(), node->op()->mnemonic(), block->id().ToInt());
+      return block;
+    }
+
+    // Split {node} for uses according to the previously computed marking
+    // closure. Every marking partition has a unique dominator, which get's a
+    // copy of the {node} with the exception of the first partition, which get's
+    // the {node} itself.
+    ZoneMap<BasicBlock*, Node*> dominators(scheduler_->zone_);
+    for (Edge edge : node->use_edges()) {
+      BasicBlock* use_block = GetBlockForUse(edge);
+      if (use_block == nullptr) continue;
+      while (marked_[use_block->dominator()->id().ToSize()]) {
+        use_block = use_block->dominator();
+      }
+      auto& use_node = dominators[use_block];
+      if (use_node == nullptr) {
+        if (dominators.size() == 1u) {
+          // Place the {node} at {use_block}.
+          block = use_block;
+          use_node = node;
+          Trace("  pushing #%d:%s down to B%d\n", node->id(),
+                node->op()->mnemonic(), block->id().ToInt());
+        } else {
+          // Place a copy of {node} at {use_block}.
+          use_node = CloneNode(node);
+          Trace("  cloning #%d:%s for B%d\n", use_node->id(),
+                use_node->op()->mnemonic(), use_block->id().ToInt());
+          scheduler_->schedule_queue_.push(use_node);
+        }
+      }
+      edge.UpdateTo(use_node);
+    }
+    return block;
+  }
+
   BasicBlock* GetPreHeader(BasicBlock* block) {
     if (block->IsLoopHeader()) {
       return block->dominator();
@@ -1310,7 +1416,7 @@ class ScheduleLateNodeVisitor {
   }
 
   BasicBlock* GetCommonDominatorOfUses(Node* node) {
-    BasicBlock* block = NULL;
+    BasicBlock* block = nullptr;
     for (Edge edge : node->use_edges()) {
       BasicBlock* use_block = GetBlockForUse(edge);
       block = block == NULL ? use_block : use_block == NULL
@@ -1361,8 +1467,25 @@ class ScheduleLateNodeVisitor {
     scheduler_->UpdatePlacement(node, Scheduler::kScheduled);
   }
 
+  Node* CloneNode(Node* node) {
+    int const input_count = node->InputCount();
+    Node** const inputs = scheduler_->zone_->NewArray<Node*>(input_count);
+    for (int index = 0; index < input_count; ++index) {
+      Node* const input = node->InputAt(index);
+      scheduler_->IncrementUnscheduledUseCount(input, index, node);
+      inputs[index] = input;
+    }
+    Node* copy = scheduler_->graph_->NewNode(node->op(), input_count, inputs);
+    scheduler_->node_data_.resize(copy->id() + 1,
+                                  scheduler_->DefaultSchedulerData());
+    scheduler_->node_data_[copy->id()] = scheduler_->node_data_[node->id()];
+    return copy;
+  }
+
   Scheduler* scheduler_;
   Schedule* schedule_;
+  BoolVector marked_;
+  ZoneDeque<BasicBlock*> marking_queue_;
 };
 
 
index b4cea23..882c761 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "src/v8.h"
 
+#include "src/base/flags.h"
 #include "src/compiler/node.h"
 #include "src/compiler/opcodes.h"
 #include "src/compiler/schedule.h"
@@ -28,9 +29,13 @@ class SpecialRPONumberer;
 // ordering the basic blocks in the special RPO order.
 class Scheduler {
  public:
+  // Flags that control the mode of operation.
+  enum Flag { kNoFlags = 0u, kSplitNodes = 1u << 1 };
+  typedef base::Flags<Flag> Flags;
+
   // The complete scheduling algorithm. Creates a new schedule and places all
   // nodes from the graph into it.
-  static Schedule* ComputeSchedule(Zone* zone, Graph* graph);
+  static Schedule* ComputeSchedule(Zone* zone, Graph* graph, Flags flags);
 
   // Compute the RPO of blocks in an existing schedule.
   static BasicBlockVector* ComputeSpecialRPO(Zone* zone, Schedule* schedule);
@@ -60,6 +65,7 @@ class Scheduler {
   Zone* zone_;
   Graph* graph_;
   Schedule* schedule_;
+  Flags flags_;
   NodeVectorVector scheduled_nodes_;     // Per-block list of nodes in reverse.
   NodeVector schedule_root_nodes_;       // Fixed root nodes seed the worklist.
   ZoneQueue<Node*> schedule_queue_;      // Worklist of schedulable nodes.
@@ -68,7 +74,7 @@ class Scheduler {
   SpecialRPONumberer* special_rpo_;      // Special RPO numbering of blocks.
   ControlEquivalence* equivalence_;      // Control dependence equivalence.
 
-  Scheduler(Zone* zone, Graph* graph, Schedule* schedule);
+  Scheduler(Zone* zone, Graph* graph, Schedule* schedule, Flags flags);
 
   inline SchedulerData DefaultSchedulerData();
   inline SchedulerData* GetData(Node* node);
@@ -110,6 +116,9 @@ class Scheduler {
   void MovePlannedNodes(BasicBlock* from, BasicBlock* to);
 };
 
+
+DEFINE_OPERATORS_FOR_FLAGS(Scheduler::Flags)
+
 }  // namespace compiler
 }  // namespace internal
 }  // namespace v8
index e4a9df6..9cdfa3a 100644 (file)
@@ -397,6 +397,7 @@ DEFINE_BOOL(trace_turbo_jt, false, "trace TurboFan's jump threading")
 DEFINE_BOOL(turbo_asm, true, "enable TurboFan for asm.js code")
 DEFINE_BOOL(turbo_verify, DEBUG_BOOL, "verify TurboFan graphs at each phase")
 DEFINE_BOOL(turbo_stats, false, "print TurboFan statistics")
+DEFINE_BOOL(turbo_splitting, true, "split nodes during scheduling in TurboFan")
 DEFINE_BOOL(turbo_types, true, "use typed lowering in TurboFan")
 DEFINE_BOOL(turbo_source_positions, false,
             "track source code positions when building TurboFan IR")
diff --git a/test/mjsunit/asm/redundancy1.js b/test/mjsunit/asm/redundancy1.js
new file mode 100644 (file)
index 0000000..c7e0e37
--- /dev/null
@@ -0,0 +1,26 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Flags: --turbo-splitting
+
+function module(stdlib, foreign, heap) {
+    "use asm";
+    function foo(i) {
+      var j = 0;
+      i = i|0;
+      if (i < 0) {
+        j = i+1|0;
+      }
+      if (i > 0) {
+        j = i+1|0;
+      }
+      return j;
+    }
+    return { foo: foo };
+}
+
+var foo = module(this, {}, new ArrayBuffer(64*1024)).foo;
+assertEquals(0, foo(0));
+assertEquals(0, foo(-1));
+assertEquals(12, foo(11));
diff --git a/test/mjsunit/asm/redundancy2.js b/test/mjsunit/asm/redundancy2.js
new file mode 100644 (file)
index 0000000..95a55b5
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Flags: --turbo-splitting
+
+function module(stdlib, foreign, heap) {
+    "use asm";
+    function foo(i) {
+      var j = 0;
+      i = i|0;
+      switch (i) {
+        case 0:
+          j = i+1|0;
+          break;
+        case 1:
+          j = i+1|0;
+          break;
+        default:
+          j = i;
+          break;
+      }
+      return j;
+    }
+    return { foo: foo };
+}
+
+var foo = module(this, {}, new ArrayBuffer(64*1024)).foo;
+print(foo(1));
index aa93c09..2b46b06 100644 (file)
@@ -32,7 +32,8 @@ class SchedulerTest : public TestWithZone {
       os << AsDOT(*graph);
     }
 
-    Schedule* schedule = Scheduler::ComputeSchedule(graph->zone(), graph);
+    Schedule* schedule = Scheduler::ComputeSchedule(graph->zone(), graph,
+                                                    Scheduler::kSplitNodes);
 
     if (FLAG_trace_turbo_scheduler) {
       OFStream os(stdout);
@@ -151,7 +152,7 @@ const Operator kIntAdd(IrOpcode::kInt32Add, Operator::kPure, "Int32Add", 2, 0,
 TEST_F(SchedulerTest, BuildScheduleEmpty) {
   graph()->SetStart(graph()->NewNode(common()->Start(0)));
   graph()->SetEnd(graph()->NewNode(common()->End(), graph()->start()));
-  USE(Scheduler::ComputeSchedule(zone(), graph()));
+  USE(Scheduler::ComputeSchedule(zone(), graph(), Scheduler::kNoFlags));
 }
 
 
@@ -164,7 +165,7 @@ TEST_F(SchedulerTest, BuildScheduleOneParameter) {
 
   graph()->SetEnd(graph()->NewNode(common()->End(), ret));
 
-  USE(Scheduler::ComputeSchedule(zone(), graph()));
+  USE(Scheduler::ComputeSchedule(zone(), graph(), Scheduler::kNoFlags));
 }
 
 
@@ -1734,7 +1735,7 @@ TARGET_TEST_F(SchedulerTest, NestedFloatingDiamondWithChain) {
 
   graph()->SetEnd(end);
 
-  ComputeAndVerifySchedule(35, graph());
+  ComputeAndVerifySchedule(36, graph());
 }