From 159b14172f75ddb89e5daf2cceb9078f1f294916 Mon Sep 17 00:00:00 2001 From: titzer Date: Mon, 12 Jan 2015 03:39:48 -0800 Subject: [PATCH] [turbofan] Implement OSR for outer loops. R=bmeurer@chromium.org BUG= Review URL: https://codereview.chromium.org/809333002 Cr-Commit-Position: refs/heads/master@{#26020} --- BUILD.gn | 2 + src/bailout-reason.h | 2 + src/compiler/arm/code-generator-arm.cc | 18 ++ src/compiler/arm64/code-generator-arm64.cc | 18 ++ src/compiler/ast-graph-builder.cc | 23 +- src/compiler/ast-graph-builder.h | 6 + src/compiler/code-generator.cc | 18 +- src/compiler/code-generator.h | 1 + src/compiler/common-operator.cc | 52 ++-- src/compiler/common-operator.h | 4 + src/compiler/control-builders.cc | 5 +- src/compiler/control-builders.h | 2 +- src/compiler/frame.h | 5 + src/compiler/graph-builder.cc | 32 ++- src/compiler/graph-builder.h | 6 +- src/compiler/ia32/code-generator-ia32.cc | 21 ++ src/compiler/instruction-selector-impl.h | 9 + src/compiler/instruction-selector.cc | 12 + src/compiler/instruction-selector.h | 1 + src/compiler/linkage-impl.h | 23 ++ src/compiler/linkage.h | 14 +- src/compiler/mips/code-generator-mips.cc | 18 ++ src/compiler/mips64/code-generator-mips64.cc | 17 ++ src/compiler/opcodes.h | 3 + src/compiler/osr.cc | 69 +++++ src/compiler/osr.h | 128 +++++++++ src/compiler/pipeline.cc | 31 ++- src/compiler/register-allocator.cc | 4 +- src/compiler/scheduler.cc | 3 +- src/compiler/typer.cc | 10 + src/compiler/verifier.cc | 15 + src/compiler/x64/code-generator-x64.cc | 18 ++ src/flag-definitions.h | 3 +- src/objects.cc | 1 - src/runtime/runtime-compiler.cc | 11 +- test/cctest/cctest.gyp | 1 + test/cctest/compiler/test-osr.cc | 274 +++++++++++++++++++ test/mjsunit/compiler/osr-alignment.js | 2 +- test/mjsunit/compiler/osr-manual1.js | 47 ++++ test/mjsunit/compiler/osr-manual2.js | 47 ++++ test/mjsunit/compiler/osr-multiple.js | 44 +++ test/mjsunit/compiler/osr-multiple2.js | 51 ++++ test/mjsunit/compiler/osr-multiple3.js | 57 ++++ test/mjsunit/compiler/osr-sar.js | 2 +- test/mjsunit/compiler/osr-warm.js | 2 +- tools/gyp/v8.gyp | 2 + 46 files changed, 1080 insertions(+), 54 deletions(-) create mode 100644 src/compiler/osr.cc create mode 100644 src/compiler/osr.h create mode 100644 test/cctest/compiler/test-osr.cc create mode 100644 test/mjsunit/compiler/osr-manual1.js create mode 100644 test/mjsunit/compiler/osr-manual2.js create mode 100644 test/mjsunit/compiler/osr-multiple.js create mode 100644 test/mjsunit/compiler/osr-multiple2.js create mode 100644 test/mjsunit/compiler/osr-multiple3.js diff --git a/BUILD.gn b/BUILD.gn index 58e6c38ee..526512965 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -573,6 +573,8 @@ source_set("v8_base") { "src/compiler/operator-properties.h", "src/compiler/operator.cc", "src/compiler/operator.h", + "src/compiler/osr.cc", + "src/compiler/osr.h", "src/compiler/pipeline.cc", "src/compiler/pipeline.h", "src/compiler/pipeline-statistics.cc", diff --git a/src/bailout-reason.h b/src/bailout-reason.h index 7287d629d..e1dd6d7b5 100644 --- a/src/bailout-reason.h +++ b/src/bailout-reason.h @@ -322,6 +322,8 @@ namespace internal { V(kWrongFunctionContext, "Wrong context passed to function") \ V(kWrongAddressOrValuePassedToRecordWrite, \ "Wrong address or value passed to RecordWrite") \ + V(kShouldNotDirectlyEnterOsrFunction, \ + "Should not directly enter OSR-compiled function") \ V(kYield, "Yield") diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc index cfa4de9b3..9bbdaeef5 100644 --- a/src/compiler/arm/code-generator-arm.cc +++ b/src/compiler/arm/code-generator-arm.cc @@ -9,6 +9,7 @@ #include "src/compiler/gap-resolver.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties-inl.h" +#include "src/compiler/osr.h" #include "src/scopes.h" namespace v8 { @@ -883,6 +884,23 @@ void CodeGenerator::AssemblePrologue() { StandardFrameConstants::kFixedFrameSizeFromFp); } int stack_slots = frame()->GetSpillSlotCount(); + + if (info()->is_osr()) { + // TurboFan OSR-compiled functions cannot be entered directly. + __ Abort(kShouldNotDirectlyEnterOsrFunction); + + // Unoptimized code jumps directly to this entrypoint while the unoptimized + // frame is still on the stack. Optimized code uses OSR values directly from + // the unoptimized frame. Thus, all that needs to be done is to allocate the + // remaining stack slots. + if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); + osr_pc_offset_ = __ pc_offset(); + int unoptimized_slots = + static_cast(OsrHelper(info()).UnoptimizedFrameSlots()); + DCHECK(stack_slots >= unoptimized_slots); + stack_slots -= unoptimized_slots; + } + if (stack_slots > 0) { __ sub(sp, sp, Operand(stack_slots * kPointerSize)); } diff --git a/src/compiler/arm64/code-generator-arm64.cc b/src/compiler/arm64/code-generator-arm64.cc index e02523605..23cc22724 100644 --- a/src/compiler/arm64/code-generator-arm64.cc +++ b/src/compiler/arm64/code-generator-arm64.cc @@ -9,6 +9,7 @@ #include "src/compiler/gap-resolver.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties-inl.h" +#include "src/compiler/osr.h" #include "src/scopes.h" namespace v8 { @@ -976,6 +977,23 @@ void CodeGenerator::AssemblePrologue() { StandardFrameConstants::kFixedFrameSizeFromFp); } int stack_slots = frame()->GetSpillSlotCount(); + + if (info()->is_osr()) { + // TurboFan OSR-compiled functions cannot be entered directly. + __ Abort(kShouldNotDirectlyEnterOsrFunction); + + // Unoptimized code jumps directly to this entrypoint while the unoptimized + // frame is still on the stack. Optimized code uses OSR values directly from + // the unoptimized frame. Thus, all that needs to be done is to allocate the + // remaining stack slots. + if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); + osr_pc_offset_ = __ pc_offset(); + int unoptimized_slots = + static_cast(OsrHelper(info()).UnoptimizedFrameSlots()); + DCHECK(stack_slots >= unoptimized_slots); + stack_slots -= unoptimized_slots; + } + if (stack_slots > 0) { Register sp = __ StackPointer(); if (!sp.Is(csp)) { diff --git a/src/compiler/ast-graph-builder.cc b/src/compiler/ast-graph-builder.cc index cde5e7182..5c91b9515 100644 --- a/src/compiler/ast-graph-builder.cc +++ b/src/compiler/ast-graph-builder.cc @@ -62,10 +62,17 @@ bool AstGraphBuilder::CreateGraph() { int parameter_count = info()->num_parameters(); graph()->SetStart(graph()->NewNode(common()->Start(parameter_count))); + Node* start = graph()->start(); // Initialize the top-level environment. - Environment env(this, scope, graph()->start()); + Environment env(this, scope, start); set_environment(&env); + if (info()->is_osr()) { + // Use OSR normal entry as the start of the top-level environment. + // It will be replaced with {Dead} after typing and optimizations. + NewNode(common()->OsrNormalEntry()); + } + // Initialize the incoming context. Node* outer_context = GetFunctionContext(); set_current_context(outer_context); @@ -589,7 +596,7 @@ void AstGraphBuilder::VisitSwitchStatement(SwitchStatement* stmt) { void AstGraphBuilder::VisitDoWhileStatement(DoWhileStatement* stmt) { LoopBuilder while_loop(this); - while_loop.BeginLoop(GetVariablesAssignedInLoop(stmt)); + while_loop.BeginLoop(GetVariablesAssignedInLoop(stmt), IsOsrLoopEntry(stmt)); VisitIterationBody(stmt, &while_loop, 0); while_loop.EndBody(); VisitForTest(stmt->cond()); @@ -601,7 +608,7 @@ void AstGraphBuilder::VisitDoWhileStatement(DoWhileStatement* stmt) { void AstGraphBuilder::VisitWhileStatement(WhileStatement* stmt) { LoopBuilder while_loop(this); - while_loop.BeginLoop(GetVariablesAssignedInLoop(stmt)); + while_loop.BeginLoop(GetVariablesAssignedInLoop(stmt), IsOsrLoopEntry(stmt)); VisitForTest(stmt->cond()); Node* condition = environment()->Pop(); while_loop.BreakUnless(condition); @@ -614,7 +621,7 @@ void AstGraphBuilder::VisitWhileStatement(WhileStatement* stmt) { void AstGraphBuilder::VisitForStatement(ForStatement* stmt) { LoopBuilder for_loop(this); VisitIfNotNull(stmt->init()); - for_loop.BeginLoop(GetVariablesAssignedInLoop(stmt)); + for_loop.BeginLoop(GetVariablesAssignedInLoop(stmt), IsOsrLoopEntry(stmt)); if (stmt->cond() != NULL) { VisitForTest(stmt->cond()); Node* condition = environment()->Pop(); @@ -690,7 +697,8 @@ void AstGraphBuilder::VisitForInStatement(ForInStatement* stmt) { environment()->Push(jsgraph()->ZeroConstant()); // PrepareForBailoutForId(stmt->BodyId(), NO_REGISTERS); LoopBuilder for_loop(this); - for_loop.BeginLoop(GetVariablesAssignedInLoop(stmt)); + for_loop.BeginLoop(GetVariablesAssignedInLoop(stmt), + IsOsrLoopEntry(stmt)); // Check loop termination condition. Node* index = environment()->Peek(0); Node* exit_cond = @@ -2209,6 +2217,11 @@ Node* AstGraphBuilder::BuildStackCheck() { } +bool AstGraphBuilder::IsOsrLoopEntry(IterationStatement* stmt) { + return info()->osr_ast_id() == stmt->OsrEntryId(); +} + + void AstGraphBuilder::PrepareFrameState(Node* node, BailoutId ast_id, OutputFrameStateCombine combine) { if (OperatorProperties::HasFrameStateInput(node->op())) { diff --git a/src/compiler/ast-graph-builder.h b/src/compiler/ast-graph-builder.h index 0337c813b..92c871234 100644 --- a/src/compiler/ast-graph-builder.h +++ b/src/compiler/ast-graph-builder.h @@ -114,7 +114,10 @@ class AstGraphBuilder : public StructuredGraphBuilder, public AstVisitor { // Builder for stack-check guards. Node* BuildStackCheck(); + bool IsOsrLoopEntry(IterationStatement* stmt); + #define DECLARE_VISIT(type) void Visit##type(type* node) OVERRIDE; + // Visiting functions for AST nodes make this an AstVisitor. AST_NODE_LIST(DECLARE_VISIT) #undef DECLARE_VISIT @@ -140,6 +143,9 @@ class AstGraphBuilder : public StructuredGraphBuilder, public AstVisitor { SetOncePointer function_closure_; SetOncePointer function_context_; + // The node representing the OSR entry into the loop, if any. + SetOncePointer osr_loop_entry_; + // Result of loop assignment analysis performed before graph creation. LoopAssignmentAnalysis* loop_assignment_analysis_; diff --git a/src/compiler/code-generator.cc b/src/compiler/code-generator.cc index cfe4f0660..df485eba9 100644 --- a/src/compiler/code-generator.cc +++ b/src/compiler/code-generator.cc @@ -28,7 +28,8 @@ CodeGenerator::CodeGenerator(Frame* frame, Linkage* linkage, deoptimization_literals_(code->zone()), translations_(code->zone()), last_lazy_deopt_pc_(0), - ools_(nullptr) { + ools_(nullptr), + osr_pc_offset_(-1) { for (int i = 0; i < code->InstructionBlockCount(); ++i) { new (&labels_[i]) Label; } @@ -236,7 +237,7 @@ void CodeGenerator::AssembleGap(GapInstruction* instr) { void CodeGenerator::PopulateDeoptimizationData(Handle code_object) { CompilationInfo* info = this->info(); int deopt_count = static_cast(deoptimization_states_.size()); - if (deopt_count == 0) return; + if (deopt_count == 0 && !info->is_osr()) return; Handle data = DeoptimizationInputData::New(isolate(), deopt_count, TENURED); @@ -266,10 +267,15 @@ void CodeGenerator::PopulateDeoptimizationData(Handle code_object) { data->SetLiteralArray(*literals); } - // No OSR in Turbofan yet... - BailoutId osr_ast_id = BailoutId::None(); - data->SetOsrAstId(Smi::FromInt(osr_ast_id.ToInt())); - data->SetOsrPcOffset(Smi::FromInt(-1)); + if (info->is_osr()) { + DCHECK(osr_pc_offset_ >= 0); + data->SetOsrAstId(Smi::FromInt(info_->osr_ast_id().ToInt())); + data->SetOsrPcOffset(Smi::FromInt(osr_pc_offset_)); + } else { + BailoutId osr_ast_id = BailoutId::None(); + data->SetOsrAstId(Smi::FromInt(osr_ast_id.ToInt())); + data->SetOsrPcOffset(Smi::FromInt(-1)); + } // Populate deoptimization entries. for (int i = 0; i < deopt_count; i++) { diff --git a/src/compiler/code-generator.h b/src/compiler/code-generator.h index 747bad2e2..d59105e67 100644 --- a/src/compiler/code-generator.h +++ b/src/compiler/code-generator.h @@ -146,6 +146,7 @@ class CodeGenerator FINAL : public GapResolver::Assembler { TranslationBuffer translations_; int last_lazy_deopt_pc_; OutOfLineCode* ools_; + int osr_pc_offset_; }; } // namespace compiler diff --git a/src/compiler/common-operator.cc b/src/compiler/common-operator.cc index a6cca456d..54059ac29 100644 --- a/src/compiler/common-operator.cc +++ b/src/compiler/common-operator.cc @@ -103,13 +103,15 @@ std::ostream& operator<<(std::ostream& os, FrameStateCallInfo const& info) { } -#define CACHED_OP_LIST(V) \ - V(Dead, Operator::kFoldable, 0, 0, 0, 1) \ - V(End, Operator::kFoldable, 0, 0, 1, 0) \ - V(IfTrue, Operator::kFoldable, 0, 0, 1, 1) \ - V(IfFalse, Operator::kFoldable, 0, 0, 1, 1) \ - V(Throw, Operator::kFoldable, 1, 1, 1, 1) \ - V(Return, Operator::kNoProperties, 1, 1, 1, 1) +#define CACHED_OP_LIST(V) \ + V(Dead, Operator::kFoldable, 0, 0, 0, 0, 0, 1) \ + V(End, Operator::kFoldable, 0, 0, 1, 0, 0, 0) \ + V(IfTrue, Operator::kFoldable, 0, 0, 1, 0, 0, 1) \ + V(IfFalse, Operator::kFoldable, 0, 0, 1, 0, 0, 1) \ + V(Throw, Operator::kFoldable, 1, 1, 1, 0, 0, 1) \ + V(Return, Operator::kNoProperties, 1, 1, 1, 0, 0, 1) \ + V(OsrNormalEntry, Operator::kFoldable, 0, 1, 1, 0, 1, 1) \ + V(OsrLoopEntry, Operator::kFoldable, 0, 1, 1, 0, 1, 1) #define CACHED_LOOP_LIST(V) \ @@ -139,14 +141,16 @@ std::ostream& operator<<(std::ostream& os, FrameStateCallInfo const& info) { struct CommonOperatorGlobalCache FINAL { -#define CACHED(Name, properties, value_input_count, effect_input_count, \ - control_input_count, control_output_count) \ - struct Name##Operator FINAL : public Operator { \ - Name##Operator() \ - : Operator(IrOpcode::k##Name, properties, #Name, value_input_count, \ - effect_input_count, control_input_count, 0, 0, \ - control_output_count) {} \ - }; \ +#define CACHED(Name, properties, value_input_count, effect_input_count, \ + control_input_count, value_output_count, effect_output_count, \ + control_output_count) \ + struct Name##Operator FINAL : public Operator { \ + Name##Operator() \ + : Operator(IrOpcode::k##Name, properties, #Name, value_input_count, \ + effect_input_count, control_input_count, \ + value_output_count, effect_output_count, \ + control_output_count) {} \ + }; \ Name##Operator k##Name##Operator; CACHED_OP_LIST(CACHED) #undef CACHED @@ -214,10 +218,11 @@ CommonOperatorBuilder::CommonOperatorBuilder(Zone* zone) : cache_(kCache.Get()), zone_(zone) {} -#define CACHED(Name, properties, value_input_count, effect_input_count, \ - control_input_count, control_output_count) \ - const Operator* CommonOperatorBuilder::Name() { \ - return &cache_.k##Name##Operator; \ +#define CACHED(Name, properties, value_input_count, effect_input_count, \ + control_input_count, value_output_count, effect_output_count, \ + control_output_count) \ + const Operator* CommonOperatorBuilder::Name() { \ + return &cache_.k##Name##Operator; \ } CACHED_OP_LIST(CACHED) #undef CACHED @@ -310,6 +315,15 @@ const Operator* CommonOperatorBuilder::Parameter(int index) { } +const Operator* CommonOperatorBuilder::OsrValue(int index) { + return new (zone()) Operator1( // -- + IrOpcode::kOsrValue, Operator::kPure, // opcode + "OsrValue", // name + 0, 0, 1, 1, 0, 0, // counts + index); // parameter +} + + const Operator* CommonOperatorBuilder::Int32Constant(int32_t value) { return new (zone()) Operator1( // -- IrOpcode::kInt32Constant, Operator::kPure, // opcode diff --git a/src/compiler/common-operator.h b/src/compiler/common-operator.h index af6066b13..2505aff8c 100644 --- a/src/compiler/common-operator.h +++ b/src/compiler/common-operator.h @@ -173,6 +173,10 @@ class CommonOperatorBuilder FINAL : public ZoneObject { const Operator* Merge(int control_input_count); const Operator* Parameter(int index); + const Operator* OsrNormalEntry(); + const Operator* OsrLoopEntry(); + const Operator* OsrValue(int index); + const Operator* Int32Constant(int32_t); const Operator* Int64Constant(int64_t); const Operator* Float32Constant(volatile float); diff --git a/src/compiler/control-builders.cc b/src/compiler/control-builders.cc index 8725244eb..2978d8b64 100644 --- a/src/compiler/control-builders.cc +++ b/src/compiler/control-builders.cc @@ -32,9 +32,8 @@ void IfBuilder::End() { } -void LoopBuilder::BeginLoop(BitVector* assigned) { - builder_->NewLoop(); - loop_environment_ = environment()->CopyForLoop(assigned); +void LoopBuilder::BeginLoop(BitVector* assigned, bool is_osr) { + loop_environment_ = environment()->CopyForLoop(assigned, is_osr); continue_environment_ = environment()->CopyAsUnreachable(); break_environment_ = environment()->CopyAsUnreachable(); } diff --git a/src/compiler/control-builders.h b/src/compiler/control-builders.h index 11adfdb0f..e8e3305db 100644 --- a/src/compiler/control-builders.h +++ b/src/compiler/control-builders.h @@ -69,7 +69,7 @@ class LoopBuilder FINAL : public ControlBuilder { break_environment_(NULL) {} // Primitive control commands. - void BeginLoop(BitVector* assigned); + void BeginLoop(BitVector* assigned, bool is_osr = false); void EndBody(); void EndLoop(); diff --git a/src/compiler/frame.h b/src/compiler/frame.h index f99d7bd1e..416d6eea5 100644 --- a/src/compiler/frame.h +++ b/src/compiler/frame.h @@ -63,6 +63,11 @@ class Frame : public ZoneObject { return spill_slot_count_++; } + void ReserveSpillSlots(size_t slot_count) { + DCHECK_EQ(0, spill_slot_count_); // can only reserve before allocation. + spill_slot_count_ = static_cast(slot_count); + } + private: int register_save_area_size_; int spill_slot_count_; diff --git a/src/compiler/graph-builder.cc b/src/compiler/graph-builder.cc index 6321aaa4e..9fa7a0275 100644 --- a/src/compiler/graph-builder.cc +++ b/src/compiler/graph-builder.cc @@ -168,10 +168,12 @@ void StructuredGraphBuilder::Environment::Merge(Environment* other) { } -void StructuredGraphBuilder::Environment::PrepareForLoop(BitVector* assigned) { - Node* control = GetControlDependency(); +void StructuredGraphBuilder::Environment::PrepareForLoop(BitVector* assigned, + bool is_osr) { int size = static_cast(values()->size()); - if (assigned == NULL) { + + Node* control = builder_->NewLoop(); + if (assigned == nullptr) { // Assume that everything is updated in the loop. for (int i = 0; i < size; ++i) { Node* phi = builder_->NewPhi(1, values()->at(i), control); @@ -187,6 +189,30 @@ void StructuredGraphBuilder::Environment::PrepareForLoop(BitVector* assigned) { } Node* effect = builder_->NewEffectPhi(1, GetEffectDependency(), control); UpdateEffectDependency(effect); + + if (is_osr) { + // Merge OSR values as inputs to the phis of the loop. + Graph* graph = builder_->graph(); + Node* osr_loop_entry = builder_->graph()->NewNode( + builder_->common()->OsrLoopEntry(), graph->start(), graph->start()); + + builder_->MergeControl(control, osr_loop_entry); + builder_->MergeEffect(effect, osr_loop_entry, control); + + for (int i = 0; i < size; ++i) { + Node* val = values()->at(i); + // TODO(titzer): use IrOpcode::IsConstant() or similar. + if (val->opcode() == IrOpcode::kNumberConstant || + val->opcode() == IrOpcode::kInt32Constant || + val->opcode() == IrOpcode::kInt64Constant || + val->opcode() == IrOpcode::kFloat64Constant || + val->opcode() == IrOpcode::kHeapConstant) + continue; + Node* osr_value = + graph->NewNode(builder_->common()->OsrValue(i), osr_loop_entry); + values()->at(i) = builder_->MergeValue(val, osr_value, control); + } + } } diff --git a/src/compiler/graph-builder.h b/src/compiler/graph-builder.h index 241e6cf7e..fd0f0559d 100644 --- a/src/compiler/graph-builder.h +++ b/src/compiler/graph-builder.h @@ -219,8 +219,8 @@ class StructuredGraphBuilder::Environment : public ZoneObject { } // Copies this environment at a loop header control-flow point. - Environment* CopyForLoop(BitVector* assigned) { - PrepareForLoop(assigned); + Environment* CopyForLoop(BitVector* assigned, bool is_osr = false) { + PrepareForLoop(assigned, is_osr); return builder()->CopyEnvironment(this); } @@ -234,7 +234,7 @@ class StructuredGraphBuilder::Environment : public ZoneObject { NodeVector* values() { return &values_; } // Prepare environment to be used as loop header. - void PrepareForLoop(BitVector* assigned); + void PrepareForLoop(BitVector* assigned, bool is_osr = false); private: StructuredGraphBuilder* builder_; diff --git a/src/compiler/ia32/code-generator-ia32.cc b/src/compiler/ia32/code-generator-ia32.cc index 55f7426a4..fabfd0a1c 100644 --- a/src/compiler/ia32/code-generator-ia32.cc +++ b/src/compiler/ia32/code-generator-ia32.cc @@ -8,6 +8,7 @@ #include "src/compiler/gap-resolver.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties-inl.h" +#include "src/compiler/osr.h" #include "src/ia32/assembler-ia32.h" #include "src/ia32/macro-assembler-ia32.h" #include "src/scopes.h" @@ -1023,6 +1024,8 @@ void CodeGenerator::AssemblePrologue() { frame->SetRegisterSaveAreaSize(register_save_area_size); } } else if (descriptor->IsJSFunctionCall()) { + // TODO(turbofan): this prologue is redundant with OSR, but needed for + // code aging. CompilationInfo* info = this->info(); __ Prologue(info->IsCodePreAgingActive()); frame->SetRegisterSaveAreaSize( @@ -1032,7 +1035,25 @@ void CodeGenerator::AssemblePrologue() { frame->SetRegisterSaveAreaSize( StandardFrameConstants::kFixedFrameSizeFromFp); } + + if (info()->is_osr()) { + // TurboFan OSR-compiled functions cannot be entered directly. + __ Abort(kShouldNotDirectlyEnterOsrFunction); + + // Unoptimized code jumps directly to this entrypoint while the unoptimized + // frame is still on the stack. Optimized code uses OSR values directly from + // the unoptimized frame. Thus, all that needs to be done is to allocate the + // remaining stack slots. + if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); + osr_pc_offset_ = __ pc_offset(); + int unoptimized_slots = + static_cast(OsrHelper(info()).UnoptimizedFrameSlots()); + DCHECK(stack_slots >= unoptimized_slots); + stack_slots -= unoptimized_slots; + } + if (stack_slots > 0) { + // Allocate the stack slots used by this frame. __ sub(esp, Immediate(stack_slots * kPointerSize)); } } diff --git a/src/compiler/instruction-selector-impl.h b/src/compiler/instruction-selector-impl.h index bdcd952b5..adf8492ef 100644 --- a/src/compiler/instruction-selector-impl.h +++ b/src/compiler/instruction-selector-impl.h @@ -188,13 +188,22 @@ class OperandGenerator { UnallocatedOperand* ToUnallocatedOperand(LinkageLocation location, MachineType type) { if (location.location_ == LinkageLocation::ANY_REGISTER) { + // any machine register. return new (zone()) UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER); } if (location.location_ < 0) { + // a location on the caller frame. return new (zone()) UnallocatedOperand(UnallocatedOperand::FIXED_SLOT, location.location_); } + if (location.location_ > LinkageLocation::ANY_REGISTER) { + // a spill location on this (callee) frame. + return new (zone()) UnallocatedOperand( + UnallocatedOperand::FIXED_SLOT, + location.location_ - LinkageLocation::ANY_REGISTER - 1); + } + // a fixed register. if (RepresentationOf(type) == kRepFloat64) { return new (zone()) UnallocatedOperand( UnallocatedOperand::FIXED_DOUBLE_REGISTER, location.location_); diff --git a/src/compiler/instruction-selector.cc b/src/compiler/instruction-selector.cc index f987248be..4b07194c8 100644 --- a/src/compiler/instruction-selector.cc +++ b/src/compiler/instruction-selector.cc @@ -543,6 +543,8 @@ MachineType InstructionSelector::GetMachineType(Node* node) { return kMachAnyTagged; case IrOpcode::kParameter: return linkage()->GetParameterType(OpParameter(node)); + case IrOpcode::kOsrValue: + return kMachAnyTagged; case IrOpcode::kPhi: return OpParameter(node); case IrOpcode::kProjection: @@ -681,6 +683,8 @@ void InstructionSelector::VisitNode(Node* node) { MarkAsRepresentation(type, node); return VisitParameter(node); } + case IrOpcode::kOsrValue: + return MarkAsReference(node), VisitOsrValue(node); case IrOpcode::kPhi: { MachineType type = OpParameter(node); MarkAsRepresentation(type, node); @@ -965,6 +969,14 @@ void InstructionSelector::VisitParameter(Node* node) { } +void InstructionSelector::VisitOsrValue(Node* node) { + OperandGenerator g(this); + int index = OpParameter(node); + Emit(kArchNop, g.DefineAsLocation(node, linkage()->GetOsrValueLocation(index), + kMachAnyTagged)); +} + + void InstructionSelector::VisitPhi(Node* node) { const int input_count = node->op()->ValueInputCount(); PhiInstruction* phi = new (instruction_zone()) diff --git a/src/compiler/instruction-selector.h b/src/compiler/instruction-selector.h index 12ee0a089..d77fc02fd 100644 --- a/src/compiler/instruction-selector.h +++ b/src/compiler/instruction-selector.h @@ -189,6 +189,7 @@ class InstructionSelector FINAL { void VisitFinish(Node* node); void VisitParameter(Node* node); + void VisitOsrValue(Node* node); void VisitPhi(Node* node); void VisitProjection(Node* node); void VisitConstant(Node* node); diff --git a/src/compiler/linkage-impl.h b/src/compiler/linkage-impl.h index c13bd74f4..6c668596d 100644 --- a/src/compiler/linkage-impl.h +++ b/src/compiler/linkage-impl.h @@ -6,6 +6,7 @@ #define V8_COMPILER_LINKAGE_IMPL_H_ #include "src/code-stubs.h" +#include "src/compiler/osr.h" namespace v8 { namespace internal { @@ -226,6 +227,28 @@ class LinkageHelper { return LinkageLocation(i); } }; + + +LinkageLocation Linkage::GetOsrValueLocation(int index) const { + CHECK(incoming_->IsJSFunctionCall()); + int parameter_count = static_cast(incoming_->JSParameterCount() - 1); + int first_stack_slot = OsrHelper::FirstStackSlotIndex(parameter_count); + + if (index >= first_stack_slot) { + // Local variable stored in this (callee) stack. + int spill_index = + LinkageLocation::ANY_REGISTER + 1 + index - first_stack_slot; + // TODO(titzer): bailout instead of crashing here. + CHECK(spill_index <= LinkageLocation::MAX_STACK_SLOT); + return LinkageLocation(spill_index); + } else { + // Parameter. Use the assigned location from the incoming call descriptor. + int parameter_index = 1 + index; // skip index 0, which is the target. + return incoming_->GetInputLocation(parameter_index); + } +} + + } // namespace compiler } // namespace internal } // namespace v8 diff --git a/src/compiler/linkage.h b/src/compiler/linkage.h index 0ad0761a0..c0f143c0c 100644 --- a/src/compiler/linkage.h +++ b/src/compiler/linkage.h @@ -18,19 +18,26 @@ class CallInterfaceDescriptor; namespace compiler { +class OsrHelper; + // Describes the location for a parameter or a return value to a call. class LinkageLocation { public: explicit LinkageLocation(int location) : location_(location) {} - static const int16_t ANY_REGISTER = 32767; + static const int16_t ANY_REGISTER = 1023; + static const int16_t MAX_STACK_SLOT = 32767; static LinkageLocation AnyRegister() { return LinkageLocation(ANY_REGISTER); } private: friend class CallDescriptor; friend class OperandGenerator; - int16_t location_; // >= 0 implies register, otherwise stack slot. + // location < 0 -> a stack slot on the caller frame + // 0 <= location < 1023 -> a specific machine register + // 1023 <= location < 1024 -> any machine register + // 1024 <= location -> a stack slot in the callee frame + int16_t location_; }; typedef Signature LocationSignature; @@ -227,6 +234,9 @@ class Linkage : public ZoneObject { static bool NeedsFrameState(Runtime::FunctionId function); + // Get the location where an incoming OSR value is stored. + LinkageLocation GetOsrValueLocation(int index) const; + private: Zone* const zone_; CallDescriptor* const incoming_; diff --git a/src/compiler/mips/code-generator-mips.cc b/src/compiler/mips/code-generator-mips.cc index dd92837bb..792abef24 100644 --- a/src/compiler/mips/code-generator-mips.cc +++ b/src/compiler/mips/code-generator-mips.cc @@ -7,6 +7,7 @@ #include "src/compiler/gap-resolver.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties-inl.h" +#include "src/compiler/osr.h" #include "src/mips/macro-assembler-mips.h" #include "src/scopes.h" @@ -952,6 +953,23 @@ void CodeGenerator::AssemblePrologue() { StandardFrameConstants::kFixedFrameSizeFromFp); } int stack_slots = frame()->GetSpillSlotCount(); + + if (info()->is_osr()) { + // TurboFan OSR-compiled functions cannot be entered directly. + __ Abort(kShouldNotDirectlyEnterOsrFunction); + + // Unoptimized code jumps directly to this entrypoint while the unoptimized + // frame is still on the stack. Optimized code uses OSR values directly from + // the unoptimized frame. Thus, all that needs to be done is to allocate the + // remaining stack slots. + if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); + osr_pc_offset_ = __ pc_offset(); + int unoptimized_slots = + static_cast(OsrHelper(info()).UnoptimizedFrameSlots()); + DCHECK(stack_slots >= unoptimized_slots); + stack_slots -= unoptimized_slots; + } + if (stack_slots > 0) { __ Subu(sp, sp, Operand(stack_slots * kPointerSize)); } diff --git a/src/compiler/mips64/code-generator-mips64.cc b/src/compiler/mips64/code-generator-mips64.cc index dee7705f0..111103968 100644 --- a/src/compiler/mips64/code-generator-mips64.cc +++ b/src/compiler/mips64/code-generator-mips64.cc @@ -1211,6 +1211,23 @@ void CodeGenerator::AssemblePrologue() { StandardFrameConstants::kFixedFrameSizeFromFp); } int stack_slots = frame()->GetSpillSlotCount(); + + if (info()->is_osr()) { + // TurboFan OSR-compiled functions cannot be entered directly. + __ Abort(kShouldNotDirectlyEnterOsrFunction); + + // Unoptimized code jumps directly to this entrypoint while the unoptimized + // frame is still on the stack. Optimized code uses OSR values directly from + // the unoptimized frame. Thus, all that needs to be done is to allocate the + // remaining stack slots. + if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); + osr_pc_offset_ = __ pc_offset(); + int unoptimized_slots = + static_cast(OsrHelper(info()).UnoptimizedFrameSlots()); + DCHECK(stack_slots >= unoptimized_slots); + stack_slots -= unoptimized_slots; + } + if (stack_slots > 0) { __ Dsubu(sp, sp, Operand(stack_slots * kPointerSize)); } diff --git a/src/compiler/opcodes.h b/src/compiler/opcodes.h index 31f829856..bda44d385 100644 --- a/src/compiler/opcodes.h +++ b/src/compiler/opcodes.h @@ -15,6 +15,8 @@ V(Merge) \ V(Return) \ V(Terminate) \ + V(OsrNormalEntry) \ + V(OsrLoopEntry) \ V(Throw) #define CONTROL_OP_LIST(V) \ @@ -42,6 +44,7 @@ V(StateValues) \ V(Call) \ V(Parameter) \ + V(OsrValue) \ V(Projection) #define COMMON_OP_LIST(V) \ diff --git a/src/compiler/osr.cc b/src/compiler/osr.cc new file mode 100644 index 000000000..3360a1a32 --- /dev/null +++ b/src/compiler/osr.cc @@ -0,0 +1,69 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/compiler.h" +#include "src/compiler/common-operator.h" +#include "src/compiler/control-reducer.h" +#include "src/compiler/frame.h" +#include "src/compiler/graph.h" +#include "src/compiler/js-graph.h" +#include "src/compiler/node.h" +#include "src/compiler/node-marker.h" +#include "src/compiler/osr.h" +#include "src/scopes.h" + +namespace v8 { +namespace internal { +namespace compiler { + +OsrHelper::OsrHelper(CompilationInfo* info) + : parameter_count_(info->scope()->num_parameters()), + stack_slot_count_(info->scope()->num_stack_slots()) {} + + +void OsrHelper::Deconstruct(JSGraph* jsgraph, CommonOperatorBuilder* common, + Zone* tmp_zone) { + NodeDeque queue(tmp_zone); + Graph* graph = jsgraph->graph(); + NodeMarker marker(graph, 2); + queue.push_back(graph->end()); + marker.Set(graph->end(), true); + + while (!queue.empty()) { + Node* node = queue.front(); + queue.pop_front(); + + // Rewrite OSR-related nodes. + switch (node->opcode()) { + case IrOpcode::kOsrNormalEntry: + node->ReplaceUses(graph->NewNode(common->Dead())); + break; + case IrOpcode::kOsrLoopEntry: + node->ReplaceUses(graph->start()); + break; + default: + break; + } + for (Node* const input : node->inputs()) { + if (!marker.Get(input)) { + marker.Set(input, true); + queue.push_back(input); + } + } + } + + ControlReducer::ReduceGraph(tmp_zone, jsgraph, common); +} + + +void OsrHelper::SetupFrame(Frame* frame) { + // The optimized frame will subsume the unoptimized frame. Do so by reserving + // the first spill slots. + frame->ReserveSpillSlots(UnoptimizedFrameSlots()); +} + + +} // namespace compiler +} // namespace internal +} // namespace v8 diff --git a/src/compiler/osr.h b/src/compiler/osr.h new file mode 100644 index 000000000..fb2267018 --- /dev/null +++ b/src/compiler/osr.h @@ -0,0 +1,128 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_COMPILER_OSR_H_ +#define V8_COMPILER_OSR_H_ + +#include "src/zone.h" + +// TurboFan structures OSR graphs in a way that separates almost all phases of +// compilation from OSR implementation details. This is accomplished with +// special +// control nodes that are added at graph building time. In particular, the graph +// is built in such a way that typing still computes the best types and +// optimizations and lowering work unchanged. All that remains is to deconstruct +// the OSR artifacts before scheduling. The helper class below performs the +// necessary graph rewriting. + +// Graphs built for OSR from the AstGraphBuilder are structured as follows: +// Start +// +-------------------^^-----+ +// | | +// OsrNormalEntry OsrLoopEntry <-------------+ +// | | | +// control flow before loop | A OsrValue +// | | | | +// | +------------------------+ | +-------+ +// | | +-------------+ | | +--------+ +// | | | | | | | | +// ( Loop )<-----------|------------------ ( phi ) | +// | | | +// loop body | backedge(s) | +// | | | | +// | +--------------+ B <-----+ +// | +// end + +// The control structure expresses the relationship that the loop has a separate +// entrypoint which corresponds to entering the loop directly from start. +// Similarly, the values that come in from unoptimized code are represented with +// {OsrValue} nodes that merge into any phis associated with the OSR loop. +// The nodes {A} and {B} represent values in the "normal" graph that correspond +// to the values of those phis before the loop and on any backedges, +// respectively. + +// To deconstruct OSR, we simply replace the uses of the {OsrNormalEntry} +// control +// node with {Dead} and {OsrLoopEntry} with start and run the {ControlReducer}. +// Control reduction propagates the dead control forward, essentially "killing" +// all the code before the OSR loop. The entrypoint to the loop corresponding +// to the "normal" entry path will also be removed, as well as the inputs to +// the loop phis, resulting in the reduced graph: + +// Start +// Dead |^-------------------------+ +// | | | +// | | | +// | | | +// disconnected, dead | A=dead OsrValue +// | | +// +------------------+ +------+ +// | +-------------+ | +--------+ +// | | | | | | +// ( Loop )<-----------|------------------ ( phi ) | +// | | | +// loop body | backedge(s) | +// | | | | +// | +--------------+ B <-----+ +// | +// end + +// Other than the presences of the OsrValue nodes, this is a normal, schedulable +// graph. OsrValue nodes are handled specially in the instruction selector to +// simply load from the unoptimized frame. + +// For nested OSR loops, loop peeling must first be applied as many times as +// necessary in order to bring the OSR loop up to the top level (i.e. to be +// an outer loop). + +namespace v8 { +namespace internal { + +class CompilationInfo; + +namespace compiler { + +class JSGraph; +class CommonOperatorBuilder; +class Frame; +class Linkage; + +// Encapsulates logic relating to OSR compilations as well has handles some +// details of the frame layout. +class OsrHelper { + public: + explicit OsrHelper(CompilationInfo* info); + // Only for testing. + OsrHelper(size_t parameter_count, size_t stack_slot_count) + : parameter_count_(parameter_count), + stack_slot_count_(stack_slot_count) {} + + // Deconstructs the artificial {OsrNormalEntry} and rewrites the graph so + // that only the path corresponding to {OsrLoopEntry} remains. + void Deconstruct(JSGraph* jsgraph, CommonOperatorBuilder* common, + Zone* tmp_zone); + + // Prepares the frame w.r.t. OSR. + void SetupFrame(Frame* frame); + + // Returns the number of unoptimized frame slots for this OSR. + size_t UnoptimizedFrameSlots() { return stack_slot_count_; } + + // Returns the environment index of the first stack slot. + static int FirstStackSlotIndex(int parameter_count) { + // n.b. unlike Crankshaft, TurboFan environments do not contain the context. + return 1 + parameter_count; // receiver + params + } + + private: + size_t parameter_count_; + size_t stack_slot_count_; +}; + +} // namespace compiler +} // namespace internal +} // namespace v8 + +#endif // V8_COMPILER_OSR_H_ diff --git a/src/compiler/pipeline.cc b/src/compiler/pipeline.cc index 9aca0a6da..0ca90098b 100644 --- a/src/compiler/pipeline.cc +++ b/src/compiler/pipeline.cc @@ -28,6 +28,7 @@ #include "src/compiler/load-elimination.h" #include "src/compiler/machine-operator-reducer.h" #include "src/compiler/move-optimizer.h" +#include "src/compiler/osr.h" #include "src/compiler/pipeline-statistics.h" #include "src/compiler/register-allocator.h" #include "src/compiler/register-allocator-verifier.h" @@ -411,6 +412,18 @@ struct TyperPhase { }; +struct OsrDeconstructionPhase { + static const char* phase_name() { return "OSR deconstruction"; } + + void Run(PipelineData* data, Zone* temp_zone) { + SourcePositionTable::Scope pos(data->source_positions(), + SourcePosition::Unknown()); + OsrHelper osr_helper(data->info()); + osr_helper.Deconstruct(data->jsgraph(), data->common(), temp_zone); + } +}; + + struct TypedLoweringPhase { static const char* phase_name() { return "typed lowering"; } @@ -756,8 +769,8 @@ Handle Pipeline::GenerateCode() { info()->function()->dont_optimize_reason() == kSuperReference || // TODO(turbofan): Make class literals work and remove this bailout. info()->function()->dont_optimize_reason() == kClassLiteral || - // TODO(turbofan): Make OSR work and remove this bailout. - info()->is_osr()) { + // TODO(turbofan): Make OSR work with inner loops and remove this bailout. + (info()->is_osr() && !FLAG_turbo_osr)) { return Handle::null(); } @@ -829,6 +842,11 @@ Handle Pipeline::GenerateCode() { Run(); RunPrintAndVerify("Lowered typed"); + if (info()->is_osr()) { + Run(); + RunPrintAndVerify("OSR deconstruction"); + } + // Lower simplified operators and insert changes. Run(); RunPrintAndVerify("Lowered simplified"); @@ -840,6 +858,11 @@ Handle Pipeline::GenerateCode() { Run(); RunPrintAndVerify("Late Control reduced"); + } else { + if (info()->is_osr()) { + Run(); + RunPrintAndVerify("OSR deconstruction"); + } } // Lower any remaining generic JSOperators. @@ -1021,6 +1044,10 @@ void Pipeline::AllocateRegisters(const RegisterConfiguration* config, ZonePool::Scope zone_scope(data->zone_pool()); data->InitializeRegisterAllocator(zone_scope.zone(), config, debug_name.get()); + if (info()->is_osr()) { + OsrHelper osr_helper(info()); + osr_helper.SetupFrame(data->frame()); + } Run(); Run(); diff --git a/src/compiler/register-allocator.cc b/src/compiler/register-allocator.cc index 8e11e7c18..7def50e47 100644 --- a/src/compiler/register-allocator.cc +++ b/src/compiler/register-allocator.cc @@ -1062,7 +1062,7 @@ void RegisterAllocator::MeetRegisterConstraintsForLastInstructionInBlock( AllocateFixed(output, -1, false); // This value is produced on the stack, we never need to spill it. if (output->IsStackSlot()) { - DCHECK(output->index() < 0); + DCHECK(output->index() < frame_->GetSpillSlotCount()); range->SetSpillOperand(output); range->SetSpillStartIndex(end); assigned = true; @@ -1128,7 +1128,7 @@ void RegisterAllocator::MeetConstraintsBetween(Instruction* first, // This value is produced on the stack, we never need to spill it. if (first_output->IsStackSlot()) { - DCHECK(first_output->index() < 0); + DCHECK(first_output->index() < frame_->GetSpillSlotCount()); range->SetSpillOperand(first_output); range->SetSpillStartIndex(gap_index - 1); assigned = true; diff --git a/src/compiler/scheduler.cc b/src/compiler/scheduler.cc index c2d043f01..6ec2295d6 100644 --- a/src/compiler/scheduler.cc +++ b/src/compiler/scheduler.cc @@ -75,7 +75,8 @@ Scheduler::Placement Scheduler::GetPlacement(Node* node) { if (data->placement_ == kUnknown) { // Compute placement, once, on demand. switch (node->opcode()) { case IrOpcode::kParameter: - // Parameters are always fixed to the start node. + case IrOpcode::kOsrValue: + // Parameters and OSR values are always fixed to the start block. data->placement_ = kFixed; break; case IrOpcode::kPhi: diff --git a/src/compiler/typer.cc b/src/compiler/typer.cc index 1fcfa17de..88a366656 100644 --- a/src/compiler/typer.cc +++ b/src/compiler/typer.cc @@ -598,6 +598,16 @@ Bounds Typer::Visitor::TypeParameter(Node* node) { } +Bounds Typer::Visitor::TypeOsrValue(Node* node) { + // OSR values explicitly have type {None} before OSR form is deconstructed. + if (node->InputAt(0)->opcode() == IrOpcode::kOsrLoopEntry) { + return Bounds(Type::None(), Type::None()); + } + // TODO(turbofan): preserve the type of OSR values after deconstruction. + return Bounds::Unbounded(zone()); +} + + Bounds Typer::Visitor::TypeInt32Constant(Node* node) { Factory* f = isolate()->factory(); Handle number = f->NewNumber(OpParameter(node)); diff --git a/src/compiler/verifier.cc b/src/compiler/verifier.cc index 5ba8c87aa..1916b99d2 100644 --- a/src/compiler/verifier.cc +++ b/src/compiler/verifier.cc @@ -247,6 +247,14 @@ void Verifier::Visitor::Pre(Node* node) { CHECK_EQ(1, control_count); CHECK_EQ(input_count, 1 + effect_count); break; + case IrOpcode::kOsrNormalEntry: + case IrOpcode::kOsrLoopEntry: + // Osr entries have + CHECK_EQ(1, effect_count); + CHECK_EQ(1, control_count); + // Type is empty. + CheckNotTyped(node); + break; // Common operators // ---------------- @@ -297,6 +305,13 @@ void Verifier::Visitor::Pre(Node* node) { // Type is considered internal. CheckUpperIs(node, Type::Internal()); break; + case IrOpcode::kOsrValue: + // OSR values have a value and a control input. + CHECK_EQ(1, control_count); + CHECK_EQ(1, input_count); + // Type is merged from other values in the graph and could be any. + CheckUpperIs(node, Type::Any()); + break; case IrOpcode::kProjection: { // Projection has an input that produces enough values. int index = static_cast(OpParameter(node->op())); diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc index 0480f9dc9..ac4e4010b 100644 --- a/src/compiler/x64/code-generator-x64.cc +++ b/src/compiler/x64/code-generator-x64.cc @@ -8,6 +8,7 @@ #include "src/compiler/gap-resolver.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties-inl.h" +#include "src/compiler/osr.h" #include "src/scopes.h" #include "src/x64/assembler-x64.h" #include "src/x64/macro-assembler-x64.h" @@ -1201,6 +1202,23 @@ void CodeGenerator::AssemblePrologue() { frame()->SetRegisterSaveAreaSize( StandardFrameConstants::kFixedFrameSizeFromFp); } + + if (info()->is_osr()) { + // TurboFan OSR-compiled functions cannot be entered directly. + __ Abort(kShouldNotDirectlyEnterOsrFunction); + + // Unoptimized code jumps directly to this entrypoint while the unoptimized + // frame is still on the stack. Optimized code uses OSR values directly from + // the unoptimized frame. Thus, all that needs to be done is to allocate the + // remaining stack slots. + if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); + osr_pc_offset_ = __ pc_offset(); + int unoptimized_slots = + static_cast(OsrHelper(info()).UnoptimizedFrameSlots()); + DCHECK(stack_slots >= unoptimized_slots); + stack_slots -= unoptimized_slots; + } + if (stack_slots > 0) { __ subq(rsp, Immediate(stack_slots * kPointerSize)); } diff --git a/src/flag-definitions.h b/src/flag-definitions.h index dbc1b9c56..241729ede 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -405,7 +405,8 @@ DEFINE_BOOL(turbo_delay_ssa_decon, false, "delay ssa deconstruction in TurboFan register allocator") // TODO(dcarney): this is just for debugging, remove eventually. DEFINE_BOOL(turbo_move_optimization, true, "optimize gap moves in TurboFan") -DEFINE_BOOL(turbo_jt, true, "enable jump threading") +DEFINE_BOOL(turbo_jt, true, "enable jump threading in TurboFan") +DEFINE_BOOL(turbo_osr, false, "enable OSR in TurboFan") DEFINE_INT(typed_array_max_size_in_heap, 64, "threshold for in-heap typed array") diff --git a/src/objects.cc b/src/objects.cc index 944b450ff..826d97671 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -8227,7 +8227,6 @@ Object* AccessorPair::GetComponent(AccessorComponent component) { Handle DeoptimizationInputData::New( Isolate* isolate, int deopt_entry_count, PretenureFlag pretenure) { - DCHECK(deopt_entry_count > 0); return Handle::cast( isolate->factory()->NewFixedArray(LengthFor(deopt_entry_count), pretenure)); diff --git a/src/runtime/runtime-compiler.cc b/src/runtime/runtime-compiler.cc index 6526dcff8..f132d986f 100644 --- a/src/runtime/runtime-compiler.cc +++ b/src/runtime/runtime-compiler.cc @@ -301,8 +301,15 @@ RUNTIME_FUNCTION(Runtime_CompileForOnStackReplacement) { // match. Fix heuristics for reenabling optimizations! function->shared()->increment_deopt_count(); - // TODO(titzer): Do not install code into the function. - function->ReplaceCode(*result); + if (result->is_turbofanned()) { + // TurboFanned OSR code cannot be installed into the function. + // But the function is obviously hot, so optimize it next time. + function->ReplaceCode( + isolate->builtins()->builtin(Builtins::kCompileOptimized)); + } else { + // Crankshafted OSR code can be installed into the function. + function->ReplaceCode(*result); + } return *result; } } diff --git a/test/cctest/cctest.gyp b/test/cctest/cctest.gyp index 8b3baa850..7d3ace84f 100644 --- a/test/cctest/cctest.gyp +++ b/test/cctest/cctest.gyp @@ -74,6 +74,7 @@ 'compiler/test-node-cache.cc', 'compiler/test-node.cc', 'compiler/test-operator.cc', + 'compiler/test-osr.cc', 'compiler/test-pipeline.cc', 'compiler/test-representation-change.cc', 'compiler/test-run-deopt.cc', diff --git a/test/cctest/compiler/test-osr.cc b/test/cctest/compiler/test-osr.cc new file mode 100644 index 000000000..92b611648 --- /dev/null +++ b/test/cctest/compiler/test-osr.cc @@ -0,0 +1,274 @@ +// Copyright 2015 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/codegen.h" +#include "src/compiler/common-operator.h" +#include "src/compiler/diamond.h" +#include "src/compiler/graph.h" +#include "src/compiler/js-graph.h" +#include "src/compiler/js-operator.h" +#include "src/compiler/operator.h" +#include "src/compiler/osr.h" +#include "test/cctest/cctest.h" + +using namespace v8::internal; +using namespace v8::internal::compiler; + +// TODO(titzer): move this method to a common testing place. + +static int CheckInputs(Node* node, Node* i0 = NULL, Node* i1 = NULL, + Node* i2 = NULL, Node* i3 = NULL) { + int count = 4; + if (i3 == NULL) count = 3; + if (i2 == NULL) count = 2; + if (i1 == NULL) count = 1; + if (i0 == NULL) count = 0; + CHECK_EQ(count, node->InputCount()); + if (i0 != NULL) CHECK_EQ(i0, node->InputAt(0)); + if (i1 != NULL) CHECK_EQ(i1, node->InputAt(1)); + if (i2 != NULL) CHECK_EQ(i2, node->InputAt(2)); + if (i3 != NULL) CHECK_EQ(i3, node->InputAt(3)); + return count; +} + + +static Operator kIntLt(IrOpcode::kInt32LessThan, Operator::kPure, + "Int32LessThan", 2, 0, 0, 1, 0, 0); + + +static const int kMaxOsrValues = 10; + +class OsrDeconstructorTester : public HandleAndZoneScope { + public: + explicit OsrDeconstructorTester(int num_values) + : isolate(main_isolate()), + common(main_zone()), + graph(main_zone()), + jsgraph(&graph, &common, NULL, NULL), + start(graph.NewNode(common.Start(1))), + p0(graph.NewNode(common.Parameter(0), start)), + end(graph.NewNode(common.End(), start)), + osr_normal_entry(graph.NewNode(common.OsrNormalEntry(), start)), + osr_loop_entry(graph.NewNode(common.OsrLoopEntry(), start)), + self(graph.NewNode(common.Int32Constant(0xaabbccdd))) { + CHECK(num_values <= kMaxOsrValues); + graph.SetStart(start); + for (int i = 0; i < num_values; i++) { + osr_values[i] = graph.NewNode(common.OsrValue(i), osr_loop_entry); + } + } + + Isolate* isolate; + CommonOperatorBuilder common; + Graph graph; + JSGraph jsgraph; + Node* start; + Node* p0; + Node* end; + Node* osr_normal_entry; + Node* osr_loop_entry; + Node* self; + Node* osr_values[kMaxOsrValues]; + + Node* NewOsrPhi(Node* loop, Node* incoming, int osr_value, Node* back1 = NULL, + Node* back2 = NULL, Node* back3 = NULL) { + int count = 5; + if (back3 == NULL) count = 4; + if (back2 == NULL) count = 3; + if (back1 == NULL) count = 2; + CHECK_EQ(loop->InputCount(), count); + CHECK_EQ(osr_loop_entry, loop->InputAt(1)); + + Node* inputs[6]; + inputs[0] = incoming; + inputs[1] = osr_values[osr_value]; + if (count > 2) inputs[2] = back1; + if (count > 3) inputs[3] = back2; + if (count > 4) inputs[4] = back3; + inputs[count] = loop; + return graph.NewNode(common.Phi(kMachAnyTagged, count), count + 1, inputs); + } + + Node* NewOsrLoop(int num_backedges, Node* entry = NULL) { + CHECK_LT(num_backedges, 4); + CHECK_GE(num_backedges, 0); + int count = 2 + num_backedges; + if (entry == NULL) entry = osr_normal_entry; + Node* inputs[5] = {entry, osr_loop_entry, self, self, self}; + + Node* loop = graph.NewNode(common.Loop(count), count, inputs); + for (int i = 0; i < num_backedges; i++) { + loop->ReplaceInput(2 + i, loop); + } + + return loop; + } +}; + + +TEST(Deconstruct_osr0) { + OsrDeconstructorTester T(0); + + Node* loop = T.NewOsrLoop(1); + + T.graph.SetEnd(loop); + + OsrHelper helper(0, 0); + helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone()); + + CheckInputs(loop, T.start, loop); +} + + +TEST(Deconstruct_osr1) { + OsrDeconstructorTester T(1); + + Node* loop = T.NewOsrLoop(1); + Node* osr_phi = + T.NewOsrPhi(loop, T.jsgraph.OneConstant(), 0, T.jsgraph.ZeroConstant()); + + Node* ret = T.graph.NewNode(T.common.Return(), osr_phi, T.start, loop); + T.graph.SetEnd(ret); + + OsrHelper helper(0, 0); + helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone()); + + CheckInputs(loop, T.start, loop); + CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), loop); + CheckInputs(ret, osr_phi, T.start, loop); +} + + +TEST(Deconstruct_osr_remove_prologue) { + OsrDeconstructorTester T(1); + Diamond d(&T.graph, &T.common, T.p0); + d.Chain(T.osr_normal_entry); + + Node* loop = T.NewOsrLoop(1, d.merge); + Node* osr_phi = + T.NewOsrPhi(loop, T.jsgraph.OneConstant(), 0, T.jsgraph.ZeroConstant()); + + Node* ret = T.graph.NewNode(T.common.Return(), osr_phi, T.start, loop); + T.graph.SetEnd(ret); + + OsrHelper helper(0, 0); + helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone()); + + CheckInputs(loop, T.start, loop); + CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), loop); + CheckInputs(ret, osr_phi, T.start, loop); + + // The control before the loop should have been removed. + CHECK(d.branch->IsDead()); + CHECK(d.if_true->IsDead()); + CHECK(d.if_false->IsDead()); + CHECK(d.merge->IsDead()); +} + + +TEST(Deconstruct_osr_with_body1) { + OsrDeconstructorTester T(1); + + Node* loop = T.NewOsrLoop(1); + + Node* branch = T.graph.NewNode(T.common.Branch(), T.p0, loop); + Node* if_true = T.graph.NewNode(T.common.IfTrue(), branch); + Node* if_false = T.graph.NewNode(T.common.IfFalse(), branch); + loop->ReplaceInput(2, if_true); + + Node* osr_phi = + T.NewOsrPhi(loop, T.jsgraph.OneConstant(), 0, T.jsgraph.ZeroConstant()); + + Node* ret = T.graph.NewNode(T.common.Return(), osr_phi, T.start, if_false); + T.graph.SetEnd(ret); + + OsrHelper helper(0, 0); + helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone()); + + CheckInputs(loop, T.start, if_true); + CheckInputs(branch, T.p0, loop); + CheckInputs(if_true, branch); + CheckInputs(if_false, branch); + CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), loop); + CheckInputs(ret, osr_phi, T.start, if_false); +} + + +TEST(Deconstruct_osr_with_body2) { + OsrDeconstructorTester T(1); + + Node* loop = T.NewOsrLoop(1); + + // Two chained branches in the the body of the loop. + Node* branch1 = T.graph.NewNode(T.common.Branch(), T.p0, loop); + Node* if_true1 = T.graph.NewNode(T.common.IfTrue(), branch1); + Node* if_false1 = T.graph.NewNode(T.common.IfFalse(), branch1); + + Node* branch2 = T.graph.NewNode(T.common.Branch(), T.p0, if_true1); + Node* if_true2 = T.graph.NewNode(T.common.IfTrue(), branch2); + Node* if_false2 = T.graph.NewNode(T.common.IfFalse(), branch2); + loop->ReplaceInput(2, if_true2); + + Node* osr_phi = + T.NewOsrPhi(loop, T.jsgraph.OneConstant(), 0, T.jsgraph.ZeroConstant()); + + Node* merge = T.graph.NewNode(T.common.Merge(2), if_false1, if_false2); + Node* ret = T.graph.NewNode(T.common.Return(), osr_phi, T.start, merge); + T.graph.SetEnd(ret); + + OsrHelper helper(0, 0); + helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone()); + + CheckInputs(loop, T.start, if_true2); + CheckInputs(branch1, T.p0, loop); + CheckInputs(branch2, T.p0, if_true1); + CheckInputs(if_true1, branch1); + CheckInputs(if_false1, branch1); + CheckInputs(if_true2, branch2); + CheckInputs(if_false2, branch2); + + CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), loop); + CheckInputs(ret, osr_phi, T.start, merge); + CheckInputs(merge, if_false1, if_false2); +} + + +TEST(Deconstruct_osr_with_body3) { + OsrDeconstructorTester T(1); + + Node* loop = T.NewOsrLoop(2); + + // Two branches that create two different backedges. + Node* branch1 = T.graph.NewNode(T.common.Branch(), T.p0, loop); + Node* if_true1 = T.graph.NewNode(T.common.IfTrue(), branch1); + Node* if_false1 = T.graph.NewNode(T.common.IfFalse(), branch1); + + Node* branch2 = T.graph.NewNode(T.common.Branch(), T.p0, if_true1); + Node* if_true2 = T.graph.NewNode(T.common.IfTrue(), branch2); + Node* if_false2 = T.graph.NewNode(T.common.IfFalse(), branch2); + loop->ReplaceInput(2, if_false1); + loop->ReplaceInput(3, if_true2); + + Node* osr_phi = + T.NewOsrPhi(loop, T.jsgraph.OneConstant(), 0, T.jsgraph.ZeroConstant(), + T.jsgraph.ZeroConstant()); + + Node* ret = T.graph.NewNode(T.common.Return(), osr_phi, T.start, if_false2); + T.graph.SetEnd(ret); + + OsrHelper helper(0, 0); + helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone()); + + CheckInputs(loop, T.start, if_false1, if_true2); + CheckInputs(branch1, T.p0, loop); + CheckInputs(branch2, T.p0, if_true1); + CheckInputs(if_true1, branch1); + CheckInputs(if_false1, branch1); + CheckInputs(if_true2, branch2); + CheckInputs(if_false2, branch2); + + CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), + T.jsgraph.ZeroConstant(), loop); + CheckInputs(ret, osr_phi, T.start, if_false2); +} diff --git a/test/mjsunit/compiler/osr-alignment.js b/test/mjsunit/compiler/osr-alignment.js index 30d72d061..b1fd702aa 100644 --- a/test/mjsunit/compiler/osr-alignment.js +++ b/test/mjsunit/compiler/osr-alignment.js @@ -25,7 +25,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Flags: --use-osr +// Flags: --use-osr --turbo-osr function f1() { var sum = 0; diff --git a/test/mjsunit/compiler/osr-manual1.js b/test/mjsunit/compiler/osr-manual1.js new file mode 100644 index 000000000..786a70dfe --- /dev/null +++ b/test/mjsunit/compiler/osr-manual1.js @@ -0,0 +1,47 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --allow-natives-syntax --use-osr --turbo-osr + +var counter = 188; + +function gen() { // defeat compiler cache. + var num = counter++; + var src = + "function f" + num + "(Z,a,b,c) {" + + " var x = 0;" + + " var y = 0;" + + " var z = 0;" + + " while (a > 0) { Z(0); x += 19; a--; }" + + " while (b > 0) { Z(1); y += 23; b--; }" + + " while (c > 0) { Z(2); z += 29; c--; }" + + " return x + y + z;" + + "} f" + num; + + return eval(src); +} + +function compiler(a) { // manual control of OSR compiles. + var x = 0; + function count(l) { + if (l == a && (x++) > 0) { + %OptimizeFunctionOnNextCall(count.caller, "osr"); + } + } + return count; +} + +function check(x,a,b,c) { + function none(l) { } + + for (var i = 0; i < 3; i++) { + var f = gen(); + assertEquals(x, f(compiler(i), a, b, c)); + assertEquals(x, f(none, a, b, c)); + } +} + +check(213, 3,3,3); +check(365, 4,5,6); +check(6948, 99,98,97); diff --git a/test/mjsunit/compiler/osr-manual2.js b/test/mjsunit/compiler/osr-manual2.js new file mode 100644 index 000000000..463ba8935 --- /dev/null +++ b/test/mjsunit/compiler/osr-manual2.js @@ -0,0 +1,47 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --allow-natives-syntax --use-osr --turbo-osr + +var counter = 188; + +function gen() { // defeat compiler cache. + var num = counter++; + var src = + "function f" + num + "(Z,a,b,c) {" + + " var x = 0;" + + " var y = 0;" + + " var z = 0;" + + " while (a > 0) { Z(0); x += 19; a--; var j=2; while(j--); }" + + " while (b > 0) { Z(1); y += 23; b--; var j=2; while(j--); }" + + " while (c > 0) { Z(2); z += 29; c--; var j=2; while(j--); }" + + " return x + y + z;" + + "} f" + num; + + return eval(src); +} + +function compiler(a) { // manual control of OSR compiles. + var x = 0; + function count(l) { + if (l == a && (x++) > 0) { + %OptimizeFunctionOnNextCall(count.caller, "osr"); + } + } + return count; +} + +function check(x,a,b,c) { + function none(l) { } + + for (var i = 0; i < 3; i++) { + var f = gen(); + assertEquals(x, f(compiler(i), a, b, c)); + assertEquals(x, f(none, a, b, c)); + } +} + +check(213, 3,3,3); +check(365, 4,5,6); +check(6948, 99,98,97); diff --git a/test/mjsunit/compiler/osr-multiple.js b/test/mjsunit/compiler/osr-multiple.js new file mode 100644 index 000000000..c318645d3 --- /dev/null +++ b/test/mjsunit/compiler/osr-multiple.js @@ -0,0 +1,44 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --use-osr --turbo-osr + +function f1(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + return x + y + z; +} + +function f2(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + return x + y + z; +} + + +function f3(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + return x + y + z; +} + +function check(f,a,b,c) { + assertEquals(a * 19 + b * 23 + c * 29, f(a,b,c)); +} + +check(f1, 50000, 5, 6); +check(f2, 4, 50000, 6); +check(f3, 11, 12, 50000); diff --git a/test/mjsunit/compiler/osr-multiple2.js b/test/mjsunit/compiler/osr-multiple2.js new file mode 100644 index 000000000..9a81bfb65 --- /dev/null +++ b/test/mjsunit/compiler/osr-multiple2.js @@ -0,0 +1,51 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --use-osr +// TODO(titzer): enable --turbo-osr when nested OSR works. + +function f1(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + for (var i = 0; i < 2; i++) { + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + } + return x + y + z; +} + +function f2(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + for (var i = 0; i < 2; i++) { + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + } + return x + y + z; +} + + +function f3(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + for (var i = 0; i < 2; i++) { + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + } + return x + y + z; +} + +function check(f,a,b,c) { + assertEquals(a * 19 + b * 23 + c * 29, f(a,b,c)); +} + +check(f1, 50000, 5, 6); +check(f2, 4, 50000, 6); +check(f3, 11, 12, 50000); diff --git a/test/mjsunit/compiler/osr-multiple3.js b/test/mjsunit/compiler/osr-multiple3.js new file mode 100644 index 000000000..0fb1ac73a --- /dev/null +++ b/test/mjsunit/compiler/osr-multiple3.js @@ -0,0 +1,57 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --use-osr +// TODO(titzer): enable --turbo-osr when nested OSR works. + +function f1(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + for (var i = 0; i < 2; i++) { + for (var j = 0; j < 2; j++) { + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + } + } + return x + y + z; +} + +function f2(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + for (var i = 0; i < 2; i++) { + for (var j = 0; j < 2; j++) { + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + } + } + return x + y + z; +} + + +function f3(a,b,c) { + var x = 0; + var y = 0; + var z = 0; + for (var i = 0; i < 2; i++) { + for (var j = 0; j < 2; j++) { + while (a > 0) { x += 19; a--; } + while (b > 0) { y += 23; b--; } + while (c > 0) { z += 29; c--; } + } + } + return x + y + z; +} + +function check(f,a,b,c) { + assertEquals(a * 19 + b * 23 + c * 29, f(a,b,c)); +} + +check(f1, 50000, 5, 6); +check(f2, 4, 50000, 6); +check(f3, 11, 12, 50000); diff --git a/test/mjsunit/compiler/osr-sar.js b/test/mjsunit/compiler/osr-sar.js index fd68b98a4..cc04adca8 100644 --- a/test/mjsunit/compiler/osr-sar.js +++ b/test/mjsunit/compiler/osr-sar.js @@ -25,7 +25,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Flags: --allow-natives-syntax +// Flags: --allow-natives-syntax --use-osr --turbo-osr function test() { // Loop to force OSR. diff --git a/test/mjsunit/compiler/osr-warm.js b/test/mjsunit/compiler/osr-warm.js index 73e1fd5cd..7c30c07f2 100644 --- a/test/mjsunit/compiler/osr-warm.js +++ b/test/mjsunit/compiler/osr-warm.js @@ -25,7 +25,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Flags: --use-osr +// Flags: --use-osr --turbo-osr function f1(x) { while (x > 0) { diff --git a/tools/gyp/v8.gyp b/tools/gyp/v8.gyp index 14b2acb8a..83f49f028 100644 --- a/tools/gyp/v8.gyp +++ b/tools/gyp/v8.gyp @@ -499,6 +499,8 @@ '../../src/compiler/operator-properties.h', '../../src/compiler/operator.cc', '../../src/compiler/operator.h', + '../../src/compiler/osr.cc', + '../../src/compiler/osr.h', '../../src/compiler/pipeline.cc', '../../src/compiler/pipeline.h', '../../src/compiler/pipeline-statistics.cc', -- 2.34.1