From bb0a2ea7664ad129070cd648df807bd80ed4181d Mon Sep 17 00:00:00 2001 From: "fschneider@chromium.org" Date: Thu, 14 Jun 2012 14:06:22 +0000 Subject: [PATCH] Share optimized code for closures. Each SharedFunctionInfo gets an optimized code map to store one optimized code object per context. When allocating a new closure we consult this map and check if there is optimized code that can be shared. This patch is based on an original patch by Anton Muhin (http://codereview.chromium.org/6793013/). BUG=v8:2087, v8:2094 TEST=test/mjsunit/compiler/optimized-closures.js Review URL: https://chromiumcodereview.appspot.com/10103035 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11817 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/code-stubs-arm.cc | 95 +++++++++++++++++++++++++++-- src/arm/deoptimizer-arm.cc | 39 +++++++++--- src/arm/lithium-codegen-arm.cc | 7 ++- src/compiler.cc | 39 ++++++++++++ src/deoptimizer.h | 3 + src/factory.cc | 44 ++++++++++--- src/flag-definitions.h | 5 ++ src/frames.cc | 20 ++++-- src/frames.h | 2 + src/heap.cc | 1 + src/hydrogen-instructions.h | 12 +--- src/hydrogen.cc | 18 +++++- src/hydrogen.h | 2 + src/ia32/code-stubs-ia32.cc | 85 ++++++++++++++++++++++++-- src/ia32/deoptimizer-ia32.cc | 41 ++++++++++--- src/ia32/lithium-codegen-ia32.cc | 6 +- src/mark-compact.cc | 10 ++- src/objects-debug.cc | 1 + src/objects-inl.h | 13 ++++ src/objects.cc | 73 +++++++++++++++++++++- src/objects.h | 26 +++++++- src/runtime-profiler.cc | 5 +- src/runtime.cc | 3 + src/v8-counters.h | 7 +-- src/x64/code-stubs-x64.cc | 93 ++++++++++++++++++++++++++-- src/x64/deoptimizer-x64.cc | 41 ++++++++++--- src/x64/lithium-codegen-x64.cc | 7 ++- test/mjsunit/compiler/optimized-closures.js | 57 +++++++++++++++++ 28 files changed, 665 insertions(+), 90 deletions(-) create mode 100644 test/mjsunit/compiler/optimized-closures.js diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index 761123f..7c0db4c 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -85,6 +85,8 @@ void ToNumberStub::Generate(MacroAssembler* masm) { void FastNewClosureStub::Generate(MacroAssembler* masm) { // Create a new closure from the given function info in new // space. Set the context to the current context in cp. + Counters* counters = masm->isolate()->counters(); + Label gc; // Pop the function info from the stack. @@ -98,6 +100,8 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { &gc, TAG_OBJECT); + __ IncrementCounter(counters->fast_new_closure_total(), 1, r6, r7); + int map_index = (language_mode_ == CLASSIC_MODE) ? Context::FUNCTION_MAP_INDEX : Context::STRICT_MODE_FUNCTION_MAP_INDEX; @@ -106,24 +110,34 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { // as the map of the allocated object. __ ldr(r2, MemOperand(cp, Context::SlotOffset(Context::GLOBAL_INDEX))); __ ldr(r2, FieldMemOperand(r2, GlobalObject::kGlobalContextOffset)); - __ ldr(r2, MemOperand(r2, Context::SlotOffset(map_index))); - __ str(r2, FieldMemOperand(r0, HeapObject::kMapOffset)); + __ ldr(r5, MemOperand(r2, Context::SlotOffset(map_index))); + __ str(r5, FieldMemOperand(r0, HeapObject::kMapOffset)); // Initialize the rest of the function. We don't have to update the // write barrier because the allocated object is in new space. __ LoadRoot(r1, Heap::kEmptyFixedArrayRootIndex); - __ LoadRoot(r2, Heap::kTheHoleValueRootIndex); - __ LoadRoot(r4, Heap::kUndefinedValueRootIndex); + __ LoadRoot(r5, Heap::kTheHoleValueRootIndex); __ str(r1, FieldMemOperand(r0, JSObject::kPropertiesOffset)); __ str(r1, FieldMemOperand(r0, JSObject::kElementsOffset)); - __ str(r2, FieldMemOperand(r0, JSFunction::kPrototypeOrInitialMapOffset)); + __ str(r5, FieldMemOperand(r0, JSFunction::kPrototypeOrInitialMapOffset)); __ str(r3, FieldMemOperand(r0, JSFunction::kSharedFunctionInfoOffset)); __ str(cp, FieldMemOperand(r0, JSFunction::kContextOffset)); __ str(r1, FieldMemOperand(r0, JSFunction::kLiteralsOffset)); - __ str(r4, FieldMemOperand(r0, JSFunction::kNextFunctionLinkOffset)); // Initialize the code pointer in the function to be the one // found in the shared function info object. + // But first check if there is an optimized version for our context. + Label check_optimized; + Label install_unoptimized; + if (FLAG_cache_optimized_code) { + __ ldr(r1, + FieldMemOperand(r3, SharedFunctionInfo::kOptimizedCodeMapOffset)); + __ tst(r1, r1); + __ b(ne, &check_optimized); + } + __ bind(&install_unoptimized); + __ LoadRoot(r4, Heap::kUndefinedValueRootIndex); + __ str(r4, FieldMemOperand(r0, JSFunction::kNextFunctionLinkOffset)); __ ldr(r3, FieldMemOperand(r3, SharedFunctionInfo::kCodeOffset)); __ add(r3, r3, Operand(Code::kHeaderSize - kHeapObjectTag)); __ str(r3, FieldMemOperand(r0, JSFunction::kCodeEntryOffset)); @@ -131,6 +145,73 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { // Return result. The argument function info has been popped already. __ Ret(); + __ bind(&check_optimized); + + __ IncrementCounter(counters->fast_new_closure_try_optimized(), 1, r6, r7); + + // r2 holds global context, r1 points to fixed array of 3-element entries + // (global context, optimized code, literals). + // The optimized code map must never be empty, so check the first elements. + Label install_optimized; + // Speculatively move code object into r4. + __ ldr(r4, FieldMemOperand(r1, FixedArray::kHeaderSize + kPointerSize)); + __ ldr(r5, FieldMemOperand(r1, FixedArray::kHeaderSize)); + __ cmp(r2, r5); + __ b(eq, &install_optimized); + __ b(&install_unoptimized); + + // Iterate through the rest of map backwards. r4 holds an index as a Smi. + Label loop; + __ ldr(r4, FieldMemOperand(r1, FixedArray::kLengthOffset)); + __ bind(&loop); + // Do not double check first entry. + + __ cmp(r4, Operand(Smi::FromInt(SharedFunctionInfo::kEntryLength))); + __ b(eq, &install_unoptimized); + __ sub(r4, r4, Operand( + Smi::FromInt(SharedFunctionInfo::kEntryLength))); // Skip an entry. + __ add(r5, r1, Operand(FixedArray::kHeaderSize - kHeapObjectTag)); + __ add(r5, r5, Operand(r4, LSL, kPointerSizeLog2 - kSmiTagSize)); + __ ldr(r5, MemOperand(r5)); + __ cmp(r2, r5); + __ b(ne, &loop); + // Hit: fetch the optimized code. + __ add(r5, r1, Operand(FixedArray::kHeaderSize - kHeapObjectTag)); + __ add(r5, r5, Operand(r4, LSL, kPointerSizeLog2 - kSmiTagSize)); + __ add(r5, r5, Operand(kPointerSize)); + __ ldr(r4, MemOperand(r5)); + + __ bind(&install_optimized); + __ IncrementCounter(counters->fast_new_closure_install_optimized(), + 1, r6, r7); + + // TODO(fschneider): Idea: store proper code pointers in the map and either + // unmangle them on marking or do nothing as the whole map is discarded on + // major GC anyway. + __ add(r4, r4, Operand(Code::kHeaderSize - kHeapObjectTag)); + __ str(r4, FieldMemOperand(r0, JSFunction::kCodeEntryOffset)); + + // Now link a function into a list of optimized functions. + __ ldr(r4, ContextOperand(r2, Context::OPTIMIZED_FUNCTIONS_LIST)); + + __ str(r4, FieldMemOperand(r0, JSFunction::kNextFunctionLinkOffset)); + // No need for write barrier as JSFunction (eax) is in the new space. + + __ str(r0, ContextOperand(r2, Context::OPTIMIZED_FUNCTIONS_LIST)); + // Store JSFunction (eax) into edx before issuing write barrier as + // it clobbers all the registers passed. + __ mov(r4, r0); + __ RecordWriteContextSlot( + r2, + Context::SlotOffset(Context::OPTIMIZED_FUNCTIONS_LIST), + r4, + r1, + kLRHasNotBeenSaved, + kDontSaveFPRegs); + + // Return result. The argument function info has been popped already. + __ Ret(); + // Create a new closure through the slower runtime call. __ bind(&gc); __ LoadRoot(r4, Heap::kFalseValueRootIndex); @@ -7131,6 +7212,8 @@ static const AheadOfTimeWriteBarrierStubList kAheadOfTime[] = { { REG(r2), REG(r6), REG(r9), EMIT_REMEMBERED_SET }, // StoreArrayLiteralElementStub::Generate { REG(r5), REG(r0), REG(r6), EMIT_REMEMBERED_SET }, + // FastNewClosureStub::Generate + { REG(r2), REG(r4), REG(r1), EMIT_REMEMBERED_SET }, // Null termination. { REG(no_reg), REG(no_reg), REG(no_reg), EMIT_REMEMBERED_SET} }; diff --git a/src/arm/deoptimizer-arm.cc b/src/arm/deoptimizer-arm.cc index 699e6aa..c75fdd4 100644 --- a/src/arm/deoptimizer-arm.cc +++ b/src/arm/deoptimizer-arm.cc @@ -50,6 +50,10 @@ void Deoptimizer::DeoptimizeFunction(JSFunction* function) { if (!function->IsOptimized()) return; + // The optimized code is going to be patched, so we cannot use it + // any more. Play safe and reset the whole cache. + function->shared()->ClearOptimizedCodeMap(); + // Get the optimized code. Code* code = function->code(); Address code_start_address = code->instruction_start(); @@ -97,8 +101,19 @@ void Deoptimizer::DeoptimizeFunction(JSFunction* function) { // ignore all slots that might have been recorded on it. isolate->heap()->mark_compact_collector()->InvalidateCode(code); - // Set the code for the function to non-optimized version. - function->ReplaceCode(function->shared()->code()); + // Iterate over all the functions which share the same code object + // and make them use unoptimized version. + Context* context = function->context()->global_context(); + Object* element = context->get(Context::OPTIMIZED_FUNCTIONS_LIST); + SharedFunctionInfo* shared = function->shared(); + while (!element->IsUndefined()) { + JSFunction* func = JSFunction::cast(element); + // Grab element before code replacement as ReplaceCode alters the list. + element = func->next_function_link(); + if (func->code() == code) { + func->ReplaceCode(shared->code()); + } + } if (FLAG_trace_deopt) { PrintF("[forced deoptimization: "); @@ -239,9 +254,9 @@ void Deoptimizer::DoComputeOsrOutputFrame() { unsigned node_id = iterator.Next(); USE(node_id); ASSERT(node_id == ast_id); - JSFunction* function = JSFunction::cast(ComputeLiteral(iterator.Next())); - USE(function); - ASSERT(function == function_); + int closure_id = iterator.Next(); + USE(closure_id); + ASSERT_EQ(Translation::kSelfLiteralId, closure_id); unsigned height = iterator.Next(); unsigned height_in_bytes = height * kPointerSize; USE(height_in_bytes); @@ -352,8 +367,8 @@ void Deoptimizer::DoComputeOsrOutputFrame() { if (FLAG_trace_osr) { PrintF("[on-stack replacement translation %s: 0x%08" V8PRIxPTR " ", ok ? "finished" : "aborted", - reinterpret_cast(function)); - function->PrintName(); + reinterpret_cast(function_)); + function_->PrintName(); PrintF(" => pc=0x%0x]\n", output_[0]->GetPc()); } } @@ -583,7 +598,15 @@ void Deoptimizer::DoComputeJSFrame(TranslationIterator* iterator, int frame_index) { // Read the ast node id, function, and frame height for this output frame. int node_id = iterator->Next(); - JSFunction* function = JSFunction::cast(ComputeLiteral(iterator->Next())); + JSFunction* function; + if (frame_index != 0) { + function = JSFunction::cast(ComputeLiteral(iterator->Next())); + } else { + int closure_id = iterator->Next(); + USE(closure_id); + ASSERT_EQ(Translation::kSelfLiteralId, closure_id); + function = function_; + } unsigned height = iterator->Next(); unsigned height_in_bytes = height * kPointerSize; if (FLAG_trace_deopt) { diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index 256d180..6a6a062 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -478,7 +478,10 @@ void LCodeGen::WriteTranslation(LEnvironment* environment, int height = translation_size - environment->parameter_count(); WriteTranslation(environment->outer(), translation); - int closure_id = DefineDeoptimizationLiteral(environment->closure()); + int closure_id = *info()->closure() != *environment->closure() + ? DefineDeoptimizationLiteral(environment->closure()) + : Translation::kSelfLiteralId; + switch (environment->frame_type()) { case JS_FUNCTION: translation->BeginJSFrame(environment->ast_id(), closure_id, height); @@ -3089,7 +3092,7 @@ void LCodeGen::DoDrop(LDrop* instr) { void LCodeGen::DoThisFunction(LThisFunction* instr) { Register result = ToRegister(instr->result()); - __ LoadHeapObject(result, instr->hydrogen()->closure()); + __ ldr(result, MemOperand(fp, JavaScriptFrameConstants::kFunctionOffset)); } diff --git a/src/compiler.cc b/src/compiler.cc index d44718b..0dab226 100644 --- a/src/compiler.cc +++ b/src/compiler.cc @@ -616,6 +616,25 @@ bool Compiler::CompileLazy(CompilationInfo* info) { int compiled_size = shared->end_position() - shared->start_position(); isolate->counters()->total_compile_size()->Increment(compiled_size); + if (FLAG_cache_optimized_code && info->IsOptimizing()) { + Handle function = info->closure(); + ASSERT(!function.is_null()); + Handle global_context(function->context()->global_context()); + int index = function->shared()->SearchOptimizedCodeMap(*global_context); + if (index > 0) { + if (FLAG_trace_opt) { + PrintF(" [Found optimized code for"); + function->PrintName(); + PrintF("\n"); + } + Code* code = Code::cast( + FixedArray::cast(shared->optimized_code_map())->get(index)); + ASSERT(code != NULL); + function->ReplaceCode(code); + return true; + } + } + // Generate the AST for the lazily compiled function. if (ParserApi::Parse(info, kNoParsingFlags)) { // Measure how long it takes to do the lazy compilation; only take the @@ -647,6 +666,26 @@ bool Compiler::CompileLazy(CompilationInfo* info) { if (info->IsOptimizing()) { ASSERT(shared->scope_info() != ScopeInfo::Empty()); function->ReplaceCode(*code); + if (FLAG_cache_optimized_code && + code->kind() == Code::OPTIMIZED_FUNCTION) { + Handle shared(function->shared()); + Handle global_context(function->context()->global_context()); + + // Create literals array that will be shared for this global context. + int number_of_literals = shared->num_literals(); + Handle literals = + isolate->factory()->NewFixedArray(number_of_literals); + if (number_of_literals > 0) { + // Store the object, regexp and array functions in the literals + // array prefix. These functions will be used when creating + // object, regexp and array literals in this function. + literals->set(JSFunction::kLiteralGlobalContextIndex, + function->context()->global_context()); + } + + SharedFunctionInfo::AddToOptimizedCodeMap( + shared, global_context, code, literals); + } } else { // Update the shared function info with the compiled code and the // scope info. Please note, that the order of the shared function diff --git a/src/deoptimizer.h b/src/deoptimizer.h index 9e8a549..120f9de 100644 --- a/src/deoptimizer.h +++ b/src/deoptimizer.h @@ -608,6 +608,9 @@ class Translation BASE_EMBEDDED { static const char* StringFor(Opcode opcode); #endif + // A literal id which refers to the JSFunction itself. + static const int kSelfLiteralId = -239; + private: TranslationBuffer* buffer_; int index_; diff --git a/src/factory.cc b/src/factory.cc index 28b318a..bdf93b9 100644 --- a/src/factory.cc +++ b/src/factory.cc @@ -554,18 +554,44 @@ Handle Factory::NewFunctionFromSharedFunctionInfo( } result->set_context(*context); + + int index = FLAG_cache_optimized_code + ? function_info->SearchOptimizedCodeMap(context->global_context()) + : -1; if (!function_info->bound()) { - int number_of_literals = function_info->num_literals(); - Handle literals = NewFixedArray(number_of_literals, pretenure); - if (number_of_literals > 0) { - // Store the object, regexp and array functions in the literals - // array prefix. These functions will be used when creating - // object, regexp and array literals in this function. - literals->set(JSFunction::kLiteralGlobalContextIndex, - context->global_context()); + if (index > 0) { + FixedArray* code_map = + FixedArray::cast(function_info->optimized_code_map()); + FixedArray* cached_literals = FixedArray::cast(code_map->get(index + 1)); + ASSERT(cached_literals != NULL); + ASSERT(function_info->num_literals() == 0 || + (code_map->get(index - 1) == + cached_literals->get(JSFunction::kLiteralGlobalContextIndex))); + result->set_literals(cached_literals); + } else { + int number_of_literals = function_info->num_literals(); + Handle literals = + NewFixedArray(number_of_literals, pretenure); + if (number_of_literals > 0) { + // Store the object, regexp and array functions in the literals + // array prefix. These functions will be used when creating + // object, regexp and array literals in this function. + literals->set(JSFunction::kLiteralGlobalContextIndex, + context->global_context()); + } + result->set_literals(*literals); } - result->set_literals(*literals); } + + if (index > 0) { + // Caching of optimized code enabled and optimized code found. + Code* code = Code::cast( + FixedArray::cast(function_info->optimized_code_map())->get(index)); + ASSERT(code != NULL); + result->ReplaceCode(code); + return result; + } + if (V8::UseCrankshaft() && FLAG_always_opt && result->is_compiled() && diff --git a/src/flag-definitions.h b/src/flag-definitions.h index 2b4c53c..e9d8d5d 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -206,6 +206,11 @@ DEFINE_bool(array_index_dehoisting, false, DEFINE_bool(trace_osr, false, "trace on-stack replacement") DEFINE_int(stress_runs, 0, "number of stress runs") DEFINE_bool(optimize_closures, true, "optimize closures") +DEFINE_bool(lookup_sample_by_shared, true, + "when picking a function to optimize, watch for shared function " + "info, not JSFunction itself") +DEFINE_bool(cache_optimized_code, true, + "cache optimized code for closures") DEFINE_bool(inline_construct, true, "inline constructor calls") DEFINE_bool(inline_arguments, true, "inline functions with arguments object") DEFINE_int(loop_weight, 1, "loop weight for representation inference") diff --git a/src/frames.cc b/src/frames.cc index b7e0286..c801123 100644 --- a/src/frames.cc +++ b/src/frames.cc @@ -832,12 +832,23 @@ void FrameSummary::Print() { } +JSFunction* OptimizedFrame::LiteralAt(FixedArray* literal_array, + int literal_id) { + if (literal_id == Translation::kSelfLiteralId) { + return JSFunction::cast(function()); + } + + return JSFunction::cast(literal_array->get(literal_id)); +} + + void OptimizedFrame::Summarize(List* frames) { ASSERT(frames->length() == 0); ASSERT(is_optimized()); int deopt_index = Safepoint::kNoDeoptimizationIndex; DeoptimizationInputData* data = GetDeoptimizationData(&deopt_index); + FixedArray* literal_array = data->LiteralArray(); // BUG(3243555): Since we don't have a lazy-deopt registered at // throw-statements, we can't use the translation at the call-site of @@ -865,10 +876,8 @@ void OptimizedFrame::Summarize(List* frames) { if (opcode == Translation::JS_FRAME) { i--; int ast_id = it.Next(); - int function_id = it.Next(); + JSFunction* function = LiteralAt(literal_array, it.Next()); it.Next(); // Skip height. - JSFunction* function = - JSFunction::cast(data->LiteralArray()->get(function_id)); // The translation commands are ordered and the receiver is always // at the first position. Since we are always at a call when we need @@ -975,6 +984,7 @@ void OptimizedFrame::GetFunctions(List* functions) { int deopt_index = Safepoint::kNoDeoptimizationIndex; DeoptimizationInputData* data = GetDeoptimizationData(&deopt_index); + FixedArray* literal_array = data->LiteralArray(); TranslationIterator it(data->TranslationByteArray(), data->TranslationIndex(deopt_index)->value()); @@ -990,10 +1000,8 @@ void OptimizedFrame::GetFunctions(List* functions) { if (opcode == Translation::JS_FRAME) { jsframe_count--; it.Next(); // Skip ast id. - int function_id = it.Next(); + JSFunction* function = LiteralAt(literal_array, it.Next()); it.Next(); // Skip height. - JSFunction* function = - JSFunction::cast(data->LiteralArray()->get(function_id)); functions->Add(function); } else { // Skip over operands to advance to the next opcode. diff --git a/src/frames.h b/src/frames.h index 2d45932..30f7e1f 100644 --- a/src/frames.h +++ b/src/frames.h @@ -577,6 +577,8 @@ class OptimizedFrame : public JavaScriptFrame { inline explicit OptimizedFrame(StackFrameIterator* iterator); private: + JSFunction* LiteralAt(FixedArray* literal_array, int literal_id); + friend class StackFrameIterator; }; diff --git a/src/heap.cc b/src/heap.cc index 172405b..6ba76db 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -3010,6 +3010,7 @@ MaybeObject* Heap::AllocateSharedFunctionInfo(Object* name) { share->set_name(name); Code* illegal = isolate_->builtins()->builtin(Builtins::kIllegal); share->set_code(illegal); + share->ClearOptimizedCodeMap(); share->set_scope_info(ScopeInfo::Empty()); Code* construct_stub = isolate_->builtins()->builtin(Builtins::kJSConstructStubGeneric); diff --git a/src/hydrogen-instructions.h b/src/hydrogen-instructions.h index 780d57d..0920024 100644 --- a/src/hydrogen-instructions.h +++ b/src/hydrogen-instructions.h @@ -1469,7 +1469,7 @@ class HPushArgument: public HUnaryOperation { class HThisFunction: public HTemplateInstruction<0> { public: - explicit HThisFunction(Handle closure) : closure_(closure) { + HThisFunction() { set_representation(Representation::Tagged()); SetFlag(kUseGVN); } @@ -1478,18 +1478,10 @@ class HThisFunction: public HTemplateInstruction<0> { return Representation::None(); } - Handle closure() const { return closure_; } - DECLARE_CONCRETE_INSTRUCTION(ThisFunction) protected: - virtual bool DataEquals(HValue* other) { - HThisFunction* b = HThisFunction::cast(other); - return *closure() == *b->closure(); - } - - private: - Handle closure_; + virtual bool DataEquals(HValue* other) { return true; } }; diff --git a/src/hydrogen.cc b/src/hydrogen.cc index 61488af..5b6378c 100644 --- a/src/hydrogen.cc +++ b/src/hydrogen.cc @@ -7963,13 +7963,25 @@ void HGraphBuilder::HandleLiteralCompareNil(CompareOperation* expr, } +HInstruction* HGraphBuilder::BuildThisFunction() { + // If we share optimized code between different closures, the + // this-function is not a constant, except inside an inlined body. + if (function_state()->outer() != NULL) { + return new(zone()) HConstant( + function_state()->compilation_info()->closure(), + Representation::Tagged()); + } else { + return new(zone()) HThisFunction; + } +} + + void HGraphBuilder::VisitThisFunction(ThisFunction* expr) { ASSERT(!HasStackOverflow()); ASSERT(current_block() != NULL); ASSERT(current_block()->HasPredecessor()); - HThisFunction* self = new(zone()) HThisFunction( - function_state()->compilation_info()->closure()); - return ast_context()->ReturnInstruction(self, expr->id()); + HInstruction* instr = BuildThisFunction(); + return ast_context()->ReturnInstruction(instr, expr->id()); } diff --git a/src/hydrogen.h b/src/hydrogen.h index 6fa3d1b..d5a9248 100644 --- a/src/hydrogen.h +++ b/src/hydrogen.h @@ -1156,6 +1156,8 @@ class HGraphBuilder: public AstVisitor { HValue* BuildContextChainWalk(Variable* var); + HInstruction* BuildThisFunction(); + void AddCheckConstantFunction(Call* expr, HValue* receiver, Handle receiver_map, diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index df04b28..afa3e1c 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -66,9 +66,13 @@ void ToNumberStub::Generate(MacroAssembler* masm) { void FastNewClosureStub::Generate(MacroAssembler* masm) { // Create a new closure from the given function info in new // space. Set the context to the current context in esi. + Counters* counters = masm->isolate()->counters(); + Label gc; __ AllocateInNewSpace(JSFunction::kSize, eax, ebx, ecx, &gc, TAG_OBJECT); + __ IncrementCounter(counters->fast_new_closure_total(), 1); + // Get the function info from the stack. __ mov(edx, Operand(esp, 1 * kPointerSize)); @@ -80,8 +84,8 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { // as the map of the allocated object. __ mov(ecx, Operand(esi, Context::SlotOffset(Context::GLOBAL_INDEX))); __ mov(ecx, FieldOperand(ecx, GlobalObject::kGlobalContextOffset)); - __ mov(ecx, Operand(ecx, Context::SlotOffset(map_index))); - __ mov(FieldOperand(eax, JSObject::kMapOffset), ecx); + __ mov(ebx, Operand(ecx, Context::SlotOffset(map_index))); + __ mov(FieldOperand(eax, JSObject::kMapOffset), ebx); // Initialize the rest of the function. We don't have to update the // write barrier because the allocated object is in new space. @@ -94,11 +98,20 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { __ mov(FieldOperand(eax, JSFunction::kSharedFunctionInfoOffset), edx); __ mov(FieldOperand(eax, JSFunction::kContextOffset), esi); __ mov(FieldOperand(eax, JSFunction::kLiteralsOffset), ebx); - __ mov(FieldOperand(eax, JSFunction::kNextFunctionLinkOffset), - Immediate(factory->undefined_value())); // Initialize the code pointer in the function to be the one // found in the shared function info object. + // But first check if there is an optimized version for our context. + Label check_optimized; + Label install_unoptimized; + if (FLAG_cache_optimized_code) { + __ mov(ebx, FieldOperand(edx, SharedFunctionInfo::kOptimizedCodeMapOffset)); + __ test(ebx, ebx); + __ j(not_zero, &check_optimized, Label::kNear); + } + __ bind(&install_unoptimized); + __ mov(FieldOperand(eax, JSFunction::kNextFunctionLinkOffset), + Immediate(factory->undefined_value())); __ mov(edx, FieldOperand(edx, SharedFunctionInfo::kCodeOffset)); __ lea(edx, FieldOperand(edx, Code::kHeaderSize)); __ mov(FieldOperand(eax, JSFunction::kCodeEntryOffset), edx); @@ -106,6 +119,68 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { // Return and remove the on-stack parameter. __ ret(1 * kPointerSize); + __ bind(&check_optimized); + + __ IncrementCounter(counters->fast_new_closure_try_optimized(), 1); + + // ecx holds global context, ebx points to fixed array of 3-element entries + // (global context, optimized code, literals). + // Map must never be empty, so check the first elements. + Label install_optimized; + // Speculatively move code object into edx. + __ mov(edx, FieldOperand(ebx, FixedArray::kHeaderSize + kPointerSize)); + __ cmp(ecx, FieldOperand(ebx, FixedArray::kHeaderSize)); + __ j(equal, &install_optimized); + + // Iterate through the rest of map backwards. edx holds an index as a Smi. + Label loop; + Label restore; + __ mov(edx, FieldOperand(ebx, FixedArray::kLengthOffset)); + __ bind(&loop); + // Do not double check first entry. + __ cmp(edx, Immediate(Smi::FromInt(SharedFunctionInfo::kEntryLength))); + __ j(equal, &restore); + __ sub(edx, Immediate(Smi::FromInt( + SharedFunctionInfo::kEntryLength))); // Skip an entry. + __ cmp(ecx, CodeGenerator::FixedArrayElementOperand(ebx, edx, 0)); + __ j(not_equal, &loop, Label::kNear); + // Hit: fetch the optimized code. + __ mov(edx, CodeGenerator::FixedArrayElementOperand(ebx, edx, 1)); + + __ bind(&install_optimized); + __ IncrementCounter(counters->fast_new_closure_install_optimized(), 1); + + // TODO(fschneider): Idea: store proper code pointers in the optimized code + // map and either unmangle them on marking or do nothing as the whole map is + // discarded on major GC anyway. + __ lea(edx, FieldOperand(edx, Code::kHeaderSize)); + __ mov(FieldOperand(eax, JSFunction::kCodeEntryOffset), edx); + + // Now link a function into a list of optimized functions. + __ mov(edx, ContextOperand(ecx, Context::OPTIMIZED_FUNCTIONS_LIST)); + + __ mov(FieldOperand(eax, JSFunction::kNextFunctionLinkOffset), edx); + // No need for write barrier as JSFunction (eax) is in the new space. + + __ mov(ContextOperand(ecx, Context::OPTIMIZED_FUNCTIONS_LIST), eax); + // Store JSFunction (eax) into edx before issuing write barrier as + // it clobbers all the registers passed. + __ mov(edx, eax); + __ RecordWriteContextSlot( + ecx, + Context::SlotOffset(Context::OPTIMIZED_FUNCTIONS_LIST), + edx, + ebx, + kDontSaveFPRegs); + + // Return and remove the on-stack parameter. + __ ret(1 * kPointerSize); + + __ bind(&restore); + // Restore SharedFunctionInfo into edx. + __ mov(edx, Operand(esp, 1 * kPointerSize)); + __ jmp(&install_unoptimized); + // Create a new closure through the slower runtime call. __ bind(&gc); __ pop(ecx); // Temporarily remove return address. @@ -7073,6 +7148,8 @@ static const AheadOfTimeWriteBarrierStubList kAheadOfTime[] = { { REG(edx), REG(eax), REG(edi), EMIT_REMEMBERED_SET}, // StoreArrayLiteralElementStub::Generate { REG(ebx), REG(eax), REG(ecx), EMIT_REMEMBERED_SET}, + // FastNewClosureStub + { REG(ecx), REG(edx), REG(ebx), EMIT_REMEMBERED_SET}, // Null termination. { REG(no_reg), REG(no_reg), REG(no_reg), EMIT_REMEMBERED_SET} }; diff --git a/src/ia32/deoptimizer-ia32.cc b/src/ia32/deoptimizer-ia32.cc index 326207f..32421ae 100644 --- a/src/ia32/deoptimizer-ia32.cc +++ b/src/ia32/deoptimizer-ia32.cc @@ -117,6 +117,10 @@ void Deoptimizer::EnsureRelocSpaceForLazyDeoptimization(Handle code) { void Deoptimizer::DeoptimizeFunction(JSFunction* function) { if (!function->IsOptimized()) return; + // The optimized code is going to be patched, so we cannot use it + // any more. Play safe and reset the whole cache. + function->shared()->ClearOptimizedCodeMap(); + Isolate* isolate = function->GetIsolate(); HandleScope scope(isolate); AssertNoAllocation no_allocation; @@ -194,8 +198,19 @@ void Deoptimizer::DeoptimizeFunction(JSFunction* function) { // ignore all slots that might have been recorded on it. isolate->heap()->mark_compact_collector()->InvalidateCode(code); - // Set the code for the function to non-optimized version. - function->ReplaceCode(function->shared()->code()); + // Iterate over all the functions which share the same code object + // and make them use unoptimized version. + Context* context = function->context()->global_context(); + Object* element = context->get(Context::OPTIMIZED_FUNCTIONS_LIST); + SharedFunctionInfo* shared = function->shared(); + while (!element->IsUndefined()) { + JSFunction* func = JSFunction::cast(element); + // Grab element before code replacement as ReplaceCode alters the list. + element = func->next_function_link(); + if (func->code() == code) { + func->ReplaceCode(shared->code()); + } + } if (FLAG_trace_deopt) { PrintF("[forced deoptimization: "); @@ -330,9 +345,9 @@ void Deoptimizer::DoComputeOsrOutputFrame() { unsigned node_id = iterator.Next(); USE(node_id); ASSERT(node_id == ast_id); - JSFunction* function = JSFunction::cast(ComputeLiteral(iterator.Next())); - USE(function); - ASSERT(function == function_); + int closure_id = iterator.Next(); + USE(closure_id); + ASSERT_EQ(Translation::kSelfLiteralId, closure_id); unsigned height = iterator.Next(); unsigned height_in_bytes = height * kPointerSize; USE(height_in_bytes); @@ -456,15 +471,15 @@ void Deoptimizer::DoComputeOsrOutputFrame() { output_[0]->SetPc(pc); } Code* continuation = - function->GetIsolate()->builtins()->builtin(Builtins::kNotifyOSR); + function_->GetIsolate()->builtins()->builtin(Builtins::kNotifyOSR); output_[0]->SetContinuation( reinterpret_cast(continuation->entry())); if (FLAG_trace_osr) { PrintF("[on-stack replacement translation %s: 0x%08" V8PRIxPTR " ", ok ? "finished" : "aborted", - reinterpret_cast(function)); - function->PrintName(); + reinterpret_cast(function_)); + function_->PrintName(); PrintF(" => pc=0x%0x]\n", output_[0]->GetPc()); } } @@ -682,7 +697,15 @@ void Deoptimizer::DoComputeConstructStubFrame(TranslationIterator* iterator, void Deoptimizer::DoComputeJSFrame(TranslationIterator* iterator, int frame_index) { int node_id = iterator->Next(); - JSFunction* function = JSFunction::cast(ComputeLiteral(iterator->Next())); + JSFunction* function; + if (frame_index != 0) { + function = JSFunction::cast(ComputeLiteral(iterator->Next())); + } else { + int closure_id = iterator->Next(); + USE(closure_id); + ASSERT_EQ(Translation::kSelfLiteralId, closure_id); + function = function_; + } unsigned height = iterator->Next(); unsigned height_in_bytes = height * kPointerSize; if (FLAG_trace_deopt) { diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc index 7fd64ca..1ea2188 100644 --- a/src/ia32/lithium-codegen-ia32.cc +++ b/src/ia32/lithium-codegen-ia32.cc @@ -420,7 +420,9 @@ void LCodeGen::WriteTranslation(LEnvironment* environment, int height = translation_size - environment->parameter_count(); WriteTranslation(environment->outer(), translation); - int closure_id = DefineDeoptimizationLiteral(environment->closure()); + int closure_id = *info()->closure() != *environment->closure() + ? DefineDeoptimizationLiteral(environment->closure()) + : Translation::kSelfLiteralId; switch (environment->frame_type()) { case JS_FUNCTION: translation->BeginJSFrame(environment->ast_id(), closure_id, height); @@ -2830,7 +2832,7 @@ void LCodeGen::DoDrop(LDrop* instr) { void LCodeGen::DoThisFunction(LThisFunction* instr) { Register result = ToRegister(instr->result()); - __ LoadHeapObject(result, instr->hydrogen()->closure()); + __ mov(result, Operand(ebp, JavaScriptFrameConstants::kFunctionOffset)); } diff --git a/src/mark-compact.cc b/src/mark-compact.cc index 878c974..2aac8b0 100644 --- a/src/mark-compact.cc +++ b/src/mark-compact.cc @@ -1296,9 +1296,7 @@ class StaticMarkingVisitor : public StaticVisitorBase { static void VisitSharedFunctionInfoGeneric(Map* map, HeapObject* object) { - SharedFunctionInfo* shared = reinterpret_cast(object); - - if (shared->IsInobjectSlackTrackingInProgress()) shared->DetachInitialMap(); + SharedFunctionInfo::cast(object)->BeforeVisitingPointers(); FixedBodyVisitorGetHeap(); SharedFunctionInfo* shared = reinterpret_cast(object); - if (shared->IsInobjectSlackTrackingInProgress()) shared->DetachInitialMap(); + shared->BeforeVisitingPointers(); if (!known_flush_code_candidate) { known_flush_code_candidate = IsFlushable(heap, shared); @@ -1539,8 +1537,8 @@ class StaticMarkingVisitor : public StaticVisitorBase { } VisitPointers(heap, - SLOT_ADDR(object, SharedFunctionInfo::kScopeInfoOffset), - SLOT_ADDR(object, SharedFunctionInfo::kSize)); + SLOT_ADDR(object, SharedFunctionInfo::kOptimizedCodeMapOffset), + SLOT_ADDR(object, SharedFunctionInfo::kSize)); } #undef SLOT_ADDR diff --git a/src/objects-debug.cc b/src/objects-debug.cc index 5aac503..fc593d5 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -502,6 +502,7 @@ void SharedFunctionInfo::SharedFunctionInfoVerify() { CHECK(IsSharedFunctionInfo()); VerifyObjectField(kNameOffset); VerifyObjectField(kCodeOffset); + VerifyObjectField(kOptimizedCodeMapOffset); VerifyObjectField(kScopeInfoOffset); VerifyObjectField(kInstanceClassNameOffset); VerifyObjectField(kFunctionDataOffset); diff --git a/src/objects-inl.h b/src/objects-inl.h index da1a35b..7efe15e 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -3663,6 +3663,8 @@ ACCESSORS(BreakPointInfo, break_point_objects, Object, kBreakPointObjectsIndex) #endif ACCESSORS(SharedFunctionInfo, name, Object, kNameOffset) +ACCESSORS(SharedFunctionInfo, optimized_code_map, Object, + kOptimizedCodeMapOffset) ACCESSORS(SharedFunctionInfo, construct_stub, Code, kConstructStubOffset) ACCESSORS(SharedFunctionInfo, initial_map, Object, kInitialMapOffset) ACCESSORS(SharedFunctionInfo, instance_class_name, Object, @@ -3874,6 +3876,17 @@ BOOL_ACCESSORS(SharedFunctionInfo, compiler_hints, dont_optimize, kDontOptimize) BOOL_ACCESSORS(SharedFunctionInfo, compiler_hints, dont_inline, kDontInline) +void SharedFunctionInfo::BeforeVisitingPointers() { + if (IsInobjectSlackTrackingInProgress()) DetachInitialMap(); + + // Flush optimized code map on major GC. + // Note: we may experiment with rebuilding it or retaining entries + // which should survive as we iterate through optimized functions + // anyway. + set_optimized_code_map(Smi::FromInt(0)); +} + + ACCESSORS(CodeCache, default_cache, FixedArray, kDefaultCacheOffset) ACCESSORS(CodeCache, normal_type_cache, Object, kNormalTypeCacheOffset) diff --git a/src/objects.cc b/src/objects.cc index 329a0c5..f44f5ef 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -7471,6 +7471,54 @@ bool SharedFunctionInfo::CompileLazy(Handle shared, } +void SharedFunctionInfo::ClearOptimizedCodeMap() { + set_optimized_code_map(Smi::FromInt(0)); +} + + +void SharedFunctionInfo::AddToOptimizedCodeMap( + Handle shared, + Handle global_context, + Handle code, + Handle literals) { + ASSERT(code->kind() == Code::OPTIMIZED_FUNCTION); + ASSERT(global_context->IsGlobalContext()); + STATIC_ASSERT(kEntryLength == 3); + Object* value = shared->optimized_code_map(); + Handle new_code_map; + if (value->IsSmi()) { + // No optimized code map. + ASSERT_EQ(0, Smi::cast(value)->value()); + // Crate 3 entries per context {context, code, literals}. + new_code_map = FACTORY->NewFixedArray(kEntryLength); + new_code_map->set(0, *global_context); + new_code_map->set(1, *code); + new_code_map->set(2, *literals); + } else { + // Copy old map and append one new entry. + Handle old_code_map(FixedArray::cast(value)); + ASSERT_EQ(-1, shared->SearchOptimizedCodeMap(*global_context)); + int old_length = old_code_map->length(); + int new_length = old_length + kEntryLength; + new_code_map = FACTORY->NewFixedArray(new_length); + old_code_map->CopyTo(0, *new_code_map, 0, old_length); + new_code_map->set(old_length, *global_context); + new_code_map->set(old_length + 1, *code); + new_code_map->set(old_length + 2, *literals); + } +#ifdef DEBUG + for (int i = 0; i < new_code_map->length(); i += kEntryLength) { + ASSERT(new_code_map->get(i)->IsGlobalContext()); + ASSERT(new_code_map->get(i + 1)->IsCode()); + ASSERT(Code::cast(new_code_map->get(i + 1))->kind() == + Code::OPTIMIZED_FUNCTION); + ASSERT(new_code_map->get(i + 2)->IsFixedArray()); + } +#endif + shared->set_optimized_code_map(*new_code_map); +} + + bool JSFunction::CompileLazy(Handle function, ClearExceptionFlag flag) { bool result = true; @@ -8040,6 +8088,22 @@ void SharedFunctionInfo::CompleteInobjectSlackTracking() { } +int SharedFunctionInfo::SearchOptimizedCodeMap(Context* global_context) { + ASSERT(global_context->IsGlobalContext()); + Object* value = optimized_code_map(); + if (!value->IsSmi()) { + FixedArray* optimized_code_map = FixedArray::cast(value); + int length = optimized_code_map->length(); + for (int i = 0; i < length; i += 3) { + if (optimized_code_map->get(i) == global_context) { + return i + 1; + } + } + } + return -1; +} + + void SharedFunctionInfo::SharedFunctionInfoIterateBody(ObjectVisitor* v) { v->VisitSharedFunctionInfo(this); SharedFunctionInfo::BodyDescriptor::IterateBody(this, v); @@ -8331,11 +8395,14 @@ void DeoptimizationInputData::DeoptimizationInputDataPrint(FILE* out) { case Translation::JS_FRAME: { int ast_id = iterator.Next(); int function_id = iterator.Next(); - JSFunction* function = - JSFunction::cast(LiteralArray()->get(function_id)); unsigned height = iterator.Next(); PrintF(out, "{ast_id=%d, function=", ast_id); - function->PrintName(out); + if (function_id != Translation::kSelfLiteralId) { + Object* function = LiteralArray()->get(function_id); + JSFunction::cast(function)->PrintName(out); + } else { + PrintF(out, ""); + } PrintF(out, ", height=%u}", height); break; } diff --git a/src/objects.h b/src/objects.h index 15ecdd1..68c98ab 100644 --- a/src/objects.h +++ b/src/objects.h @@ -5229,6 +5229,25 @@ class SharedFunctionInfo: public HeapObject { // [code]: Function code. DECL_ACCESSORS(code, Code) + // [optimized_code_map]: Map from global context to optimized code + // and a shared literals array or Smi 0 if none. + DECL_ACCESSORS(optimized_code_map, Object) + + // Returns index i of the entry with the specified context. At position + // i - 1 is the context, position i the code, and i + 1 the literals array. + // Returns -1 when no matching entry is found. + int SearchOptimizedCodeMap(Context* global_context); + + // Clear optimized code map. + void ClearOptimizedCodeMap(); + + // Add a new entry to the optimized code map. + static void AddToOptimizedCodeMap(Handle shared, + Handle global_context, + Handle code, + Handle literals); + static const int kEntryLength = 3; + // [scope_info]: Scope info. DECL_ACCESSORS(scope_info, ScopeInfo) @@ -5336,6 +5355,10 @@ class SharedFunctionInfo: public HeapObject { // IsInobjectSlackTrackingInProgress is false after this call. void CompleteInobjectSlackTracking(); + // Invoked before pointers in SharedFunctionInfo are being marked. + // Also clears the optimized code map. + inline void BeforeVisitingPointers(); + // Clears the initial_map before the GC marking phase to ensure the reference // is weak. IsInobjectSlackTrackingInProgress is false after this call. void DetachInitialMap(); @@ -5613,7 +5636,8 @@ class SharedFunctionInfo: public HeapObject { // Pointer fields. static const int kNameOffset = HeapObject::kHeaderSize; static const int kCodeOffset = kNameOffset + kPointerSize; - static const int kScopeInfoOffset = kCodeOffset + kPointerSize; + static const int kOptimizedCodeMapOffset = kCodeOffset + kPointerSize; + static const int kScopeInfoOffset = kOptimizedCodeMapOffset + kPointerSize; static const int kConstructStubOffset = kScopeInfoOffset + kPointerSize; static const int kInstanceClassNameOffset = kConstructStubOffset + kPointerSize; diff --git a/src/runtime-profiler.cc b/src/runtime-profiler.cc index 003b882..cdbc77a 100644 --- a/src/runtime-profiler.cc +++ b/src/runtime-profiler.cc @@ -218,7 +218,10 @@ int RuntimeProfiler::LookupSample(JSFunction* function) { for (int i = 0; i < kSamplerWindowSize; i++) { Object* sample = sampler_window_[i]; if (sample != NULL) { - if (function == sample) { + bool fits = FLAG_lookup_sample_by_shared + ? (function->shared() == JSFunction::cast(sample)->shared()) + : (function == JSFunction::cast(sample)); + if (fits) { weight += sampler_window_weight_[i]; } } diff --git a/src/runtime.cc b/src/runtime.cc index 9e38949..58f0a94 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -8366,6 +8366,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_NotifyDeoptimized) { } else { Deoptimizer::DeoptimizeFunction(*function); } + // Flush optimized code cache for this function. + function->shared()->ClearOptimizedCodeMap(); + return isolate->heap()->undefined_value(); } diff --git a/src/v8-counters.h b/src/v8-counters.h index 6db9c77..f36b0ed 100644 --- a/src/v8-counters.h +++ b/src/v8-counters.h @@ -210,6 +210,9 @@ namespace internal { SC(compute_entry_frame, V8.ComputeEntryFrame) \ SC(generic_binary_stub_calls, V8.GenericBinaryStubCalls) \ SC(generic_binary_stub_calls_regs, V8.GenericBinaryStubCallsRegs) \ + SC(fast_new_closure_total, V8.FastNewClosureTotal) \ + SC(fast_new_closure_try_optimized, V8.FastNewClosureTryOptimized) \ + SC(fast_new_closure_install_optimized, V8.FastNewClosureInstallOptimized) \ SC(string_add_runtime, V8.StringAddRuntime) \ SC(string_add_native, V8.StringAddNative) \ SC(string_add_runtime_ext_to_ascii, V8.StringAddRuntimeExtToAscii) \ @@ -240,10 +243,6 @@ namespace internal { SC(transcendental_cache_miss, V8.TranscendentalCacheMiss) \ SC(stack_interrupts, V8.StackInterrupts) \ SC(runtime_profiler_ticks, V8.RuntimeProfilerTicks) \ - SC(other_ticks, V8.OtherTicks) \ - SC(js_opt_ticks, V8.JsOptTicks) \ - SC(js_non_opt_ticks, V8.JsNonoptTicks) \ - SC(js_other_ticks, V8.JsOtherTicks) \ SC(smi_checks_removed, V8.SmiChecksRemoved) \ SC(map_checks_removed, V8.MapChecksRemoved) \ SC(quote_json_char_count, V8.QuoteJsonCharacterCount) \ diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index 61d6c87..ecdb392 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -62,9 +62,13 @@ void ToNumberStub::Generate(MacroAssembler* masm) { void FastNewClosureStub::Generate(MacroAssembler* masm) { // Create a new closure from the given function info in new // space. Set the context to the current context in rsi. + Counters* counters = masm->isolate()->counters(); + Label gc; __ AllocateInNewSpace(JSFunction::kSize, rax, rbx, rcx, &gc, TAG_OBJECT); + __ IncrementCounter(counters->fast_new_closure_total(), 1); + // Get the function info from the stack. __ movq(rdx, Operand(rsp, 1 * kPointerSize)); @@ -76,32 +80,109 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { // as the map of the allocated object. __ movq(rcx, Operand(rsi, Context::SlotOffset(Context::GLOBAL_INDEX))); __ movq(rcx, FieldOperand(rcx, GlobalObject::kGlobalContextOffset)); - __ movq(rcx, Operand(rcx, Context::SlotOffset(map_index))); - __ movq(FieldOperand(rax, JSObject::kMapOffset), rcx); + __ movq(rbx, Operand(rcx, Context::SlotOffset(map_index))); + __ movq(FieldOperand(rax, JSObject::kMapOffset), rbx); // Initialize the rest of the function. We don't have to update the // write barrier because the allocated object is in new space. __ LoadRoot(rbx, Heap::kEmptyFixedArrayRootIndex); - __ LoadRoot(rcx, Heap::kTheHoleValueRootIndex); + __ LoadRoot(r8, Heap::kTheHoleValueRootIndex); __ LoadRoot(rdi, Heap::kUndefinedValueRootIndex); __ movq(FieldOperand(rax, JSObject::kPropertiesOffset), rbx); __ movq(FieldOperand(rax, JSObject::kElementsOffset), rbx); - __ movq(FieldOperand(rax, JSFunction::kPrototypeOrInitialMapOffset), rcx); + __ movq(FieldOperand(rax, JSFunction::kPrototypeOrInitialMapOffset), r8); __ movq(FieldOperand(rax, JSFunction::kSharedFunctionInfoOffset), rdx); __ movq(FieldOperand(rax, JSFunction::kContextOffset), rsi); __ movq(FieldOperand(rax, JSFunction::kLiteralsOffset), rbx); - __ movq(FieldOperand(rax, JSFunction::kNextFunctionLinkOffset), rdi); // Initialize the code pointer in the function to be the one // found in the shared function info object. + // But first check if there is an optimized version for our context. + Label check_optimized; + Label install_unoptimized; + if (FLAG_cache_optimized_code) { + __ movq(rbx, + FieldOperand(rdx, SharedFunctionInfo::kOptimizedCodeMapOffset)); + __ testq(rbx, rbx); + __ j(not_zero, &check_optimized, Label::kNear); + } + __ bind(&install_unoptimized); + __ movq(FieldOperand(rax, JSFunction::kNextFunctionLinkOffset), + rdi); // Initialize with undefined. __ movq(rdx, FieldOperand(rdx, SharedFunctionInfo::kCodeOffset)); __ lea(rdx, FieldOperand(rdx, Code::kHeaderSize)); __ movq(FieldOperand(rax, JSFunction::kCodeEntryOffset), rdx); + // Return and remove the on-stack parameter. + __ ret(1 * kPointerSize); + + __ bind(&check_optimized); + + __ IncrementCounter(counters->fast_new_closure_try_optimized(), 1); + + // rcx holds global context, ebx points to fixed array of 3-element entries + // (global context, optimized code, literals). + // The optimized code map must never be empty, so check the first elements. + Label install_optimized; + // Speculatively move code object into edx. + __ movq(rdx, FieldOperand(rbx, FixedArray::kHeaderSize + kPointerSize)); + __ cmpq(rcx, FieldOperand(rbx, FixedArray::kHeaderSize)); + __ j(equal, &install_optimized); + + // Iterate through the rest of map backwards. rdx holds an index. + Label loop; + Label restore; + __ movq(rdx, FieldOperand(rbx, FixedArray::kLengthOffset)); + __ SmiToInteger32(rdx, rdx); + __ bind(&loop); + // Do not double check first entry. + __ cmpq(rdx, Immediate(SharedFunctionInfo::kEntryLength)); + __ j(equal, &restore); + __ subq(rdx, Immediate(SharedFunctionInfo::kEntryLength)); // Skip an entry. + __ cmpq(rcx, FieldOperand(rbx, + rdx, + times_pointer_size, + FixedArray::kHeaderSize)); + __ j(not_equal, &loop, Label::kNear); + // Hit: fetch the optimized code. + __ movq(rdx, FieldOperand(rbx, + rdx, + times_pointer_size, + FixedArray::kHeaderSize + 1 * kPointerSize)); + + __ bind(&install_optimized); + __ IncrementCounter(counters->fast_new_closure_install_optimized(), 1); + + // TODO(fschneider): Idea: store proper code pointers in the map and either + // unmangle them on marking or do nothing as the whole map is discarded on + // major GC anyway. + __ lea(rdx, FieldOperand(rdx, Code::kHeaderSize)); + __ movq(FieldOperand(rax, JSFunction::kCodeEntryOffset), rdx); + + // Now link a function into a list of optimized functions. + __ movq(rdx, ContextOperand(rcx, Context::OPTIMIZED_FUNCTIONS_LIST)); + + __ movq(FieldOperand(rax, JSFunction::kNextFunctionLinkOffset), rdx); + // No need for write barrier as JSFunction (rax) is in the new space. + + __ movq(ContextOperand(rcx, Context::OPTIMIZED_FUNCTIONS_LIST), rax); + // Store JSFunction (rax) into rdx before issuing write barrier as + // it clobbers all the registers passed. + __ movq(rdx, rax); + __ RecordWriteContextSlot( + rcx, + Context::SlotOffset(Context::OPTIMIZED_FUNCTIONS_LIST), + rdx, + rbx, + kDontSaveFPRegs); // Return and remove the on-stack parameter. __ ret(1 * kPointerSize); + __ bind(&restore); + __ movq(rdx, Operand(rsp, 1 * kPointerSize)); + __ jmp(&install_unoptimized); + // Create a new closure through the slower runtime call. __ bind(&gc); __ pop(rcx); // Temporarily remove return address. @@ -6014,6 +6095,8 @@ struct AheadOfTimeWriteBarrierStubList kAheadOfTime[] = { { REG(r11), REG(rax), REG(r15), EMIT_REMEMBERED_SET}, // StoreArrayLiteralElementStub::Generate { REG(rbx), REG(rax), REG(rcx), EMIT_REMEMBERED_SET}, + // FastNewClosureStub::Generate + { REG(rcx), REG(rdx), REG(rbx), EMIT_REMEMBERED_SET}, // Null termination. { REG(no_reg), REG(no_reg), REG(no_reg), EMIT_REMEMBERED_SET} }; diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc index f3046b9..2813bef 100644 --- a/src/x64/deoptimizer-x64.cc +++ b/src/x64/deoptimizer-x64.cc @@ -52,6 +52,10 @@ void Deoptimizer::DeoptimizeFunction(JSFunction* function) { if (!function->IsOptimized()) return; + // The optimized code is going to be patched, so we cannot use it + // any more. Play safe and reset the whole cache. + function->shared()->ClearOptimizedCodeMap(); + // Get the optimized code. Code* code = function->code(); @@ -100,8 +104,19 @@ void Deoptimizer::DeoptimizeFunction(JSFunction* function) { // ignore all slots that might have been recorded on it. isolate->heap()->mark_compact_collector()->InvalidateCode(code); - // Set the code for the function to non-optimized version. - function->ReplaceCode(function->shared()->code()); + // Iterate over all the functions which share the same code object + // and make them use unoptimized version. + Context* context = function->context()->global_context(); + Object* element = context->get(Context::OPTIMIZED_FUNCTIONS_LIST); + SharedFunctionInfo* shared = function->shared(); + while (!element->IsUndefined()) { + JSFunction* func = JSFunction::cast(element); + // Grab element before code replacement as ReplaceCode alters the list. + element = func->next_function_link(); + if (func->code() == code) { + func->ReplaceCode(shared->code()); + } + } if (FLAG_trace_deopt) { PrintF("[forced deoptimization: "); @@ -234,9 +249,9 @@ void Deoptimizer::DoComputeOsrOutputFrame() { unsigned node_id = iterator.Next(); USE(node_id); ASSERT(node_id == ast_id); - JSFunction* function = JSFunction::cast(ComputeLiteral(iterator.Next())); - USE(function); - ASSERT(function == function_); + int closure_id = iterator.Next(); + USE(closure_id); + ASSERT_EQ(Translation::kSelfLiteralId, closure_id); unsigned height = iterator.Next(); unsigned height_in_bytes = height * kPointerSize; USE(height_in_bytes); @@ -341,15 +356,15 @@ void Deoptimizer::DoComputeOsrOutputFrame() { output_[0]->SetPc(pc); } Code* continuation = - function->GetIsolate()->builtins()->builtin(Builtins::kNotifyOSR); + function_->GetIsolate()->builtins()->builtin(Builtins::kNotifyOSR); output_[0]->SetContinuation( reinterpret_cast(continuation->entry())); if (FLAG_trace_osr) { PrintF("[on-stack replacement translation %s: 0x%08" V8PRIxPTR " ", ok ? "finished" : "aborted", - reinterpret_cast(function)); - function->PrintName(); + reinterpret_cast(function_)); + function_->PrintName(); PrintF(" => pc=0x%0" V8PRIxPTR "]\n", output_[0]->GetPc()); } } @@ -579,7 +594,15 @@ void Deoptimizer::DoComputeConstructStubFrame(TranslationIterator* iterator, void Deoptimizer::DoComputeJSFrame(TranslationIterator* iterator, int frame_index) { int node_id = iterator->Next(); - JSFunction* function = JSFunction::cast(ComputeLiteral(iterator->Next())); + JSFunction* function; + if (frame_index != 0) { + function = JSFunction::cast(ComputeLiteral(iterator->Next())); + } else { + int closure_id = iterator->Next(); + USE(closure_id); + ASSERT_EQ(Translation::kSelfLiteralId, closure_id); + function = function_; + } unsigned height = iterator->Next(); unsigned height_in_bytes = height * kPointerSize; if (FLAG_trace_deopt) { diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc index bc8f848..ccc81bb 100644 --- a/src/x64/lithium-codegen-x64.cc +++ b/src/x64/lithium-codegen-x64.cc @@ -367,7 +367,10 @@ void LCodeGen::WriteTranslation(LEnvironment* environment, int height = translation_size - environment->parameter_count(); WriteTranslation(environment->outer(), translation); - int closure_id = DefineDeoptimizationLiteral(environment->closure()); + int closure_id = *info()->closure() != *environment->closure() + ? DefineDeoptimizationLiteral(environment->closure()) + : Translation::kSelfLiteralId; + switch (environment->frame_type()) { case JS_FUNCTION: translation->BeginJSFrame(environment->ast_id(), closure_id, height); @@ -2731,7 +2734,7 @@ void LCodeGen::DoDrop(LDrop* instr) { void LCodeGen::DoThisFunction(LThisFunction* instr) { Register result = ToRegister(instr->result()); - __ LoadHeapObject(result, instr->hydrogen()->closure()); + __ movq(result, Operand(rbp, JavaScriptFrameConstants::kFunctionOffset)); } diff --git a/test/mjsunit/compiler/optimized-closures.js b/test/mjsunit/compiler/optimized-closures.js new file mode 100644 index 0000000..eaf75f8 --- /dev/null +++ b/test/mjsunit/compiler/optimized-closures.js @@ -0,0 +1,57 @@ +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --allow-natives-syntax + +// Test optimized closures. + +var a = new Array(100); + +function f() { + var x=0; + for (var i=0; i<100; i++) { + var g = function goo(y) { + function h() { + if (goo.arguments[0] == 23) return -42; + return 42; + } + return x + y + h(y); + } + g(0); + %OptimizeFunctionOnNextCall(g); + a[i] = g(i); + } +} + +f(); +assertEquals(42, a[0]); +assertEquals(49, a[7]); +assertEquals(-19, a[23]); + + + + -- 2.7.4