From: ricow@chromium.org Date: Thu, 24 Feb 2011 12:01:24 +0000 (+0000) Subject: X64: Implement DoComputeOsrOutputFrame and fix Generate_OnStackReplacement. X-Git-Tag: upstream/4.7.83~20096 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=77fffe2207c55991895878043b487e07cf87f7af;p=platform%2Fupstream%2Fv8.git X64: Implement DoComputeOsrOutputFrame and fix Generate_OnStackReplacement. The existing version does not have a correct implementation of Generate_OnStackReplacement since we do not correctly find the loop depth. This is mainly because EmitStackCheck in full-codegen-x64.cc did not write it and partly due to us reading at the wrong offset (which had no effect when it was not written in the first place, we simply got a random number from the next instruction). The DoComputeOsrOoutputFrame is very similar to the ia32 version. Review URL: http://codereview.chromium.org/6581028 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6931 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/flag-definitions.h b/src/flag-definitions.h index 96f63c5..cf13def 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -135,11 +135,8 @@ DEFINE_bool(deoptimize_uncommon_cases, true, "deoptimize uncommon cases") DEFINE_bool(polymorphic_inlining, true, "polymorphic inlining") DEFINE_bool(aggressive_loop_invariant_motion, true, "aggressive motion of instructions out of loops") -#ifdef V8_TARGET_ARCH_X64 -DEFINE_bool(use_osr, false, "use on-stack replacement") -#else DEFINE_bool(use_osr, true, "use on-stack replacement") -#endif + DEFINE_bool(trace_osr, false, "trace on-stack replacement") DEFINE_int(stress_runs, 0, "number of stress runs") DEFINE_bool(optimize_closures, true, "optimize closures") diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc index 595dedc..75b0376 100644 --- a/src/x64/deoptimizer-x64.cc +++ b/src/x64/deoptimizer-x64.cc @@ -224,7 +224,7 @@ void Deoptimizer::PatchStackCheckCodeAt(Address pc_after, // ok: // ASSERT(*(call_target_address - 3) == 0x73 && // jae - *(call_target_address - 2) == 0x05 && // offset + *(call_target_address - 2) == 0x07 && // offset *(call_target_address - 1) == 0xe8); // call *(call_target_address - 3) = 0x90; // nop *(call_target_address - 2) = 0x90; // nop @@ -245,14 +245,154 @@ void Deoptimizer::RevertStackCheckCodeAt(Address pc_after, *(call_target_address - 2) == 0x90 && // nop *(call_target_address - 1) == 0xe8); // call *(call_target_address - 3) = 0x73; // jae - *(call_target_address - 2) = 0x05; // offset + *(call_target_address - 2) = 0x07; // offset Assembler::set_target_address_at(call_target_address, check_code->entry()); } +static int LookupBailoutId(DeoptimizationInputData* data, unsigned ast_id) { + ByteArray* translations = data->TranslationByteArray(); + int length = data->DeoptCount(); + for (int i = 0; i < length; i++) { + if (static_cast(data->AstId(i)->value()) == ast_id) { + TranslationIterator it(translations, data->TranslationIndex(i)->value()); + int value = it.Next(); + ASSERT(Translation::BEGIN == static_cast(value)); + // Read the number of frames. + value = it.Next(); + if (value == 1) return i; + } + } + UNREACHABLE(); + return -1; +} + + void Deoptimizer::DoComputeOsrOutputFrame() { - UNIMPLEMENTED(); + DeoptimizationInputData* data = DeoptimizationInputData::cast( + optimized_code_->deoptimization_data()); + unsigned ast_id = data->OsrAstId()->value(); + // TODO(kasperl): This should not be the bailout_id_. It should be + // the ast id. Confusing. + ASSERT(bailout_id_ == ast_id); + + int bailout_id = LookupBailoutId(data, ast_id); + unsigned translation_index = data->TranslationIndex(bailout_id)->value(); + ByteArray* translations = data->TranslationByteArray(); + + TranslationIterator iterator(translations, translation_index); + Translation::Opcode opcode = + static_cast(iterator.Next()); + ASSERT(Translation::BEGIN == opcode); + USE(opcode); + int count = iterator.Next(); + ASSERT(count == 1); + USE(count); + + opcode = static_cast(iterator.Next()); + USE(opcode); + ASSERT(Translation::FRAME == opcode); + unsigned node_id = iterator.Next(); + USE(node_id); + ASSERT(node_id == ast_id); + JSFunction* function = JSFunction::cast(ComputeLiteral(iterator.Next())); + USE(function); + ASSERT(function == function_); + unsigned height = iterator.Next(); + unsigned height_in_bytes = height * kPointerSize; + USE(height_in_bytes); + + unsigned fixed_size = ComputeFixedSize(function_); + unsigned input_frame_size = input_->GetFrameSize(); + ASSERT(fixed_size + height_in_bytes == input_frame_size); + + unsigned stack_slot_size = optimized_code_->stack_slots() * kPointerSize; + unsigned outgoing_height = data->ArgumentsStackHeight(bailout_id)->value(); + unsigned outgoing_size = outgoing_height * kPointerSize; + unsigned output_frame_size = fixed_size + stack_slot_size + outgoing_size; + ASSERT(outgoing_size == 0); // OSR does not happen in the middle of a call. + + if (FLAG_trace_osr) { + PrintF("[on-stack replacement: begin 0x%08" V8PRIxPTR " ", + reinterpret_cast(function_)); + function_->PrintName(); + PrintF(" => node=%u, frame=%d->%d]\n", + ast_id, + input_frame_size, + output_frame_size); + } + + // There's only one output frame in the OSR case. + output_count_ = 1; + output_ = new FrameDescription*[1]; + output_[0] = new(output_frame_size) FrameDescription( + output_frame_size, function_); + + // Clear the incoming parameters in the optimized frame to avoid + // confusing the garbage collector. + unsigned output_offset = output_frame_size - kPointerSize; + int parameter_count = function_->shared()->formal_parameter_count() + 1; + for (int i = 0; i < parameter_count; ++i) { + output_[0]->SetFrameSlot(output_offset, 0); + output_offset -= kPointerSize; + } + + // Translate the incoming parameters. This may overwrite some of the + // incoming argument slots we've just cleared. + int input_offset = input_frame_size - kPointerSize; + bool ok = true; + int limit = input_offset - (parameter_count * kPointerSize); + while (ok && input_offset > limit) { + ok = DoOsrTranslateCommand(&iterator, &input_offset); + } + + // There are no translation commands for the caller's pc and fp, the + // context, and the function. Set them up explicitly. + for (int i = 0; ok && i < 4; i++) { + intptr_t input_value = input_->GetFrameSlot(input_offset); + if (FLAG_trace_osr) { + PrintF(" [esp + %d] <- 0x%08" V8PRIxPTR " ; [esp + %d] (fixed part)\n", + output_offset, + input_value, + input_offset); + } + output_[0]->SetFrameSlot(output_offset, input_->GetFrameSlot(input_offset)); + input_offset -= kPointerSize; + output_offset -= kPointerSize; + } + + // Translate the rest of the frame. + while (ok && input_offset >= 0) { + ok = DoOsrTranslateCommand(&iterator, &input_offset); + } + + // If translation of any command failed, continue using the input frame. + if (!ok) { + delete output_[0]; + output_[0] = input_; + output_[0]->SetPc(reinterpret_cast(from_)); + } else { + // Setup the frame pointer and the context pointer. + output_[0]->SetRegister(rbp.code(), input_->GetRegister(rbp.code())); + output_[0]->SetRegister(rsi.code(), input_->GetRegister(rsi.code())); + + unsigned pc_offset = data->OsrPcOffset()->value(); + intptr_t pc = reinterpret_cast( + optimized_code_->entry() + pc_offset); + output_[0]->SetPc(pc); + } + Code* continuation = Builtins::builtin(Builtins::NotifyOSR); + output_[0]->SetContinuation( + reinterpret_cast(continuation->entry())); + + if (FLAG_trace_osr) { + PrintF("[on-stack replacement translation %s: 0x%08" V8PRIxPTR " ", + ok ? "finished" : "aborted", + reinterpret_cast(function)); + function->PrintName(); + PrintF(" => pc=0x%0" V8PRIxPTR "]\n", output_[0]->GetPc()); + } } diff --git a/src/x64/full-codegen-x64.cc b/src/x64/full-codegen-x64.cc index d037021..f1182ae 100644 --- a/src/x64/full-codegen-x64.cc +++ b/src/x64/full-codegen-x64.cc @@ -269,6 +269,13 @@ void FullCodeGenerator::EmitStackCheck(IterationStatement* stmt) { // the deoptimization input data found in the optimized code. RecordStackCheck(stmt->OsrEntryId()); + // Loop stack checks can be patched to perform on-stack replacement. In + // order to decide whether or not to perform OSR we embed the loop depth + // in a test instruction after the call so we can extract it from the OSR + // builtin. + ASSERT(loop_depth() > 0); + __ testl(rax, Immediate(Min(loop_depth(), Code::kMaxLoopNestingMarker))); + __ bind(&ok); PrepareForBailoutForId(stmt->EntryId(), NO_REGISTERS); // Record a mapping of the OSR id to this PC. This is used if the OSR