From b49dd13c90717057eed54b251636793a0707c138 Mon Sep 17 00:00:00 2001 From: "danno@chromium.org" Date: Wed, 21 Mar 2012 08:41:16 +0000 Subject: [PATCH] MIPS: Branch delay slot and other optimizations. List of changes: -added a minor optimization to the Simulator that quickly skips nops in the delay slot -slightly re-worked CEntryStub to save a few instructions CEntryStub now expects the following values: -s0: number of arguments including receiver -s1: size of arguments excluding receiver -s2: pointer to builtin function Two new MacroAssembler functions were added to make usage more convenient: -PrepareCEntryArgs(int num_args) to set up s0 and s1 -PrepareCEntryFunction(const ExternalReference&) to set up s2 -removed branch delay slot nops from the most frequently used code areas -reorganized some code to execute fewer instructions -utilized the delay slot of most Ret instructions This does not cover all Rets, only the most obvious cases. Also added a special version of DropAndRet that utilizes the delay slot. -added some comments to code areas where explanation of the register/delay slot usage may be needed -added an optimization to Jump so it doesn't always pre-load the target register BUG= TEST= Review URL: https://chromiumcodereview.appspot.com/9699071 Patch from Daniel Kalmar . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11099 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/mips/builtins-mips.cc | 6 +- src/mips/code-stubs-mips.cc | 194 +++++++++++++++++++-------------------- src/mips/debug-mips.cc | 4 +- src/mips/ic-mips.cc | 8 +- src/mips/lithium-codegen-mips.cc | 10 +- src/mips/macro-assembler-mips.cc | 77 ++++++++++------ src/mips/macro-assembler-mips.h | 39 ++++++-- src/mips/simulator-mips.h | 8 ++ src/mips/stub-cache-mips.cc | 11 ++- 9 files changed, 203 insertions(+), 154 deletions(-) diff --git a/src/mips/builtins-mips.cc b/src/mips/builtins-mips.cc index 09a9924..60fc72f 100644 --- a/src/mips/builtins-mips.cc +++ b/src/mips/builtins-mips.cc @@ -67,9 +67,11 @@ void Builtins::Generate_Adaptor(MacroAssembler* masm, ASSERT(extra_args == NO_EXTRA_ARGUMENTS); } - // JumpToExternalReference expects a0 to contain the number of arguments + // JumpToExternalReference expects s0 to contain the number of arguments // including the receiver and the extra arguments. - __ Addu(a0, a0, Operand(num_extra_args + 1)); + __ Addu(s0, a0, num_extra_args + 1); + __ sll(s1, s0, kPointerSizeLog2); + __ Subu(s1, s1, kPointerSize); __ JumpToExternalReference(ExternalReference(id, masm->isolate())); } diff --git a/src/mips/code-stubs-mips.cc b/src/mips/code-stubs-mips.cc index 67a880a..03755b2 100644 --- a/src/mips/code-stubs-mips.cc +++ b/src/mips/code-stubs-mips.cc @@ -70,13 +70,13 @@ void ToNumberStub::Generate(MacroAssembler* masm) { // The ToNumber stub takes one argument in a0. Label check_heap_number, call_builtin; __ JumpIfNotSmi(a0, &check_heap_number); + __ Ret(USE_DELAY_SLOT); __ mov(v0, a0); - __ Ret(); __ bind(&check_heap_number); EmitCheckForHeapNumber(masm, a0, a1, t0, &call_builtin); + __ Ret(USE_DELAY_SLOT); __ mov(v0, a0); - __ Ret(); __ bind(&call_builtin); __ push(a0); @@ -128,9 +128,9 @@ void FastNewClosureStub::Generate(MacroAssembler* masm) { // found in the shared function info object. __ lw(a3, FieldMemOperand(a3, SharedFunctionInfo::kCodeOffset)); __ Addu(a3, a3, Operand(Code::kHeaderSize - kHeapObjectTag)); - __ sw(a3, FieldMemOperand(v0, JSFunction::kCodeEntryOffset)); // Return result. The argument function info has been popped already. + __ sw(a3, FieldMemOperand(v0, JSFunction::kCodeEntryOffset)); __ Ret(); // Create a new closure through the slower runtime call. @@ -179,8 +179,7 @@ void FastNewContextStub::Generate(MacroAssembler* masm) { // Remove the on-stack argument and return. __ mov(cp, v0); - __ Pop(); - __ Ret(); + __ DropAndRet(1); // Need to collect. Call into runtime system. __ bind(&gc); @@ -242,8 +241,7 @@ void FastNewBlockContextStub::Generate(MacroAssembler* masm) { // Remove the on-stack argument and return. __ mov(cp, v0); - __ Addu(sp, sp, Operand(2 * kPointerSize)); - __ Ret(); + __ DropAndRet(2); // Need to collect. Call into runtime system. __ bind(&gc); @@ -368,8 +366,7 @@ void FastCloneShallowArrayStub::Generate(MacroAssembler* masm) { GenerateFastCloneShallowArrayCommon(masm, length_, mode, &slow_case); // Return and remove the on-stack parameters. - __ Addu(sp, sp, Operand(3 * kPointerSize)); - __ Ret(); + __ DropAndRet(3); __ bind(&slow_case); __ TailCallRuntime(Runtime::kCreateArrayLiteralShallow, 3, 1); @@ -405,16 +402,14 @@ void FastCloneShallowObjectStub::Generate(MacroAssembler* masm) { // Allocate the JS object and copy header together with all in-object // properties from the boilerplate. - __ AllocateInNewSpace(size, a0, a1, a2, &slow_case, TAG_OBJECT); + __ AllocateInNewSpace(size, v0, a1, a2, &slow_case, TAG_OBJECT); for (int i = 0; i < size; i += kPointerSize) { __ lw(a1, FieldMemOperand(a3, i)); - __ sw(a1, FieldMemOperand(a0, i)); + __ sw(a1, FieldMemOperand(v0, i)); } // Return and remove the on-stack parameters. - __ Drop(4); - __ Ret(USE_DELAY_SLOT); - __ mov(v0, a0); + __ DropAndRet(4); __ bind(&slow_case); __ TailCallRuntime(Runtime::kCreateObjectLiteralShallow, 4, 1); @@ -492,8 +487,8 @@ void ConvertToDoubleStub::Generate(MacroAssembler* masm) { __ Or(at, exponent, Operand(exponent_word_for_1)); __ Movn(exponent, at, source_); // Write exp when source not 0. // 1, 0 and -1 all have 0 for the second word. + __ Ret(USE_DELAY_SLOT); __ mov(mantissa, zero_reg); - __ Ret(); __ bind(¬_special); // Count leading zeros. @@ -514,9 +509,9 @@ void ConvertToDoubleStub::Generate(MacroAssembler* masm) { __ sll(mantissa, source_, HeapNumber::kMantissaBitsInTopWord); // And the top (top 20 bits). __ srl(source_, source_, 32 - HeapNumber::kMantissaBitsInTopWord); - __ or_(exponent, exponent, source_); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ or_(exponent, exponent, source_); } @@ -1025,9 +1020,9 @@ void FloatingPointHelper::CallCCodeForDoubleOperation( __ sw(v0, FieldMemOperand(heap_number_result, HeapNumber::kMantissaOffset)); } // Place heap_number_result in v0 and return to the pushed return address. - __ mov(v0, heap_number_result); __ pop(ra); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ mov(v0, heap_number_result); } @@ -1163,6 +1158,7 @@ static void EmitIdenticalObjectComparison(MacroAssembler* masm, } __ bind(&return_equal); + if (cc == less) { __ li(v0, Operand(GREATER)); // Things aren't less than themselves. } else if (cc == greater) { @@ -1234,8 +1230,8 @@ static void EmitSmiNonsmiComparison(MacroAssembler* masm, if (strict) { // If lhs was not a number and rhs was a Smi then strict equality cannot // succeed. Return non-equal (lhs is already not zero). + __ Ret(USE_DELAY_SLOT, ne, t4, Operand(HEAP_NUMBER_TYPE)); __ mov(v0, lhs); - __ Ret(ne, t4, Operand(HEAP_NUMBER_TYPE)); } else { // Smi compared non-strictly with a non-Smi non-heap-number. Call // the runtime. @@ -1273,8 +1269,8 @@ static void EmitSmiNonsmiComparison(MacroAssembler* masm, if (strict) { // If lhs was not a number and rhs was a Smi then strict equality cannot // succeed. Return non-equal. + __ Ret(USE_DELAY_SLOT, ne, t4, Operand(HEAP_NUMBER_TYPE)); __ li(v0, Operand(1)); - __ Ret(ne, t4, Operand(HEAP_NUMBER_TYPE)); } else { // Smi compared non-strictly with a non-Smi non-heap-number. Call // the runtime. @@ -1354,12 +1350,13 @@ void EmitNanCheck(MacroAssembler* masm, Condition cc) { __ bind(&one_is_nan); // NaN comparisons always fail. // Load whatever we need in v0 to make the comparison fail. + if (cc == lt || cc == le) { __ li(v0, Operand(GREATER)); } else { __ li(v0, Operand(LESS)); } - __ Ret(); // Return. + __ Ret(); __ bind(&neither_is_nan); } @@ -1408,6 +1405,7 @@ static void EmitTwoNonNanDoubleComparison(MacroAssembler* masm, Condition cc) { __ Branch(&return_result_not_equal, ne, t4, Operand(zero_reg)); __ bind(&return_result_equal); + __ li(v0, Operand(EQUAL)); __ Ret(); } @@ -1439,6 +1437,7 @@ static void EmitTwoNonNanDoubleComparison(MacroAssembler* masm, Condition cc) { __ BranchF(&less_than, NULL, lt, f12, f14); // Not equal, not less, not NaN, must be greater. + __ li(v0, Operand(GREATER)); __ Ret(); @@ -1469,8 +1468,8 @@ static void EmitStrictTwoHeapObjectCompare(MacroAssembler* masm, // Return non-zero. Label return_not_equal; __ bind(&return_not_equal); + __ Ret(USE_DELAY_SLOT); __ li(v0, Operand(1)); - __ Ret(); __ bind(&first_non_object); // Check for oddballs: true, false, null, undefined. @@ -1549,8 +1548,8 @@ static void EmitCheckForSymbolsOrObjects(MacroAssembler* masm, // Both are symbols. We already checked they weren't the same pointer // so they are not equal. + __ Ret(USE_DELAY_SLOT); __ li(v0, Operand(1)); // Non-zero indicates not equal. - __ Ret(); __ bind(&object_test); __ Branch(not_both_strings, lt, a2, Operand(FIRST_SPEC_OBJECT_TYPE)); @@ -1565,8 +1564,8 @@ static void EmitCheckForSymbolsOrObjects(MacroAssembler* masm, __ lbu(a3, FieldMemOperand(a3, Map::kBitFieldOffset)); __ and_(a0, a2, a3); __ And(a0, a0, Operand(1 << Map::kIsUndetectable)); - __ Xor(v0, a0, Operand(1 << Map::kIsUndetectable)); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ xori(v0, a0, 1 << Map::kIsUndetectable); } @@ -1673,8 +1672,7 @@ void NumberToStringStub::Generate(MacroAssembler* masm) { // Generate code to lookup number in the number string cache. GenerateLookupNumberStringCache(masm, a1, v0, a2, a3, t0, false, &runtime); - __ Addu(sp, sp, Operand(1 * kPointerSize)); - __ Ret(); + __ DropAndRet(1); __ bind(&runtime); // Handle number to string in the runtime system if not found in the cache. @@ -1696,8 +1694,8 @@ void CompareStub::Generate(MacroAssembler* masm) { __ JumpIfNotSmi(a2, ¬_two_smis); __ sra(a1, a1, 1); __ sra(a0, a0, 1); - __ Subu(v0, a1, a0); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ subu(v0, a1, a0); __ bind(¬_two_smis); } else if (FLAG_debug_code) { __ Or(a2, a1, a0); @@ -1916,8 +1914,8 @@ void ToBooleanStub::Generate(MacroAssembler* masm) { __ lbu(at, FieldMemOperand(map, Map::kInstanceTypeOffset)); Label skip; __ Branch(&skip, ge, at, Operand(FIRST_NONSTRING_TYPE)); + __ Ret(USE_DELAY_SLOT); // the string length is OK as the return value __ lw(tos_, FieldMemOperand(tos_, String::kLengthOffset)); - __ Ret(); // the string length is OK as the return value __ bind(&skip); } @@ -2092,8 +2090,8 @@ void UnaryOpStub::GenerateSmiCodeSub(MacroAssembler* masm, __ Branch(slow, eq, t0, Operand(zero_reg)); // Return '0 - value'. - __ Subu(v0, zero_reg, a0); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ subu(v0, zero_reg, a0); } @@ -2423,8 +2421,8 @@ void BinaryOpStub::GenerateSmiSmiOperation(MacroAssembler* masm) { // Negating it results in 'lt'. __ Branch(&skip, lt, scratch2, Operand(zero_reg)); ASSERT(Smi::FromInt(0) == 0); - __ mov(v0, zero_reg); - __ Ret(); // Return smi 0 if the non-zero one was positive. + __ Ret(USE_DELAY_SLOT); + __ mov(v0, zero_reg); // Return smi 0 if the non-zero one was positive. __ bind(&skip); // We fall through here if we multiplied a negative number with 0, because // that would mean we should produce -0. @@ -2479,23 +2477,23 @@ void BinaryOpStub::GenerateSmiSmiOperation(MacroAssembler* masm) { } break; case Token::BIT_OR: - __ Or(v0, left, Operand(right)); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ or_(v0, left, right); break; case Token::BIT_AND: - __ And(v0, left, Operand(right)); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ and_(v0, left, right); break; case Token::BIT_XOR: - __ Xor(v0, left, Operand(right)); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ xor_(v0, left, right); break; case Token::SAR: // Remove tags from right operand. __ GetLeastBitsFromSmi(scratch1, right, 5); __ srav(scratch1, left, scratch1); // Smi tag result. - __ And(v0, scratch1, Operand(~kSmiTagMask)); + __ And(v0, scratch1, ~kSmiTagMask); __ Ret(); break; case Token::SHR: @@ -2607,8 +2605,8 @@ void BinaryOpStub::GenerateFPOperation(MacroAssembler* masm, // kValueOffset. On MIPS this workaround is built into sdc1 so // there's no point in generating even more instructions. __ sdc1(f10, FieldMemOperand(result, HeapNumber::kValueOffset)); + __ Ret(USE_DELAY_SLOT); __ mov(v0, result); - __ Ret(); } else { // Call the C function to handle the double operation. FloatingPointHelper::CallCCodeForDoubleOperation(masm, @@ -3482,8 +3480,8 @@ void TranscendentalCacheStub::Generate(MacroAssembler* masm) { __ sw(a3, MemOperand(cache_entry, 1 * kPointerSize)); __ sw(t2, MemOperand(cache_entry, 2 * kPointerSize)); + __ Ret(USE_DELAY_SLOT); __ mov(v0, cache_entry); - __ Ret(); __ bind(&invalid_cache); // The cache is invalid. Call runtime which will recreate the @@ -3662,7 +3660,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { ne, double_exponent, double_scratch); - + // double_scratch can be overwritten in the delay slot. // Calculates square root of base. Check for the special case of // Math.pow(-Infinity, 0.5) == Infinity (ECMA spec, 15.8.2.13). __ Move(double_scratch, -V8_INFINITY); @@ -3682,7 +3680,7 @@ void MathPowStub::Generate(MacroAssembler* masm) { ne, double_exponent, double_scratch); - + // double_scratch can be overwritten in the delay slot. // Calculates square root of base. Check for the special case of // Math.pow(-Infinity, -0.5) == 0 (ECMA spec, 15.8.2.13). __ Move(double_scratch, -V8_INFINITY); @@ -3866,9 +3864,10 @@ void CEntryStub::GenerateCore(MacroAssembler* masm, __ sw(a1, MemOperand(a0)); } - // Prepare arguments for C routine: a0 = argc, a1 = argv + // Prepare arguments for C routine. + // a0 = argc __ mov(a0, s0); - __ mov(a1, s1); + // a1 = argv (set in the delay slot after find_ra below). // We are calling compiled C/C++ code. a0 and a1 hold our two arguments. We // also need to reserve the 4 argument slots on the stack. @@ -3888,30 +3887,28 @@ void CEntryStub::GenerateCore(MacroAssembler* masm, // coverage code can interfere with the proper calculation of ra. Label find_ra; masm->bal(&find_ra); // bal exposes branch delay slot. - masm->nop(); // Branch delay slot nop. + masm->mov(a1, s1); masm->bind(&find_ra); // Adjust the value in ra to point to the correct return location, 2nd // instruction past the real call into C code (the jalr(t9)), and push it. // This is the return address of the exit frame. - const int kNumInstructionsToJump = 6; + const int kNumInstructionsToJump = 5; masm->Addu(ra, ra, kNumInstructionsToJump * kPointerSize); masm->sw(ra, MemOperand(sp)); // This spot was reserved in EnterExitFrame. - masm->Subu(sp, sp, kCArgsSlotsSize); + // Stack space reservation moved to the branch delay slot below. // Stack is still aligned. // Call the C routine. masm->mov(t9, s2); // Function pointer to t9 to conform to ABI for PIC. masm->jalr(t9); - masm->nop(); // Branch delay slot nop. + // Set up sp in the delay slot. + masm->addiu(sp, sp, -kCArgsSlotsSize); // Make sure the stored 'ra' points to this position. ASSERT_EQ(kNumInstructionsToJump, masm->InstructionsGeneratedSince(&find_ra)); } - // Restore stack (remove arg slots). - __ Addu(sp, sp, kCArgsSlotsSize); - if (always_allocate) { // It's okay to clobber a2 and a3 here. v0 & v1 contain result. __ li(a2, Operand(scope_depth)); @@ -3925,14 +3922,16 @@ void CEntryStub::GenerateCore(MacroAssembler* masm, STATIC_ASSERT(((kFailureTag + 1) & kFailureTagMask) == 0); __ addiu(a2, v0, 1); __ andi(t0, a2, kFailureTagMask); - __ Branch(&failure_returned, eq, t0, Operand(zero_reg)); + __ Branch(USE_DELAY_SLOT, &failure_returned, eq, t0, Operand(zero_reg)); + // Restore stack (remove arg slots) in branch delay slot. + __ addiu(sp, sp, kCArgsSlotsSize); + // Exit C frame and return. // v0:v1: result // sp: stack pointer // fp: frame pointer - __ LeaveExitFrame(save_doubles_, s0); - __ Ret(); + __ LeaveExitFrame(save_doubles_, s0, true); // Check if we should retry or throw exception. Label retry; @@ -3943,8 +3942,10 @@ void CEntryStub::GenerateCore(MacroAssembler* masm, // Special handling of out of memory exceptions. Failure* out_of_memory = Failure::OutOfMemoryException(); - __ Branch(throw_out_of_memory_exception, eq, + __ Branch(USE_DELAY_SLOT, throw_out_of_memory_exception, eq, v0, Operand(reinterpret_cast(out_of_memory))); + // If we throw the OOM exception, the value of a3 doesn't matter. + // Any instruction can be in the delay slot that's not a jump. // Retrieve the pending exception and clear the variable. __ li(a3, Operand(isolate->factory()->the_hole_value())); @@ -3968,8 +3969,9 @@ void CEntryStub::GenerateCore(MacroAssembler* masm, void CEntryStub::Generate(MacroAssembler* masm) { // Called from JavaScript; parameters are on stack as if calling JS function - // a0: number of arguments including receiver - // a1: pointer to builtin function + // s0: number of arguments including receiver + // s1: size of arguments excluding receiver + // s2: pointer to builtin function // fp: frame pointer (restored after C call) // sp: stack pointer (restored as callee's sp after C call) // cp: current context (C callee-saved) @@ -3979,19 +3981,18 @@ void CEntryStub::Generate(MacroAssembler* masm) { // this by performing a garbage collection and retrying the // builtin once. + // NOTE: s0-s2 hold the arguments of this function instead of a0-a2. + // The reason for this is that these arguments would need to be saved anyway + // so it's faster to set them up directly. + // See MacroAssembler::PrepareCEntryArgs and PrepareCEntryFunction. + // Compute the argv pointer in a callee-saved register. - __ sll(s1, a0, kPointerSizeLog2); __ Addu(s1, sp, s1); - __ Subu(s1, s1, Operand(kPointerSize)); // Enter the exit frame that transitions from JavaScript to C++. FrameScope scope(masm, StackFrame::MANUAL); __ EnterExitFrame(save_doubles_); - // Set up argc and the builtin function in callee-saved registers. - __ mov(s0, a0); - __ mov(s2, a1); - // s0: number of arguments (C callee-saved) // s1: pointer to first argument (C callee-saved) // s2: pointer to builtin function (C callee-saved) @@ -4693,8 +4694,7 @@ void ArgumentsAccessStub::GenerateNewNonStrictFast(MacroAssembler* masm) { __ Branch(&arguments_loop, lt, t5, Operand(a2)); // Return and remove the on-stack parameters. - __ Addu(sp, sp, Operand(3 * kPointerSize)); - __ Ret(); + __ DropAndRet(3); // Do the runtime call to allocate the arguments object. // a2 = argument count (tagged) @@ -4799,8 +4799,7 @@ void ArgumentsAccessStub::GenerateNewStrict(MacroAssembler* masm) { // Return and remove the on-stack parameters. __ bind(&done); - __ Addu(sp, sp, Operand(3 * kPointerSize)); - __ Ret(); + __ DropAndRet(3); // Do the runtime call to allocate the arguments object. __ bind(&runtime); @@ -5149,8 +5148,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ bind(&failure); // For failure and exception return null. __ li(v0, Operand(isolate->factory()->null_value())); - __ Addu(sp, sp, Operand(4 * kPointerSize)); - __ Ret(); + __ DropAndRet(4); // Process the result from the native regexp code. __ bind(&success); @@ -5211,14 +5209,13 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ sll(a3, a3, kSmiTagSize); // Convert to Smi. __ sw(a3, MemOperand(a0, 0)); __ Branch(&next_capture, USE_DELAY_SLOT); - __ addiu(a0, a0, kPointerSize); // In branch delay slot. + __ addiu(a0, a0, kPointerSize); // In branch delay slot. __ bind(&done); // Return last match info. __ lw(v0, MemOperand(sp, kLastMatchInfoOffset)); - __ Addu(sp, sp, Operand(4 * kPointerSize)); - __ Ret(); + __ DropAndRet(4); // External string. Short external strings have already been ruled out. // a0: scratch @@ -5330,8 +5327,7 @@ void RegExpConstructResultStub::Generate(MacroAssembler* masm) { __ addiu(a3, a3, kPointerSize); // In branch delay slot. __ bind(&done); - __ Addu(sp, sp, Operand(3 * kPointerSize)); - __ Ret(); + __ DropAndRet(3); __ bind(&slowcase); __ TailCallRuntime(Runtime::kRegExpConstructResult, 3, 1); @@ -6136,7 +6132,7 @@ void SubStringStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(kIsIndirectStringMask != 0); __ And(t0, a1, Operand(kIsIndirectStringMask)); __ Branch(USE_DELAY_SLOT, &seq_or_external_string, eq, t0, Operand(zero_reg)); - + // t0 is used as a scratch register and can be overwritten in either case. __ And(t0, a1, Operand(kSlicedNotConsMask)); __ Branch(&sliced_string, ne, t0, Operand(zero_reg)); // Cons string. Check whether it is flat, then fetch first part. @@ -6409,8 +6405,7 @@ void StringCompareStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(kSmiTag == 0); __ li(v0, Operand(Smi::FromInt(EQUAL))); __ IncrementCounter(counters->string_compare_native(), 1, a1, a2); - __ Addu(sp, sp, Operand(2 * kPointerSize)); - __ Ret(); + __ DropAndRet(2); __ bind(¬_same); @@ -6815,16 +6810,16 @@ void ICCompareStub::GenerateHeapNumbers(MacroAssembler* masm) { __ BranchF(&fpu_lt, NULL, lt, f0, f2); // Otherwise it's greater, so just fall thru, and return. - __ Ret(USE_DELAY_SLOT); - __ li(v0, Operand(GREATER)); // In delay slot. + __ li(v0, Operand(GREATER)); + __ Ret(); __ bind(&fpu_eq); - __ Ret(USE_DELAY_SLOT); - __ li(v0, Operand(EQUAL)); // In delay slot. + __ li(v0, Operand(EQUAL)); + __ Ret(); __ bind(&fpu_lt); - __ Ret(USE_DELAY_SLOT); - __ li(v0, Operand(LESS)); // In delay slot. + __ li(v0, Operand(LESS)); + __ Ret(); } __ bind(&unordered); @@ -6924,9 +6919,9 @@ void ICCompareStub::GenerateStrings(MacroAssembler* masm) { Label left_ne_right; STATIC_ASSERT(EQUAL == 0); STATIC_ASSERT(kSmiTag == 0); - __ Branch(&left_ne_right, ne, left, Operand(right), USE_DELAY_SLOT); + __ Branch(&left_ne_right, ne, left, Operand(right)); + __ Ret(USE_DELAY_SLOT); __ mov(v0, zero_reg); // In the delay slot. - __ Ret(); __ bind(&left_ne_right); // Handle not identical strings. @@ -6939,12 +6934,12 @@ void ICCompareStub::GenerateStrings(MacroAssembler* masm) { __ And(tmp3, tmp1, Operand(tmp2)); __ And(tmp5, tmp3, Operand(kIsSymbolMask)); Label is_symbol; - __ Branch(&is_symbol, eq, tmp5, Operand(zero_reg), USE_DELAY_SLOT); - __ mov(v0, a0); // In the delay slot. + __ Branch(&is_symbol, eq, tmp5, Operand(zero_reg)); // Make sure a0 is non-zero. At this point input operands are // guaranteed to be non-zero. ASSERT(right.is(a0)); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ mov(v0, a0); // In the delay slot. __ bind(&is_symbol); } @@ -6988,8 +6983,8 @@ void ICCompareStub::GenerateObjects(MacroAssembler* masm) { __ Branch(&miss, ne, a2, Operand(JS_OBJECT_TYPE)); ASSERT(GetCondition() == eq); - __ Subu(v0, a0, Operand(a1)); - __ Ret(); + __ Ret(USE_DELAY_SLOT); + __ subu(v0, a0, a1); __ bind(&miss); GenerateMiss(masm); @@ -7022,8 +7017,9 @@ void ICCompareStub::GenerateMiss(MacroAssembler* masm) { __ push(ra); __ Push(a1, a0); __ li(t0, Operand(Smi::FromInt(op_))); - __ push(t0); - __ CallExternalReference(miss, 3); + __ addiu(sp, sp, -kPointerSize); + __ CallExternalReference(miss, 3, USE_DELAY_SLOT); + __ sw(t0, MemOperand(sp)); // In the delay slot. // Compute the entry point of the rewritten stub. __ Addu(a2, v0, Operand(Code::kHeaderSize - kHeapObjectTag)); // Restore registers. @@ -7333,17 +7329,17 @@ void StringDictionaryLookupStub::Generate(MacroAssembler* masm) { // treated as a lookup success. For positive lookup probing failure // should be treated as lookup failure. if (mode_ == POSITIVE_LOOKUP) { + __ Ret(USE_DELAY_SLOT); __ mov(result, zero_reg); - __ Ret(); } __ bind(&in_dictionary); + __ Ret(USE_DELAY_SLOT); __ li(result, 1); - __ Ret(); __ bind(¬_in_dictionary); + __ Ret(USE_DELAY_SLOT); __ mov(result, zero_reg); - __ Ret(); } diff --git a/src/mips/debug-mips.cc b/src/mips/debug-mips.cc index 26b343c..83f5f50 100644 --- a/src/mips/debug-mips.cc +++ b/src/mips/debug-mips.cc @@ -152,8 +152,8 @@ static void Generate_DebugBreakCallHelper(MacroAssembler* masm, #ifdef DEBUG __ RecordComment("// Calling from debug break to runtime - come in - over"); #endif - __ mov(a0, zero_reg); // No arguments. - __ li(a1, Operand(ExternalReference::debug_break(masm->isolate()))); + __ PrepareCEntryArgs(0); // No arguments. + __ PrepareCEntryFunction(ExternalReference::debug_break(masm->isolate())); CEntryStub ceb(1); __ CallStub(&ceb); diff --git a/src/mips/ic-mips.cc b/src/mips/ic-mips.cc index 01d446f..7760877 100644 --- a/src/mips/ic-mips.cc +++ b/src/mips/ic-mips.cc @@ -512,8 +512,8 @@ void CallICBase::GenerateMiss(MacroAssembler* masm, __ Push(a3, a2); // Call the entry. - __ li(a0, Operand(2)); - __ li(a1, Operand(ExternalReference(IC_Utility(id), isolate))); + __ PrepareCEntryArgs(2); + __ PrepareCEntryFunction(ExternalReference(IC_Utility(id), isolate)); CEntryStub stub(1); __ CallStub(&stub); @@ -844,8 +844,8 @@ void KeyedLoadIC::GenerateNonStrictArguments(MacroAssembler* masm) { Label slow, notin; MemOperand mapped_location = GenerateMappedArgumentsLookup(masm, a1, a0, a2, a3, t0, ¬in, &slow); + __ Ret(USE_DELAY_SLOT); __ lw(v0, mapped_location); - __ Ret(); __ bind(¬in); // The unmapped lookup expects that the parameter map is in a2. MemOperand unmapped_location = @@ -853,8 +853,8 @@ void KeyedLoadIC::GenerateNonStrictArguments(MacroAssembler* masm) { __ lw(a2, unmapped_location); __ LoadRoot(a3, Heap::kTheHoleValueRootIndex); __ Branch(&slow, eq, a2, Operand(a3)); + __ Ret(USE_DELAY_SLOT); __ mov(v0, a2); - __ Ret(); __ bind(&slow); GenerateMiss(masm, false); } diff --git a/src/mips/lithium-codegen-mips.cc b/src/mips/lithium-codegen-mips.cc index de9c74b..909422f 100644 --- a/src/mips/lithium-codegen-mips.cc +++ b/src/mips/lithium-codegen-mips.cc @@ -634,13 +634,9 @@ void LCodeGen::DeoptimizeIf(Condition cc, __ bind(&skip); } - if (cc == al) { - __ Jump(entry, RelocInfo::RUNTIME_ENTRY); - } else { - // TODO(plind): The Arm port is a little different here, due to their - // DeOpt jump table, which is not used for Mips yet. - __ Jump(entry, RelocInfo::RUNTIME_ENTRY, cc, src1, src2); - } + // TODO(plind): The Arm port is a little different here, due to their + // DeOpt jump table, which is not used for Mips yet. + __ Jump(entry, RelocInfo::RUNTIME_ENTRY, cc, src1, src2); } diff --git a/src/mips/macro-assembler-mips.cc b/src/mips/macro-assembler-mips.cc index 2072b39..4578014 100644 --- a/src/mips/macro-assembler-mips.cc +++ b/src/mips/macro-assembler-mips.cc @@ -2438,8 +2438,15 @@ void MacroAssembler::Jump(intptr_t target, Register rs, const Operand& rt, BranchDelaySlot bd) { + Label skip; + if (cond != cc_always) { + Branch(USE_DELAY_SLOT, &skip, NegateCondition(cond), rs, rt); + } + // The first instruction of 'li' may be placed in the delay slot. + // This is not an issue, t9 is expected to be clobbered anyway. li(t9, Operand(target, rmode)); - Jump(t9, cond, rs, rt, bd); + Jump(t9, al, zero_reg, Operand(zero_reg), bd); + bind(&skip); } @@ -2569,7 +2576,7 @@ void MacroAssembler::Call(Handle code, rmode = RelocInfo::CODE_TARGET_WITH_ID; } Call(reinterpret_cast
(code.location()), rmode, cond, rs, rt, bd); - ASSERT_EQ(CallSize(code, rmode, ast_id, cond, rs, rt), + ASSERT_EQ(CallSize(code, rmode, ast_id, cond, rs, rt, bd), SizeOfCodeGeneratedSince(&start)); } @@ -2639,14 +2646,16 @@ void MacroAssembler::Jalr(Label* L, BranchDelaySlot bdslot) { nop(); } +void MacroAssembler::DropAndRet(int drop) { + Ret(USE_DELAY_SLOT); + addiu(sp, sp, drop * kPointerSize); +} void MacroAssembler::DropAndRet(int drop, Condition cond, Register r1, const Operand& r2) { - // This is a workaround to make sure only one branch instruction is - // generated. It relies on Drop and Ret not creating branches if - // cond == cc_always. + // Both Drop and Ret need to be conditional. Label skip; if (cond != cc_always) { Branch(&skip, NegateCondition(cond), r1, r2); @@ -2713,8 +2722,8 @@ void MacroAssembler::Push(Handle handle) { #ifdef ENABLE_DEBUGGER_SUPPORT void MacroAssembler::DebugBreak() { - mov(a0, zero_reg); - li(a1, Operand(ExternalReference(Runtime::kDebugBreak, isolate()))); + PrepareCEntryArgs(0); + PrepareCEntryFunction(ExternalReference(Runtime::kDebugBreak, isolate())); CEntryStub ces(1); ASSERT(AllowThisStubCall(&ces)); Call(ces.GetCode(), RelocInfo::DEBUG_BREAK); @@ -3876,10 +3885,13 @@ void MacroAssembler::GetObjectType(Register object, // ----------------------------------------------------------------------------- // Runtime calls. -void MacroAssembler::CallStub(CodeStub* stub, Condition cond, - Register r1, const Operand& r2) { +void MacroAssembler::CallStub(CodeStub* stub, + Condition cond, + Register r1, + const Operand& r2, + BranchDelaySlot bd) { ASSERT(AllowThisStubCall(stub)); // Stub calls are not allowed in some stubs. - Call(stub->GetCode(), RelocInfo::CODE_TARGET, kNoASTId, cond, r1, r2); + Call(stub->GetCode(), RelocInfo::CODE_TARGET, kNoASTId, cond, r1, r2, bd); } @@ -3962,8 +3974,7 @@ void MacroAssembler::CallApiFunctionAndReturn(ExternalReference function, lw(t1, MemOperand(at)); Branch(&promote_scheduled_exception, ne, t0, Operand(t1)); li(s0, Operand(stack_space)); - LeaveExitFrame(false, s0); - Ret(); + LeaveExitFrame(false, s0, true); bind(&promote_scheduled_exception); TailCallExternalReference( @@ -4161,8 +4172,8 @@ void MacroAssembler::CallRuntime(const Runtime::Function* f, // arguments passed in because it is constant. At some point we // should remove this need and make the runtime routine entry code // smarter. - li(a0, num_arguments); - li(a1, Operand(ExternalReference(f, isolate()))); + PrepareCEntryArgs(num_arguments); + PrepareCEntryFunction(ExternalReference(f, isolate())); CEntryStub stub(1); CallStub(&stub); } @@ -4170,8 +4181,8 @@ void MacroAssembler::CallRuntime(const Runtime::Function* f, void MacroAssembler::CallRuntimeSaveDoubles(Runtime::FunctionId id) { const Runtime::Function* function = Runtime::FunctionForId(id); - li(a0, Operand(function->nargs)); - li(a1, Operand(ExternalReference(function, isolate()))); + PrepareCEntryArgs(function->nargs); + PrepareCEntryFunction(ExternalReference(function, isolate())); CEntryStub stub(1, kSaveFPRegs); CallStub(&stub); } @@ -4183,12 +4194,13 @@ void MacroAssembler::CallRuntime(Runtime::FunctionId fid, int num_arguments) { void MacroAssembler::CallExternalReference(const ExternalReference& ext, - int num_arguments) { - li(a0, Operand(num_arguments)); - li(a1, Operand(ext)); + int num_arguments, + BranchDelaySlot bd) { + PrepareCEntryArgs(num_arguments); + PrepareCEntryFunction(ext); CEntryStub stub(1); - CallStub(&stub); + CallStub(&stub, al, zero_reg, Operand(zero_reg), bd); } @@ -4199,7 +4211,7 @@ void MacroAssembler::TailCallExternalReference(const ExternalReference& ext, // arguments passed in because it is constant. At some point we // should remove this need and make the runtime routine entry code // smarter. - li(a0, Operand(num_arguments)); + PrepareCEntryArgs(num_arguments); JumpToExternalReference(ext); } @@ -4213,10 +4225,16 @@ void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid, } -void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin) { - li(a1, Operand(builtin)); +void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin, + BranchDelaySlot bd) { + PrepareCEntryFunction(builtin); CEntryStub stub(1); - Jump(stub.GetCode(), RelocInfo::CODE_TARGET); + Jump(stub.GetCode(), + RelocInfo::CODE_TARGET, + al, + zero_reg, + Operand(zero_reg), + bd); } @@ -4563,7 +4581,8 @@ void MacroAssembler::EnterExitFrame(bool save_doubles, void MacroAssembler::LeaveExitFrame(bool save_doubles, - Register argument_count) { + Register argument_count, + bool do_return) { // Optionally restore all double registers. if (save_doubles) { // Remember: we only need to restore every 2nd double FPU value. @@ -4589,11 +4608,17 @@ void MacroAssembler::LeaveExitFrame(bool save_doubles, mov(sp, fp); // Respect ABI stack constraint. lw(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset)); lw(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset)); - addiu(sp, sp, 8); + if (argument_count.is_valid()) { sll(t8, argument_count, kPointerSizeLog2); addu(sp, sp, t8); } + + if (do_return) { + Ret(USE_DELAY_SLOT); + // If returning, the instruction in the delay slot will be the addiu below. + } + addiu(sp, sp, 8); } diff --git a/src/mips/macro-assembler-mips.h b/src/mips/macro-assembler-mips.h index 63393ae..71f99e6 100644 --- a/src/mips/macro-assembler-mips.h +++ b/src/mips/macro-assembler-mips.h @@ -193,10 +193,14 @@ class MacroAssembler: public Assembler { Register reg = no_reg, const Operand& op = Operand(no_reg)); - void DropAndRet(int drop = 0, - Condition cond = cc_always, - Register reg = no_reg, - const Operand& op = Operand(no_reg)); + // Trivial case of DropAndRet that utilizes the delay slot and only emits + // 2 instructions. + void DropAndRet(int drop); + + void DropAndRet(int drop, + Condition cond, + Register reg, + const Operand& op); // Swap two registers. If the scratch register is omitted then a slightly // less efficient form using xor instead of mov is emitted. @@ -773,7 +777,9 @@ class MacroAssembler: public Assembler { int stack_space = 0); // Leave the current exit frame. - void LeaveExitFrame(bool save_doubles, Register arg_count); + void LeaveExitFrame(bool save_doubles, + Register arg_count, + bool do_return = false); // Get the actual activation frame alignment for target environment. static int ActivationFrameAlignment(); @@ -1084,9 +1090,22 @@ class MacroAssembler: public Assembler { // ------------------------------------------------------------------------- // Runtime calls. + // See comments at the beginning of CEntryStub::Generate. + inline void PrepareCEntryArgs(int num_args) { + li(s0, num_args); + li(s1, (num_args - 1) * kPointerSize); + } + + inline void PrepareCEntryFunction(const ExternalReference& ref) { + li(s2, Operand(ref)); + } + // Call a code stub. - void CallStub(CodeStub* stub, Condition cond = cc_always, - Register r1 = zero_reg, const Operand& r2 = Operand(zero_reg)); + void CallStub(CodeStub* stub, + Condition cond = cc_always, + Register r1 = zero_reg, + const Operand& r2 = Operand(zero_reg), + BranchDelaySlot bd = PROTECT); // Tail call a code stub (jump). void TailCallStub(CodeStub* stub); @@ -1102,7 +1121,8 @@ class MacroAssembler: public Assembler { // Convenience function: call an external reference. void CallExternalReference(const ExternalReference& ext, - int num_arguments); + int num_arguments, + BranchDelaySlot bd = PROTECT); // Tail call of a runtime routine (jump). // Like JumpToExternalReference, but also takes care of passing the number @@ -1168,7 +1188,8 @@ class MacroAssembler: public Assembler { void CallApiFunctionAndReturn(ExternalReference function, int stack_space); // Jump to the builtin routine. - void JumpToExternalReference(const ExternalReference& builtin); + void JumpToExternalReference(const ExternalReference& builtin, + BranchDelaySlot bd = PROTECT); // Invoke specified builtin JavaScript function. Adds an entry to // the unresolved list if the name does not resolve. diff --git a/src/mips/simulator-mips.h b/src/mips/simulator-mips.h index ba625f4..1e72939 100644 --- a/src/mips/simulator-mips.h +++ b/src/mips/simulator-mips.h @@ -309,6 +309,14 @@ class Simulator { void InstructionDecode(Instruction* instr); // Execute one instruction placed in a branch delay slot. void BranchDelayInstructionDecode(Instruction* instr) { + if (instr->InstructionBits() == nopInstr) { + // Short-cut generic nop instructions. They are always valid and they + // never change the simulator state. + set_register(pc, reinterpret_cast(instr) + + Instruction::kInstrSize); + return; + } + if (instr->IsForbiddenInBranchDelay()) { V8_Fatal(__FILE__, __LINE__, "Eror:Unexpected %i opcode in a branch delay slot.", diff --git a/src/mips/stub-cache-mips.cc b/src/mips/stub-cache-mips.cc index 6332be4..294bc0a 100644 --- a/src/mips/stub-cache-mips.cc +++ b/src/mips/stub-cache-mips.cc @@ -577,8 +577,8 @@ static void CompileCallLoadPropertyWithInterceptor( ExternalReference ref = ExternalReference(IC_Utility(IC::kLoadPropertyWithInterceptorOnly), masm->isolate()); - __ li(a0, Operand(5)); - __ li(a1, Operand(ref)); + __ PrepareCEntryArgs(5); + __ PrepareCEntryFunction(ref); CEntryStub stub(1); __ CallStub(&stub); @@ -4107,7 +4107,8 @@ void KeyedLoadStubCompiler::GenerateLoadFastElement(MacroAssembler* masm) { // have been verified by the caller to not be a smi. // Check that the key is a smi. - __ JumpIfNotSmi(a0, &miss_force_generic); + __ JumpIfNotSmi(a0, &miss_force_generic, at, USE_DELAY_SLOT); + // The delay slot can be safely used here, a1 is an object pointer. // Get the elements array. __ lw(a2, FieldMemOperand(a1, JSObject::kElementsOffset)); @@ -4115,7 +4116,7 @@ void KeyedLoadStubCompiler::GenerateLoadFastElement(MacroAssembler* masm) { // Check that the key is within bounds. __ lw(a3, FieldMemOperand(a2, FixedArray::kLengthOffset)); - __ Branch(&miss_force_generic, hs, a0, Operand(a3)); + __ Branch(USE_DELAY_SLOT, &miss_force_generic, hs, a0, Operand(a3)); // Load the result and make sure it's not the hole. __ Addu(a3, a2, Operand(FixedArray::kHeaderSize - kHeapObjectTag)); @@ -4125,8 +4126,8 @@ void KeyedLoadStubCompiler::GenerateLoadFastElement(MacroAssembler* masm) { __ lw(t0, MemOperand(t0)); __ LoadRoot(t1, Heap::kTheHoleValueRootIndex); __ Branch(&miss_force_generic, eq, t0, Operand(t1)); + __ Ret(USE_DELAY_SLOT); __ mov(v0, t0); - __ Ret(); __ bind(&miss_force_generic); Handle stub = -- 2.7.4