From b43ec5112f62a9e8dd86312aa57f5691eec7a0e3 Mon Sep 17 00:00:00 2001 From: "jkummerow@chromium.org" Date: Thu, 29 Nov 2012 09:22:08 +0000 Subject: [PATCH] MIPS: Faster implementation of Math.exp() Port r13054 (636985d7) BUG= TEST= Review URL: https://codereview.chromium.org/11415192 Patch from Akos Palfi . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13089 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/mips/codegen-mips.cc | 147 ++++++++++++++++++++++++++++++++++++++- src/mips/codegen-mips.h | 16 +++++ src/mips/lithium-codegen-mips.cc | 14 ++++ src/mips/lithium-mips.cc | 14 ++++ src/mips/lithium-mips.h | 25 +++++++ src/mips/simulator-mips.cc | 102 +++++++++++++++++++-------- src/mips/simulator-mips.h | 5 ++ 7 files changed, 294 insertions(+), 29 deletions(-) diff --git a/src/mips/codegen-mips.cc b/src/mips/codegen-mips.cc index d237d4a..0119c11 100644 --- a/src/mips/codegen-mips.cc +++ b/src/mips/codegen-mips.cc @@ -31,11 +31,11 @@ #include "codegen.h" #include "macro-assembler.h" +#include "simulator-mips.h" namespace v8 { namespace internal { -#define __ ACCESS_MASM(masm) UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) { switch (type) { @@ -49,6 +49,74 @@ UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) { } +#define __ masm. + + +#if defined(USE_SIMULATOR) +byte* fast_exp_mips_machine_code = NULL; +double fast_exp_simulator(double x) { + return Simulator::current(Isolate::Current())->CallFP( + fast_exp_mips_machine_code, x, 0); +} +#endif + + +UnaryMathFunction CreateExpFunction() { + if (!CpuFeatures::IsSupported(FPU)) return &exp; + if (!FLAG_fast_math) return &exp; + size_t actual_size; + byte* buffer = static_cast(OS::Allocate(1 * KB, &actual_size, true)); + if (buffer == NULL) return &exp; + ExternalReference::InitializeMathExpData(); + + MacroAssembler masm(NULL, buffer, static_cast(actual_size)); + + { + CpuFeatures::Scope use_fpu(FPU); + DoubleRegister input = f12; + DoubleRegister result = f0; + DoubleRegister double_scratch1 = f4; + DoubleRegister double_scratch2 = f6; + Register temp1 = t0; + Register temp2 = t1; + Register temp3 = t2; + + if (!IsMipsSoftFloatABI) { + // Input value is in f12 anyway, nothing to do. + } else { + __ Move(input, a0, a1); + } + __ Push(temp3, temp2, temp1); + MathExpGenerator::EmitMathExp( + &masm, input, result, double_scratch1, double_scratch2, + temp1, temp2, temp3); + __ Pop(temp3, temp2, temp1); + if (!IsMipsSoftFloatABI) { + // Result is already in f0, nothing to do. + } else { + __ Move(a0, a1, result); + } + __ Ret(); + } + + CodeDesc desc; + masm.GetCode(&desc); + + CPU::FlushICache(buffer, actual_size); + OS::ProtectCode(buffer, actual_size); + +#if !defined(USE_SIMULATOR) + return FUNCTION_CAST(buffer); +#else + fast_exp_mips_machine_code = buffer; + return &fast_exp_simulator; +#endif +} + + +#undef __ + + UnaryMathFunction CreateSqrtFunction() { return &sqrt; } @@ -72,6 +140,8 @@ void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const { // ------------------------------------------------------------------------- // Code generators +#define __ ACCESS_MASM(masm) + void ElementsTransitionGenerator::GenerateMapChangeElementsTransition( MacroAssembler* masm) { // ----------- S t a t e ------------- @@ -446,6 +516,81 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ bind(&done); } + +static MemOperand ExpConstant(int index, Register base) { + return MemOperand(base, index * kDoubleSize); +} + + +void MathExpGenerator::EmitMathExp(MacroAssembler* masm, + DoubleRegister input, + DoubleRegister result, + DoubleRegister double_scratch1, + DoubleRegister double_scratch2, + Register temp1, + Register temp2, + Register temp3) { + ASSERT(!input.is(result)); + ASSERT(!input.is(double_scratch1)); + ASSERT(!input.is(double_scratch2)); + ASSERT(!result.is(double_scratch1)); + ASSERT(!result.is(double_scratch2)); + ASSERT(!double_scratch1.is(double_scratch2)); + ASSERT(!temp1.is(temp2)); + ASSERT(!temp1.is(temp3)); + ASSERT(!temp2.is(temp3)); + ASSERT(ExternalReference::math_exp_constants(0).address() != NULL); + + Label done; + + __ li(temp3, Operand(ExternalReference::math_exp_constants(0))); + + __ ldc1(double_scratch1, ExpConstant(0, temp3)); + __ Move(result, kDoubleRegZero); + __ BranchF(&done, NULL, ge, double_scratch1, input); + __ ldc1(double_scratch2, ExpConstant(1, temp3)); + __ ldc1(result, ExpConstant(2, temp3)); + __ BranchF(&done, NULL, ge, input, double_scratch2); + __ ldc1(double_scratch1, ExpConstant(3, temp3)); + __ ldc1(result, ExpConstant(4, temp3)); + __ mul_d(double_scratch1, double_scratch1, input); + __ add_d(double_scratch1, double_scratch1, result); + __ Move(temp2, temp1, double_scratch1); + __ sub_d(double_scratch1, double_scratch1, result); + __ ldc1(result, ExpConstant(6, temp3)); + __ ldc1(double_scratch2, ExpConstant(5, temp3)); + __ mul_d(double_scratch1, double_scratch1, double_scratch2); + __ sub_d(double_scratch1, double_scratch1, input); + __ sub_d(result, result, double_scratch1); + __ mul_d(input, double_scratch1, double_scratch1); + __ mul_d(result, result, input); + __ srl(temp1, temp2, 11); + __ ldc1(double_scratch2, ExpConstant(7, temp3)); + __ mul_d(result, result, double_scratch2); + __ sub_d(result, result, double_scratch1); + __ ldc1(double_scratch2, ExpConstant(8, temp3)); + __ add_d(result, result, double_scratch2); + __ li(at, 0x7ff); + __ And(temp2, temp2, at); + __ Addu(temp1, temp1, Operand(0x3ff)); + __ sll(temp1, temp1, 20); + + // Must not call ExpConstant() after overwriting temp3! + __ li(temp3, Operand(ExternalReference::math_exp_log_table())); + __ sll(at, temp2, 3); + __ addu(at, at, temp3); + __ lw(at, MemOperand(at)); + __ Addu(temp3, temp3, Operand(kPointerSize)); + __ sll(temp2, temp2, 3); + __ addu(temp2, temp2, temp3); + __ lw(temp2, MemOperand(temp2)); + __ Or(temp1, temp1, temp2); + __ Move(input, at, temp1); + __ mul_d(result, result, input); + __ bind(&done); +} + + // nop(CODE_AGE_MARKER_NOP) static const uint32_t kCodeAgePatchFirstInstruction = 0x00010180; diff --git a/src/mips/codegen-mips.h b/src/mips/codegen-mips.h index e704c4f..0ed2414 100644 --- a/src/mips/codegen-mips.h +++ b/src/mips/codegen-mips.h @@ -90,6 +90,22 @@ class StringCharLoadGenerator : public AllStatic { DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator); }; + +class MathExpGenerator : public AllStatic { + public: + static void EmitMathExp(MacroAssembler* masm, + DoubleRegister input, + DoubleRegister result, + DoubleRegister double_scratch1, + DoubleRegister double_scratch2, + Register temp1, + Register temp2, + Register temp3); + + private: + DISALLOW_COPY_AND_ASSIGN(MathExpGenerator); +}; + } } // namespace v8::internal #endif // V8_MIPS_CODEGEN_MIPS_H_ diff --git a/src/mips/lithium-codegen-mips.cc b/src/mips/lithium-codegen-mips.cc index 1e370df..22352e1 100644 --- a/src/mips/lithium-codegen-mips.cc +++ b/src/mips/lithium-codegen-mips.cc @@ -3500,6 +3500,20 @@ void LCodeGen::DoDeferredRandom(LRandom* instr) { } +void LCodeGen::DoMathExp(LMathExp* instr) { + DoubleRegister input = ToDoubleRegister(instr->value()); + DoubleRegister result = ToDoubleRegister(instr->result()); + DoubleRegister double_scratch1 = ToDoubleRegister(instr->double_temp()); + DoubleRegister double_scratch2 = double_scratch0(); + Register temp1 = ToRegister(instr->temp1()); + Register temp2 = ToRegister(instr->temp2()); + + MathExpGenerator::EmitMathExp( + masm(), input, result, double_scratch1, double_scratch2, + temp1, temp2, scratch0()); +} + + void LCodeGen::DoMathLog(LUnaryMathOperation* instr) { ASSERT(ToDoubleRegister(instr->result()).is(f4)); TranscendentalCacheStub stub(TranscendentalCache::LOG, diff --git a/src/mips/lithium-mips.cc b/src/mips/lithium-mips.cc index ebd54f5..56dd33d 100644 --- a/src/mips/lithium-mips.cc +++ b/src/mips/lithium-mips.cc @@ -297,6 +297,11 @@ void LUnaryMathOperation::PrintDataTo(StringStream* stream) { } +void LMathExp::PrintDataTo(StringStream* stream) { + value()->PrintTo(stream); +} + + void LLoadContextSlot::PrintDataTo(StringStream* stream) { context()->PrintTo(stream); stream->Add("[%d]", slot_index()); @@ -1040,6 +1045,15 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) { LOperand* input = UseFixedDouble(instr->value(), f4); LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input, NULL); return MarkAsCall(DefineFixedDouble(result, f4), instr); + } else if (op == kMathExp) { + ASSERT(instr->representation().IsDouble()); + ASSERT(instr->value()->representation().IsDouble()); + LOperand* input = UseTempRegister(instr->value()); + LOperand* temp1 = TempRegister(); + LOperand* temp2 = TempRegister(); + LOperand* double_temp = FixedTemp(f6); // Chosen by fair dice roll. + LMathExp* result = new(zone()) LMathExp(input, double_temp, temp1, temp2); + return DefineAsRegister(result); } else if (op == kMathPowHalf) { // Input cannot be the same as the result. // See lithium-codegen-mips.cc::DoMathPowHalf. diff --git a/src/mips/lithium-mips.h b/src/mips/lithium-mips.h index 1baf07d..17ef24c 100644 --- a/src/mips/lithium-mips.h +++ b/src/mips/lithium-mips.h @@ -131,6 +131,7 @@ class LCodeGen; V(LoadNamedFieldPolymorphic) \ V(LoadNamedGeneric) \ V(MapEnumLength) \ + V(MathExp) \ V(MathMinMax) \ V(ModI) \ V(MulI) \ @@ -641,6 +642,30 @@ class LUnaryMathOperation: public LTemplateInstruction<1, 1, 1> { }; +class LMathExp: public LTemplateInstruction<1, 1, 3> { + public: + LMathExp(LOperand* value, + LOperand* double_temp, + LOperand* temp1, + LOperand* temp2) { + inputs_[0] = value; + temps_[0] = temp1; + temps_[1] = temp2; + temps_[2] = double_temp; + ExternalReference::InitializeMathExpData(); + } + + LOperand* value() { return inputs_[0]; } + LOperand* temp1() { return temps_[0]; } + LOperand* temp2() { return temps_[1]; } + LOperand* double_temp() { return temps_[2]; } + + DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp") + + virtual void PrintDataTo(StringStream* stream); +}; + + class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> { public: LCmpObjectEqAndBranch(LOperand* left, LOperand* right) { diff --git a/src/mips/simulator-mips.cc b/src/mips/simulator-mips.cc index cb56473..ea359ea 100644 --- a/src/mips/simulator-mips.cc +++ b/src/mips/simulator-mips.cc @@ -1016,6 +1016,13 @@ void Simulator::set_register(int reg, int32_t value) { } +void Simulator::set_dw_register(int reg, const int* dbl) { + ASSERT((reg >= 0) && (reg < kNumSimuRegisters)); + registers_[reg] = dbl[0]; + registers_[reg + 1] = dbl[1]; +} + + void Simulator::set_fpu_register(int fpureg, int32_t value) { ASSERT((fpureg >= 0) && (fpureg < kNumFPURegisters)); FPUregisters_[fpureg] = value; @@ -1045,6 +1052,19 @@ int32_t Simulator::get_register(int reg) const { } +double Simulator::get_double_from_register_pair(int reg) { + ASSERT((reg >= 0) && (reg < kNumSimuRegisters) && ((reg % 2) == 0)); + + double dm_val = 0.0; + // Read the bits from the unsigned integer register_[] array + // into the double precision floating point value and return it. + char buffer[2 * sizeof(registers_[0])]; + memcpy(buffer, ®isters_[reg], 2 * sizeof(registers_[0])); + memcpy(&dm_val, buffer, 2 * sizeof(registers_[0])); + return(dm_val); +} + + int32_t Simulator::get_fpu_register(int fpureg) const { ASSERT((fpureg >= 0) && (fpureg < kNumFPURegisters)); return FPUregisters_[fpureg]; @@ -2718,34 +2738,7 @@ void Simulator::Execute() { } -int32_t Simulator::Call(byte* entry, int argument_count, ...) { - va_list parameters; - va_start(parameters, argument_count); - // Set up arguments. - - // First four arguments passed in registers. - ASSERT(argument_count >= 4); - set_register(a0, va_arg(parameters, int32_t)); - set_register(a1, va_arg(parameters, int32_t)); - set_register(a2, va_arg(parameters, int32_t)); - set_register(a3, va_arg(parameters, int32_t)); - - // Remaining arguments passed on stack. - int original_stack = get_register(sp); - // Compute position of stack on entry to generated code. - int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t) - - kCArgsSlotsSize); - if (OS::ActivationFrameAlignment() != 0) { - entry_stack &= -OS::ActivationFrameAlignment(); - } - // Store remaining arguments on stack, from low to high memory. - intptr_t* stack_argument = reinterpret_cast(entry_stack); - for (int i = 4; i < argument_count; i++) { - stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t); - } - va_end(parameters); - set_register(sp, entry_stack); - +void Simulator::CallInternal(byte* entry) { // Prepare to execute the code at entry. set_register(pc, reinterpret_cast(entry)); // Put down marker for end of simulation. The simulator will stop simulation @@ -2809,6 +2802,38 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) { set_register(gp, gp_val); set_register(sp, sp_val); set_register(fp, fp_val); +} + + +int32_t Simulator::Call(byte* entry, int argument_count, ...) { + va_list parameters; + va_start(parameters, argument_count); + // Set up arguments. + + // First four arguments passed in registers. + ASSERT(argument_count >= 4); + set_register(a0, va_arg(parameters, int32_t)); + set_register(a1, va_arg(parameters, int32_t)); + set_register(a2, va_arg(parameters, int32_t)); + set_register(a3, va_arg(parameters, int32_t)); + + // Remaining arguments passed on stack. + int original_stack = get_register(sp); + // Compute position of stack on entry to generated code. + int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t) + - kCArgsSlotsSize); + if (OS::ActivationFrameAlignment() != 0) { + entry_stack &= -OS::ActivationFrameAlignment(); + } + // Store remaining arguments on stack, from low to high memory. + intptr_t* stack_argument = reinterpret_cast(entry_stack); + for (int i = 4; i < argument_count; i++) { + stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t); + } + va_end(parameters); + set_register(sp, entry_stack); + + CallInternal(entry); // Pop stack passed arguments. CHECK_EQ(entry_stack, get_register(sp)); @@ -2819,6 +2844,27 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) { } +double Simulator::CallFP(byte* entry, double d0, double d1) { + if (!IsMipsSoftFloatABI) { + set_fpu_register_double(f12, d0); + set_fpu_register_double(f14, d1); + } else { + int buffer[2]; + ASSERT(sizeof(buffer[0]) * 2 == sizeof(d0)); + memcpy(buffer, &d0, sizeof(d0)); + set_dw_register(a0, buffer); + memcpy(buffer, &d1, sizeof(d1)); + set_dw_register(a2, buffer); + } + CallInternal(entry); + if (!IsMipsSoftFloatABI) { + return get_fpu_register_double(f0); + } else { + return get_double_from_register_pair(v0); + } +} + + uintptr_t Simulator::PushAddress(uintptr_t address) { int new_sp = get_register(sp) - sizeof(uintptr_t); uintptr_t* stack_slot = reinterpret_cast(new_sp); diff --git a/src/mips/simulator-mips.h b/src/mips/simulator-mips.h index 776badc..67f5953 100644 --- a/src/mips/simulator-mips.h +++ b/src/mips/simulator-mips.h @@ -184,7 +184,9 @@ class Simulator { // architecture specification and is off by a 8 from the currently executing // instruction. void set_register(int reg, int32_t value); + void set_dw_register(int dreg, const int* dbl); int32_t get_register(int reg) const; + double get_double_from_register_pair(int reg); // Same for FPURegisters. void set_fpu_register(int fpureg, int32_t value); void set_fpu_register_float(int fpureg, float value); @@ -214,6 +216,8 @@ class Simulator { // generated RegExp code with 7 parameters. This is a convenience function, // which sets up the simulator state and grabs the result on return. int32_t Call(byte* entry, int argument_count, ...); + // Alternative: call a 2-argument double function. + double CallFP(byte* entry, double d0, double d1); // Push an address onto the JS stack. uintptr_t PushAddress(uintptr_t address); @@ -353,6 +357,7 @@ class Simulator { void GetFpArgs(double* x, int32_t* y); void SetFpResult(const double& result); + void CallInternal(byte* entry); // Architecture state. // Registers. -- 2.7.4