Faster implementation of Math.exp()

author jkummerow@chromium.org <jkummerow@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 26 Nov 2012 13:12:35 +0000 (13:12 +0000)

committer jkummerow@chromium.org <jkummerow@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 26 Nov 2012 13:12:35 +0000 (13:12 +0000)
author jkummerow@chromium.org <jkummerow@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 26 Nov 2012 13:12:35 +0000 (13:12 +0000)
committer jkummerow@chromium.org <jkummerow@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 26 Nov 2012 13:12:35 +0000 (13:12 +0000)
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc

index 52a6295..689de9f 100644 (file)
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -31,11 +31,11 @@
  
  #include "codegen.h"
  #include "macro-assembler.h"
+#include "simulator-arm.h"
  
  namespace v8 {
  namespace internal {
  
-#define __ ACCESS_MASM(masm)
  
  UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
    switch (type) {
@@ -49,6 +49,74 @@ UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
  }
  
  
+#define __ masm.
+
+
+#if defined(USE_SIMULATOR)
+byte* fast_exp_arm_machine_code = NULL;
+double fast_exp_simulator(double x) {
+  return Simulator::current(Isolate::Current())->CallFP(
+      fast_exp_arm_machine_code, x, 0);
+}
+#endif
+
+
+UnaryMathFunction CreateExpFunction() {
+  if (!CpuFeatures::IsSupported(VFP2)) return &exp;
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;
+  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+
+  {
+    CpuFeatures::Scope use_vfp(VFP2);
+    DoubleRegister input = d0;
+    DoubleRegister result = d1;
+    DoubleRegister double_scratch1 = d2;
+    DoubleRegister double_scratch2 = d3;
+    Register temp1 = r4;
+    Register temp2 = r5;
+    Register temp3 = r6;
+
+    if (masm.use_eabi_hardfloat()) {
+      // Input value is in d0 anyway, nothing to do.
+    } else {
+      __ vmov(input, r0, r1);
+    }
+    __ Push(temp3, temp2, temp1);
+    MathExpGenerator::EmitMathExp(
+        &masm, input, result, double_scratch1, double_scratch2,
+        temp1, temp2, temp3);
+    __ Pop(temp3, temp2, temp1);
+    if (masm.use_eabi_hardfloat()) {
+      __ vmov(d0, result);
+    } else {
+      __ vmov(r0, r1, result);
+    }
+    __ Ret();
+  }
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+
+#if !defined(USE_SIMULATOR)
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+#else
+  fast_exp_arm_machine_code = buffer;
+  return &fast_exp_simulator;
+#endif
+}
+
+
+#undef __
+
+
  UnaryMathFunction CreateSqrtFunction() {
    return &sqrt;
  }
@@ -73,6 +141,8 @@ void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
  // -------------------------------------------------------------------------
  // Code generators
  
+#define __ ACCESS_MASM(masm)
+
  void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
      MacroAssembler* masm) {
    // ----------- S t a t e -------------
@@ -450,6 +520,78 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm,
    __ bind(&done);
  }
  
+
+static MemOperand ExpConstant(int index, Register base) {
+  return MemOperand(base, index * kDoubleSize);
+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   DoubleRegister input,
+                                   DoubleRegister result,
+                                   DoubleRegister double_scratch1,
+                                   DoubleRegister double_scratch2,
+                                   Register temp1,
+                                   Register temp2,
+                                   Register temp3) {
+  ASSERT(!input.is(result));
+  ASSERT(!input.is(double_scratch1));
+  ASSERT(!input.is(double_scratch2));
+  ASSERT(!result.is(double_scratch1));
+  ASSERT(!result.is(double_scratch2));
+  ASSERT(!double_scratch1.is(double_scratch2));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(!temp1.is(temp3));
+  ASSERT(!temp2.is(temp3));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ mov(temp3, Operand(ExternalReference::math_exp_constants(0)));
+
+  __ vldr(double_scratch1, ExpConstant(0, temp3));
+  __ vmov(result, kDoubleRegZero);
+  __ VFPCompareAndSetFlags(double_scratch1, input);
+  __ b(ge, &done);
+  __ vldr(double_scratch2, ExpConstant(1, temp3));
+  __ VFPCompareAndSetFlags(input, double_scratch2);
+  __ vldr(result, ExpConstant(2, temp3));
+  __ b(ge, &done);
+  __ vldr(double_scratch1, ExpConstant(3, temp3));
+  __ vldr(result, ExpConstant(4, temp3));
+  __ vmul(double_scratch1, double_scratch1, input);
+  __ vadd(double_scratch1, double_scratch1, result);
+  __ vmov(temp2, temp1, double_scratch1);
+  __ vsub(double_scratch1, double_scratch1, result);
+  __ vldr(result, ExpConstant(6, temp3));
+  __ vldr(double_scratch2, ExpConstant(5, temp3));
+  __ vmul(double_scratch1, double_scratch1, double_scratch2);
+  __ vsub(double_scratch1, double_scratch1, input);
+  __ vsub(result, result, double_scratch1);
+  __ vmul(input, double_scratch1, double_scratch1);
+  __ vmul(result, result, input);
+  __ mov(temp1, Operand(temp2, LSR, 11));
+  __ vldr(double_scratch2, ExpConstant(7, temp3));
+  __ vmul(result, result, double_scratch2);
+  __ vsub(result, result, double_scratch1);
+  __ vldr(double_scratch2, ExpConstant(8, temp3));
+  __ vadd(result, result, double_scratch2);
+  __ movw(ip, 0x7ff);
+  __ and_(temp2, temp2, Operand(ip));
+  __ add(temp1, temp1, Operand(0x3ff));
+  __ mov(temp1, Operand(temp1, LSL, 20));
+
+  // Must not call ExpConstant() after overwriting temp3!
+  __ mov(temp3, Operand(ExternalReference::math_exp_log_table()));
+  __ ldr(ip, MemOperand(temp3, temp2, LSL, 3));
+  __ add(temp3, temp3, Operand(kPointerSize));
+  __ ldr(temp2, MemOperand(temp3, temp2, LSL, 3));
+  __ orr(temp1, temp1, temp2);
+  __ vmov(input, ip, temp1);
+  __ vmul(result, result, input);
+  __ bind(&done);
+}
+
  #undef __
  
  // add(r0, pc, Operand(-8))
diff --git a/src/arm/codegen-arm.h b/src/arm/codegen-arm.h

index c77844d..7ca2c8d 100644 (file)
--- a/src/arm/codegen-arm.h
+++ b/src/arm/codegen-arm.h
@@ -91,6 +91,22 @@ class StringCharLoadGenerator : public AllStatic {
    DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
  };
  
+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          DoubleRegister input,
+                          DoubleRegister result,
+                          DoubleRegister double_scratch1,
+                          DoubleRegister double_scratch2,
+                          Register temp1,
+                          Register temp2,
+                          Register temp3);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
  } }  // namespace v8::internal
  
  #endif  // V8_ARM_CODEGEN_ARM_H_
diff --git a/src/arm/lithium-arm.cc b/src/arm/lithium-arm.cc

index 63eb33e..32dda27 100644 (file)
--- a/src/arm/lithium-arm.cc
+++ b/src/arm/lithium-arm.cc
@@ -297,6 +297,11 @@ void LUnaryMathOperation::PrintDataTo(StringStream* stream) {
  }
  
  
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}
+
+
  void LLoadContextSlot::PrintDataTo(StringStream* stream) {
    context()->PrintTo(stream);
    stream->Add("[%d]", slot_index());
@@ -1041,6 +1046,15 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
      LOperand* input = UseFixedDouble(instr->value(), d2);
      LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input, NULL);
      return MarkAsCall(DefineFixedDouble(result, d2), instr);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* input = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LOperand* double_temp = FixedTemp(d3);  // Chosen by fair dice roll.
+    LMathExp* result = new(zone()) LMathExp(input, double_temp, temp1, temp2);
+    return DefineAsRegister(result);
    } else if (op == kMathPowHalf) {
      LOperand* input = UseFixedDouble(instr->value(), d2);
      LOperand* temp = FixedTemp(d3);
diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h

index a5cf85f..b45a3e0 100644 (file)
--- a/src/arm/lithium-arm.h
+++ b/src/arm/lithium-arm.h
@@ -131,6 +131,7 @@ class LCodeGen;
    V(LoadNamedFieldPolymorphic)                  \
    V(LoadNamedGeneric)                           \
    V(MapEnumLength)                              \
+  V(MathExp)                                    \
    V(MathFloorOfDiv)                             \
    V(MathMinMax)                                 \
    V(ModI)                                       \
@@ -681,6 +682,30 @@ class LUnaryMathOperation: public LTemplateInstruction<1, 1, 1> {
  };
  
  
+class LMathExp: public LTemplateInstruction<1, 1, 3> {
+ public:
+  LMathExp(LOperand* value,
+           LOperand* double_temp,
+           LOperand* temp1,
+           LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = double_temp;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+  LOperand* double_temp() { return temps_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
  class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
   public:
    LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {
diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc

index 1f6c5ad..6ff4cab 100644 (file)
--- a/src/arm/lithium-codegen-arm.cc
+++ b/src/arm/lithium-codegen-arm.cc
@@ -3807,6 +3807,20 @@ void LCodeGen::DoDeferredRandom(LRandom* instr) {
  }
  
  
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  DoubleRegister input = ToDoubleRegister(instr->value());
+  DoubleRegister result = ToDoubleRegister(instr->result());
+  DoubleRegister double_scratch1 = ToDoubleRegister(instr->double_temp());
+  DoubleRegister double_scratch2 = double_scratch0();
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(
+      masm(), input, result, double_scratch1, double_scratch2,
+      temp1, temp2, scratch0());
+}
+
+
  void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
    ASSERT(ToDoubleRegister(instr->result()).is(d2));
    TranscendentalCacheStub stub(TranscendentalCache::LOG,
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc

index efbfff2..d11e340 100644 (file)
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -3301,33 +3301,7 @@ void Simulator::Execute() {
  }
  
  
-int32_t Simulator::Call(byte* entry, int argument_count, ...) {
-  va_list parameters;
-  va_start(parameters, argument_count);
-  // Set up arguments
-
-  // First four arguments passed in registers.
-  ASSERT(argument_count >= 4);
-  set_register(r0, va_arg(parameters, int32_t));
-  set_register(r1, va_arg(parameters, int32_t));
-  set_register(r2, va_arg(parameters, int32_t));
-  set_register(r3, va_arg(parameters, int32_t));
-
-  // Remaining arguments passed on stack.
-  int original_stack = get_register(sp);
-  // Compute position of stack on entry to generated code.
-  int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t));
-  if (OS::ActivationFrameAlignment() != 0) {
-    entry_stack &= -OS::ActivationFrameAlignment();
-  }
-  // Store remaining arguments on stack, from low to high memory.
-  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
-  for (int i = 4; i < argument_count; i++) {
-    stack_argument[i - 4] = va_arg(parameters, int32_t);
-  }
-  va_end(parameters);
-  set_register(sp, entry_stack);
-
+void Simulator::CallInternal(byte* entry) {
    // Prepare to execute the code at entry
    set_register(pc, reinterpret_cast<int32_t>(entry));
    // Put down marker for end of simulation. The simulator will stop simulation
@@ -3381,6 +3355,37 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) {
    set_register(r9, r9_val);
    set_register(r10, r10_val);
    set_register(r11, r11_val);
+}
+
+
+int32_t Simulator::Call(byte* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+  // Set up arguments
+
+  // First four arguments passed in registers.
+  ASSERT(argument_count >= 4);
+  set_register(r0, va_arg(parameters, int32_t));
+  set_register(r1, va_arg(parameters, int32_t));
+  set_register(r2, va_arg(parameters, int32_t));
+  set_register(r3, va_arg(parameters, int32_t));
+
+  // Remaining arguments passed on stack.
+  int original_stack = get_register(sp);
+  // Compute position of stack on entry to generated code.
+  int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t));
+  if (OS::ActivationFrameAlignment() != 0) {
+    entry_stack &= -OS::ActivationFrameAlignment();
+  }
+  // Store remaining arguments on stack, from low to high memory.
+  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
+  for (int i = 4; i < argument_count; i++) {
+    stack_argument[i - 4] = va_arg(parameters, int32_t);
+  }
+  va_end(parameters);
+  set_register(sp, entry_stack);
+
+  CallInternal(entry);
  
    // Pop stack passed arguments.
    CHECK_EQ(entry_stack, get_register(sp));
@@ -3391,6 +3396,27 @@ int32_t Simulator::Call(byte* entry, int argument_count, ...) {
  }
  
  
+double Simulator::CallFP(byte* entry, double d0, double d1) {
+  if (use_eabi_hardfloat()) {
+    set_d_register_from_double(0, d0);
+    set_d_register_from_double(1, d1);
+  } else {
+    int buffer[2];
+    ASSERT(sizeof(buffer[0]) * 2 == sizeof(d0));
+    memcpy(buffer, &d0, sizeof(d0));
+    set_dw_register(0, buffer);
+    memcpy(buffer, &d1, sizeof(d1));
+    set_dw_register(2, buffer);
+  }
+  CallInternal(entry);
+  if (use_eabi_hardfloat()) {
+    return get_double_from_d_register(0);
+  } else {
+    return get_double_from_register_pair(0);
+  }
+}
+
+
  uintptr_t Simulator::PushAddress(uintptr_t address) {
    int new_sp = get_register(sp) - sizeof(uintptr_t);
    uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(new_sp);
diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h

index abc91bb..ec47fa1 100644 (file)
--- a/src/arm/simulator-arm.h
+++ b/src/arm/simulator-arm.h
@@ -205,6 +205,8 @@ class Simulator {
    // generated RegExp code with 7 parameters. This is a convenience function,
    // which sets up the simulator state and grabs the result on return.
    int32_t Call(byte* entry, int argument_count, ...);
+  // Alternative: call a 2-argument double function.
+  double CallFP(byte* entry, double d0, double d1);
  
    // Push an address onto the JS stack.
    uintptr_t PushAddress(uintptr_t address);
@@ -356,6 +358,8 @@ class Simulator {
    template<class InputType, int register_size>
        void SetVFPRegister(int reg_index, const InputType& value);
  
+  void CallInternal(byte* entry);
+
    // Architecture state.
    // Saturating instructions require a Q flag to indicate saturation.
    // There is currently no way to read the CPSR directly, and thus read the Q
diff --git a/src/assembler.cc b/src/assembler.cc

index 9cec04a..344e203 100644 (file)
--- a/src/assembler.cc
+++ b/src/assembler.cc
@@ -103,6 +103,11 @@ static DoubleConstant double_constants;
  
  const char* const RelocInfo::kFillerCommentString = "DEOPTIMIZATION PADDING";
  
+static bool math_exp_data_initialized = false;
+static Mutex* math_exp_data_mutex = NULL;
+static double* math_exp_constants_array = NULL;
+static double* math_exp_log_table_array = NULL;
+
  // -----------------------------------------------------------------------------
  // Implementation of AssemblerBase
  
@@ -836,6 +841,70 @@ void ExternalReference::SetUp() {
    double_constants.canonical_non_hole_nan = OS::nan_value();
    double_constants.the_hole_nan = BitCast<double>(kHoleNanInt64);
    double_constants.negative_infinity = -V8_INFINITY;
+
+  math_exp_data_mutex = OS::CreateMutex();
+}
+
+
+void ExternalReference::InitializeMathExpData() {
+  // Early return?
+  if (math_exp_data_initialized) return;
+
+  math_exp_data_mutex->Lock();
+  if (!math_exp_data_initialized) {
+    // If this is changed, generated code must be adapted too.
+    const int kTableSizeBits = 11;
+    const int kTableSize = 1 << kTableSizeBits;
+    const double kTableSizeDouble = static_cast<double>(kTableSize);
+
+    math_exp_constants_array = new double[9];
+    // Input values smaller than this always return 0.
+    math_exp_constants_array[0] = -708.39641853226408;
+    // Input values larger than this always return +Infinity.
+    math_exp_constants_array[1] = 709.78271289338397;
+    math_exp_constants_array[2] = V8_INFINITY;
+    // The rest is black magic. Do not attempt to understand it. It is
+    // loosely based on the "expd" function published at:
+    // http://herumi.blogspot.com/2011/08/fast-double-precision-exponential.html
+    const double constant3 = (1 << kTableSizeBits) / log(2.0);
+    math_exp_constants_array[3] = constant3;
+    math_exp_constants_array[4] =
+        static_cast<double>(static_cast<int64_t>(3) << 51);
+    math_exp_constants_array[5] = 1 / constant3;
+    math_exp_constants_array[6] = 3.0000000027955394;
+    math_exp_constants_array[7] = 0.16666666685227835;
+    math_exp_constants_array[8] = 1;
+
+    math_exp_log_table_array = new double[kTableSize];
+    for (int i = 0; i < kTableSize; i++) {
+      double value = pow(2, i / kTableSizeDouble);
+
+      uint64_t bits = BitCast<uint64_t, double>(value);
+      bits &= (static_cast<uint64_t>(1) << 52) - 1;
+      double mantissa = BitCast<double, uint64_t>(bits);
+
+      // <just testing>
+      uint64_t doublebits;
+      memcpy(&doublebits, &value, sizeof doublebits);
+      doublebits &= (static_cast<uint64_t>(1) << 52) - 1;
+      double mantissa2;
+      memcpy(&mantissa2, &doublebits, sizeof mantissa2);
+      CHECK_EQ(mantissa, mantissa2);
+      // </just testing>
+
+      math_exp_log_table_array[i] = mantissa;
+    }
+
+    math_exp_data_initialized = true;
+  }
+  math_exp_data_mutex->Unlock();
+}
+
+
+void ExternalReference::TearDownMathExpData() {
+  delete[] math_exp_constants_array;
+  delete[] math_exp_log_table_array;
+  delete math_exp_data_mutex;
  }
  
  
@@ -1273,6 +1342,19 @@ ExternalReference ExternalReference::math_log_double_function(
  }
  
  
+ExternalReference ExternalReference::math_exp_constants(int constant_index) {
+  ASSERT(math_exp_data_initialized);
+  return ExternalReference(
+      reinterpret_cast<void*>(math_exp_constants_array + constant_index));
+}
+
+
+ExternalReference ExternalReference::math_exp_log_table() {
+  ASSERT(math_exp_data_initialized);
+  return ExternalReference(reinterpret_cast<void*>(math_exp_log_table_array));
+}
+
+
  ExternalReference ExternalReference::page_flags(Page* page) {
    return ExternalReference(reinterpret_cast<Address>(page) +
                             MemoryChunk::kFlagsOffset);
diff --git a/src/assembler.h b/src/assembler.h

index 01f0790..76e0569 100644 (file)
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -604,6 +604,8 @@ class ExternalReference BASE_EMBEDDED {
    };
  
    static void SetUp();
+  static void InitializeMathExpData();
+  static void TearDownMathExpData();
  
    typedef void* ExternalReferenceRedirector(void* original, Type type);
  
@@ -725,6 +727,9 @@ class ExternalReference BASE_EMBEDDED {
    static ExternalReference math_tan_double_function(Isolate* isolate);
    static ExternalReference math_log_double_function(Isolate* isolate);
  
+  static ExternalReference math_exp_constants(int constant_index);
+  static ExternalReference math_exp_log_table();
+
    static ExternalReference page_flags(Page* page);
  
    Address address() const {return reinterpret_cast<Address>(address_);}
diff --git a/src/codegen.h b/src/codegen.h

index 08a777f..3d14502 100644 (file)
--- a/src/codegen.h
+++ b/src/codegen.h
@@ -90,6 +90,7 @@ namespace internal {
  typedef double (*UnaryMathFunction)(double x);
  
  UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type);
+UnaryMathFunction CreateExpFunction();
  UnaryMathFunction CreateSqrtFunction();
  
  
diff --git a/src/flag-definitions.h b/src/flag-definitions.h

index 4a08eff..b25fe5f 100644 (file)
--- a/src/flag-definitions.h
+++ b/src/flag-definitions.h
@@ -181,6 +181,7 @@ DEFINE_int(max_inlined_nodes, 196,
  DEFINE_int(max_inlined_nodes_cumulative, 196,
             "maximum cumulative number of AST nodes considered for inlining")
  DEFINE_bool(loop_invariant_code_motion, true, "loop invariant code motion")
+DEFINE_bool(fast_math, true, "faster (but maybe less accurate) math functions")
  DEFINE_bool(collect_megamorphic_maps_from_stub_cache,
              true,
              "crankshaft harvests type feedback from stub cache")
diff --git a/src/hydrogen-instructions.h b/src/hydrogen-instructions.h

index e529078..47a9da3 100644 (file)
--- a/src/hydrogen-instructions.h
+++ b/src/hydrogen-instructions.h
@@ -2084,6 +2084,9 @@ class HUnaryMathOperation: public HTemplateInstruction<2> {
          set_representation(Representation::Double());
          SetGVNFlag(kChangesNewSpacePromotion);
          break;
+      case kMathExp:
+        set_representation(Representation::Double());
+        break;
        default:
          UNREACHABLE();
      }
@@ -2110,6 +2113,7 @@ class HUnaryMathOperation: public HTemplateInstruction<2> {
          case kMathSqrt:
          case kMathPowHalf:
          case kMathLog:
+        case kMathExp:
          case kMathSin:
          case kMathCos:
          case kMathTan:
diff --git a/src/hydrogen.cc b/src/hydrogen.cc

index ccc11ff..1590ab3 100644 (file)
--- a/src/hydrogen.cc
+++ b/src/hydrogen.cc
@@ -7253,6 +7253,9 @@ bool HGraphBuilder::TryInlineBuiltinFunctionCall(Call* expr, bool drop_extra) {
    if (!expr->target()->shared()->HasBuiltinFunctionId()) return false;
    BuiltinFunctionId id = expr->target()->shared()->builtin_function_id();
    switch (id) {
+    case kMathExp:
+      if (!FLAG_fast_math) break;
+      // Fall through if FLAG_fast_math.
      case kMathRound:
      case kMathAbs:
      case kMathSqrt:
@@ -7313,6 +7316,9 @@ bool HGraphBuilder::TryInlineBuiltinMethodCall(Call* expr,
          return true;
        }
        break;
+    case kMathExp:
+      if (!FLAG_fast_math) break;
+      // Fall through if FLAG_fast_math.
      case kMathRound:
      case kMathFloor:
      case kMathAbs:
diff --git a/src/ia32/assembler-ia32.cc b/src/ia32/assembler-ia32.cc

index 1f90e1a..8cccaa5 100644 (file)
--- a/src/ia32/assembler-ia32.cc
+++ b/src/ia32/assembler-ia32.cc
@@ -1966,6 +1966,16 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) {
  }
  
  
+void Assembler::addsd(XMMRegister dst, const Operand& src) {
+  ASSERT(CpuFeatures::IsEnabled(SSE2));
+  EnsureSpace ensure_space(this);
+  EMIT(0xF2);
+  EMIT(0x0F);
+  EMIT(0x58);
+  emit_sse_operand(dst, src);
+}
+
+
  void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
    ASSERT(CpuFeatures::IsEnabled(SSE2));
    EnsureSpace ensure_space(this);
@@ -1976,6 +1986,16 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
  }
  
  
+void Assembler::mulsd(XMMRegister dst, const Operand& src) {
+  ASSERT(CpuFeatures::IsEnabled(SSE2));
+  EnsureSpace ensure_space(this);
+  EMIT(0xF2);
+  EMIT(0x0F);
+  EMIT(0x59);
+  emit_sse_operand(dst, src);
+}
+
+
  void Assembler::subsd(XMMRegister dst, XMMRegister src) {
    ASSERT(CpuFeatures::IsEnabled(SSE2));
    EnsureSpace ensure_space(this);
@@ -2372,7 +2392,7 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
  }
  
  
-void Assembler::pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle) {
+void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
    ASSERT(CpuFeatures::IsEnabled(SSE2));
    EnsureSpace ensure_space(this);
    EMIT(0x66);
diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h

index b8d42b4..b1f421e 100644 (file)
--- a/src/ia32/assembler-ia32.h
+++ b/src/ia32/assembler-ia32.h
@@ -990,8 +990,10 @@ class Assembler : public AssemblerBase {
    void cvtsd2ss(XMMRegister dst, XMMRegister src);
  
    void addsd(XMMRegister dst, XMMRegister src);
+  void addsd(XMMRegister dst, const Operand& src);
    void subsd(XMMRegister dst, XMMRegister src);
    void mulsd(XMMRegister dst, XMMRegister src);
+  void mulsd(XMMRegister dst, const Operand& src);
    void divsd(XMMRegister dst, XMMRegister src);
    void xorpd(XMMRegister dst, XMMRegister src);
    void xorps(XMMRegister dst, XMMRegister src);
@@ -1048,7 +1050,7 @@ class Assembler : public AssemblerBase {
    void psllq(XMMRegister dst, XMMRegister src);
    void psrlq(XMMRegister reg, int8_t shift);
    void psrlq(XMMRegister dst, XMMRegister src);
-  void pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle);
+  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
    void pextrd(Register dst, XMMRegister src, int8_t offset) {
      pextrd(Operand(dst), src, offset);
    }
diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc

index fa3e1a8..da8e2ae 100644 (file)
--- a/src/ia32/code-stubs-ia32.cc
+++ b/src/ia32/code-stubs-ia32.cc
@@ -3115,10 +3115,10 @@ void MathPowStub::Generate(MacroAssembler* masm) {
      // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1
      __ f2xm1();    // 2^(X-rnd(X)) - 1, rnd(X)
      __ fld1();     // 1, 2^(X-rnd(X)) - 1, rnd(X)
-    __ faddp(1);   // 1, 2^(X-rnd(X)), rnd(X)
+    __ faddp(1);   // 2^(X-rnd(X)), rnd(X)
      // FSCALE calculates st(0) * 2^st(1)
      __ fscale();   // 2^X, rnd(X)
-    __ fstp(1);
+    __ fstp(1);    // 2^X
      // Bail out to runtime in case of exceptions in the status word.
      __ fnstsw_ax();
      __ test_b(eax, 0x5F);  // We check for all but precision exception.
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc

index 5898c31..a024339 100644 (file)
--- a/src/ia32/codegen-ia32.cc
+++ b/src/ia32/codegen-ia32.cc
@@ -102,6 +102,43 @@ UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
  }
  
  
+UnaryMathFunction CreateExpFunction() {
+  if (!CpuFeatures::IsSupported(SSE2)) return &exp;
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;
+  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+  // esp[1 * kPointerSize]: raw double input
+  // esp[0 * kPointerSize]: return address
+  {
+    CpuFeatures::Scope use_sse2(SSE2);
+    XMMRegister input = xmm1;
+    XMMRegister result = xmm2;
+    __ movdbl(input, Operand(esp, 1 * kPointerSize));
+    __ push(eax);
+    __ push(ebx);
+
+    MathExpGenerator::EmitMathExp(&masm, input, result, xmm0, eax, ebx);
+
+    __ pop(ebx);
+    __ pop(eax);
+    __ movdbl(Operand(esp, 1 * kPointerSize), result);
+    __ fld_d(Operand(esp, 1 * kPointerSize));
+    __ Ret();
+  }
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+}
+
+
  UnaryMathFunction CreateSqrtFunction() {
    size_t actual_size;
    // Allocate buffer in executable space.
@@ -755,6 +792,63 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm,
    __ bind(&done);
  }
  
+
+static Operand ExpConstant(int index) {
+  return Operand::StaticVariable(ExternalReference::math_exp_constants(index));
+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   XMMRegister input,
+                                   XMMRegister result,
+                                   XMMRegister double_scratch,
+                                   Register temp1,
+                                   Register temp2) {
+  ASSERT(!input.is(double_scratch));
+  ASSERT(!input.is(result));
+  ASSERT(!result.is(double_scratch));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ movdbl(double_scratch, ExpConstant(0));
+  __ xorpd(result, result);
+  __ ucomisd(double_scratch, input);
+  __ j(above_equal, &done);
+  __ ucomisd(input, ExpConstant(1));
+  __ movdbl(result, ExpConstant(2));
+  __ j(above_equal, &done);
+  __ movdbl(double_scratch, ExpConstant(3));
+  __ movdbl(result, ExpConstant(4));
+  __ mulsd(double_scratch, input);
+  __ addsd(double_scratch, result);
+  __ movd(temp2, double_scratch);
+  __ subsd(double_scratch, result);
+  __ movdbl(result, ExpConstant(6));
+  __ mulsd(double_scratch, ExpConstant(5));
+  __ subsd(double_scratch, input);
+  __ subsd(result, double_scratch);
+  __ movsd(input, double_scratch);
+  __ mulsd(input, double_scratch);
+  __ mulsd(result, input);
+  __ mov(temp1, temp2);
+  __ mulsd(result, ExpConstant(7));
+  __ subsd(result, double_scratch);
+  __ add(temp1, Immediate(0x1ff800));
+  __ addsd(result, ExpConstant(8));
+  __ and_(temp2, Immediate(0x7ff));
+  __ shr(temp1, 11);
+  __ shl(temp1, 20);
+  __ movd(input, temp1);
+  __ pshufd(input, input, static_cast<uint8_t>(0xe1));  // Order: 11 10 00 01
+  __ movdbl(double_scratch, Operand::StaticArray(
+      temp2, times_8, ExternalReference::math_exp_log_table()));
+  __ por(input, double_scratch);
+  __ mulsd(result, input);
+  __ bind(&done);
+}
+
  #undef __
  
  static const int kNoCodeAgeSequenceLength = 5;
diff --git a/src/ia32/codegen-ia32.h b/src/ia32/codegen-ia32.h

index a783e9a..6de4725 100644 (file)
--- a/src/ia32/codegen-ia32.h
+++ b/src/ia32/codegen-ia32.h
@@ -92,6 +92,20 @@ class StringCharLoadGenerator : public AllStatic {
    DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
  };
  
+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          XMMRegister input,
+                          XMMRegister result,
+                          XMMRegister double_scratch,
+                          Register temp1,
+                          Register temp2);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
  } }  // namespace v8::internal
  
  #endif  // V8_IA32_CODEGEN_IA32_H_
diff --git a/src/ia32/disasm-ia32.cc b/src/ia32/disasm-ia32.cc

index 75b46bd..1ac3b2e 100644 (file)
--- a/src/ia32/disasm-ia32.cc
+++ b/src/ia32/disasm-ia32.cc
@@ -869,6 +869,7 @@ static const char* F0Mnem(byte f0byte) {
      case 0xAF: return "imul";
      case 0xA5: return "shld";
      case 0xAD: return "shrd";
+    case 0xAC: return "shrd";  // 3-operand version.
      case 0xAB: return "bts";
      default: return NULL;
    }
diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc

index 3e19c39..388d496 100644 (file)
--- a/src/ia32/lithium-codegen-ia32.cc
+++ b/src/ia32/lithium-codegen-ia32.cc
@@ -3609,6 +3609,16 @@ void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
  }
  
  
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  XMMRegister input = ToDoubleRegister(instr->value());
+  XMMRegister result = ToDoubleRegister(instr->result());
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(masm(), input, result, xmm0, temp1, temp2);
+}
+
+
  void LCodeGen::DoMathTan(LUnaryMathOperation* instr) {
    ASSERT(ToDoubleRegister(instr->result()).is(xmm1));
    TranscendentalCacheStub stub(TranscendentalCache::TAN,
diff --git a/src/ia32/lithium-ia32.cc b/src/ia32/lithium-ia32.cc

index 0175ca7..65a300e 100644 (file)
--- a/src/ia32/lithium-ia32.cc
+++ b/src/ia32/lithium-ia32.cc
@@ -299,6 +299,11 @@ void LUnaryMathOperation::PrintDataTo(StringStream* stream) {
  }
  
  
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}
+
+
  void LMathPowHalf::PrintDataTo(StringStream* stream) {
    stream->Add("/pow_half ");
    value()->PrintTo(stream);
@@ -1087,6 +1092,14 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
      LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(context,
                                                                    input);
      return DefineSameAsFirst(result);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* value = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LMathExp* result = new(zone()) LMathExp(value, temp1, temp2);
+    return DefineAsRegister(result);
    } else if (op == kMathSin || op == kMathCos || op == kMathTan) {
      LOperand* context = UseFixed(instr->context(), esi);
      LOperand* input = UseFixedDouble(instr->value(), xmm1);
diff --git a/src/ia32/lithium-ia32.h b/src/ia32/lithium-ia32.h

index c27c841..2067c62 100644 (file)
--- a/src/ia32/lithium-ia32.h
+++ b/src/ia32/lithium-ia32.h
@@ -125,6 +125,7 @@ class LCodeGen;
    V(LoadNamedFieldPolymorphic)                  \
    V(LoadNamedGeneric)                           \
    V(MapEnumLength)                              \
+  V(MathExp)                                    \
    V(MathFloorOfDiv)                             \
    V(MathMinMax)                                 \
    V(MathPowHalf)                                \
@@ -639,6 +640,27 @@ class LUnaryMathOperation: public LTemplateInstruction<1, 2, 0> {
  };
  
  
+class LMathExp: public LTemplateInstruction<1, 1, 2> {
+ public:
+  LMathExp(LOperand* value,
+           LOperand* temp1,
+           LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
  class LMathPowHalf: public LTemplateInstruction<1, 2, 1> {
   public:
    LMathPowHalf(LOperand* context, LOperand* value, LOperand* temp) {
diff --git a/src/platform-posix.cc b/src/platform-posix.cc

index 3bc8373..0016d59 100644 (file)
--- a/src/platform-posix.cc
+++ b/src/platform-posix.cc
@@ -142,11 +142,19 @@ UNARY_MATH_FUNCTION(sin, CreateTranscendentalFunction(TranscendentalCache::SIN))
  UNARY_MATH_FUNCTION(cos, CreateTranscendentalFunction(TranscendentalCache::COS))
  UNARY_MATH_FUNCTION(tan, CreateTranscendentalFunction(TranscendentalCache::TAN))
  UNARY_MATH_FUNCTION(log, CreateTranscendentalFunction(TranscendentalCache::LOG))
+UNARY_MATH_FUNCTION(exp, CreateExpFunction())
  UNARY_MATH_FUNCTION(sqrt, CreateSqrtFunction())
  
  #undef MATH_FUNCTION
  
  
+void lazily_initialize_fast_exp() {
+  if (fast_exp_function == NULL) {
+    init_fast_exp_function();
+  }
+}
+
+
  double OS::nan_value() {
    // NAN from math.h is defined in C99 and not in POSIX.
    return NAN;
@@ -332,6 +340,7 @@ void POSIXPostSetUp() {
    init_fast_cos_function();
    init_fast_tan_function();
    init_fast_log_function();
+  // fast_exp is initialized lazily.
    init_fast_sqrt_function();
  }
  
diff --git a/src/platform-win32.cc b/src/platform-win32.cc

index 27433b2..261a946 100644 (file)
--- a/src/platform-win32.cc
+++ b/src/platform-win32.cc
@@ -199,11 +199,19 @@ UNARY_MATH_FUNCTION(sin, CreateTranscendentalFunction(TranscendentalCache::SIN))
  UNARY_MATH_FUNCTION(cos, CreateTranscendentalFunction(TranscendentalCache::COS))
  UNARY_MATH_FUNCTION(tan, CreateTranscendentalFunction(TranscendentalCache::TAN))
  UNARY_MATH_FUNCTION(log, CreateTranscendentalFunction(TranscendentalCache::LOG))
+UNARY_MATH_FUNCTION(exp, CreateExpFunction())
  UNARY_MATH_FUNCTION(sqrt, CreateSqrtFunction())
  
  #undef MATH_FUNCTION
  
  
+void lazily_initialize_fast_exp() {
+  if (fast_exp_function == NULL) {
+    init_fast_exp_function();
+  }
+}
+
+
  void MathSetup() {
  #ifdef _WIN64
    init_modulo_function();
@@ -212,6 +220,7 @@ void MathSetup() {
    init_fast_cos_function();
    init_fast_tan_function();
    init_fast_log_function();
+  // fast_exp is initialized lazily.
    init_fast_sqrt_function();
  }
  
diff --git a/src/platform.h b/src/platform.h

index d4c91b4..67f6792 100644 (file)
--- a/src/platform.h
+++ b/src/platform.h
@@ -119,12 +119,16 @@ class Mutex;
  double ceiling(double x);
  double modulo(double x, double y);
  
-// Custom implementation of sin, cos, tan and log.
+// Custom implementation of math functions.
  double fast_sin(double input);
  double fast_cos(double input);
  double fast_tan(double input);
  double fast_log(double input);
+double fast_exp(double input);
  double fast_sqrt(double input);
+// The custom exp implementation needs 16KB of lookup data; initialize it
+// on demand.
+void lazily_initialize_fast_exp();
  
  // Forward declarations.
  class Socket;
diff --git a/src/runtime.cc b/src/runtime.cc

index a15e1f5..5106be8 100644 (file)
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -7153,7 +7153,8 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_Math_exp) {
    isolate->counters()->math_exp()->Increment();
  
    CONVERT_DOUBLE_ARG_CHECKED(x, 0);
-  return isolate->transcendental_cache()->Get(TranscendentalCache::EXP, x);
+  lazily_initialize_fast_exp();
+  return isolate->heap()->NumberFromDouble(fast_exp(x));
  }
  
  
diff --git a/src/v8.cc b/src/v8.cc

index 7d01582..b5aad90 100644 (file)
--- a/src/v8.cc
+++ b/src/v8.cc
@@ -115,6 +115,7 @@ void V8::TearDown() {
  
    ElementsAccessor::TearDown();
    LOperand::TearDownCaches();
+  ExternalReference::TearDownMathExpData();
    RegisteredExtension::UnregisterAll();
  
    is_running_ = false;
diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc

index d3e2a4f..370cb02 100644 (file)
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -2807,6 +2807,16 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) {
  }
  
  
+void Assembler::addsd(XMMRegister dst, const Operand& src) {
+  EnsureSpace ensure_space(this);
+  emit(0xF2);
+  emit_optional_rex_32(dst, src);
+  emit(0x0F);
+  emit(0x58);
+  emit_sse_operand(dst, src);
+}
+
+
  void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
    EnsureSpace ensure_space(this);
    emit(0xF2);
@@ -2817,6 +2827,16 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
  }
  
  
+void Assembler::mulsd(XMMRegister dst, const Operand& src) {
+  EnsureSpace ensure_space(this);
+  emit(0xF2);
+  emit_optional_rex_32(dst, src);
+  emit(0x0F);
+  emit(0x59);
+  emit_sse_operand(dst, src);
+}
+
+
  void Assembler::subsd(XMMRegister dst, XMMRegister src) {
    EnsureSpace ensure_space(this);
    emit(0xF2);
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h

index e070fb5..24c8df3 100644 (file)
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -1363,8 +1363,10 @@ class Assembler : public AssemblerBase {
    void cvtsd2siq(Register dst, XMMRegister src);
  
    void addsd(XMMRegister dst, XMMRegister src);
+  void addsd(XMMRegister dst, const Operand& src);
    void subsd(XMMRegister dst, XMMRegister src);
    void mulsd(XMMRegister dst, XMMRegister src);
+  void mulsd(XMMRegister dst, const Operand& src);
    void divsd(XMMRegister dst, XMMRegister src);
  
    void andpd(XMMRegister dst, XMMRegister src);
diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc

index c529eb4..9705718 100644 (file)
--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@@ -2221,7 +2221,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
      // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1
      __ f2xm1();    // 2^(X-rnd(X)) - 1, rnd(X)
      __ fld1();     // 1, 2^(X-rnd(X)) - 1, rnd(X)
-    __ faddp(1);   // 1, 2^(X-rnd(X)), rnd(X)
+    __ faddp(1);   // 2^(X-rnd(X)), rnd(X)
      // FSCALE calculates st(0) * 2^st(1)
      __ fscale();   // 2^X, rnd(X)
      __ fstp(1);
diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc

index aa777ba..e2471e6 100644 (file)
--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@@ -99,6 +99,36 @@ UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
  }
  
  
+UnaryMathFunction CreateExpFunction() {
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;
+  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+  // xmm0: raw double input.
+  XMMRegister input = xmm0;
+  XMMRegister result = xmm1;
+  __ push(rax);
+  __ push(rbx);
+
+  MathExpGenerator::EmitMathExp(&masm, input, result, xmm2, rax, rbx);
+
+  __ pop(rbx);
+  __ pop(rax);
+  __ movsd(xmm0, result);
+  __ Ret();
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+}
+
+
  UnaryMathFunction CreateSqrtFunction() {
    size_t actual_size;
    // Allocate buffer in executable space.
@@ -575,6 +605,58 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm,
    __ bind(&done);
  }
  
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   XMMRegister input,
+                                   XMMRegister result,
+                                   XMMRegister double_scratch,
+                                   Register temp1,
+                                   Register temp2) {
+  ASSERT(!input.is(result));
+  ASSERT(!input.is(double_scratch));
+  ASSERT(!result.is(double_scratch));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ movq(kScratchRegister, ExternalReference::math_exp_constants(0));
+  __ movsd(double_scratch, Operand(kScratchRegister, 0 * kDoubleSize));
+  __ xorpd(result, result);
+  __ ucomisd(double_scratch, input);
+  __ j(above_equal, &done);
+  __ ucomisd(input, Operand(kScratchRegister, 1 * kDoubleSize));
+  __ movsd(result, Operand(kScratchRegister, 2 * kDoubleSize));
+  __ j(above_equal, &done);
+  __ movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize));
+  __ movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
+  __ mulsd(double_scratch, input);
+  __ addsd(double_scratch, result);
+  __ movq(temp2, double_scratch);
+  __ subsd(double_scratch, result);
+  __ movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
+  __ lea(temp1, Operand(temp2, 0x1ff800));
+  __ and_(temp2, Immediate(0x7ff));
+  __ shr(temp1, Immediate(11));
+  __ mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
+  __ movq(kScratchRegister, ExternalReference::math_exp_log_table());
+  __ shl(temp1, Immediate(52));
+  __ or_(temp1, Operand(kScratchRegister, temp2, times_8, 0));
+  __ movq(kScratchRegister, ExternalReference::math_exp_constants(0));
+  __ subsd(double_scratch, input);
+  __ movsd(input, double_scratch);
+  __ subsd(result, double_scratch);
+  __ mulsd(input, double_scratch);
+  __ mulsd(result, input);
+  __ movq(input, temp1);
+  __ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
+  __ subsd(result, double_scratch);
+  __ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
+  __ mulsd(result, input);
+
+  __ bind(&done);
+}
+
  #undef __
  
  
diff --git a/src/x64/codegen-x64.h b/src/x64/codegen-x64.h

index 5d8bbff..73438d7 100644 (file)
--- a/src/x64/codegen-x64.h
+++ b/src/x64/codegen-x64.h
@@ -86,6 +86,20 @@ class StringCharLoadGenerator : public AllStatic {
    DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
  };
  
+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          XMMRegister input,
+                          XMMRegister result,
+                          XMMRegister double_scratch,
+                          Register temp1,
+                          Register temp2);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
  } }  // namespace v8::internal
  
  #endif  // V8_X64_CODEGEN_X64_H_
diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc

index 13c5fa5..c430465 100644 (file)
--- a/src/x64/lithium-codegen-x64.cc
+++ b/src/x64/lithium-codegen-x64.cc
@@ -3477,6 +3477,16 @@ void LCodeGen::DoDeferredRandom(LRandom* instr) {
  }
  
  
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  XMMRegister input = ToDoubleRegister(instr->value());
+  XMMRegister result = ToDoubleRegister(instr->result());
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(masm(), input, result, xmm0, temp1, temp2);
+}
+
+
  void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
    ASSERT(ToDoubleRegister(instr->result()).is(xmm1));
    TranscendentalCacheStub stub(TranscendentalCache::LOG,
diff --git a/src/x64/lithium-x64.cc b/src/x64/lithium-x64.cc

index a4771ed..e102803 100644 (file)
--- a/src/x64/lithium-x64.cc
+++ b/src/x64/lithium-x64.cc
@@ -299,6 +299,11 @@ void LUnaryMathOperation::PrintDataTo(StringStream* stream) {
  }
  
  
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}
+
+
  void LLoadContextSlot::PrintDataTo(StringStream* stream) {
    context()->PrintTo(stream);
    stream->Add("[%d]", slot_index());
@@ -1046,6 +1051,14 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
      LOperand* input = UseFixedDouble(instr->value(), xmm1);
      LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input);
      return MarkAsCall(DefineFixedDouble(result, xmm1), instr);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* value = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LMathExp* result = new(zone()) LMathExp(value, temp1, temp2);
+    return DefineAsRegister(result);
    } else {
      LOperand* input = UseRegisterAtStart(instr->value());
      LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input);
diff --git a/src/x64/lithium-x64.h b/src/x64/lithium-x64.h

index 4977cac..b5d435b 100644 (file)
--- a/src/x64/lithium-x64.h
+++ b/src/x64/lithium-x64.h
@@ -131,6 +131,7 @@ class LCodeGen;
    V(LoadNamedField)                             \
    V(LoadNamedFieldPolymorphic)                  \
    V(LoadNamedGeneric)                           \
+  V(MathExp)                                    \
    V(MathFloorOfDiv)                             \
    V(MathMinMax)                                 \
    V(ModI)                                       \
@@ -643,6 +644,25 @@ class LUnaryMathOperation: public LTemplateInstruction<1, 1, 0> {
  };
  
  
+class LMathExp: public LTemplateInstruction<1, 1, 2> {
+ public:
+  LMathExp(LOperand* value, LOperand* temp1, LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
  class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
   public:
    LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {
diff --git a/test/mjsunit/math-exp-precision.js b/test/mjsunit/math-exp-precision.js

new file mode 100644 (file)

index 0000000..ace7edc
--- /dev/null
+++ b/test/mjsunit/math-exp-precision.js
@@ -0,0 +1,64 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Tests that the --fast-math implementation of Math.exp() has
+// reasonable precision.
+
+function exp(x) {
+  return Math.exp(x);
+}
+
+var first_call_result = exp(Math.PI);
+var second_call_result = exp(Math.PI);
+
+function assertAlmostEquals(expected, actual, x) {
+  if (expected == 0 && actual == 0) return;  // OK
+  if (expected == Number.POSITIVE_INFINITY &&
+      actual == Number.POSITIVE_INFINITY) {
+    return;  // OK
+  }
+  relative_diff = Math.abs(expected/actual - 1);
+  assertTrue(relative_diff < 1e-12, "relative difference of " + relative_diff +
+                                    " for input " + x);
+}
+
+var increment = Math.PI / 35;  // Roughly 0.1, but we want to try many
+                               // different mantissae.
+for (var x = -708; x < 710; x += increment) {
+  var ex = exp(x);
+  var reference = Math.pow(Math.E, x);
+  assertAlmostEquals(reference, ex, x);
+  if (ex > 0 && isFinite(ex)) {
+    var back = Math.log(ex);
+    assertAlmostEquals(x, back, x + " (backwards)");
+  }
+}
+
+// Make sure optimizing the function does not alter the result.
+var last_call_result = exp(Math.PI);
+assertEquals(first_call_result, second_call_result);
+assertEquals(first_call_result, last_call_result);
author	jkummerow@chromium.org <jkummerow@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 26 Nov 2012 13:12:35 +0000 (13:12 +0000)
committer	jkummerow@chromium.org <jkummerow@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 26 Nov 2012 13:12:35 +0000 (13:12 +0000)
src/arm/codegen-arm.cc		patch \| blob \| history
src/arm/codegen-arm.h		patch \| blob \| history
src/arm/lithium-arm.cc		patch \| blob \| history
src/arm/lithium-arm.h		patch \| blob \| history
src/arm/lithium-codegen-arm.cc		patch \| blob \| history
src/arm/simulator-arm.cc		patch \| blob \| history
src/arm/simulator-arm.h		patch \| blob \| history
src/assembler.cc		patch \| blob \| history
src/assembler.h		patch \| blob \| history
src/codegen.h		patch \| blob \| history
src/flag-definitions.h		patch \| blob \| history
src/hydrogen-instructions.h		patch \| blob \| history
src/hydrogen.cc		patch \| blob \| history
src/ia32/assembler-ia32.cc		patch \| blob \| history
src/ia32/assembler-ia32.h		patch \| blob \| history
src/ia32/code-stubs-ia32.cc		patch \| blob \| history
src/ia32/codegen-ia32.cc		patch \| blob \| history
src/ia32/codegen-ia32.h		patch \| blob \| history
src/ia32/disasm-ia32.cc		patch \| blob \| history
src/ia32/lithium-codegen-ia32.cc		patch \| blob \| history
src/ia32/lithium-ia32.cc		patch \| blob \| history
src/ia32/lithium-ia32.h		patch \| blob \| history
src/platform-posix.cc		patch \| blob \| history
src/platform-win32.cc		patch \| blob \| history
src/platform.h		patch \| blob \| history
src/runtime.cc		patch \| blob \| history
src/v8.cc		patch \| blob \| history
src/x64/assembler-x64.cc		patch \| blob \| history
src/x64/assembler-x64.h		patch \| blob \| history
src/x64/code-stubs-x64.cc		patch \| blob \| history
src/x64/codegen-x64.cc		patch \| blob \| history
src/x64/codegen-x64.h		patch \| blob \| history
src/x64/lithium-codegen-x64.cc		patch \| blob \| history
src/x64/lithium-x64.cc		patch \| blob \| history
src/x64/lithium-x64.h		patch \| blob \| history
test/mjsunit/math-exp-precision.js	[new file with mode: 0644]	patch \| blob