ARM: Use the vsqrt instruction when available

author sgjesse@chromium.org <sgjesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 29 Jun 2010 09:40:36 +0000 (09:40 +0000)

committer sgjesse@chromium.org <sgjesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 29 Jun 2010 09:40:36 +0000 (09:40 +0000)
author sgjesse@chromium.org <sgjesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 29 Jun 2010 09:40:36 +0000 (09:40 +0000)
committer sgjesse@chromium.org <sgjesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 29 Jun 2010 09:40:36 +0000 (09:40 +0000)
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc

index f8d98db..c8170b3 100644 (file)
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -2112,6 +2112,18 @@ void Assembler::vmrs(Register dst, Condition cond) {
  }
  
  
+
+void Assembler::vsqrt(const DwVfpRegister dst,
+                      const DwVfpRegister src,
+                      const Condition cond) {
+  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0001 (19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | 11 (7-6) | M(5)=? | 0(4) | Vm(3-0)
+  ASSERT(CpuFeatures::IsEnabled(VFP3));
+  emit(cond | 0xE*B24 | B23 | 0x3*B20 | B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | 3*B6 | src.code());
+}
+
+
  // Pseudo instructions.
  void Assembler::nop(int type) {
    // This is mov rx, rx.
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h

index 54b584a..8a4173d 100644 (file)
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -988,6 +988,9 @@ class Assembler : public Malloced {
              const Condition cond = al);
    void vmrs(const Register dst,
              const Condition cond = al);
+  void vsqrt(const DwVfpRegister dst,
+             const DwVfpRegister src,
+             const Condition cond = al);
  
    // Pseudo instructions
    void nop(int type = 0);
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc

index c481ded..f923c09 100644 (file)
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -4279,22 +4279,147 @@ void CodeGenerator::GenerateIsNonNegativeSmi(ZoneList<Expression*>* args) {
  }
  
  
-// Generates the Math.pow method - currently just calls runtime.
+// Generates the Math.pow method.
  void CodeGenerator::GenerateMathPow(ZoneList<Expression*>* args) {
    ASSERT(args->length() == 2);
    Load(args->at(0));
    Load(args->at(1));
-  frame_->CallRuntime(Runtime::kMath_pow, 2);
-  frame_->EmitPush(r0);
+
+  if (!CpuFeatures::IsSupported(VFP3)) {
+    frame_->CallRuntime(Runtime::kMath_pow, 2);
+    frame_->EmitPush(r0);
+  } else {
+    CpuFeatures::Scope scope(VFP3);
+    JumpTarget runtime, done;
+    Label not_minus_half, allocate_return;
+
+    Register scratch1 = VirtualFrame::scratch0();
+    Register scratch2 = VirtualFrame::scratch1();
+
+    // Get base and exponent to registers.
+    Register exponent = frame_->PopToRegister();
+    Register base = frame_->PopToRegister(exponent);
+
+    // Set the frame for the runtime jump target. The code below jumps to the
+    // jump target label so the frame needs to be established before that.
+    ASSERT(runtime.entry_frame() == NULL);
+    runtime.set_entry_frame(frame_);
+
+    __ BranchOnSmi(exponent, runtime.entry_label());
+
+    // Special handling of raising to the power of -0.5 and 0.5. First check
+    // that the value is a heap number and that the lower bits (which for both
+    // values are zero).
+    Register heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+    __ ldr(scratch1, FieldMemOperand(exponent, HeapObject::kMapOffset));
+    __ ldr(scratch2, FieldMemOperand(exponent, HeapNumber::kMantissaOffset));
+    __ cmp(scratch1, heap_number_map);
+    runtime.Branch(ne);
+    __ tst(scratch2, scratch2);
+    runtime.Branch(ne);
+
+    // Load the e
+    __ ldr(scratch1, FieldMemOperand(exponent, HeapNumber::kExponentOffset));
+
+    // Compare exponent with -0.5.
+    __ cmp(scratch1, Operand(0xbfe00000));
+    __ b(ne, &not_minus_half);
+
+    // Get the double value from the base into vfp register d0.
+    __ ObjectToDoubleVFPRegister(base, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label(),
+                                 AVOID_NANS_AND_INFINITIES);
+
+    // Load 1.0 into d2.
+    __ mov(scratch2, Operand(0x3ff00000));
+    __ mov(scratch1, Operand(0));
+    __ vmov(d2, scratch1, scratch2);
+
+    // Calculate the reciprocal of the square root. 1/sqrt(x) = sqrt(1/x).
+    __ vdiv(d0, d2, d0);
+    __ vsqrt(d0, d0);
+
+    __ b(&allocate_return);
+
+    __ bind(&not_minus_half);
+    // Compare exponent with 0.5.
+    __ cmp(scratch1, Operand(0x3fe00000));
+    runtime.Branch(ne);
+
+      // Get the double value from the base into vfp register d0.
+    __ ObjectToDoubleVFPRegister(base, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label(),
+                                 AVOID_NANS_AND_INFINITIES);
+    __ vsqrt(d0, d0);
+
+    __ bind(&allocate_return);
+    __ AllocateHeapNumberWithValue(
+        base, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
+    done.Jump();
+
+    runtime.Bind();
+
+    // Push back the arguments again for the runtime call.
+    frame_->EmitPush(base);
+    frame_->EmitPush(exponent);
+    frame_->CallRuntime(Runtime::kMath_pow, 2);
+    __ Move(base, r0);
+
+    done.Bind();
+    frame_->EmitPush(base);
+  }
  }
  
  
-// Generates the Math.sqrt method - currently just calls runtime.
+// Generates the Math.sqrt method.
  void CodeGenerator::GenerateMathSqrt(ZoneList<Expression*>* args) {
    ASSERT(args->length() == 1);
    Load(args->at(0));
-  frame_->CallRuntime(Runtime::kMath_sqrt, 1);
-  frame_->EmitPush(r0);
+
+  if (!CpuFeatures::IsSupported(VFP3)) {
+    frame_->CallRuntime(Runtime::kMath_sqrt, 1);
+    frame_->EmitPush(r0);
+  } else {
+    CpuFeatures::Scope scope(VFP3);
+    JumpTarget runtime, done;
+
+    Register scratch1 = VirtualFrame::scratch0();
+    Register scratch2 = VirtualFrame::scratch1();
+
+    // Get the value from the frame.
+    Register tos = frame_->PopToRegister();
+
+    // Set the frame for the runtime jump target. The code below jumps to the
+    // jump target label so the frame needs to be established before that.
+    ASSERT(runtime.entry_frame() == NULL);
+    runtime.set_entry_frame(frame_);
+
+    Register heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+
+    // Get the double value from the heap number into vfp register d0.
+    __ ObjectToDoubleVFPRegister(tos, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label());
+
+    // Calculate the square root of d0 and place result in a heap number object.
+    __ vsqrt(d0, d0);
+    __ AllocateHeapNumberWithValue(
+        tos, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
+    done.Jump();
+
+    runtime.Bind();
+    // Push back the argument again for the runtime call.
+    frame_->EmitPush(tos);
+    frame_->CallRuntime(Runtime::kMath_sqrt, 1);
+    __ Move(tos, r0);
+
+    done.Bind();
+    frame_->EmitPush(tos);
+  }
  }
  
  
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc

index 4005369..fb17d45 100644 (file)
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -1038,7 +1038,8 @@ void Decoder::DecodeUnconditional(Instr* instr) {
  // Dd = vmul(Dn, Dm)
  // Dd = vdiv(Dn, Dm)
  // vcmp(Dd, Dm)
-// VMRS
+// vmrs
+// Dd = vsqrt(Dm)
  void Decoder::DecodeTypeVFP(Instr* instr) {
    ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
    ASSERT(instr->Bits(11, 9) == 0x5);
@@ -1056,6 +1057,8 @@ void Decoder::DecodeTypeVFP(Instr* instr) {
        } else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
                   (instr->Opc3Field() & 0x1)) {
          DecodeVCMP(instr);
+      } else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
+        Format(instr, "vsqrt.f64'cond 'Dd, 'Dm");
        } else {
          Unknown(instr);  // Not used by V8.
        }
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc

index 8568266..630e0b8 100644 (file)
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -1369,6 +1369,56 @@ void MacroAssembler::IntegerToDoubleConversionWithVFP3(Register inReg,
  }
  
  
+void MacroAssembler::ObjectToDoubleVFPRegister(Register object,
+                                               DwVfpRegister result,
+                                               Register scratch1,
+                                               Register scratch2,
+                                               Register heap_number_map,
+                                               SwVfpRegister scratch3,
+                                               Label* not_number,
+                                               ObjectToDoubleFlags flags) {
+  Label done;
+  if ((flags & OBJECT_NOT_SMI) == 0) {
+    Label not_smi;
+    BranchOnNotSmi(object, &not_smi);
+    // Remove smi tag and convert to double.
+    mov(scratch1, Operand(object, ASR, kSmiTagSize));
+    vmov(scratch3, scratch1);
+    vcvt_f64_s32(result, scratch3);
+    b(&done);
+    bind(&not_smi);
+  }
+  // Check for heap number and load double value from it.
+  ldr(scratch1, FieldMemOperand(object, HeapObject::kMapOffset));
+  sub(scratch2, object, Operand(kHeapObjectTag));
+  cmp(scratch1, heap_number_map);
+  b(ne, not_number);
+  if ((flags & AVOID_NANS_AND_INFINITIES) != 0) {
+    // If exponent is all ones the number is either a NaN or +/-Infinity.
+    ldr(scratch1, FieldMemOperand(object, HeapNumber::kExponentOffset));
+    Sbfx(scratch1,
+         scratch1,
+         HeapNumber::kExponentShift,
+         HeapNumber::kExponentBits);
+    // All-one value sign extend to -1.
+    cmp(scratch1, Operand(-1));
+    b(eq, not_number);
+  }
+  vldr(result, scratch2, HeapNumber::kValueOffset);
+  bind(&done);
+}
+
+
+void MacroAssembler::SmiToDoubleVFPRegister(Register smi,
+                                            DwVfpRegister value,
+                                            Register scratch1,
+                                            SwVfpRegister scratch2) {
+  mov(scratch1, Operand(smi, ASR, kSmiTagSize));
+  vmov(scratch2, scratch1);
+  vcvt_f64_s32(value, scratch2);
+}
+
+
  void MacroAssembler::GetLeastBitsFromSmi(Register dst,
                                           Register src,
                                           int num_least_bits) {
@@ -1686,6 +1736,18 @@ void MacroAssembler::AllocateHeapNumber(Register result,
  }
  
  
+void MacroAssembler::AllocateHeapNumberWithValue(Register result,
+                                                 DwVfpRegister value,
+                                                 Register scratch1,
+                                                 Register scratch2,
+                                                 Register heap_number_map,
+                                                 Label* gc_required) {
+  AllocateHeapNumber(result, scratch1, scratch2, heap_number_map, gc_required);
+  sub(scratch1, result, Operand(kHeapObjectTag));
+  vstr(value, scratch1, HeapNumber::kValueOffset);
+}
+
+
  void MacroAssembler::CountLeadingZeros(Register zeros,   // Answer.
                                         Register source,  // Input.
                                         Register scratch) {
diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h

index 12fb648..c3f45a6 100644 (file)
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -67,6 +67,17 @@ enum AllocationFlags {
  };
  
  
+// Flags used for the ObjectToDoubleVFPRegister function.
+enum ObjectToDoubleFlags {
+  // No special flags.
+  NO_OBJECT_TO_DOUBLE_FLAGS = 0,
+  // Object is known to be a non smi.
+  OBJECT_NOT_SMI = 1 << 0,
+  // Don't load NaNs or infinities, branch to the non number case instead.
+  AVOID_NANS_AND_INFINITIES = 1 << 1
+};
+
+
  // MacroAssembler implements a collection of frequently used macros.
  class MacroAssembler: public Assembler {
   public:
@@ -381,6 +392,13 @@ class MacroAssembler: public Assembler {
                            Register scratch2,
                            Register heap_number_map,
                            Label* gc_required);
+  void AllocateHeapNumberWithValue(Register result,
+                                   DwVfpRegister value,
+                                   Register scratch1,
+                                   Register scratch2,
+                                   Register heap_number_map,
+                                   Label* gc_required);
+
  
    // ---------------------------------------------------------------------------
    // Support functions.
@@ -469,6 +487,27 @@ class MacroAssembler: public Assembler {
                                           Register outHighReg,
                                           Register outLowReg);
  
+  // Load the value of a number object into a VFP double register. If the object
+  // is not a number a jump to the label not_number is performed and the VFP
+  // double register is unchanged.
+  void ObjectToDoubleVFPRegister(
+      Register object,
+      DwVfpRegister value,
+      Register scratch1,
+      Register scratch2,
+      Register heap_number_map,
+      SwVfpRegister scratch3,
+      Label* not_number,
+      ObjectToDoubleFlags flags = NO_OBJECT_TO_DOUBLE_FLAGS);
+
+  // Load the value of a smi object into a VFP double register. The register
+  // scratch1 can be the same register as smi in which case smi will hold the
+  // untagged value afterwards.
+  void SmiToDoubleVFPRegister(Register smi,
+                              DwVfpRegister value,
+                              Register scratch1,
+                              SwVfpRegister scratch2);
+
    // Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
    // instruction.  On pre-ARM5 hardware this routine gives the wrong answer
    // for 0 (31 instead of 32).  Source and scratch can be the same in which case
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc

index 77776c2..6240cd4 100644 (file)
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -26,6 +26,7 @@
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
  #include <stdlib.h>
+#include <math.h>
  #include <cstdarg>
  #include "v8.h"
  
@@ -2262,7 +2263,8 @@ static int GlueRegCode(bool last_bit, int vm, int m) {
  // Dd = vmul(Dn, Dm)
  // Dd = vdiv(Dn, Dm)
  // vcmp(Dd, Dm)
-// VMRS
+// vmrs
+// Dd = vsqrt(Dm)
  void Simulator::DecodeTypeVFP(Instr* instr) {
    ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
    ASSERT(instr->Bits(11, 9) == 0x5);
@@ -2284,6 +2286,11 @@ void Simulator::DecodeTypeVFP(Instr* instr) {
        } else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
                   (instr->Opc3Field() & 0x1)) {
          DecodeVCMP(instr);
+      } else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
+        // vsqrt
+        double dm_value = get_double_from_d_register(vm);
+        double dd_value = sqrt(dm_value);
+        set_d_register_from_double(vd, dd_value);
        } else {
          UNREACHABLE();  // Not used by V8.
        }
diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc

index 5903fe6..bfdd5ce 100644 (file)
--- a/test/cctest/test-disasm-arm.cc
+++ b/test/cctest/test-disasm-arm.cc
@@ -401,3 +401,16 @@ TEST(Type3) {
    VERIFY_RUN();
  }
  
+
+
+TEST(Vfp) {
+  SETUP();
+
+  if (CpuFeatures::IsSupported(VFP3)) {
+    CpuFeatures::Scope scope(VFP3);
+    COMPARE(vsqrt(d0, d0),
+            "eeb10bc0       vsqrt.f64 d0, d0");
+  }
+
+  VERIFY_RUN();
+}
diff --git a/test/mjsunit/math-sqrt.js b/test/mjsunit/math-sqrt.js

index ae29b74..fb00d5b 100644 (file)
--- a/test/mjsunit/math-sqrt.js
+++ b/test/mjsunit/math-sqrt.js
@@ -27,18 +27,23 @@
  
  // Tests the special cases specified by ES 15.8.2.17
  
+function test(expected_sqrt, value) {
+  assertEquals(expected_sqrt, Math.sqrt(value));
+  if (isFinite(value)) { 
+    assertEquals(expected_sqrt, Math.pow(value, 0.5));
+  }
+}
+
  // Simple sanity check
-assertEquals(2, Math.sqrt(4));
-assertEquals(0.1, Math.sqrt(0.01));
+test(2, 4);
+test(0.1, 0.01);
  
  // Spec tests
-assertEquals(NaN, Math.sqrt(NaN));
-assertEquals(NaN, Math.sqrt(-1));
-assertEquals(+0, Math.sqrt(+0));
-assertEquals(-0, Math.sqrt(-0));
-assertEquals(Infinity, Math.sqrt(Infinity));
+test(NaN, NaN);
+test(NaN, -1);
+test(+0, +0);
+test(-0, -0);
+test(Infinity, Infinity);
  // -Infinity is smaller than 0 so it should return NaN
-assertEquals(NaN, Math.sqrt(-Infinity));
-
-
+test(NaN, -Infinity);
author	sgjesse@chromium.org <sgjesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 29 Jun 2010 09:40:36 +0000 (09:40 +0000)
committer	sgjesse@chromium.org <sgjesse@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 29 Jun 2010 09:40:36 +0000 (09:40 +0000)
src/arm/assembler-arm.cc		patch \| blob \| history
src/arm/assembler-arm.h		patch \| blob \| history
src/arm/codegen-arm.cc		patch \| blob \| history
src/arm/disasm-arm.cc		patch \| blob \| history
src/arm/macro-assembler-arm.cc		patch \| blob \| history
src/arm/macro-assembler-arm.h		patch \| blob \| history
src/arm/simulator-arm.cc		patch \| blob \| history
test/cctest/test-disasm-arm.cc		patch \| blob \| history
test/mjsunit/math-sqrt.js		patch \| blob \| history