From 04da3331d283875d361c679cf2a369c1a5de44f7 Mon Sep 17 00:00:00 2001
From: "bmeurer@chromium.org"
 <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Mon, 26 Aug 2013 12:24:03 +0000
Subject: [PATCH] Arm support for DoubleToIStub (truncating).

Added support for truncating DoubleToIStub and reorganize the macro-assembler
dToI operations to do the fast-path inline and the slow path by calling the
stub.

BUG=
R=bmeurer@chromium.org

Review URL: https://codereview.chromium.org/23129003

Patch from Ross McIlroy <rmcilroy@google.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16322 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 src/arm/code-stubs-arm.cc          | 114 +++++++++++++++++++++--
 src/arm/code-stubs-arm.h           |  15 +--
 src/arm/lithium-arm.cc             |  15 +--
 src/arm/lithium-arm.h              |  23 ++---
 src/arm/lithium-codegen-arm.cc     |  31 ++-----
 src/arm/macro-assembler-arm.cc     | 180 ++++++++++++++++++------------------
 src/arm/macro-assembler-arm.h      |  56 +++++++-----
 src/code-stubs.h                   |  14 ++-
 test/cctest/cctest.gyp             |   2 +
 test/cctest/test-code-stubs-arm.cc | 181 +++++++++++++++++++++++++++++++++++++
 test/cctest/test-code-stubs.cc     |  21 ++++-
 test/cctest/test-code-stubs.h      |   5 +
 12 files changed, 468 insertions(+), 189 deletions(-)
 create mode 100644 test/cctest/test-code-stubs-arm.cc

diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc
index 98a835f..3dbdb2f 100644
--- a/src/arm/code-stubs-arm.cc
+++ b/src/arm/code-stubs-arm.cc
@@ -634,6 +634,111 @@ void ConvertToDoubleStub::Generate(MacroAssembler* masm) {
 }
 
 
+void DoubleToIStub::Generate(MacroAssembler* masm) {
+  Label out_of_range, only_low, negate, done;
+  Register input_reg = source();
+  Register result_reg = destination();
+
+  int double_offset = offset();
+  // Account for saved regs if input is sp.
+  if (input_reg.is(sp)) double_offset += 2 * kPointerSize;
+
+  // Immediate values for this stub fit in instructions, so it's safe to use ip.
+  Register scratch = ip;
+  Register scratch_low =
+      GetRegisterThatIsNotOneOf(input_reg, result_reg, scratch);
+  Register scratch_high =
+      GetRegisterThatIsNotOneOf(input_reg, result_reg, scratch, scratch_low);
+  LowDwVfpRegister double_scratch = kScratchDoubleReg;
+
+  __ Push(scratch_high, scratch_low);
+
+  if (!skip_fastpath()) {
+    // Load double input.
+    __ vldr(double_scratch, MemOperand(input_reg, double_offset));
+    __ vmov(scratch_low, scratch_high, double_scratch);
+
+    // Do fast-path convert from double to int.
+    __ vcvt_s32_f64(double_scratch.low(), double_scratch);
+    __ vmov(result_reg, double_scratch.low());
+
+    // If result is not saturated (0x7fffffff or 0x80000000), we are done.
+    __ sub(scratch, result_reg, Operand(1));
+    __ cmp(scratch, Operand(0x7ffffffe));
+    __ b(lt, &done);
+  } else {
+    // We've already done MacroAssembler::TryFastTruncatedDoubleToILoad, so we
+    // know exponent > 31, so we can skip the vcvt_s32_f64 which will saturate.
+    if (double_offset == 0) {
+      __ ldm(ia, input_reg, scratch_low.bit() | scratch_high.bit());
+    } else {
+      __ ldr(scratch_low, MemOperand(input_reg, double_offset));
+      __ ldr(scratch_high, MemOperand(input_reg, double_offset + kIntSize));
+    }
+  }
+
+  __ Ubfx(scratch, scratch_high,
+         HeapNumber::kExponentShift, HeapNumber::kExponentBits);
+  // Load scratch with exponent - 1. This is faster than loading
+  // with exponent because Bias + 1 = 1024 which is an *ARM* immediate value.
+  STATIC_ASSERT(HeapNumber::kExponentBias + 1 == 1024);
+  __ sub(scratch, scratch, Operand(HeapNumber::kExponentBias + 1));
+  // If exponent is greater than or equal to 84, the 32 less significant
+  // bits are 0s (2^84 = 1, 52 significant bits, 32 uncoded bits),
+  // the result is 0.
+  // Compare exponent with 84 (compare exponent - 1 with 83).
+  __ cmp(scratch, Operand(83));
+  __ b(ge, &out_of_range);
+
+  // If we reach this code, 31 <= exponent <= 83.
+  // So, we don't have to handle cases where 0 <= exponent <= 20 for
+  // which we would need to shift right the high part of the mantissa.
+  // Scratch contains exponent - 1.
+  // Load scratch with 52 - exponent (load with 51 - (exponent - 1)).
+  __ rsb(scratch, scratch, Operand(51), SetCC);
+  __ b(ls, &only_low);
+  // 21 <= exponent <= 51, shift scratch_low and scratch_high
+  // to generate the result.
+  __ mov(scratch_low, Operand(scratch_low, LSR, scratch));
+  // Scratch contains: 52 - exponent.
+  // We needs: exponent - 20.
+  // So we use: 32 - scratch = 32 - 52 + exponent = exponent - 20.
+  __ rsb(scratch, scratch, Operand(32));
+  __ Ubfx(result_reg, scratch_high,
+          0, HeapNumber::kMantissaBitsInTopWord);
+  // Set the implicit 1 before the mantissa part in scratch_high.
+  __ orr(result_reg, result_reg,
+         Operand(1 << HeapNumber::kMantissaBitsInTopWord));
+  __ orr(result_reg, scratch_low, Operand(result_reg, LSL, scratch));
+  __ b(&negate);
+
+  __ bind(&out_of_range);
+  __ mov(result_reg, Operand::Zero());
+  __ b(&done);
+
+  __ bind(&only_low);
+  // 52 <= exponent <= 83, shift only scratch_low.
+  // On entry, scratch contains: 52 - exponent.
+  __ rsb(scratch, scratch, Operand::Zero());
+  __ mov(result_reg, Operand(scratch_low, LSL, scratch));
+
+  __ bind(&negate);
+  // If input was positive, scratch_high ASR 31 equals 0 and
+  // scratch_high LSR 31 equals zero.
+  // New result = (result eor 0) + 0 = result.
+  // If the input was negative, we have to negate the result.
+  // Input_high ASR 31 equals 0xffffffff and scratch_high LSR 31 equals 1.
+  // New result = (result eor 0xffffffff) + 1 = 0 - result.
+  __ eor(result_reg, result_reg, Operand(scratch_high, ASR, 31));
+  __ add(result_reg, result_reg, Operand(scratch_high, LSR, 31));
+
+  __ bind(&done);
+
+  __ Pop(scratch_high, scratch_low);
+  __ Ret();
+}
+
+
 bool WriteInt32ToHeapNumberStub::IsPregenerated() {
   // These variants are compiled ahead of time.  See next method.
   if (the_int_.is(r1) && the_heap_number_.is(r0) && scratch_.is(r2)) {
@@ -1591,7 +1696,6 @@ void BinaryOpStub_GenerateFPOperation(MacroAssembler* masm,
   Register right = r0;
   Register scratch1 = r6;
   Register scratch2 = r7;
-  Register scratch3 = r4;
 
   ASSERT(smi_operands || (not_numbers != NULL));
   if (smi_operands) {
@@ -1689,12 +1793,8 @@ void BinaryOpStub_GenerateFPOperation(MacroAssembler* masm,
         __ SmiUntag(r2, right);
       } else {
         // Convert operands to 32-bit integers. Right in r2 and left in r3.
-        __ ConvertNumberToInt32(
-          left, r3, heap_number_map,
-          scratch1, scratch2, scratch3, d0, d1, not_numbers);
-        __ ConvertNumberToInt32(
-          right, r2, heap_number_map,
-          scratch1, scratch2, scratch3, d0, d1, not_numbers);
+        __ TruncateNumberToI(left, r3, heap_number_map, scratch1, not_numbers);
+        __ TruncateNumberToI(right, r2, heap_number_map, scratch1, not_numbers);
       }
 
       Label result_not_a_smi;
diff --git a/src/arm/code-stubs-arm.h b/src/arm/code-stubs-arm.h
index 6eab8d1..bee3e1e 100644
--- a/src/arm/code-stubs-arm.h
+++ b/src/arm/code-stubs-arm.h
@@ -376,7 +376,7 @@ class RecordWriteStub: public PlatformCodeStub {
           address_(address),
           scratch0_(scratch0) {
       ASSERT(!AreAliased(scratch0, object, address, no_reg));
-      scratch1_ = GetRegThatIsNotOneOf(object_, address_, scratch0_);
+      scratch1_ = GetRegisterThatIsNotOneOf(object_, address_, scratch0_);
     }
 
     void Save(MacroAssembler* masm) {
@@ -419,19 +419,6 @@ class RecordWriteStub: public PlatformCodeStub {
     Register scratch0_;
     Register scratch1_;
 
-    Register GetRegThatIsNotOneOf(Register r1,
-                                  Register r2,
-                                  Register r3) {
-      for (int i = 0; i < Register::NumAllocatableRegisters(); i++) {
-        Register candidate = Register::FromAllocationIndex(i);
-        if (candidate.is(r1)) continue;
-        if (candidate.is(r2)) continue;
-        if (candidate.is(r3)) continue;
-        return candidate;
-      }
-      UNREACHABLE();
-      return no_reg;
-    }
     friend class RecordWriteStub;
   };
 
diff --git a/src/arm/lithium-arm.cc b/src/arm/lithium-arm.cc
index 12a6290..2ee00dc 100644
--- a/src/arm/lithium-arm.cc
+++ b/src/arm/lithium-arm.cc
@@ -1915,13 +1915,10 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
       } else {
         value = UseRegister(instr->value());
         LOperand* temp1 = TempRegister();
-        LOperand* temp2 = instr->CanTruncateToInt32() ? TempRegister()
-                                                      : NULL;
-        LOperand* temp3 = FixedTemp(d11);
+        LOperand* temp2 = FixedTemp(d11);
         res = DefineSameAsFirst(new(zone()) LTaggedToI(value,
                                                        temp1,
-                                                       temp2,
-                                                       temp3));
+                                                       temp2));
         res = AssignEnvironment(res);
       }
       return res;
@@ -1941,14 +1938,12 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
       return AssignPointerMap(result);
     } else if (to.IsSmi()) {
       LOperand* value = UseRegister(instr->value());
-      return AssignEnvironment(DefineAsRegister(new(zone()) LDoubleToSmi(value,
-          TempRegister(), TempRegister())));
+      return AssignEnvironment(
+          DefineAsRegister(new(zone()) LDoubleToSmi(value)));
     } else {
       ASSERT(to.IsInteger32());
       LOperand* value = UseRegister(instr->value());
-      LOperand* temp1 = TempRegister();
-      LOperand* temp2 = instr->CanTruncateToInt32() ? TempRegister() : NULL;
-      LDoubleToI* res = new(zone()) LDoubleToI(value, temp1, temp2);
+      LDoubleToI* res = new(zone()) LDoubleToI(value);
       return AssignEnvironment(DefineAsRegister(res));
     }
   } else if (from.IsInteger32()) {
diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h
index 9a6c395..316df7d 100644
--- a/src/arm/lithium-arm.h
+++ b/src/arm/lithium-arm.h
@@ -2045,17 +2045,13 @@ class LNumberTagD V8_FINAL : public LTemplateInstruction<1, 1, 2> {
 };
 
 
-class LDoubleToSmi V8_FINAL : public LTemplateInstruction<1, 1, 2> {
+class LDoubleToSmi V8_FINAL : public LTemplateInstruction<1, 1, 0> {
  public:
-  LDoubleToSmi(LOperand* value, LOperand* temp, LOperand* temp2) {
+  explicit LDoubleToSmi(LOperand* value) {
     inputs_[0] = value;
-    temps_[0] = temp;
-    temps_[1] = temp2;
   }
 
   LOperand* value() { return inputs_[0]; }
-  LOperand* temp() { return temps_[0]; }
-  LOperand* temp2() { return temps_[1]; }
 
   DECLARE_CONCRETE_INSTRUCTION(DoubleToSmi, "double-to-smi")
   DECLARE_HYDROGEN_ACCESSOR(UnaryOperation)
@@ -2065,17 +2061,13 @@ class LDoubleToSmi V8_FINAL : public LTemplateInstruction<1, 1, 2> {
 
 
 // Sometimes truncating conversion from a tagged value to an int32.
-class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 2> {
+class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 0> {
  public:
-  LDoubleToI(LOperand* value, LOperand* temp, LOperand* temp2) {
+  explicit LDoubleToI(LOperand* value) {
     inputs_[0] = value;
-    temps_[0] = temp;
-    temps_[1] = temp2;
   }
 
   LOperand* value() { return inputs_[0]; }
-  LOperand* temp() { return temps_[0]; }
-  LOperand* temp2() { return temps_[1]; }
 
   DECLARE_CONCRETE_INSTRUCTION(DoubleToI, "double-to-i")
   DECLARE_HYDROGEN_ACCESSOR(UnaryOperation)
@@ -2085,22 +2077,19 @@ class LDoubleToI V8_FINAL : public LTemplateInstruction<1, 1, 2> {
 
 
 // Truncating conversion from a tagged value to an int32.
-class LTaggedToI V8_FINAL : public LTemplateInstruction<1, 1, 3> {
+class LTaggedToI V8_FINAL : public LTemplateInstruction<1, 1, 2> {
  public:
   LTaggedToI(LOperand* value,
              LOperand* temp,
-             LOperand* temp2,
-             LOperand* temp3) {
+             LOperand* temp2) {
     inputs_[0] = value;
     temps_[0] = temp;
     temps_[1] = temp2;
-    temps_[2] = temp3;
   }
 
   LOperand* value() { return inputs_[0]; }
   LOperand* temp() { return temps_[0]; }
   LOperand* temp2() { return temps_[1]; }
-  LOperand* temp3() { return temps_[2]; }
 
   DECLARE_CONCRETE_INSTRUCTION(TaggedToI, "tagged-to-i")
   DECLARE_HYDROGEN_ACCESSOR(UnaryOperation)
diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc
index 6dae6db..f3f8b50 100644
--- a/src/arm/lithium-codegen-arm.cc
+++ b/src/arm/lithium-codegen-arm.cc
@@ -4908,7 +4908,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
   Register scratch1 = scratch0();
   Register scratch2 = ToRegister(instr->temp());
   LowDwVfpRegister double_scratch = double_scratch0();
-  DwVfpRegister double_scratch2 = ToDoubleRegister(instr->temp3());
+  DwVfpRegister double_scratch2 = ToDoubleRegister(instr->temp2());
 
   ASSERT(!scratch1.is(input_reg) && !scratch1.is(scratch2));
   ASSERT(!scratch2.is(input_reg) && !scratch2.is(scratch1));
@@ -4919,18 +4919,14 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
   // The carry flag is set when we reach this deferred code as we just executed
   // SmiUntag(heap_object, SetCC)
   STATIC_ASSERT(kHeapObjectTag == 1);
-  __ adc(input_reg, input_reg, Operand(input_reg));
+  __ adc(scratch2, input_reg, Operand(input_reg));
 
   // Heap number map check.
-  __ ldr(scratch1, FieldMemOperand(input_reg, HeapObject::kMapOffset));
+  __ ldr(scratch1, FieldMemOperand(scratch2, HeapObject::kMapOffset));
   __ LoadRoot(ip, Heap::kHeapNumberMapRootIndex);
   __ cmp(scratch1, Operand(ip));
 
   if (instr->truncating()) {
-    Register scratch3 = ToRegister(instr->temp2());
-    ASSERT(!scratch3.is(input_reg) &&
-           !scratch3.is(scratch1) &&
-           !scratch3.is(scratch2));
     // Performs a truncating conversion of a floating point number as used by
     // the JS bitwise operations.
     Label heap_number;
@@ -4938,23 +4934,18 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
     // Check for undefined. Undefined is converted to zero for truncating
     // conversions.
     __ LoadRoot(ip, Heap::kUndefinedValueRootIndex);
-    __ cmp(input_reg, Operand(ip));
+    __ cmp(scratch2, Operand(ip));
     DeoptimizeIf(ne, instr->environment());
     __ mov(input_reg, Operand::Zero());
     __ b(&done);
 
     __ bind(&heap_number);
-    __ sub(scratch1, input_reg, Operand(kHeapObjectTag));
-    __ vldr(double_scratch2, scratch1, HeapNumber::kValueOffset);
-
-    __ ECMAToInt32(input_reg, double_scratch2,
-                   scratch1, scratch2, scratch3, double_scratch);
-
+    __ TruncateHeapNumberToI(input_reg, scratch2);
   } else {
     // Deoptimize if we don't have a heap number.
     DeoptimizeIf(ne, instr->environment());
 
-    __ sub(ip, input_reg, Operand(kHeapObjectTag));
+    __ sub(ip, scratch2, Operand(kHeapObjectTag));
     __ vldr(double_scratch2, ip, HeapNumber::kValueOffset);
     __ TryDoubleToInt32Exact(input_reg, double_scratch2, double_scratch);
     DeoptimizeIf(ne, instr->environment());
@@ -5026,14 +5017,11 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {
 void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
   Register result_reg = ToRegister(instr->result());
   Register scratch1 = scratch0();
-  Register scratch2 = ToRegister(instr->temp());
   DwVfpRegister double_input = ToDoubleRegister(instr->value());
   LowDwVfpRegister double_scratch = double_scratch0();
 
   if (instr->truncating()) {
-    Register scratch3 = ToRegister(instr->temp2());
-    __ ECMAToInt32(result_reg, double_input,
-                   scratch1, scratch2, scratch3, double_scratch);
+    __ TruncateDoubleToI(result_reg, double_input);
   } else {
     __ TryDoubleToInt32Exact(result_reg, double_input, double_scratch);
     // Deoptimize if the input wasn't a int32 (inside a double).
@@ -5054,14 +5042,11 @@ void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
 void LCodeGen::DoDoubleToSmi(LDoubleToSmi* instr) {
   Register result_reg = ToRegister(instr->result());
   Register scratch1 = scratch0();
-  Register scratch2 = ToRegister(instr->temp());
   DwVfpRegister double_input = ToDoubleRegister(instr->value());
   LowDwVfpRegister double_scratch = double_scratch0();
 
   if (instr->truncating()) {
-    Register scratch3 = ToRegister(instr->temp2());
-    __ ECMAToInt32(result_reg, double_input,
-                   scratch1, scratch2, scratch3, double_scratch);
+    __ TruncateDoubleToI(result_reg, double_input);
   } else {
     __ TryDoubleToInt32Exact(result_reg, double_input, double_scratch);
     // Deoptimize if the input wasn't a int32 (inside a double).
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc
index 0d1736c..16ba89c 100644
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -829,26 +829,6 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
 }
 
 
-void MacroAssembler::ConvertNumberToInt32(Register object,
-                                          Register dst,
-                                          Register heap_number_map,
-                                          Register scratch1,
-                                          Register scratch2,
-                                          Register scratch3,
-                                          DwVfpRegister double_scratch1,
-                                          LowDwVfpRegister double_scratch2,
-                                          Label* not_number) {
-  Label done;
-  UntagAndJumpIfSmi(dst, object, &done);
-  JumpIfNotHeapNumber(object, heap_number_map, scratch1, not_number);
-  vldr(double_scratch1, FieldMemOperand(object, HeapNumber::kValueOffset));
-  ECMAToInt32(dst, double_scratch1,
-              scratch1, scratch2, scratch3, double_scratch2);
-
-  bind(&done);
-}
-
-
 void MacroAssembler::LoadNumber(Register object,
                                 LowDwVfpRegister dst,
                                 Register heap_number_map,
@@ -2538,84 +2518,76 @@ void MacroAssembler::TryInt32Floor(Register result,
   bind(&exception);
 }
 
-
-void MacroAssembler::ECMAToInt32(Register result,
-                                 DwVfpRegister double_input,
-                                 Register scratch,
-                                 Register scratch_high,
-                                 Register scratch_low,
-                                 LowDwVfpRegister double_scratch) {
-  ASSERT(!scratch_high.is(result));
-  ASSERT(!scratch_low.is(result));
-  ASSERT(!scratch_low.is(scratch_high));
-  ASSERT(!scratch.is(result) &&
-         !scratch.is(scratch_high) &&
-         !scratch.is(scratch_low));
-  ASSERT(!double_input.is(double_scratch));
-
-  Label out_of_range, only_low, negate, done;
-
+void MacroAssembler::TryInlineTruncateDoubleToI(Register result,
+                                                DwVfpRegister double_input,
+                                                Label* done) {
+  LowDwVfpRegister double_scratch = kScratchDoubleReg;
   vcvt_s32_f64(double_scratch.low(), double_input);
   vmov(result, double_scratch.low());
 
   // If result is not saturated (0x7fffffff or 0x80000000), we are done.
-  sub(scratch, result, Operand(1));
-  cmp(scratch, Operand(0x7ffffffe));
-  b(lt, &done);
+  sub(ip, result, Operand(1));
+  cmp(ip, Operand(0x7ffffffe));
+  b(lt, done);
+}
 
-  vmov(scratch_low, scratch_high, double_input);
-  Ubfx(scratch, scratch_high,
-       HeapNumber::kExponentShift, HeapNumber::kExponentBits);
-  // Load scratch with exponent - 1. This is faster than loading
-  // with exponent because Bias + 1 = 1024 which is an *ARM* immediate value.
-  sub(scratch, scratch, Operand(HeapNumber::kExponentBias + 1));
-  // If exponent is greater than or equal to 84, the 32 less significant
-  // bits are 0s (2^84 = 1, 52 significant bits, 32 uncoded bits),
-  // the result is 0.
-  // Compare exponent with 84 (compare exponent - 1 with 83).
-  cmp(scratch, Operand(83));
-  b(ge, &out_of_range);
-
-  // If we reach this code, 31 <= exponent <= 83.
-  // So, we don't have to handle cases where 0 <= exponent <= 20 for
-  // which we would need to shift right the high part of the mantissa.
-  // Scratch contains exponent - 1.
-  // Load scratch with 52 - exponent (load with 51 - (exponent - 1)).
-  rsb(scratch, scratch, Operand(51), SetCC);
-  b(ls, &only_low);
-  // 21 <= exponent <= 51, shift scratch_low and scratch_high
-  // to generate the result.
-  mov(scratch_low, Operand(scratch_low, LSR, scratch));
-  // Scratch contains: 52 - exponent.
-  // We needs: exponent - 20.
-  // So we use: 32 - scratch = 32 - 52 + exponent = exponent - 20.
-  rsb(scratch, scratch, Operand(32));
-  Ubfx(result, scratch_high,
-       0, HeapNumber::kMantissaBitsInTopWord);
-  // Set the implicit 1 before the mantissa part in scratch_high.
-  orr(result, result, Operand(1 << HeapNumber::kMantissaBitsInTopWord));
-  orr(result, scratch_low, Operand(result, LSL, scratch));
-  b(&negate);
-
-  bind(&out_of_range);
-  mov(result, Operand::Zero());
-  b(&done);
 
-  bind(&only_low);
-  // 52 <= exponent <= 83, shift only scratch_low.
-  // On entry, scratch contains: 52 - exponent.
-  rsb(scratch, scratch, Operand::Zero());
-  mov(result, Operand(scratch_low, LSL, scratch));
-
-  bind(&negate);
-  // If input was positive, scratch_high ASR 31 equals 0 and
-  // scratch_high LSR 31 equals zero.
-  // New result = (result eor 0) + 0 = result.
-  // If the input was negative, we have to negate the result.
-  // Input_high ASR 31 equals 0xffffffff and scratch_high LSR 31 equals 1.
-  // New result = (result eor 0xffffffff) + 1 = 0 - result.
-  eor(result, result, Operand(scratch_high, ASR, 31));
-  add(result, result, Operand(scratch_high, LSR, 31));
+void MacroAssembler::TruncateDoubleToI(Register result,
+                                       DwVfpRegister double_input) {
+  Label done;
+
+  TryInlineTruncateDoubleToI(result, double_input, &done);
+
+  // If we fell through then inline version didn't succeed - call stub instead.
+  push(lr);
+  sub(sp, sp, Operand(kDoubleSize));  // Put input on stack.
+  vstr(double_input, MemOperand(sp, 0));
+
+  DoubleToIStub stub(sp, result, 0, true, true);
+  CallStub(&stub);
+
+  add(sp, sp, Operand(kDoubleSize));
+  pop(lr);
+
+  bind(&done);
+}
+
+
+void MacroAssembler::TruncateHeapNumberToI(Register result,
+                                           Register object) {
+  Label done;
+  LowDwVfpRegister double_scratch = kScratchDoubleReg;
+  ASSERT(!result.is(object));
+
+  vldr(double_scratch,
+       MemOperand(object, HeapNumber::kValueOffset - kHeapObjectTag));
+  TryInlineTruncateDoubleToI(result, double_scratch, &done);
+
+  // If we fell through then inline version didn't succeed - call stub instead.
+  push(lr);
+  DoubleToIStub stub(object,
+                     result,
+                     HeapNumber::kValueOffset - kHeapObjectTag,
+                     true,
+                     true);
+  CallStub(&stub);
+  pop(lr);
+
+  bind(&done);
+}
+
+
+void MacroAssembler::TruncateNumberToI(Register object,
+                                       Register result,
+                                       Register heap_number_map,
+                                       Register scratch1,
+                                       Label* not_number) {
+  Label done;
+  ASSERT(!result.is(object));
+
+  UntagAndJumpIfSmi(result, object, &done);
+  JumpIfNotHeapNumber(object, heap_number_map, scratch1, not_number);
+  TruncateHeapNumberToI(result, object);
 
   bind(&done);
 }
@@ -3835,6 +3807,30 @@ void MacroAssembler::TestJSArrayForAllocationMemento(
 }
 
 
+Register GetRegisterThatIsNotOneOf(Register reg1,
+                                   Register reg2,
+                                   Register reg3,
+                                   Register reg4,
+                                   Register reg5,
+                                   Register reg6) {
+  RegList regs = 0;
+  if (reg1.is_valid()) regs |= reg1.bit();
+  if (reg2.is_valid()) regs |= reg2.bit();
+  if (reg3.is_valid()) regs |= reg3.bit();
+  if (reg4.is_valid()) regs |= reg4.bit();
+  if (reg5.is_valid()) regs |= reg5.bit();
+  if (reg6.is_valid()) regs |= reg6.bit();
+
+  for (int i = 0; i < Register::NumAllocatableRegisters(); i++) {
+    Register candidate = Register::FromAllocationIndex(i);
+    if (regs & candidate.bit()) continue;
+    return candidate;
+  }
+  UNREACHABLE();
+  return no_reg;
+}
+
+
 #ifdef DEBUG
 bool AreAliased(Register reg1,
                 Register reg2,
diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h
index 8b9fa2b..f3716c2 100644
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -62,6 +62,14 @@ enum SmiCheck { INLINE_SMI_CHECK, OMIT_SMI_CHECK };
 enum LinkRegisterStatus { kLRHasNotBeenSaved, kLRHasBeenSaved };
 
 
+Register GetRegisterThatIsNotOneOf(Register reg1,
+                                   Register reg2 = no_reg,
+                                   Register reg3 = no_reg,
+                                   Register reg4 = no_reg,
+                                   Register reg5 = no_reg,
+                                   Register reg6 = no_reg);
+
+
 #ifdef DEBUG
 bool AreAliased(Register reg1,
                 Register reg2,
@@ -491,19 +499,6 @@ class MacroAssembler: public Assembler {
   void VmovLow(Register dst, DwVfpRegister src);
   void VmovLow(DwVfpRegister dst, Register src);
 
-  // Converts the smi or heap number in object to an int32 using the rules
-  // for ToInt32 as described in ECMAScript 9.5.: the value is truncated
-  // and brought into the range -2^31 .. +2^31 - 1.
-  void ConvertNumberToInt32(Register object,
-                            Register dst,
-                            Register heap_number_map,
-                            Register scratch1,
-                            Register scratch2,
-                            Register scratch3,
-                            DwVfpRegister double_scratch1,
-                            LowDwVfpRegister double_scratch2,
-                            Label* not_int32);
-
   // Loads the number from object into dst register.
   // If |object| is neither smi nor heap number, |not_number| is jumped to
   // with |object| still intact.
@@ -989,15 +984,34 @@ class MacroAssembler: public Assembler {
                      Label* exact);
 
   // Performs a truncating conversion of a floating point number as used by
+  // the JS bitwise operations. See ECMA-262 9.5: ToInt32. Goes to 'done' if it
+  // succeeds, otherwise falls through if result is saturated. On return
+  // 'result' either holds answer, or is clobbered on fall through.
+  //
+  // Only public for the test code in test-code-stubs-arm.cc.
+  void TryInlineTruncateDoubleToI(Register result,
+                                  DwVfpRegister input,
+                                  Label* done);
+
+  // Performs a truncating conversion of a floating point number as used by
   // the JS bitwise operations. See ECMA-262 9.5: ToInt32.
-  // Double_scratch must be between d0 and d15.
-  // Exits with 'result' holding the answer and all other registers clobbered.
-  void ECMAToInt32(Register result,
-                   DwVfpRegister double_input,
-                   Register scratch,
-                   Register scratch_high,
-                   Register scratch_low,
-                   LowDwVfpRegister double_scratch);
+  // Exits with 'result' holding the answer.
+  void TruncateDoubleToI(Register result, DwVfpRegister double_input);
+
+  // Performs a truncating conversion of a heap number as used by
+  // the JS bitwise operations. See ECMA-262 9.5: ToInt32. 'result' and 'input'
+  // must be different registers.  Exits with 'result' holding the answer.
+  void TruncateHeapNumberToI(Register result, Register object);
+
+  // Converts the smi or heap number in object to an int32 using the rules
+  // for ToInt32 as described in ECMAScript 9.5.: the value is truncated
+  // and brought into the range -2^31 .. +2^31 - 1. 'result' and 'input' must be
+  // different registers.
+  void TruncateNumberToI(Register object,
+                         Register result,
+                         Register heap_number_map,
+                         Register scratch1,
+                         Label* not_int32);
 
   // Check whether d16-d31 are available on the CPU. The result is given by the
   // Z condition flag: Z==0 if d16-d31 available, Z==1 otherwise.
diff --git a/src/code-stubs.h b/src/code-stubs.h
index c58acd6..4be914d 100644
--- a/src/code-stubs.h
+++ b/src/code-stubs.h
@@ -1704,11 +1704,13 @@ class DoubleToIStub : public PlatformCodeStub {
   DoubleToIStub(Register source,
                 Register destination,
                 int offset,
-                bool is_truncating) : bit_field_(0) {
+                bool is_truncating,
+                bool skip_fastpath = false) : bit_field_(0) {
     bit_field_ = SourceRegisterBits::encode(source.code_) |
       DestinationRegisterBits::encode(destination.code_) |
       OffsetBits::encode(offset) |
-      IsTruncatingBits::encode(is_truncating);
+      IsTruncatingBits::encode(is_truncating) |
+      SkipFastPathBits::encode(skip_fastpath);
   }
 
   Register source() {
@@ -1725,12 +1727,18 @@ class DoubleToIStub : public PlatformCodeStub {
     return IsTruncatingBits::decode(bit_field_);
   }
 
+  bool skip_fastpath() {
+    return SkipFastPathBits::decode(bit_field_);
+  }
+
   int offset() {
     return OffsetBits::decode(bit_field_);
   }
 
   void Generate(MacroAssembler* masm);
 
+  virtual bool SometimesSetsUpAFrame() { return false; }
+
  private:
   static const int kBitsPerRegisterNumber = 6;
   STATIC_ASSERT((1L << kBitsPerRegisterNumber) >= Register::kNumRegisters);
@@ -1743,6 +1751,8 @@ class DoubleToIStub : public PlatformCodeStub {
       public BitField<bool, 2 * kBitsPerRegisterNumber, 1> {};  // NOLINT
   class OffsetBits:
       public BitField<int, 2 * kBitsPerRegisterNumber + 1, 3> {};  // NOLINT
+  class SkipFastPathBits:
+      public BitField<int, 2 * kBitsPerRegisterNumber + 4, 1> {};  // NOLINT
 
   Major MajorKey() { return DoubleToI; }
   int MinorKey() { return bit_field_; }
diff --git a/test/cctest/cctest.gyp b/test/cctest/cctest.gyp
index 712823e..b7e0771 100644
--- a/test/cctest/cctest.gyp
+++ b/test/cctest/cctest.gyp
@@ -132,6 +132,8 @@
         ['v8_target_arch=="arm"', {
           'sources': [
             'test-assembler-arm.cc',
+            'test-code-stubs.cc',
+            'test-code-stubs-arm.cc',
             'test-disasm-arm.cc'
           ],
         }],
diff --git a/test/cctest/test-code-stubs-arm.cc b/test/cctest/test-code-stubs-arm.cc
new file mode 100644
index 0000000..cc51e83
--- /dev/null
+++ b/test/cctest/test-code-stubs-arm.cc
@@ -0,0 +1,181 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Rrdistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Rrdistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Rrdistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdlib.h>
+
+#include "v8.h"
+
+#include "cctest.h"
+#include "code-stubs.h"
+#include "test-code-stubs.h"
+#include "factory.h"
+#include "macro-assembler.h"
+#include "platform.h"
+#include "simulator.h"
+
+using namespace v8::internal;
+
+#define __ masm.
+
+ConvertDToIFunc MakeConvertDToIFuncTrampoline(Isolate* isolate,
+                                              Register source_reg,
+                                              Register destination_reg,
+                                              bool inline_fastpath) {
+  // Allocate an executable page of memory.
+  size_t actual_size;
+  byte* buffer = static_cast<byte*>(OS::Allocate(Assembler::kMinimalBufferSize,
+                                                 &actual_size,
+                                                 true));
+  CHECK(buffer);
+  HandleScope handles(isolate);
+  MacroAssembler masm(isolate, buffer, static_cast<int>(actual_size));
+  masm.set_allow_stub_calls(false);
+  DoubleToIStub stub(source_reg, destination_reg, 0, true, inline_fastpath);
+
+  byte* start = stub.GetCode(isolate)->instruction_start();
+  Label done;
+
+  // Save callee save registers.
+  __ Push(r7, r6, r5, r4);
+  __ Push(lr);
+
+  // Push the double argument.
+  __ vmov(d0, r0, r1);
+  __ sub(sp, sp, Operand(kDoubleSize));
+  __ vstr(d0, sp, 0);
+  if (!source_reg.is(sp)) {
+    __ mov(source_reg, sp);
+  }
+
+  // Save registers make sure they don't get clobbered.
+  int source_reg_offset = kDoubleSize;
+  int reg_num = 0;
+  for (;reg_num < Register::NumAllocatableRegisters(); ++reg_num) {
+    Register reg = Register::from_code(reg_num);
+    if (!reg.is(destination_reg)) {
+      __ push(reg);
+      source_reg_offset += kPointerSize;
+    }
+  }
+
+  // Re-push the double argument.
+  __ sub(sp, sp, Operand(kDoubleSize));
+  __ vstr(d0, sp, 0);
+
+  // Call through to the actual stub
+  if (inline_fastpath) {
+    __ vldr(d0, MemOperand(source_reg));
+    __ TryInlineTruncateDoubleToI(destination_reg, d0, &done);
+    if (destination_reg.is(source_reg) && !source_reg.is(sp)) {
+      // Restore clobbered source_reg.
+      __ add(source_reg, sp, Operand(source_reg_offset));
+    }
+  }
+  __ Call(start, RelocInfo::EXTERNAL_REFERENCE);
+  __ bind(&done);
+
+  __ add(sp, sp, Operand(kDoubleSize));
+
+  // Make sure no registers have been unexpectedly clobbered
+  for (--reg_num; reg_num >= 0; --reg_num) {
+    Register reg = Register::from_code(reg_num);
+    if (!reg.is(destination_reg)) {
+      __ ldr(ip, MemOperand(sp, 0));
+      __ cmp(reg, ip);
+      __ Assert(eq, kRegisterWasClobbered);
+      __ add(sp, sp, Operand(kPointerSize));
+    }
+  }
+
+  __ add(sp, sp, Operand(kDoubleSize));
+
+  if (!destination_reg.is(r0))
+    __ mov(r0, destination_reg);
+
+  // Restore callee save registers.
+  __ Pop(lr);
+  __ Pop(r7, r6, r5, r4);
+
+  __ Ret(0);
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+  return (reinterpret_cast<ConvertDToIFunc>(
+      reinterpret_cast<intptr_t>(buffer)));
+}
+
+#undef __
+
+
+static Isolate* GetIsolateFrom(LocalContext* context) {
+  return reinterpret_cast<Isolate*>((*context)->GetIsolate());
+}
+
+
+int32_t RunGeneratedCodeCallWrapper(ConvertDToIFunc func,
+                                    double from) {
+#ifdef USE_SIMULATOR
+  return reinterpret_cast<int32_t>(CALL_GENERATED_CODE(func, from, 0, 0, 0, 0));
+#else
+  return (*func)(from);
+#endif
+}
+
+
+TEST(ConvertDToI) {
+  CcTest::InitializeVM();
+  LocalContext context;
+  Isolate* isolate = GetIsolateFrom(&context);
+  HandleScope scope(isolate);
+
+#if DEBUG
+  // Verify that the tests actually work with the C version. In the release
+  // code, the compiler optimizes it away because it's all constant, but does it
+  // wrong, triggering an assert on gcc.
+  RunAllTruncationTests(&ConvertDToICVersion);
+#endif
+
+  Register source_registers[] = {sp, r0, r1, r2, r3, r4, r5, r6, r7};
+  Register dest_registers[] = {r0, r1, r2, r3, r4, r5, r6, r7};
+
+  for (size_t s = 0; s < sizeof(source_registers) / sizeof(Register); s++) {
+    for (size_t d = 0; d < sizeof(dest_registers) / sizeof(Register); d++) {
+      RunAllTruncationTests(
+          RunGeneratedCodeCallWrapper,
+          MakeConvertDToIFuncTrampoline(isolate,
+                                        source_registers[s],
+                                        dest_registers[d],
+                                        false));
+      RunAllTruncationTests(
+          RunGeneratedCodeCallWrapper,
+          MakeConvertDToIFuncTrampoline(isolate,
+                                        source_registers[s],
+                                        dest_registers[d],
+                                        true));
+    }
+  }
+}
diff --git a/test/cctest/test-code-stubs.cc b/test/cctest/test-code-stubs.cc
index 4050696..d698804 100644
--- a/test/cctest/test-code-stubs.cc
+++ b/test/cctest/test-code-stubs.cc
@@ -71,22 +71,37 @@ int STDCALL ConvertDToICVersion(double d) {
 }
 
 
-void RunOneTruncationTestWithTest(ConvertDToIFunc func,
+void RunOneTruncationTestWithTest(ConvertDToICallWrapper callWrapper,
+                                  ConvertDToIFunc func,
                                   double from,
                                   double raw) {
   uint64_t to = static_cast<int64_t>(raw);
-  int result = (*func)(from);
+  int result = (*callWrapper)(func, from);
   CHECK_EQ(static_cast<int>(to), result);
 }
 
 
+int32_t DefaultCallWrapper(ConvertDToIFunc func,
+                           double from) {
+  return (*func)(from);
+}
+
+
 // #define NaN and Infinity so that it's possible to cut-and-paste these tests
 // directly to a .js file and run them.
 #define NaN (OS::nan_value())
 #define Infinity (std::numeric_limits<double>::infinity())
-#define RunOneTruncationTest(p1, p2) RunOneTruncationTestWithTest(func, p1, p2)
+#define RunOneTruncationTest(p1, p2) \
+    RunOneTruncationTestWithTest(callWrapper, func, p1, p2)
+
 
 void RunAllTruncationTests(ConvertDToIFunc func) {
+  RunAllTruncationTests(DefaultCallWrapper, func);
+}
+
+
+void RunAllTruncationTests(ConvertDToICallWrapper callWrapper,
+                           ConvertDToIFunc func) {
   RunOneTruncationTest(0, 0);
   RunOneTruncationTest(0.5, 0);
   RunOneTruncationTest(-0.5, 0);
diff --git a/test/cctest/test-code-stubs.h b/test/cctest/test-code-stubs.h
index eab8e63..910e0d1 100644
--- a/test/cctest/test-code-stubs.h
+++ b/test/cctest/test-code-stubs.h
@@ -41,8 +41,13 @@
 typedef int32_t STDCALL ConvertDToIFuncType(double input);
 typedef ConvertDToIFuncType* ConvertDToIFunc;
 
+typedef int32_t ConvertDToICallWrapperType(ConvertDToIFunc func, double from);
+typedef ConvertDToICallWrapperType* ConvertDToICallWrapper;
+
 int STDCALL ConvertDToICVersion(double d);
 
 void RunAllTruncationTests(ConvertDToIFunc func);
+void RunAllTruncationTests(ConvertDToICallWrapper callWrapper,
+                           ConvertDToIFunc func);
 
 #endif
-- 
2.7.4