From 984135cb32d73308857bc86a45e68fec453e0e31 Mon Sep 17 00:00:00 2001
From: "karlklose@chromium.org"
 <karlklose@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Tue, 15 Mar 2011 11:19:13 +0000
Subject: [PATCH] ARM: Improved double to integer truncation.

Patch from ARM Ltd.

BUG=none
TEST=Added to cctest/test-assembler-arm.cc and cctest/test-disasm-arm.cc

Review URL: http://codereview.chromium.org/6625084

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7174 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 src/arm/assembler-arm.cc          |   8 +++
 src/arm/assembler-arm.h           |   3 +
 src/arm/code-stubs-arm.cc         |  63 ++----------------
 src/arm/constants-arm.h           |   4 +-
 src/arm/disasm-arm.cc             |   3 +
 src/arm/lithium-arm.cc            |  18 ++++--
 src/arm/lithium-arm.h             |  16 +++--
 src/arm/lithium-codegen-arm.cc    | 133 +++++++++++++++++++-------------------
 src/arm/macro-assembler-arm.cc    | 115 ++++++++++++++++++++++++++++++++
 src/arm/macro-assembler-arm.h     |  31 +++++++--
 src/arm/simulator-arm.cc          |   7 ++
 test/cctest/test-assembler-arm.cc |  14 ++++
 test/cctest/test-disasm-arm.cc    |   5 ++
 13 files changed, 281 insertions(+), 139 deletions(-)

diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index 48b91a6..54b2446 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -2384,6 +2384,14 @@ void Assembler::vcvt_f32_f64(const SwVfpRegister dst,
 }
 
 
+void Assembler::vneg(const DwVfpRegister dst,
+                     const DwVfpRegister src,
+                     const Condition cond) {
+  emit(cond | 0xE*B24 | 0xB*B20 | B16 | dst.code()*B12 |
+       0x5*B9 | B8 | B6 | src.code());
+}
+
+
 void Assembler::vabs(const DwVfpRegister dst,
                      const DwVfpRegister src,
                      const Condition cond) {
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h
index 4594ecb..cf94e83 100644
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -992,6 +992,9 @@ class Assembler : public Malloced {
                     VFPConversionMode mode = kDefaultRoundToZero,
                     const Condition cond = al);
 
+  void vneg(const DwVfpRegister dst,
+            const DwVfpRegister src,
+            const Condition cond = al);
   void vabs(const DwVfpRegister dst,
             const DwVfpRegister src,
             const Condition cond = al);
diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc
index 3e9a9e9..f80998a 100644
--- a/src/arm/code-stubs-arm.cc
+++ b/src/arm/code-stubs-arm.cc
@@ -635,64 +635,13 @@ void FloatingPointHelper::ConvertNumberToInt32(MacroAssembler* masm,
   __ jmp(&done);
 
   __ bind(&not_in_int32_range);
-  __ ldr(scratch2, FieldMemOperand(object, HeapNumber::kExponentOffset));
-  __ ldr(scratch1, FieldMemOperand(object, HeapNumber::kMantissaOffset));
+  __ ldr(scratch1, FieldMemOperand(object, HeapNumber::kExponentOffset));
+  __ ldr(scratch2, FieldMemOperand(object, HeapNumber::kMantissaOffset));
 
-  // Register scratch1 contains mantissa word, scratch2 contains
-  // sign, exponent and mantissa. Extract biased exponent into dst.
-  __ Ubfx(dst,
-          scratch2,
-          HeapNumber::kExponentShift,
-          HeapNumber::kExponentBits);
-
-  // Express exponent as delta to 31.
-  __ sub(dst, dst, Operand(HeapNumber::kExponentBias + 31));
-
-  Label normal_exponent;
-  // If the delta is larger than kMantissaBits plus one, all bits
-  // would be shifted away, which means that we can return 0.
-  __ cmp(dst, Operand(HeapNumber::kMantissaBits + 1));
-  __ b(&normal_exponent, lt);
-  __ mov(dst, Operand(0));
-  __ jmp(&done);
-
-  __ bind(&normal_exponent);
-  const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;
-  // Calculate shift.
-  __ add(scratch3, dst, Operand(kShiftBase));
-
-  // Put implicit 1 before the mantissa part in scratch2.
-  __ orr(scratch2,
-         scratch2,
-         Operand(1 << HeapNumber::kMantissaBitsInTopWord));
-
-  // Save sign.
-  Register sign = dst;
-  __ and_(sign, scratch2, Operand(HeapNumber::kSignMask));
-
-  // Shift mantisssa bits the correct position in high word.
-  __ mov(scratch2, Operand(scratch2, LSL, scratch3));
-
-  // Replace the shifted bits with bits from the lower mantissa word.
-  Label pos_shift, shift_done;
-  __ rsb(scratch3, scratch3, Operand(32), SetCC);
-  __ b(&pos_shift, ge);
-
-  // Negate scratch3.
-  __ rsb(scratch3, scratch3, Operand(0));
-  __ mov(scratch1, Operand(scratch1, LSL, scratch3));
-  __ jmp(&shift_done);
-
-  __ bind(&pos_shift);
-  __ mov(scratch1, Operand(scratch1, LSR, scratch3));
-
-  __ bind(&shift_done);
-  __ orr(scratch2, scratch2, Operand(scratch1));
-
-  // Restore sign if necessary.
-  __ cmp(sign, Operand(0));
-  __ rsb(dst, scratch2, Operand(0), LeaveCC, ne);
-  __ mov(dst, scratch2, LeaveCC, eq);
+  __ EmitOutOfInt32RangeTruncate(dst,
+                                 scratch1,
+                                 scratch2,
+                                 scratch3);
   __ jmp(&done);
 
   __ bind(&is_smi);
diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h
index e6033a8..3adb916 100644
--- a/src/arm/constants-arm.h
+++ b/src/arm/constants-arm.h
@@ -388,9 +388,11 @@ enum VFPConversionMode {
 // This mask does not include the "inexact" or "input denormal" cumulative
 // exceptions flags, because we usually don't want to check for it.
 static const uint32_t kVFPExceptionMask = 0xf;
+static const uint32_t kVFPInvalidOpExceptionBit = 1 << 0;
+static const uint32_t kVFPOverflowExceptionBit = 1 << 2;
+static const uint32_t kVFPUnderflowExceptionBit = 1 << 3;
 static const uint32_t kVFPInexactExceptionBit = 1 << 4;
 static const uint32_t kVFPFlushToZeroMask = 1 << 24;
-static const uint32_t kVFPInvalidExceptionBit = 1;
 
 static const uint32_t kVFPNConditionFlagBit = 1 << 31;
 static const uint32_t kVFPZConditionFlagBit = 1 << 30;
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc
index 08f605b..f154839 100644
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -1067,6 +1067,9 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
       } else if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x3)) {
         // vabs
         Format(instr, "vabs'cond 'Dd, 'Dm");
+      } else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
+        // vneg
+        Format(instr, "vneg'cond 'Dd, 'Dm");
       } else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
         DecodeVCVTBetweenDoubleAndSingle(instr);
       } else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
diff --git a/src/arm/lithium-arm.cc b/src/arm/lithium-arm.cc
index a9b33a0..72bff78 100644
--- a/src/arm/lithium-arm.cc
+++ b/src/arm/lithium-arm.cc
@@ -1610,12 +1610,15 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
       LOperand* value = UseRegister(instr->value());
       bool needs_check = !instr->value()->type().IsSmi();
       LInstruction* res = NULL;
-      if (needs_check) {
-        res = DefineSameAsFirst(new LTaggedToI(value, FixedTemp(d1)));
-      } else {
+      if (!needs_check) {
         res = DefineSameAsFirst(new LSmiUntag(value, needs_check));
-      }
-      if (needs_check) {
+      } else {
+        LOperand* temp1 = TempRegister();
+        LOperand* temp2 = instr->CanTruncateToInt32() ? TempRegister()
+                                                      : NULL;
+        LOperand* temp3 = instr->CanTruncateToInt32() ? FixedTemp(d3)
+                                                      : NULL;
+        res = DefineSameAsFirst(new LTaggedToI(value, temp1, temp2, temp3));
         res = AssignEnvironment(res);
       }
       return res;
@@ -1635,7 +1638,10 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
     } else {
       ASSERT(to.IsInteger32());
       LOperand* value = UseRegister(instr->value());
-      LDoubleToI* res = new LDoubleToI(value, TempRegister());
+      LDoubleToI* res =
+        new LDoubleToI(value,
+                       TempRegister(),
+                       instr->CanTruncateToInt32() ? TempRegister() : NULL);
       return AssignEnvironment(DefineAsRegister(res));
     }
   } else if (from.IsInteger32()) {
diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h
index 54faafd..cb811cc 100644
--- a/src/arm/lithium-arm.h
+++ b/src/arm/lithium-arm.h
@@ -1458,11 +1458,12 @@ class LNumberTagD: public LTemplateInstruction<1, 1, 2> {
 
 
 // Sometimes truncating conversion from a tagged value to an int32.
-class LDoubleToI: public LTemplateInstruction<1, 1, 1> {
+class LDoubleToI: public LTemplateInstruction<1, 1, 2> {
  public:
-  explicit LDoubleToI(LOperand* value, LOperand* temp1) {
+  LDoubleToI(LOperand* value, LOperand* temp1, LOperand* temp2) {
     inputs_[0] = value;
     temps_[0] = temp1;
+    temps_[1] = temp2;
   }
 
   DECLARE_CONCRETE_INSTRUCTION(DoubleToI, "double-to-i")
@@ -1473,11 +1474,16 @@ class LDoubleToI: public LTemplateInstruction<1, 1, 1> {
 
 
 // Truncating conversion from a tagged value to an int32.
-class LTaggedToI: public LTemplateInstruction<1, 1, 1> {
+class LTaggedToI: public LTemplateInstruction<1, 1, 3> {
  public:
-  LTaggedToI(LOperand* value, LOperand* temp) {
+  LTaggedToI(LOperand* value,
+             LOperand* temp1,
+             LOperand* temp2,
+             LOperand* temp3) {
     inputs_[0] = value;
-    temps_[0] = temp;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = temp3;
   }
 
   DECLARE_CONCRETE_INSTRUCTION(TaggedToI, "tagged-to-i")
diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc
index 9cb55fa..aa252f1 100644
--- a/src/arm/lithium-codegen-arm.cc
+++ b/src/arm/lithium-codegen-arm.cc
@@ -3380,19 +3380,30 @@ class DeferredTaggedToI: public LDeferredCode {
 
 
 void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
-  Label done;
   Register input_reg = ToRegister(instr->InputAt(0));
-  Register scratch = scratch0();
-  DoubleRegister dbl_scratch = d0;
-  SwVfpRegister flt_scratch = s0;
-  DoubleRegister dbl_tmp = ToDoubleRegister(instr->TempAt(0));
+  Register scratch1 = scratch0();
+  Register scratch2 = ToRegister(instr->TempAt(0));
+  DwVfpRegister double_scratch = double_scratch0();
+  SwVfpRegister single_scratch = double_scratch.low();
+
+  ASSERT(!scratch1.is(input_reg) && !scratch1.is(scratch2));
+  ASSERT(!scratch2.is(input_reg) && !scratch2.is(scratch1));
+
+  Label done;
 
   // Heap number map check.
-  __ ldr(scratch, FieldMemOperand(input_reg, HeapObject::kMapOffset));
+  __ ldr(scratch1, FieldMemOperand(input_reg, HeapObject::kMapOffset));
   __ LoadRoot(ip, Heap::kHeapNumberMapRootIndex);
-  __ cmp(scratch, Operand(ip));
+  __ cmp(scratch1, Operand(ip));
 
   if (instr->truncating()) {
+    Register scratch3 = ToRegister(instr->TempAt(1));
+    DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(2));
+    ASSERT(!scratch3.is(input_reg) &&
+           !scratch3.is(scratch1) &&
+           !scratch3.is(scratch2));
+    // Performs a truncating conversion of a floating point number as used by
+    // the JS bitwise operations.
     Label heap_number;
     __ b(eq, &heap_number);
     // Check for undefined. Undefined is converted to zero for truncating
@@ -3404,36 +3415,38 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
     __ b(&done);
 
     __ bind(&heap_number);
-    __ sub(ip, input_reg, Operand(kHeapObjectTag));
-    __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset);
-    __ vcmp(dbl_tmp, 0.0);  // Sets overflow bit in FPSCR flags if NaN.
-    __ vcvt_s32_f64(flt_scratch, dbl_tmp);
-    __ vmov(input_reg, flt_scratch);  // 32-bit result of conversion.
-    __ vmrs(pc);  // Move vector status bits to normal status bits.
-    // Overflow bit is set if dbl_tmp is Nan.
-    __ cmn(input_reg, Operand(1), vc);  // 0x7fffffff + 1 -> overflow.
-    __ cmp(input_reg, Operand(1), vc);  // 0x80000000 - 1 -> overflow.
-    DeoptimizeIf(vs, instr->environment());  // Saturation may have occured.
+    __ sub(scratch1, input_reg, Operand(kHeapObjectTag));
+    __ vldr(double_scratch2, scratch1, HeapNumber::kValueOffset);
+
+    __ EmitECMATruncate(input_reg,
+                        double_scratch2,
+                        single_scratch,
+                        scratch1,
+                        scratch2,
+                        scratch3);
 
   } else {
+    CpuFeatures::Scope scope(VFP3);
     // Deoptimize if we don't have a heap number.
     DeoptimizeIf(ne, instr->environment());
 
     __ sub(ip, input_reg, Operand(kHeapObjectTag));
-    __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset);
-    __ vcvt_s32_f64(flt_scratch, dbl_tmp);
-    __ vmov(input_reg, flt_scratch);  // 32-bit result of conversion.
-    // Non-truncating conversion means that we cannot lose bits, so we convert
-    // back to check; note that using non-overlapping s and d regs would be
-    // slightly faster.
-    __ vcvt_f64_s32(dbl_scratch, flt_scratch);
-    __ VFPCompareAndSetFlags(dbl_scratch, dbl_tmp);
-    DeoptimizeIf(ne, instr->environment());  // Not equal or unordered.
+    __ vldr(double_scratch, ip, HeapNumber::kValueOffset);
+    __ EmitVFPTruncate(kRoundToZero,
+                       single_scratch,
+                       double_scratch,
+                       scratch1,
+                       scratch2,
+                       kCheckForInexactConversion);
+    DeoptimizeIf(ne, instr->environment());
+    // Load the result.
+    __ vmov(input_reg, single_scratch);
+
     if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
-      __ tst(input_reg, Operand(input_reg));
+      __ cmp(input_reg, Operand(0));
       __ b(ne, &done);
-      __ vmov(lr, ip, dbl_tmp);
-      __ tst(ip, Operand(1 << 31));  // Test sign bit.
+      __ vmov(scratch1, double_scratch.high());
+      __ tst(scratch1, Operand(HeapNumber::kSignMask));
       DeoptimizeIf(ne, instr->environment());
     }
   }
@@ -3475,48 +3488,38 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {
 
 
 void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
-  LOperand* input = instr->InputAt(0);
-  ASSERT(input->IsDoubleRegister());
-  LOperand* result = instr->result();
-  ASSERT(result->IsRegister());
-
-  DoubleRegister double_input = ToDoubleRegister(input);
-  Register result_reg = ToRegister(result);
-  SwVfpRegister single_scratch = double_scratch0().low();
+  Register result_reg = ToRegister(instr->result());
   Register scratch1 = scratch0();
   Register scratch2 = ToRegister(instr->TempAt(0));
+  DwVfpRegister double_input = ToDoubleRegister(instr->InputAt(0));
+  DwVfpRegister double_scratch = double_scratch0();
+  SwVfpRegister single_scratch = double_scratch0().low();
 
-  __ EmitVFPTruncate(kRoundToZero,
-                     single_scratch,
-                     double_input,
-                     scratch1,
-                     scratch2);
-
-  // Deoptimize if we had a vfp invalid exception.
-  DeoptimizeIf(ne, instr->environment());
-
-  // Retrieve the result.
-  __ vmov(result_reg, single_scratch);
+  Label done;
 
-  if (!instr->truncating()) {
-    // Convert result back to double and compare with input
-    // to check if the conversion was exact.
-    __ vmov(single_scratch, result_reg);
-    __ vcvt_f64_s32(double_scratch0(), single_scratch);
-    __ VFPCompareAndSetFlags(double_scratch0(), double_input);
+  if (instr->truncating()) {
+    Register scratch3 = ToRegister(instr->TempAt(1));
+    __ EmitECMATruncate(result_reg,
+                        double_input,
+                        single_scratch,
+                        scratch1,
+                        scratch2,
+                        scratch3);
+  } else {
+    VFPRoundingMode rounding_mode = kRoundToMinusInf;
+    __ EmitVFPTruncate(rounding_mode,
+                       single_scratch,
+                       double_input,
+                       scratch1,
+                       scratch2,
+                       kCheckForInexactConversion);
+    // Deoptimize if we had a vfp invalid exception,
+    // including inexact operation.
     DeoptimizeIf(ne, instr->environment());
-    if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
-      Label done;
-      __ cmp(result_reg, Operand(0));
-      __ b(ne, &done);
-      // Check for -0.
-      __ vmov(scratch1, double_input.high());
-      __ tst(scratch1, Operand(HeapNumber::kSignMask));
-      DeoptimizeIf(ne, instr->environment());
-
-      __ bind(&done);
-    }
+    // Retrieve the result.
+    __ vmov(result_reg, single_scratch);
   }
+    __ bind(&done);
 }
 
 
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc
index 916110a..63aadf6 100644
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -2032,6 +2032,121 @@ void MacroAssembler::EmitVFPTruncate(VFPRoundingMode rounding_mode,
 }
 
 
+void MacroAssembler::EmitOutOfInt32RangeTruncate(Register result,
+                                                 Register input_high,
+                                                 Register input_low,
+                                                 Register scratch) {
+  Label done, normal_exponent, restore_sign;
+
+  // Extract the biased exponent in result.
+  Ubfx(result,
+       input_high,
+       HeapNumber::kExponentShift,
+       HeapNumber::kExponentBits);
+
+  // Check for Infinity and NaNs, which should return 0.
+  cmp(result, Operand(HeapNumber::kExponentMask));
+  mov(result, Operand(0), LeaveCC, eq);
+  b(eq, &done);
+
+  // Express exponent as delta to (number of mantissa bits + 31).
+  sub(result,
+      result,
+      Operand(HeapNumber::kExponentBias + HeapNumber::kMantissaBits + 31),
+      SetCC);
+
+  // If the delta is strictly positive, all bits would be shifted away,
+  // which means that we can return 0.
+  b(le, &normal_exponent);
+  mov(result, Operand(0));
+  b(&done);
+
+  bind(&normal_exponent);
+  const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;
+  // Calculate shift.
+  add(scratch, result, Operand(kShiftBase + HeapNumber::kMantissaBits), SetCC);
+
+  // Save the sign.
+  Register sign = result;
+  result = no_reg;
+  and_(sign, input_high, Operand(HeapNumber::kSignMask));
+
+  // Set the implicit 1 before the mantissa part in input_high.
+  orr(input_high,
+      input_high,
+      Operand(1 << HeapNumber::kMantissaBitsInTopWord));
+  // Shift the mantissa bits to the correct position.
+  // We don't need to clear non-mantissa bits as they will be shifted away.
+  // If they weren't, it would mean that the answer is in the 32bit range.
+  mov(input_high, Operand(input_high, LSL, scratch));
+
+  // Replace the shifted bits with bits from the lower mantissa word.
+  Label pos_shift, shift_done;
+  rsb(scratch, scratch, Operand(32), SetCC);
+  b(&pos_shift, ge);
+
+  // Negate scratch.
+  rsb(scratch, scratch, Operand(0));
+  mov(input_low, Operand(input_low, LSL, scratch));
+  b(&shift_done);
+
+  bind(&pos_shift);
+  mov(input_low, Operand(input_low, LSR, scratch));
+
+  bind(&shift_done);
+  orr(input_high, input_high, Operand(input_low));
+  // Restore sign if necessary.
+  cmp(sign, Operand(0));
+  result = sign;
+  sign = no_reg;
+  rsb(result, input_high, Operand(0), LeaveCC, ne);
+  mov(result, input_high, LeaveCC, eq);
+  bind(&done);
+}
+
+
+void MacroAssembler::EmitECMATruncate(Register result,
+                                      DwVfpRegister double_input,
+                                      SwVfpRegister single_scratch,
+                                      Register scratch,
+                                      Register input_high,
+                                      Register input_low) {
+  CpuFeatures::Scope scope(VFP3);
+  ASSERT(!input_high.is(result));
+  ASSERT(!input_low.is(result));
+  ASSERT(!input_low.is(input_high));
+  ASSERT(!scratch.is(result) &&
+         !scratch.is(input_high) &&
+         !scratch.is(input_low));
+  ASSERT(!single_scratch.is(double_input.low()) &&
+         !single_scratch.is(double_input.high()));
+
+  Label done;
+
+  // Clear cumulative exception flags.
+  ClearFPSCRBits(kVFPExceptionMask, scratch);
+  // Try a conversion to a signed integer.
+  vcvt_s32_f64(single_scratch, double_input);
+  vmov(result, single_scratch);
+  // Retrieve he FPSCR.
+  vmrs(scratch);
+  // Check for overflow and NaNs.
+  tst(scratch, Operand(kVFPOverflowExceptionBit |
+                       kVFPUnderflowExceptionBit |
+                       kVFPInvalidOpExceptionBit));
+  // If we had no exceptions we are done.
+  b(eq, &done);
+
+  // Load the double value and perform a manual truncation.
+  vmov(input_low, input_high, double_input);
+  EmitOutOfInt32RangeTruncate(result,
+                              input_high,
+                              input_low,
+                              scratch);
+  bind(&done);
+}
+
+
 void MacroAssembler::GetLeastBitsFromSmi(Register dst,
                                          Register src,
                                          int num_least_bits) {
diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h
index d79edd7..f72bbed 100644
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -649,11 +649,11 @@ class MacroAssembler: public Assembler {
                       DwVfpRegister double_scratch,
                       Label *not_int32);
 
-// Truncates a double using a specific rounding mode.
-// Clears the z flag (ne condition) if an overflow occurs.
-// If exact_conversion is true, the z flag is also cleared if the conversion
-// was inexact, ie. if the double value could not be converted exactly
-// to a 32bit integer.
+  // Truncates a double using a specific rounding mode.
+  // Clears the z flag (ne condition) if an overflow occurs.
+  // If exact_conversion is true, the z flag is also cleared if the conversion
+  // was inexact, ie. if the double value could not be converted exactly
+  // to a 32bit integer.
   void EmitVFPTruncate(VFPRoundingMode rounding_mode,
                        SwVfpRegister result,
                        DwVfpRegister double_input,
@@ -662,6 +662,27 @@ class MacroAssembler: public Assembler {
                        CheckForInexactConversion check
                            = kDontCheckForInexactConversion);
 
+  // Helper for EmitECMATruncate.
+  // This will truncate a floating-point value outside of the singed 32bit
+  // integer range to a 32bit signed integer.
+  // Expects the double value loaded in input_high and input_low.
+  // Exits with the answer in 'result'.
+  // Note that this code does not work for values in the 32bit range!
+  void EmitOutOfInt32RangeTruncate(Register result,
+                                   Register input_high,
+                                   Register input_low,
+                                   Register scratch);
+
+  // Performs a truncating conversion of a floating point number as used by
+  // the JS bitwise operations. See ECMA-262 9.5: ToInt32.
+  // Exits with 'result' holding the answer and all other registers clobbered.
+  void EmitECMATruncate(Register result,
+                        DwVfpRegister double_input,
+                        SwVfpRegister single_scratch,
+                        Register scratch,
+                        Register scratch2,
+                        Register scratch3);
+
   // Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
   // instruction.  On pre-ARM5 hardware this routine gives the wrong answer
   // for 0 (31 instead of 32).  Source and scratch can be the same in which case
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index f475a18..cedce6d 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -2467,6 +2467,8 @@ void Simulator::DecodeType7(Instruction* instr) {
 // vmov :Rt = Sn
 // vcvt: Dd = Sm
 // vcvt: Sd = Dm
+// Dd = vabs(Dm)
+// Dd = vneg(Dm)
 // Dd = vadd(Dn, Dm)
 // Dd = vsub(Dn, Dm)
 // Dd = vmul(Dn, Dm)
@@ -2502,6 +2504,11 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
         double dm_value = get_double_from_d_register(vm);
         double dd_value = fabs(dm_value);
         set_d_register_from_double(vd, dd_value);
+      } else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
+        // vneg
+        double dm_value = get_double_from_d_register(vm);
+        double dd_value = -dm_value;
+        set_d_register_from_double(vd, dd_value);
       } else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
         DecodeVCVTBetweenDoubleAndSingle(instr);
       } else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc
index 43cf580..29f5d10 100644
--- a/test/cctest/test-assembler-arm.cc
+++ b/test/cctest/test-assembler-arm.cc
@@ -232,6 +232,8 @@ TEST(4) {
     double g;
     double h;
     int i;
+    double m;
+    double n;
     float x;
     float y;
   } T;
@@ -297,6 +299,14 @@ TEST(4) {
     __ vabs(d0, d2);
     __ vstr(d0, r4, OFFSET_OF(T, h));
 
+    // Test vneg.
+    __ vldr(d1, r4, OFFSET_OF(T, m));
+    __ vneg(d0, d1);
+    __ vstr(d0, r4, OFFSET_OF(T, m));
+    __ vldr(d1, r4, OFFSET_OF(T, n));
+    __ vneg(d0, d1);
+    __ vstr(d0, r4, OFFSET_OF(T, n));
+
     __ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit());
 
     CodeDesc desc;
@@ -319,12 +329,16 @@ TEST(4) {
     t.g = -2718.2818;
     t.h = 31415926.5;
     t.i = 0;
+    t.m = -2718.2818;
+    t.n = 123.456;
     t.x = 4.5;
     t.y = 9.0;
     Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0);
     USE(dummy);
     CHECK_EQ(4.5, t.y);
     CHECK_EQ(9.0, t.x);
+    CHECK_EQ(-123.456, t.n);
+    CHECK_EQ(2718.2818, t.m);
     CHECK_EQ(2, t.i);
     CHECK_EQ(2718.2818, t.g);
     CHECK_EQ(31415926.5, t.h);
diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc
index dea0db9..c702645 100644
--- a/test/cctest/test-disasm-arm.cc
+++ b/test/cctest/test-disasm-arm.cc
@@ -440,6 +440,11 @@ TEST(Vfp) {
     COMPARE(vabs(d3, d4, mi),
             "4eb03bc4       vabsmi d3, d4");
 
+    COMPARE(vneg(d0, d1),
+            "eeb10b41       vneg d0, d1");
+    COMPARE(vneg(d3, d4, mi),
+            "4eb13b44       vnegmi d3, d4");
+
     COMPARE(vadd(d0, d1, d2),
             "ee310b02       vadd.f64 d0, d1, d2");
     COMPARE(vadd(d3, d4, d5, mi),
-- 
2.7.4