From 0d94d7c78529f944afa5b8a682ab3a2b1706de8b Mon Sep 17 00:00:00 2001
From: "erik.corry@gmail.com"
 <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Thu, 12 Nov 2009 13:04:02 +0000
Subject: [PATCH] Add vfp support on ARM.  Patch from John Jozwiak. Review URL:
 http://codereview.chromium.org/348019

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3292 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 AUTHORS                  |   1 +
 src/arm/assembler-arm.cc | 263 ++++++++++++++++++++++++++++++++
 src/arm/assembler-arm.h  | 135 +++++++++++++++++
 src/arm/codegen-arm.cc   | 225 +++++++++++++++++++--------
 src/arm/constants-arm.cc |  22 +++
 src/arm/constants-arm.h  |  22 +++
 src/arm/cpu-arm.cc       |   3 +-
 src/arm/disasm-arm.cc    | 201 ++++++++++++++++++++++++-
 src/arm/simulator-arm.cc | 317 ++++++++++++++++++++++++++++++++++++++-
 src/arm/simulator-arm.h  |  43 +++++-
 src/flag-definitions.h   |   2 +
 src/platform-linux.cc    |  40 +++++
 src/platform.h           |   3 +
 13 files changed, 1204 insertions(+), 73 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index de8cabb0b..4fd7aa5b7 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -19,3 +19,4 @@ Rafal Krypa <rafal@krypa.net>
 Rene Rebe <rene@exactcode.de>
 Ryan Dahl <coldredlemur@gmail.com>
 Patrick Gansterer <paroga@paroga.com>
+John Jozwiak <jjozwiak@codeaurora.org>
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index 39bb3ee4b..8eb375ff7 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -42,6 +42,34 @@
 namespace v8 {
 namespace internal {
 
+// Safe default is no features.
+uint64_t CpuFeatures::supported_ = 0;
+uint64_t CpuFeatures::enabled_ = 0;
+
+void CpuFeatures::Probe() {
+  // Perform runtime detection of VFP.
+  static const char* descriptive_file_linux = "/proc/cpuinfo";
+
+  #if !defined(__arm__) || (defined(__VFP_FP__) && !defined(__SOFTFP__))
+    // The  supported & enabled flags for VFP are set to true for the following
+    // conditions, even without runtime detection of VFP:
+    // (1) For the simulator=arm build, always use VFP since
+    // the arm simulator has VFP support.
+    // (2) If V8 is being compiled with GCC with the vfp option turned on,
+    // always use VFP since the build system assumes that V8 will run on
+    // a platform that has VFP hardware.
+    supported_ |= static_cast<uint64_t>(1) << VFP3;
+    enabled_ |= static_cast<uint64_t>(1) << VFP3;
+  #endif
+
+  if (OS::fgrep_vfp(descriptive_file_linux, "vfp")) {
+    // This implementation also sets the VFP flags if
+    // runtime detection of VFP returns true.
+    supported_ |= static_cast<uint64_t>(1) << VFP3;
+    enabled_ |= static_cast<uint64_t>(1) << VFP3;
+  }
+}
+
 // -----------------------------------------------------------------------------
 // Implementation of Register and CRegister
 
@@ -84,6 +112,57 @@ CRegister cr13 = { 13 };
 CRegister cr14 = { 14 };
 CRegister cr15 = { 15 };
 
+// Support for the VFP registers s0 to s31 (d0 to d15).
+// Note that "sN:sM" is the same as "dN/2".
+Register s0  = {  0 };
+Register s1  = {  1 };
+Register s2  = {  2 };
+Register s3  = {  3 };
+Register s4  = {  4 };
+Register s5  = {  5 };
+Register s6  = {  6 };
+Register s7  = {  7 };
+Register s8  = {  8 };
+Register s9  = {  9 };
+Register s10 = { 10 };
+Register s11 = { 11 };
+Register s12 = { 12 };
+Register s13 = { 13 };
+Register s14 = { 14 };
+Register s15 = { 15 };
+Register s16 = { 16 };
+Register s17 = { 17 };
+Register s18 = { 18 };
+Register s19 = { 19 };
+Register s20 = { 20 };
+Register s21 = { 21 };
+Register s22 = { 22 };
+Register s23 = { 23 };
+Register s24 = { 24 };
+Register s25 = { 25 };
+Register s26 = { 26 };
+Register s27 = { 27 };
+Register s28 = { 28 };
+Register s29 = { 29 };
+Register s30 = { 30 };
+Register s31 = { 31 };
+
+Register d0  = {  0 };
+Register d1  = {  1 };
+Register d2  = {  2 };
+Register d3  = {  3 };
+Register d4  = {  4 };
+Register d5  = {  5 };
+Register d6  = {  6 };
+Register d7  = {  7 };
+Register d8  = {  8 };
+Register d9  = {  9 };
+Register d10 = { 10 };
+Register d11 = { 11 };
+Register d12 = { 12 };
+Register d13 = { 13 };
+Register d14 = { 14 };
+Register d15 = { 15 };
 
 // -----------------------------------------------------------------------------
 // Implementation of RelocInfo
@@ -203,10 +282,14 @@ enum {
 
   B4  = 1 << 4,
   B5  = 1 << 5,
+  B6  = 1 << 6,
   B7  = 1 << 7,
   B8  = 1 << 8,
+  B9  = 1 << 9,
   B12 = 1 << 12,
   B16 = 1 << 16,
+  B18 = 1 << 18,
+  B19 = 1 << 19,
   B20 = 1 << 20,
   B21 = 1 << 21,
   B22 = 1 << 22,
@@ -1281,6 +1364,186 @@ void Assembler::stc2(Coprocessor coproc,
   stc(coproc, crd, rn, option, l, static_cast<Condition>(nv));
 }
 
+// Support for VFP.
+void Assembler::fmdrr(const Register dst,
+                      const Register src1,
+                      const Register src2,
+                      const SBit s,
+                      const Condition cond) {
+  // Dm = <Rt,Rt2>.
+  // Instruction details available in ARM DDI 0406A, A8-646.
+  // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
+  // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
+
+  ASSERT(!src1.is(pc) && !src2.is(pc));
+  emit(cond | 0xC*B24 | B22 | src2.code()*B16 |
+       src1.code()*B12 | 0xB*B8 | B4 | dst.code());
+}
+
+
+void Assembler::fmrrd(const Register dst1,
+                      const Register dst2,
+                      const Register src,
+                      const SBit s,
+                      const Condition cond) {
+  // <Rt,Rt2> = Dm.
+  // Instruction details available in ARM DDI 0406A, A8-646.
+  // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
+  // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
+
+  ASSERT(!dst1.is(pc) && !dst2.is(pc));
+  emit(cond | 0xC*B24 | B22 | B20 | dst2.code()*B16 |
+       dst1.code()*B12 | 0xB*B8 | B4 | src.code());
+}
+
+
+void Assembler::fmsr(const Register dst,
+                     const Register src,
+                     const SBit s,
+                     const Condition cond) {
+  // Sn = Rt.
+  // Instruction details available in ARM DDI 0406A, A8-642.
+  // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
+  // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
+
+  ASSERT(!src.is(pc));
+  emit(cond | 0xE*B24 | (dst.code() >> 1)*B16 |
+       src.code()*B12 | 0xA*B8 | (0x1 & dst.code())*B7 | B4);
+}
+
+
+void Assembler::fmrs(const Register dst,
+                     const Register src,
+                     const SBit s,
+                     const Condition cond) {
+  // Rt = Sn.
+  // Instruction details available in ARM DDI 0406A, A8-642.
+  // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
+  // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
+
+  ASSERT(!dst.is(pc));
+  emit(cond | 0xE*B24 | B20 | (src.code() >> 1)*B16 |
+       dst.code()*B12 | 0xA*B8 | (0x1 & src.code())*B7 | B4);
+}
+
+
+void Assembler::fsitod(const Register dst,
+                       const Register src,
+                       const SBit s,
+                       const Condition cond) {
+  // Dd = Sm (integer in Sm converted to IEEE 64-bit doubles in Dd).
+  // Instruction details available in ARM DDI 0406A, A8-576.
+  // cond(31-28) | 11101(27-23)| D=?(22) | 11(21-20) | 1(19) |opc2=000(18-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | op(7)=1 | 1(6) | M=?(5) | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 | B23 | 0x3*B20 | B19 |
+       dst.code()*B12 | 0x5*B9 | B8 | B7 | B6 |
+       (0x1 & src.code())*B5 | (src.code() >> 1));
+}
+
+
+void Assembler::ftosid(const Register dst,
+                       const Register src,
+                       const SBit s,
+                       const Condition cond) {
+  // Sd = Dm (IEEE 64-bit doubles in Dm converted to 32 bit integer in Sd).
+  // Instruction details available in ARM DDI 0406A, A8-576.
+  // cond(31-28) | 11101(27-23)| D=?(22) | 11(21-20) | 1(19) | opc2=101(18-16)|
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | op(7)=? | 1(6) | M=?(5) | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 | B23 |(0x1 & dst.code())*B22 |
+       0x3*B20 | B19 | 0x5*B16 | (dst.code() >> 1)*B12 |
+       0x5*B9 | B8 | B7 | B6 | src.code());
+}
+
+
+void Assembler::faddd(const Register dst,
+                      const  Register src1,
+                      const  Register src2,
+                      const  SBit s,
+                      const  Condition cond) {
+  // Dd = faddd(Dn, Dm) double precision floating point addition.
+  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
+  // Instruction details available in ARM DDI 0406A, A8-536.
+  // cond(31-28) | 11100(27-23)| D=?(22) | 11(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 0(6) | M=?(5) | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 | 0x3*B20 | src1.code()*B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | src2.code());
+}
+
+
+void Assembler::fsubd(const Register dst,
+                      const  Register src1,
+                      const  Register src2,
+                      const  SBit s,
+                      const  Condition cond) {
+  // Dd = fsubd(Dn, Dm) double precision floating point subtraction.
+  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
+  // Instruction details available in ARM DDI 0406A, A8-784.
+  // cond(31-28) | 11100(27-23)| D=?(22) | 11(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 1(6) | M=?(5) | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 | 0x3*B20 | src1.code()*B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | B6 | src2.code());
+}
+
+
+void Assembler::fmuld(const Register dst,
+                      const  Register src1,
+                      const  Register src2,
+                      const  SBit s,
+                      const  Condition cond) {
+  // Dd = fmuld(Dn, Dm) double precision floating point multiplication.
+  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
+  // Instruction details available in ARM DDI 0406A, A8-784.
+  // cond(31-28) | 11100(27-23)| D=?(22) | 10(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 0(6) | M=?(5) | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 | 0x2*B20 | src1.code()*B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | src2.code());
+}
+
+
+void Assembler::fdivd(const Register dst,
+                      const  Register src1,
+                      const  Register src2,
+                      const  SBit s,
+                      const  Condition cond) {
+  // Dd = fdivd(Dn, Dm) double precision floating point division.
+  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
+  // Instruction details available in ARM DDI 0406A, A8-584.
+  // cond(31-28) | 11101(27-23)| D=?(22) | 00(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=? | 0(6) | M=?(5) | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 | B23 | src1.code()*B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | src2.code());
+}
+
+
+void Assembler::fcmp(const Register src1,
+                     const Register src2,
+                     const SBit s,
+                     const Condition cond) {
+  // vcmp(Dd, Dm) double precision floating point comparison.
+  // Instruction details available in ARM DDI 0406A, A8-570.
+  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0100 (19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | E(7)=? | 1(6) | M(5)=? | 0(4) | Vm(3-0)
+
+  emit(cond | 0xE*B24 |B23 | 0x3*B20 | B18 |
+       src1.code()*B12 | 0x5*B9 | B8 | B6 | src2.code());
+}
+
+
+void Assembler::vmrs(Register dst, Condition cond) {
+  // Instruction details available in ARM DDI 0406A, A8-652.
+  // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
+  // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
+
+  emit(cond | 0xE*B24 | 0xF*B20 |  B16 |
+       dst.code()*B12 | 0xA*B8 | B4);
+}
+
 
 // Pseudo instructions
 void Assembler::lea(Register dst,
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h
index 0c791b349..886f20696 100644
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -102,6 +102,57 @@ extern Register sp;
 extern Register lr;
 extern Register pc;
 
+// Support for VFP registers s0 to s32 (d0 to d16).
+// Note that "sN:sM" is the same as "dN/2".
+extern Register s0;
+extern Register s1;
+extern Register s2;
+extern Register s3;
+extern Register s4;
+extern Register s5;
+extern Register s6;
+extern Register s7;
+extern Register s8;
+extern Register s9;
+extern Register s10;
+extern Register s11;
+extern Register s12;
+extern Register s13;
+extern Register s14;
+extern Register s15;
+extern Register s16;
+extern Register s17;
+extern Register s18;
+extern Register s19;
+extern Register s20;
+extern Register s21;
+extern Register s22;
+extern Register s23;
+extern Register s24;
+extern Register s25;
+extern Register s26;
+extern Register s27;
+extern Register s28;
+extern Register s29;
+extern Register s30;
+extern Register s31;
+
+extern Register d0;
+extern Register d1;
+extern Register d2;
+extern Register d3;
+extern Register d4;
+extern Register d5;
+extern Register d6;
+extern Register d7;
+extern Register d8;
+extern Register d9;
+extern Register d10;
+extern Register d11;
+extern Register d12;
+extern Register d13;
+extern Register d14;
+extern Register d15;
 
 // Coprocessor register
 struct CRegister {
@@ -372,6 +423,30 @@ class MemOperand BASE_EMBEDDED {
   friend class Assembler;
 };
 
+// CpuFeatures keeps track of which features are supported by the target CPU.
+// Supported features must be enabled by a Scope before use.
+class CpuFeatures : public AllStatic {
+ public:
+  enum Feature { VFP3 = 1 };
+  // Detect features of the target CPU. Set safe defaults if the serializer
+  // is enabled (snapshots must be portable).
+  static void Probe();
+  // Check whether a feature is supported by the target CPU.
+  static bool IsSupported(Feature f) {
+    if (f == VFP3 && !FLAG_enable_vfp3) return false;
+
+    return (supported_ & (static_cast<uint64_t>(1) << f)) != 0;
+  }
+  // Check whether a feature is currently enabled.
+  static bool IsEnabled(Feature f) {
+    return (enabled_ & (static_cast<uint64_t>(1) << f)) != 0;
+  }
+
+ private:
+  static uint64_t supported_;
+  static uint64_t enabled_;
+};
+
 
 typedef int32_t Instr;
 
@@ -655,6 +730,66 @@ class Assembler : public Malloced {
   void stc2(Coprocessor coproc, CRegister crd, Register base, int option,
             LFlag l = Short);  // v5 and above
 
+  // Support for VFP.
+  // All these APIs support S0 to S31 and D0 to D15.
+  // Currently these APIs do not support extended D registers, i.e, D16 to D31.
+  // However, some simple modifications can allow
+  // these APIs to support D16 to D31.
+
+  void fmdrr(const Register dst,
+             const Register src1,
+             const Register src2,
+             const SBit s = LeaveCC,
+             const Condition cond = al);
+  void fmrrd(const Register dst1,
+             const Register dst2,
+             const Register src,
+             const SBit s = LeaveCC,
+             const Condition cond = al);
+  void fmsr(const Register dst,
+            const Register src,
+            const SBit s = LeaveCC,
+            const Condition cond = al);
+  void fmrs(const Register dst,
+            const Register src,
+            const SBit s = LeaveCC,
+            const Condition cond = al);
+  void fsitod(const Register dst,
+              const Register src,
+              const SBit s = LeaveCC,
+              const Condition cond = al);
+  void ftosid(const Register dst,
+              const Register src,
+              const SBit s = LeaveCC,
+              const Condition cond = al);
+
+  void faddd(const Register dst,
+             const Register src1,
+             const Register src2,
+             const SBit s = LeaveCC,
+             const Condition cond = al);
+  void fsubd(const Register dst,
+             const Register src1,
+             const Register src2,
+             const SBit s = LeaveCC,
+             const Condition cond = al);
+  void fmuld(const Register dst,
+             const Register src1,
+             const Register src2,
+             const SBit s = LeaveCC,
+             const Condition cond = al);
+  void fdivd(const Register dst,
+             const Register src1,
+             const Register src2,
+             const SBit s = LeaveCC,
+             const Condition cond = al);
+  void fcmp(const Register src1,
+            const Register src2,
+            const SBit s = LeaveCC,
+            const Condition cond = al);
+  void vmrs(const Register dst,
+            const Condition cond = al);
+
   // Pseudo instructions
   void nop()  { mov(r0, Operand(r0)); }
 
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc
index 2165f4c51..92717f76d 100644
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -4599,6 +4599,21 @@ static void EmitIdenticalObjectComparison(MacroAssembler* masm,
 }
 
 
+static void IntegerToDoubleConversionWithVFP3(MacroAssembler* masm,
+                                              Register inReg,
+                                              Register outHighReg,
+                                              Register outLowReg) {
+  // ARMv7 VFP3 instructions to implement integer to double conversion.
+  // This VFP3 implementation is known to work
+  // on ARMv7-VFP3 Snapdragon processor.
+
+  __ mov(r7, Operand(inReg, ASR, kSmiTagSize));
+  __ fmsr(s15, r7);
+  __ fsitod(d7, s15);
+  __ fmrrd(outLowReg, outHighReg, d7);
+}
+
+
 // See comment at call site.
 static void EmitSmiNonsmiComparison(MacroAssembler* masm,
                                     Label* rhs_not_nan,
@@ -4622,9 +4637,16 @@ static void EmitSmiNonsmiComparison(MacroAssembler* masm,
 
   // Rhs is a smi, lhs is a number.
   __ push(lr);
-  __ mov(r7, Operand(r1));
-  ConvertToDoubleStub stub1(r3, r2, r7, r6);
-  __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET);
+
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    IntegerToDoubleConversionWithVFP3(masm, r1, r3, r2);
+  } else {
+    __ mov(r7, Operand(r1));
+    ConvertToDoubleStub stub1(r3, r2, r7, r6);
+    __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET);
+  }
+
+
   // r3 and r2 are rhs as double.
   __ ldr(r1, FieldMemOperand(r0, HeapNumber::kValueOffset + kPointerSize));
   __ ldr(r0, FieldMemOperand(r0, HeapNumber::kValueOffset));
@@ -4652,9 +4674,15 @@ static void EmitSmiNonsmiComparison(MacroAssembler* masm,
   __ push(lr);
   __ ldr(r2, FieldMemOperand(r1, HeapNumber::kValueOffset));
   __ ldr(r3, FieldMemOperand(r1, HeapNumber::kValueOffset + kPointerSize));
-  __ mov(r7, Operand(r0));
-  ConvertToDoubleStub stub2(r1, r0, r7, r6);
-  __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET);
+
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    IntegerToDoubleConversionWithVFP3(masm, r0, r1, r0);
+  } else {
+    __ mov(r7, Operand(r0));
+    ConvertToDoubleStub stub2(r1, r0, r7, r6);
+    __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET);
+  }
+
   __ pop(lr);
   // Fall through to both_loaded_as_doubles.
 }
@@ -4857,9 +4885,25 @@ void CompareStub::Generate(MacroAssembler* masm) {
   // fall through if neither is a NaN.  Also binds rhs_not_nan.
   EmitNanCheck(masm, &rhs_not_nan, cc_);
 
-  // Compares two doubles in r0, r1, r2, r3 that are not NaNs.  Returns the
-  // answer.  Never falls through.
-  EmitTwoNonNanDoubleComparison(masm, cc_);
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    // ARMv7 VFP3 instructions to implement double precision comparison.
+    // This VFP3 implementation is known to work on
+    // ARMv7-VFP3 Snapdragon processor.
+
+    __ fmdrr(d6, r0, r1);
+    __ fmdrr(d7, r2, r3);
+
+    __ fcmp(d6, d7);
+    __ vmrs(pc);
+    __ mov(r0, Operand(0), LeaveCC, eq);
+    __ mov(r0, Operand(1), LeaveCC, lt);
+    __ mvn(r0, Operand(0), LeaveCC, gt);
+    __ mov(pc, Operand(lr));
+  } else {
+    // Compares two doubles in r0, r1, r2, r3 that are not NaNs.  Returns the
+    // answer.  Never falls through.
+    EmitTwoNonNanDoubleComparison(masm, cc_);
+  }
 
   __ bind(&not_smis);
   // At this point we know we are dealing with two different objects,
@@ -4959,16 +5003,23 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm,
   // Since both are Smis there is no heap number to overwrite, so allocate.
   // The new heap number is in r5.  r6 and r7 are scratch.
   AllocateHeapNumber(masm, &slow, r5, r6, r7);
-  // Write Smi from r0 to r3 and r2 in double format.  r6 is scratch.
-  __ mov(r7, Operand(r0));
-  ConvertToDoubleStub stub1(r3, r2, r7, r6);
-  __ push(lr);
-  __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET);
-  // Write Smi from r1 to r1 and r0 in double format.  r6 is scratch.
-  __ mov(r7, Operand(r1));
-  ConvertToDoubleStub stub2(r1, r0, r7, r6);
-  __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET);
-  __ pop(lr);
+
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    IntegerToDoubleConversionWithVFP3(masm, r0, r3, r2);
+    IntegerToDoubleConversionWithVFP3(masm, r1, r1, r0);
+  } else {
+    // Write Smi from r0 to r3 and r2 in double format.  r6 is scratch.
+    __ mov(r7, Operand(r0));
+    ConvertToDoubleStub stub1(r3, r2, r7, r6);
+    __ push(lr);
+    __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET);
+    // Write Smi from r1 to r1 and r0 in double format.  r6 is scratch.
+    __ mov(r7, Operand(r1));
+    ConvertToDoubleStub stub2(r1, r0, r7, r6);
+    __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET);
+    __ pop(lr);
+  }
+
   __ jmp(&do_the_call);  // Tail call.  No return.
 
   // We jump to here if something goes wrong (one param is not a number of any
@@ -5004,12 +5055,19 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm,
     // We can't overwrite a Smi so get address of new heap number into r5.
     AllocateHeapNumber(masm, &slow, r5, r6, r7);
   }
-  // Write Smi from r0 to r3 and r2 in double format.
-  __ mov(r7, Operand(r0));
-  ConvertToDoubleStub stub3(r3, r2, r7, r6);
-  __ push(lr);
-  __ Call(stub3.GetCode(), RelocInfo::CODE_TARGET);
-  __ pop(lr);
+
+
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    IntegerToDoubleConversionWithVFP3(masm, r0, r3, r2);
+  } else {
+    // Write Smi from r0 to r3 and r2 in double format.
+    __ mov(r7, Operand(r0));
+    ConvertToDoubleStub stub3(r3, r2, r7, r6);
+    __ push(lr);
+    __ Call(stub3.GetCode(), RelocInfo::CODE_TARGET);
+    __ pop(lr);
+  }
+
   __ bind(&finished_loading_r0);
 
   // Move r1 to a double in r0-r1.
@@ -5029,12 +5087,18 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm,
     // We can't overwrite a Smi so get address of new heap number into r5.
     AllocateHeapNumber(masm, &slow, r5, r6, r7);
   }
-  // Write Smi from r1 to r1 and r0 in double format.
-  __ mov(r7, Operand(r1));
-  ConvertToDoubleStub stub4(r1, r0, r7, r6);
-  __ push(lr);
-  __ Call(stub4.GetCode(), RelocInfo::CODE_TARGET);
-  __ pop(lr);
+
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    IntegerToDoubleConversionWithVFP3(masm, r1, r1, r0);
+  } else {
+    // Write Smi from r1 to r1 and r0 in double format.
+    __ mov(r7, Operand(r1));
+    ConvertToDoubleStub stub4(r1, r0, r7, r6);
+    __ push(lr);
+    __ Call(stub4.GetCode(), RelocInfo::CODE_TARGET);
+    __ pop(lr);
+  }
+
   __ bind(&finished_loading_r1);
 
   __ bind(&do_the_call);
@@ -5043,6 +5107,33 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm,
   // r2: Right value (least significant part of mantissa).
   // r3: Right value (sign, exponent, top of mantissa).
   // r5: Address of heap number for result.
+
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3) &&
+      ((Token::MUL == operation) ||
+       (Token::DIV == operation) ||
+       (Token::ADD == operation) ||
+       (Token::SUB == operation))) {
+     // ARMv7 VFP3 instructions to implement
+     // double precision, add, subtract, multiply, divide.
+     // This VFP3 implementation is known to work on
+     // ARMv7-VFP3 Snapdragon processor
+
+     __ fmdrr(d6, r0, r1);
+     __ fmdrr(d7, r2, r3);
+
+     if      (Token::MUL == operation) __ fmuld(d5, d6, d7);
+     else if (Token::DIV == operation) __ fdivd(d5, d6, d7);
+     else if (Token::ADD == operation) __ faddd(d5, d6, d7);
+     else if (Token::SUB == operation) __ fsubd(d5, d6, d7);
+
+     __ fmrrd(r0, r1, d5);
+
+     __ str(r0, FieldMemOperand(r5, HeapNumber::kValueOffset));
+     __ str(r1, FieldMemOperand(r5, HeapNumber::kValueOffset + 4));
+     __ mov(r0, Operand(r5));
+     __ mov(pc, lr);
+     return;
+  }
   __ push(lr);   // For later.
   __ push(r5);   // Address of heap number that is answer.
   __ AlignStack(0);
@@ -5111,38 +5202,50 @@ static void GetInt32(MacroAssembler* masm,
   __ sub(scratch2, scratch2, Operand(zero_exponent), SetCC);
   // Dest already has a Smi zero.
   __ b(lt, &done);
-  // We have a shifted exponent between 0 and 30 in scratch2.
-  __ mov(dest, Operand(scratch2, LSR, HeapNumber::kExponentShift));
-  // We now have the exponent in dest.  Subtract from 30 to get
-  // how much to shift down.
-  __ rsb(dest, dest, Operand(30));
-
+  if (!CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    // We have a shifted exponent between 0 and 30 in scratch2.
+    __ mov(dest, Operand(scratch2, LSR, HeapNumber::kExponentShift));
+    // We now have the exponent in dest.  Subtract from 30 to get
+    // how much to shift down.
+    __ rsb(dest, dest, Operand(30));
+  }
   __ bind(&right_exponent);
-  // Get the top bits of the mantissa.
-  __ and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask));
-  // Put back the implicit 1.
-  __ orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift));
-  // Shift up the mantissa bits to take up the space the exponent used to take.
-  // We just orred in the implicit bit so that took care of one and we want to
-  // leave the sign bit 0 so we subtract 2 bits from the shift distance.
-  const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
-  __ mov(scratch2, Operand(scratch2, LSL, shift_distance));
-  // Put sign in zero flag.
-  __ tst(scratch, Operand(HeapNumber::kSignMask));
-  // Get the second half of the double.  For some exponents we don't actually
-  // need this because the bits get shifted out again, but it's probably slower
-  // to test than just to do it.
-  __ ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset));
-  // Shift down 22 bits to get the last 10 bits.
-  __ orr(scratch, scratch2, Operand(scratch, LSR, 32 - shift_distance));
-  // Move down according to the exponent.
-  __ mov(dest, Operand(scratch, LSR, dest));
-  // Fix sign if sign bit was set.
-  __ rsb(dest, dest, Operand(0), LeaveCC, ne);
+  if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) {
+    // ARMv7 VFP3 instructions implementing double precision to integer
+    // conversion using round to zero.
+    // This VFP3 implementation is known to work on
+    // ARMv7-VFP3 Snapdragon processor.
+    __ ldr(scratch2, FieldMemOperand(source, HeapNumber::kMantissaOffset));
+    __ fmdrr(d7, scratch2, scratch);
+    __ ftosid(s15, d7);
+    __ fmrs(dest, s15);
+  } else {
+    // Get the top bits of the mantissa.
+    __ and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask));
+    // Put back the implicit 1.
+    __ orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift));
+    // Shift up the mantissa bits to take up the space the exponent used to
+    // take. We just orred in the implicit bit so that took care of one and
+    // we want to leave the sign bit 0 so we subtract 2 bits from the shift
+    //  distance.
+    const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
+    __ mov(scratch2, Operand(scratch2, LSL, shift_distance));
+    // Put sign in zero flag.
+    __ tst(scratch, Operand(HeapNumber::kSignMask));
+    // Get the second half of the double. For some exponents we don't
+    // actually need this because the bits get shifted out again, but
+    // it's probably slower to test than just to do it.
+    __ ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset));
+    // Shift down 22 bits to get the last 10 bits.
+    __ orr(scratch, scratch2, Operand(scratch, LSR, 32 - shift_distance));
+    // Move down according to the exponent.
+    __ mov(dest, Operand(scratch, LSR, dest));
+    // Fix sign if sign bit was set.
+    __ rsb(dest, dest, Operand(0), LeaveCC, ne);
+  }
   __ bind(&done);
 }
 
-
 // For bitwise ops where the inputs are not both Smis we here try to determine
 // whether both inputs are either Smis or at least heap numbers that can be
 // represented by a 32 bit signed value.  We truncate towards zero as required
@@ -5159,7 +5262,7 @@ void GenericBinaryOpStub::HandleNonSmiBitwiseOp(MacroAssembler* masm) {
   __ b(eq, &r1_is_smi);  // It's a Smi so don't check it's a heap number.
   __ CompareObjectType(r1, r4, r4, HEAP_NUMBER_TYPE);
   __ b(ne, &slow);
-  GetInt32(masm, r1, r3, r4, r5, &slow);
+  GetInt32(masm, r1, r3, r5, r4, &slow);
   __ jmp(&done_checking_r1);
   __ bind(&r1_is_smi);
   __ mov(r3, Operand(r1, ASR, 1));
@@ -5169,7 +5272,7 @@ void GenericBinaryOpStub::HandleNonSmiBitwiseOp(MacroAssembler* masm) {
   __ b(eq, &r0_is_smi);  // It's a Smi so don't check it's a heap number.
   __ CompareObjectType(r0, r4, r4, HEAP_NUMBER_TYPE);
   __ b(ne, &slow);
-  GetInt32(masm, r0, r2, r4, r5, &slow);
+  GetInt32(masm, r0, r2, r5, r4, &slow);
   __ jmp(&done_checking_r0);
   __ bind(&r0_is_smi);
   __ mov(r2, Operand(r0, ASR, 1));
diff --git a/src/arm/constants-arm.cc b/src/arm/constants-arm.cc
index 964bfe14f..4802af960 100644
--- a/src/arm/constants-arm.cc
+++ b/src/arm/constants-arm.cc
@@ -66,6 +66,28 @@ const char* Registers::Name(int reg) {
   return result;
 }
 
+// Support for VFP registers s0 to s31 (d0 to d15).
+// Note that "sN:sM" is the same as "dN/2"
+// These register names are defined in a way to match the native disassembler
+// formatting. See for example the command "objdump -d <binary file>".
+const char* VFPRegisters::names_[kNumVFPRegisters] = {
+    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+    "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+    "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+    "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+    "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+    "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+};
+
+const char* VFPRegisters::Name(int reg) {
+  const char* result;
+  if ((0 <= reg) && (reg < kNumVFPRegisters)) {
+    result = names_[reg];
+  } else {
+    result = "no_vfp_reg";
+  }
+  return result;
+}
 
 int Registers::Number(const char* name) {
   // Look through the canonical names.
diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h
index 6bd0d0080..d173a8aa2 100644
--- a/src/arm/constants-arm.h
+++ b/src/arm/constants-arm.h
@@ -75,6 +75,9 @@ namespace arm {
 // Number of registers in normal ARM mode.
 static const int kNumRegisters = 16;
 
+// VFP support.
+static const int kNumVFPRegisters = 48;
+
 // PC is register 15.
 static const int kPCRegister = 15;
 static const int kNoRegister = -1;
@@ -231,6 +234,16 @@ class Instr {
   inline int RnField() const { return Bits(19, 16); }
   inline int RdField() const { return Bits(15, 12); }
 
+  // Support for VFP.
+  // Vn(19-16) | Vd(15-12) |  Vm(3-0)
+  inline int VnField() const { return Bits(19, 16); }
+  inline int VmField() const { return Bits(3, 0); }
+  inline int VdField() const { return Bits(15, 12); }
+  inline int NField() const { return Bit(7); }
+  inline int MField() const { return Bit(5); }
+  inline int DField() const { return Bit(22); }
+  inline int RtField() const { return Bits(15, 12); }
+
   // Fields used in Data processing instructions
   inline Opcode OpcodeField() const {
     return static_cast<Opcode>(Bits(24, 21));
@@ -315,6 +328,15 @@ class Registers {
   static const RegisterAlias aliases_[];
 };
 
+// Helper functions for converting between VFP register numbers and names.
+class VFPRegisters {
+ public:
+  // Return the name of the register.
+  static const char* Name(int reg);
+
+ private:
+  static const char* names_[kNumVFPRegisters];
+};
 
 
 } }  // namespace assembler::arm
diff --git a/src/arm/cpu-arm.cc b/src/arm/cpu-arm.cc
index cafefce45..a5a358b3a 100644
--- a/src/arm/cpu-arm.cc
+++ b/src/arm/cpu-arm.cc
@@ -33,12 +33,13 @@
 #include "v8.h"
 
 #include "cpu.h"
+#include "macro-assembler.h"
 
 namespace v8 {
 namespace internal {
 
 void CPU::Setup() {
-  // Nothing to do.
+  CpuFeatures::Probe();
 }
 
 
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc
index 64314837d..d7021cf4a 100644
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -97,6 +97,10 @@ class Decoder {
 
   // Printing of common values.
   void PrintRegister(int reg);
+  void PrintSRegister(int reg);
+  void PrintDRegister(int reg);
+  int FormatVFPRegister(Instr* instr, const char* format);
+  int FormatVFPinstruction(Instr* instr, const char* format);
   void PrintCondition(Instr* instr);
   void PrintShiftRm(Instr* instr);
   void PrintShiftImm(Instr* instr);
@@ -121,6 +125,10 @@ class Decoder {
   void DecodeType6(Instr* instr);
   void DecodeType7(Instr* instr);
   void DecodeUnconditional(Instr* instr);
+  // For VFP support.
+  void DecodeTypeVFP(Instr* instr);
+  void DecodeType6CoprocessorIns(Instr* instr);
+
 
   const disasm::NameConverter& converter_;
   v8::internal::Vector<char> out_buffer_;
@@ -171,6 +179,16 @@ void Decoder::PrintRegister(int reg) {
   Print(converter_.NameOfCPURegister(reg));
 }
 
+// Print the VFP S register name according to the active name converter.
+void Decoder::PrintSRegister(int reg) {
+  Print(assembler::arm::VFPRegisters::Name(reg));
+}
+
+// Print the  VFP D register name according to the active name converter.
+void Decoder::PrintDRegister(int reg) {
+  Print(assembler::arm::VFPRegisters::Name(reg + 32));
+}
+
 
 // These shift names are defined in a way to match the native disassembler
 // formatting. See for example the command "objdump -d <binary file>".
@@ -290,6 +308,10 @@ int Decoder::FormatRegister(Instr* instr, const char* format) {
     int reg = instr->RmField();
     PrintRegister(reg);
     return 2;
+  } else if (format[1] == 't') {  // 'rt: Rt register
+    int reg = instr->RtField();
+    PrintRegister(reg);
+    return 2;
   } else if (format[1] == 'l') {
     // 'rlist: register list for load and store multiple instructions
     ASSERT(STRING_STARTS_WITH(format, "rlist"));
@@ -314,6 +336,39 @@ int Decoder::FormatRegister(Instr* instr, const char* format) {
   return -1;
 }
 
+// Handle all VFP register based formatting in this function to reduce the
+// complexity of FormatOption.
+int Decoder::FormatVFPRegister(Instr* instr, const char* format) {
+  ASSERT((format[0] == 'S') || (format[0] == 'D'));
+
+  if (format[1] == 'n') {
+    int reg = instr->VnField();
+    if (format[0] == 'S') PrintSRegister(((reg << 1) | instr->NField()));
+    if (format[0] == 'D') PrintDRegister(reg);
+    return 2;
+  } else if (format[1] == 'm') {
+    int reg = instr->VmField();
+    if (format[0] == 'S') PrintSRegister(((reg << 1) | instr->MField()));
+    if (format[0] == 'D') PrintDRegister(reg);
+    return 2;
+  } else if (format[1] == 'd') {
+    int reg = instr->VdField();
+    if (format[0] == 'S') PrintSRegister(((reg << 1) | instr->DField()));
+    if (format[0] == 'D') PrintDRegister(reg);
+    return 2;
+  }
+
+  UNREACHABLE();
+  return -1;
+}
+
+int Decoder::FormatVFPinstruction(Instr* instr, const char* format) {
+    Print(format);
+    return 0;
+}
+
+
+
 
 // FormatOption takes a formatting string and interprets it based on
 // the current instructions. The format string points to the first
@@ -459,6 +514,13 @@ int Decoder::FormatOption(Instr* instr, const char* format) {
       }
       return 1;
     }
+    case 'v': {
+      return FormatVFPinstruction(instr, format);
+    }
+    case 'S':
+    case 'D': {
+      return FormatVFPRegister(instr, format);
+    }
     case 'w': {  // 'w: W field of load and store instructions
       if (instr->HasW()) {
         Print("!");
@@ -761,8 +823,7 @@ void Decoder::DecodeType5(Instr* instr) {
 
 
 void Decoder::DecodeType6(Instr* instr) {
-  // Coprocessor instructions currently not supported.
-  Unknown(instr);
+  DecodeType6CoprocessorIns(instr);
 }
 
 
@@ -770,12 +831,10 @@ void Decoder::DecodeType7(Instr* instr) {
   if (instr->Bit(24) == 1) {
     Format(instr, "swi'cond 'swi");
   } else {
-    // Coprocessor instructions currently not supported.
-    Unknown(instr);
+    DecodeTypeVFP(instr);
   }
 }
 
-
 void Decoder::DecodeUnconditional(Instr* instr) {
   if (instr->Bits(7, 4) == 0xB && instr->Bits(27, 25) == 0 && instr->HasL()) {
     Format(instr, "'memop'h'pu 'rd, ");
@@ -837,6 +896,138 @@ void Decoder::DecodeUnconditional(Instr* instr) {
 }
 
 
+// void Decoder::DecodeTypeVFP(Instr* instr)
+// Implements the following
+// VFP instructions
+// fmsr :Sn = Rt
+// fmrs :Rt = Sn
+// fsitod: Dd = Sm
+// ftosid: Sd = Dm
+// Dd = faddd(Dn, Dm)
+// Dd = fsubd(Dn, Dm)
+// Dd = fmuld(Dn, Dm)
+// Dd = fdivd(Dn, Dm)
+// vcmp(Dd, Dm)
+// VMRS
+void Decoder::DecodeTypeVFP(Instr* instr) {
+  ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
+
+  if (instr->Bit(23) == 1) {
+    if ((instr->Bits(21, 19) == 0x7) &&
+        (instr->Bits(18, 16) == 0x5) &&
+        (instr->Bits(11, 9) == 0x5) &&
+        (instr->Bit(8) == 1) &&
+        (instr->Bit(6) == 1) &&
+        (instr->Bit(4) == 0)) {
+      Format(instr, "vcvt.s32.f64'cond 'Sd, 'Dm");
+    } else if ((instr->Bits(21, 19) == 0x7) &&
+               (instr->Bits(18, 16) == 0x0) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 1) &&
+               (instr->Bit(7) == 1) &&
+               (instr->Bit(6) == 1) &&
+               (instr->Bit(4) == 0)) {
+      Format(instr, "vcvt.f64.s32'cond 'Dd, 'Sm");
+    } else if ((instr->Bit(21) == 0x0) &&
+               (instr->Bit(20) == 0x0) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 1) &&
+               (instr->Bit(6) == 0) &&
+               (instr->Bit(4) == 0)) {
+        Format(instr, "vdiv.f64'cond 'Dd, 'Dn, 'Dm");
+    } else if ((instr->Bits(21, 20) == 0x3) &&
+               (instr->Bits(19, 16) == 0x4) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 0x1) &&
+               (instr->Bit(6) == 0x1) &&
+               (instr->Bit(4) == 0x0)) {
+      Format(instr, "vcmp.f64'cond 'Dd, 'Dm");
+    } else if ((instr->Bits(23, 20) == 0xF) &&
+               (instr->Bits(19, 16) == 0x1) &&
+               (instr->Bits(11, 8) == 0xA) &&
+               (instr->Bits(7, 5) == 0x0) &&
+               (instr->Bit(4) == 0x1)    &&
+               (instr->Bits(3, 0) == 0x0)) {
+        if (instr->Bits(15, 12) == 0xF)
+          Format(instr, "vmrs'cond APSR, FPSCR");
+        else
+          Unknown(instr);  // not used by V8
+    } else {
+      Unknown(instr);  // not used by V8
+    }
+  } else if (instr->Bit(21) == 1) {
+    if ((instr->Bit(20) == 0x1) &&
+        (instr->Bits(11, 9) == 0x5) &&
+        (instr->Bit(8) == 0x1) &&
+        (instr->Bit(6) == 0) &&
+        (instr->Bit(4) == 0)) {
+      Format(instr, "vadd.f64'cond 'Dd, 'Dn, 'Dm");
+    } else if ((instr->Bit(20) == 0x1) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 0x1) &&
+               (instr->Bit(6) == 1) &&
+               (instr->Bit(4) == 0)) {
+      Format(instr, "vsub.f64'cond 'Dd, 'Dn, 'Dm");
+    } else if ((instr->Bit(20) == 0x0) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 0x1) &&
+               (instr->Bit(6) == 0) &&
+               (instr->Bit(4) == 0)) {
+      Format(instr, "vmul.f64'cond 'Dd, 'Dn, 'Dm");
+    } else {
+      Unknown(instr);  // not used by V8
+    }
+  } else {
+    if ((instr->Bit(20) == 0x0) &&
+        (instr->Bits(11, 8) == 0xA) &&
+        (instr->Bits(6, 5) == 0x0) &&
+        (instr->Bit(4) == 1) &&
+        (instr->Bits(3, 0) == 0x0)) {
+      Format(instr, "vmov'cond 'Sn, 'rt");
+    } else if ((instr->Bit(20) == 0x1) &&
+               (instr->Bits(11, 8) == 0xA) &&
+               (instr->Bits(6, 5) == 0x0) &&
+               (instr->Bit(4) == 1) &&
+               (instr->Bits(3, 0) == 0x0)) {
+      Format(instr, "vmov'cond 'rt, 'Sn");
+    } else {
+      Unknown(instr);  // not used by V8
+    }
+  }
+}
+
+
+
+// Decode Type 6 coprocessor instructions
+// Dm = fmdrr(Rt, Rt2)
+// <Rt, Rt2> = fmrrd(Dm)
+void Decoder::DecodeType6CoprocessorIns(Instr* instr) {
+  ASSERT((instr->TypeField() == 6));
+
+  if (instr->Bit(23) == 1) {
+     Unknown(instr);  // not used by V8
+  } else if (instr->Bit(22) == 1) {
+    if ((instr->Bits(27, 24) == 0xC) &&
+        (instr->Bit(22) == 1) &&
+        (instr->Bits(11, 8) == 0xB) &&
+        (instr->Bits(7, 6) == 0x0) &&
+        (instr->Bit(4) == 1)) {
+      if (instr->Bit(20) == 0) {
+        Format(instr, "vmov'cond 'Dm, 'rt, 'rn");
+      } else if (instr->Bit(20) == 1) {
+        Format(instr, "vmov'cond 'rt, 'rn, 'Dm");
+      }
+    } else {
+      Unknown(instr);  // not used by V8
+    }
+  } else if (instr->Bit(21) == 1) {
+    Unknown(instr);  // not used by V8
+  } else {
+    Unknown(instr);  // not used by V8
+  }
+}
+
+
 // Disassemble the instruction at *instr_ptr into the output buffer.
 int Decoder::InstructionDecode(byte* instr_ptr) {
   Instr* instr = Instr::At(instr_ptr);
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index 843dab819..ce972a7ec 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -342,6 +342,11 @@ void Debugger::Debug() {
         PrintF("Z flag: %d; ", sim_->z_flag_);
         PrintF("C flag: %d; ", sim_->c_flag_);
         PrintF("V flag: %d\n", sim_->v_flag_);
+        PrintF("INVALID OP flag: %d; ", sim_->inv_op_vfp_flag_);
+        PrintF("DIV BY ZERO flag: %d; ", sim_->div_zero_vfp_flag_);
+        PrintF("OVERFLOW flag: %d; ", sim_->overflow_vfp_flag_);
+        PrintF("UNDERFLOW flag: %d; ", sim_->underflow_vfp_flag_);
+        PrintF("INEXACT flag: %d; ", sim_->inexact_vfp_flag_);
       } else if (strcmp(cmd, "unstop") == 0) {
         intptr_t stop_pc = sim_->get_pc() - Instr::kInstrSize;
         Instr* stop_instr = reinterpret_cast<Instr*>(stop_pc);
@@ -429,6 +434,24 @@ Simulator::Simulator() {
   c_flag_ = false;
   v_flag_ = false;
 
+  // Initializing VFP registers.
+  // All registers are initialized to zero to start with.
+  // even though s_registers_ & d_registers_ share the same
+  // physical registers in the target
+  for (int i = 0; i < num_s_registers; i++) {
+    vfp_register[i] = 0;
+  }
+  n_flag_FPSCR_ = false;
+  z_flag_FPSCR_ = false;
+  c_flag_FPSCR_ = false;
+  v_flag_FPSCR_ = false;
+
+  inv_op_vfp_flag_ = false;
+  div_zero_vfp_flag_ = false;
+  overflow_vfp_flag_ = false;
+  underflow_vfp_flag_ = false;
+  inexact_vfp_flag_ = false;
+
   // The sp is initialized to point to the bottom (high address) of the
   // allocated stack area. To be safe in potential stack underflows we leave
   // some buffer below.
@@ -544,6 +567,91 @@ int32_t Simulator::get_pc() const {
   return registers_[pc];
 }
 
+// Getting from and setting into VFP registers.
+void Simulator::set_s_register(int sreg, unsigned int value) {
+  ASSERT((sreg >= 0) && (sreg < num_s_registers));
+  vfp_register[sreg] = value;
+}
+
+unsigned int Simulator::get_s_register(int sreg) const {
+  ASSERT((sreg >= 0) && (sreg < num_s_registers));
+  return vfp_register[sreg];
+}
+
+void Simulator::set_s_register_from_float(int sreg, const float flt) {
+  ASSERT((sreg >= 0) && (sreg < num_s_registers));
+  // Read the bits from the single precision floating point value
+  // into the unsigned integer element of vfp_register[] given by index=sreg.
+  char buffer[sizeof(vfp_register[0])];
+  memcpy(buffer, &flt, sizeof(vfp_register[0]));
+  memcpy(&vfp_register[sreg], buffer, sizeof(vfp_register[0]));
+}
+
+void Simulator::set_s_register_from_sinteger(int sreg, const int sint) {
+  ASSERT((sreg >= 0) && (sreg < num_s_registers));
+  // Read the bits from the integer value
+  // into the unsigned integer element of vfp_register[] given by index=sreg.
+  char buffer[sizeof(vfp_register[0])];
+  memcpy(buffer, &sint, sizeof(vfp_register[0]));
+  memcpy(&vfp_register[sreg], buffer, sizeof(vfp_register[0]));
+}
+
+void Simulator::set_d_register_from_double(int dreg, const double& dbl) {
+  ASSERT((dreg >= 0) && (dreg < num_d_registers));
+  // Read the bits from the double precision floating point value
+  // into the two consecutive unsigned integer elements of vfp_register[]
+  // given by index 2*sreg and 2*sreg+1.
+  char buffer[2 * sizeof(vfp_register[0])];
+  memcpy(buffer, &dbl, 2 * sizeof(vfp_register[0]));
+#ifndef BIG_ENDIAN_FLOATING_POINT
+  memcpy(&vfp_register[dreg * 2], buffer, 2 * sizeof(vfp_register[0]));
+#else
+  memcpy(&vfp_register[dreg * 2], &buffer[4], sizeof(vfp_register[0]));
+  memcpy(&vfp_register[dreg * 2 + 1], &buffer[0], sizeof(vfp_register[0]));
+#endif
+}
+
+float Simulator::get_float_from_s_register(int sreg) {
+  ASSERT((sreg >= 0) && (sreg < num_s_registers));
+
+  float sm_val = 0.0;
+  // Read the bits from the unsigned integer vfp_register[] array
+  // into the single precision floating point value and return it.
+  char buffer[sizeof(vfp_register[0])];
+  memcpy(buffer, &vfp_register[sreg], sizeof(vfp_register[0]));
+  memcpy(&sm_val, buffer, sizeof(vfp_register[0]));
+  return(sm_val);
+}
+
+int Simulator::get_sinteger_from_s_register(int sreg) {
+  ASSERT((sreg >= 0) && (sreg < num_s_registers));
+
+  int sm_val = 0;
+  // Read the bits from the unsigned integer vfp_register[] array
+  // into the single precision floating point value and return it.
+  char buffer[sizeof(vfp_register[0])];
+  memcpy(buffer, &vfp_register[sreg], sizeof(vfp_register[0]));
+  memcpy(&sm_val, buffer, sizeof(vfp_register[0]));
+  return(sm_val);
+}
+
+double Simulator::get_double_from_d_register(int dreg) {
+  ASSERT((dreg >= 0) && (dreg < num_d_registers));
+
+  double dm_val = 0.0;
+  // Read the bits from the unsigned integer vfp_register[] array
+  // into the double precision floating point value and return it.
+  char buffer[2 * sizeof(vfp_register[0])];
+#ifndef BIG_ENDIAN_FLOATING_POINT
+  memcpy(buffer, &vfp_register[2 * dreg], 2 * sizeof(vfp_register[0]));
+#else
+  memcpy(&buffer[0], &vfp_register[2 * dreg + 1], sizeof(vfp_register[0]));
+  memcpy(&buffer[4], &vfp_register[2 * dreg], sizeof(vfp_register[0]));
+#endif
+  memcpy(&dm_val, buffer, 2 * sizeof(vfp_register[0]));
+  return(dm_val);
+}
+
 
 // For use in calls that take two double values, constructed from r0, r1, r2
 // and r3.
@@ -771,6 +879,37 @@ bool Simulator::OverflowFrom(int32_t alu_out,
   return overflow;
 }
 
+// Support for VFP comparisons.
+void Simulator::Compute_FPSCR_Flags(double val1, double val2) {
+  // All Non-Nan cases
+  if (val1 == val2) {
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = true;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = false;
+  } else if (val1 < val2) {
+    n_flag_FPSCR_ = true;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = false;
+    v_flag_FPSCR_ = false;
+  } else {
+    // Case when (val1 > val2).
+    n_flag_FPSCR_ = false;
+    z_flag_FPSCR_ = false;
+    c_flag_FPSCR_ = true;
+    v_flag_FPSCR_ = false;
+  }
+}
+
+
+void Simulator::Copy_FPSCR_to_APSR() {
+  n_flag_ = n_flag_FPSCR_;
+  z_flag_ = z_flag_FPSCR_;
+  c_flag_ = c_flag_FPSCR_;
+  v_flag_ = v_flag_FPSCR_;
+}
+
+
 
 // Addressing Mode 1 - Data-processing operands:
 // Get the value based on the shifter_operand with register.
@@ -1664,16 +1803,15 @@ void Simulator::DecodeType5(Instr* instr) {
 
 
 void Simulator::DecodeType6(Instr* instr) {
-  UNIMPLEMENTED();
+  DecodeType6CoprocessorIns(instr);
 }
 
 
 void Simulator::DecodeType7(Instr* instr) {
   if (instr->Bit(24) == 1) {
-    // Format(instr, "swi 'swi");
     SoftwareInterrupt(instr);
   } else {
-    UNIMPLEMENTED();
+    DecodeTypeVFP(instr);
   }
 }
 
@@ -1745,6 +1883,178 @@ void Simulator::DecodeUnconditional(Instr* instr) {
 }
 
 
+// void Simulator::DecodeTypeVFP(Instr* instr)
+// The Following ARMv7 VFPv instructions are currently supported.
+// fmsr :Sn = Rt
+// fmrs :Rt = Sn
+// fsitod: Dd = Sm
+// ftosid: Sd = Dm
+// Dd = faddd(Dn, Dm)
+// Dd = fsubd(Dn, Dm)
+// Dd = fmuld(Dn, Dm)
+// Dd = fdivd(Dn, Dm)
+// vcmp(Dd, Dm)
+// VMRS
+void Simulator::DecodeTypeVFP(Instr* instr) {
+  ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
+
+  int rt = instr->RtField();
+  int vm = instr->VmField();
+  int vn = instr->VnField();
+  int vd = instr->VdField();
+
+  if (instr->Bit(23) == 1) {
+    if ((instr->Bits(21, 19) == 0x7) &&
+        (instr->Bits(18, 16) == 0x5) &&
+        (instr->Bits(11, 9) == 0x5) &&
+        (instr->Bit(8) == 1) &&
+        (instr->Bit(6) == 1) &&
+        (instr->Bit(4) == 0)) {
+      double dm_val = get_double_from_d_register(vm);
+      int32_t int_value = static_cast<int32_t>(dm_val);
+      set_s_register_from_sinteger(((vd<<1) | instr->DField()), int_value);
+    } else if ((instr->Bits(21, 19) == 0x7) &&
+               (instr->Bits(18, 16) == 0x0) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 1) &&
+               (instr->Bit(7) == 1) &&
+               (instr->Bit(6) == 1) &&
+               (instr->Bit(4) == 0)) {
+      int32_t int_value = get_sinteger_from_s_register(((vm<<1) |
+                                                       instr->MField()));
+      double dbl_value = static_cast<double>(int_value);
+      set_d_register_from_double(vd, dbl_value);
+    } else if ((instr->Bit(21) == 0x0) &&
+               (instr->Bit(20) == 0x0) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 1) &&
+               (instr->Bit(6) == 0) &&
+               (instr->Bit(4) == 0)) {
+      double dn_value = get_double_from_d_register(vn);
+      double dm_value = get_double_from_d_register(vm);
+      double dd_value = dn_value / dm_value;
+      set_d_register_from_double(vd, dd_value);
+    } else if ((instr->Bits(21, 20) == 0x3) &&
+               (instr->Bits(19, 16) == 0x4) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 0x1) &&
+               (instr->Bit(6) == 0x1) &&
+               (instr->Bit(4) == 0x0)) {
+      double dd_value = get_double_from_d_register(vd);
+      double dm_value = get_double_from_d_register(vm);
+      Compute_FPSCR_Flags(dd_value, dm_value);
+    } else if ((instr->Bits(23, 20) == 0xF) &&
+               (instr->Bits(19, 16) == 0x1) &&
+               (instr->Bits(11, 8) == 0xA) &&
+               (instr->Bits(7, 5) == 0x0) &&
+               (instr->Bit(4) == 0x1)    &&
+               (instr->Bits(3, 0) == 0x0)) {
+      if (instr->Bits(15, 12) == 0xF)
+        Copy_FPSCR_to_APSR();
+      else
+        UNIMPLEMENTED();  // not used by V8 now
+    } else {
+      UNIMPLEMENTED();  // not used by V8 now
+    }
+  } else if (instr->Bit(21) == 1) {
+    if ((instr->Bit(20) == 0x1) &&
+        (instr->Bits(11, 9) == 0x5) &&
+        (instr->Bit(8) == 0x1) &&
+        (instr->Bit(6) == 0) &&
+        (instr->Bit(4) == 0)) {
+      double dn_value = get_double_from_d_register(vn);
+      double dm_value = get_double_from_d_register(vm);
+      double dd_value = dn_value + dm_value;
+      set_d_register_from_double(vd, dd_value);
+    } else if ((instr->Bit(20) == 0x1) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 0x1) &&
+               (instr->Bit(6) == 1) &&
+               (instr->Bit(4) == 0)) {
+      double dn_value = get_double_from_d_register(vn);
+      double dm_value = get_double_from_d_register(vm);
+      double dd_value = dn_value - dm_value;
+      set_d_register_from_double(vd, dd_value);
+    } else if ((instr->Bit(20) == 0x0) &&
+               (instr->Bits(11, 9) == 0x5) &&
+               (instr->Bit(8) == 0x1) &&
+               (instr->Bit(6) == 0) &&
+               (instr->Bit(4) == 0)) {
+      double dn_value = get_double_from_d_register(vn);
+      double dm_value = get_double_from_d_register(vm);
+      double dd_value = dn_value * dm_value;
+      set_d_register_from_double(vd, dd_value);
+    } else {
+      UNIMPLEMENTED();  // not used by V8 now
+    }
+  } else {
+    if ((instr->Bit(20) == 0x0) &&
+        (instr->Bits(11, 8) == 0xA) &&
+        (instr->Bits(6, 5) == 0x0) &&
+        (instr->Bit(4) == 1) &&
+        (instr->Bits(3, 0) == 0x0)) {
+      int32_t rs_val = get_register(rt);
+      set_s_register_from_sinteger(((vn<<1) | instr->NField()), rs_val);
+    } else if ((instr->Bit(20) == 0x1) &&
+               (instr->Bits(11, 8) == 0xA) &&
+               (instr->Bits(6, 5) == 0x0) &&
+               (instr->Bit(4) == 1) &&
+               (instr->Bits(3, 0) == 0x0)) {
+      int32_t int_value = get_sinteger_from_s_register(((vn<<1) |
+                                                       instr->NField()));
+      set_register(rt, int_value);
+    } else {
+      UNIMPLEMENTED();  // not used by V8 now
+    }
+  }
+}
+
+
+
+// void Simulator::DecodeType6CoprocessorIns(Instr* instr)
+// Decode Type 6 coprocessor instructions
+// Dm = fmdrr(Rt, Rt2)
+// <Rt, Rt2> = fmrrd(Dm)
+void Simulator::DecodeType6CoprocessorIns(Instr* instr) {
+  ASSERT((instr->TypeField() == 6));
+
+  int rt = instr->RtField();
+  int rn = instr->RnField();
+  int vm = instr->VmField();
+
+  if (instr->Bit(23) == 1) {
+    UNIMPLEMENTED();
+  } else if (instr->Bit(22) == 1) {
+    if ((instr->Bits(27, 24) == 0xC) &&
+        (instr->Bit(22) == 1) &&
+        (instr->Bits(11, 8) == 0xB) &&
+        (instr->Bits(7, 6) == 0x0) &&
+        (instr->Bit(4) == 1)) {
+      if (instr->Bit(20) == 0) {
+        int32_t rs_val = get_register(rt);
+        int32_t rn_val = get_register(rn);
+
+        set_s_register_from_sinteger(2*vm, rs_val);
+        set_s_register_from_sinteger((2*vm+1), rn_val);
+
+      } else if (instr->Bit(20) == 1) {
+        int32_t rt_int_value = get_sinteger_from_s_register(2*vm);
+        int32_t rn_int_value = get_sinteger_from_s_register(2*vm+1);
+
+        set_register(rt, rt_int_value);
+        set_register(rn, rn_int_value);
+      }
+    } else {
+      UNIMPLEMENTED();
+    }
+  } else if (instr->Bit(21) == 1) {
+    UNIMPLEMENTED();
+  } else {
+    UNIMPLEMENTED();
+  }
+}
+
+
 // Executes the current instruction.
 void Simulator::InstructionDecode(Instr* instr) {
   pc_modified_ = false;
@@ -1802,7 +2112,6 @@ void Simulator::InstructionDecode(Instr* instr) {
 }
 
 
-//
 void Simulator::Execute() {
   // Get the PC to simulate. Cannot use the accessor here as we need the
   // raw PC value and not the one used as input to arithmetic instructions.
diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h
index 3c8fa71b5..ed81946a9 100644
--- a/src/arm/simulator-arm.h
+++ b/src/arm/simulator-arm.h
@@ -97,7 +97,6 @@ namespace arm {
 class Simulator {
  public:
   friend class Debugger;
-
   enum Register {
     no_reg = -1,
     r0 = 0, r1, r2, r3, r4, r5, r6, r7,
@@ -105,7 +104,15 @@ class Simulator {
     num_registers,
     sp = 13,
     lr = 14,
-    pc = 15
+    pc = 15,
+    s0 = 0, s1, s2, s3, s4, s5, s6, s7,
+    s8, s9, s10, s11, s12, s13, s14, s15,
+    s16, s17, s18, s19, s20, s21, s22, s23,
+    s24, s25, s26, s27, s28, s29, s30, s31,
+    num_s_registers = 32,
+    d0 = 0, d1, d2, d3, d4, d5, d6, d7,
+    d8, d9, d10, d11, d12, d13, d14, d15,
+    num_d_registers = 16
   };
 
   Simulator();
@@ -121,6 +128,16 @@ class Simulator {
   void set_register(int reg, int32_t value);
   int32_t get_register(int reg) const;
 
+  // Support for VFP.
+  void set_s_register(int reg, unsigned int value);
+  unsigned int get_s_register(int reg) const;
+  void set_d_register_from_double(int dreg, const double& dbl);
+  double get_double_from_d_register(int dreg);
+  void set_s_register_from_float(int sreg, const float dbl);
+  float get_float_from_s_register(int sreg);
+  void set_s_register_from_sinteger(int reg, const int value);
+  int get_sinteger_from_s_register(int reg);
+
   // Special case of set_register and get_register to access the raw PC value.
   void set_pc(int32_t value);
   int32_t get_pc() const;
@@ -175,6 +192,10 @@ class Simulator {
                     int32_t right,
                     bool addition);
 
+  // Support for VFP.
+  void Compute_FPSCR_Flags(double val1, double val2);
+  void Copy_FPSCR_to_APSR();
+
   // Helper functions to decode common "addressing" modes
   int32_t GetShiftRm(Instr* instr, bool* carry_out);
   int32_t GetImm(Instr* instr, bool* carry_out);
@@ -206,6 +227,10 @@ class Simulator {
   void DecodeType7(Instr* instr);
   void DecodeUnconditional(Instr* instr);
 
+  // Support for VFP.
+  void DecodeTypeVFP(Instr* instr);
+  void DecodeType6CoprocessorIns(Instr* instr);
+
   // Executes one instruction.
   void InstructionDecode(Instr* instr);
 
@@ -226,6 +251,20 @@ class Simulator {
   bool c_flag_;
   bool v_flag_;
 
+  // VFP architecture state.
+  unsigned int vfp_register[32/*num_s_registers*/];
+  bool n_flag_FPSCR_;
+  bool z_flag_FPSCR_;
+  bool c_flag_FPSCR_;
+  bool v_flag_FPSCR_;
+
+  // VFP FP exception flags architecture state.
+  bool inv_op_vfp_flag_;
+  bool div_zero_vfp_flag_;
+  bool overflow_vfp_flag_;
+  bool underflow_vfp_flag_;
+  bool inexact_vfp_flag_;
+
   // Simulator support.
   char* stack_;
   bool pc_modified_;
diff --git a/src/flag-definitions.h b/src/flag-definitions.h
index d8b0633f9..23f23d2dc 100644
--- a/src/flag-definitions.h
+++ b/src/flag-definitions.h
@@ -114,6 +114,8 @@ DEFINE_bool(enable_rdtsc, true,
             "enable use of RDTSC instruction if available")
 DEFINE_bool(enable_sahf, true,
             "enable use of SAHF instruction if available (X64 only)")
+DEFINE_bool(enable_vfp3, true,
+            "enable use of VFP3 instructions if available")
 
 // bootstrapper.cc
 DEFINE_string(expose_natives_as, NULL, "expose natives in global object")
diff --git a/src/platform-linux.cc b/src/platform-linux.cc
index fe4c31f51..9564e8d66 100644
--- a/src/platform-linux.cc
+++ b/src/platform-linux.cc
@@ -89,6 +89,46 @@ double OS::nan_value() {
 }
 
 
+bool OS::fgrep_vfp(const char* file_name, const char* string) {
+  // Simple detection of VFP at runtime for Linux.
+  // It is based on /proc/cpuinfo, which reveals hardware configuration
+  // to user-space applications.  According to ARM (mid 2009), no similar
+  // facility is universally available on the ARM architectures,
+  // so it's up to individual OSes to provide such.
+  //
+  // This is written as a straight shot one pass parser
+  // and not using STL string and ifstream because,
+  // on Linux, it's reading from a (non-mmap-able)
+  // character special device.
+
+  FILE* f = NULL;
+
+  if (NULL == (f = fopen(file_name, "r")))
+    return false;
+
+  const char* what = string;
+  int k;
+  while (EOF != (k = fgetc(f))) {
+    if (k == *what) {
+      ++what;
+      while ((*what != '\0') && (*what == fgetc(f))) {
+        ++what;
+      }
+      if (*what == '\0') {
+        fclose(f);
+        return true;
+      } else {
+        what = string;
+      }
+    }
+  }
+  fclose(f);
+
+  // Did not find string in the file file_name.
+  return false;
+}
+
+
 int OS::ActivationFrameAlignment() {
 #ifdef V8_TARGET_ARCH_ARM
   // On EABI ARM targets this is required for fp correctness in the
diff --git a/src/platform.h b/src/platform.h
index fefe4b856..2f4953bbf 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -250,6 +250,9 @@ class OS {
   // Returns the double constant NAN
   static double nan_value();
 
+  // Support runtime detection of VFP3 on linux platforms.
+  static bool fgrep_vfp(const char * file_name, const char * string);
+
   // Returns the activation frame alignment constraint or zero if
   // the platform doesn't care. Guaranteed to be a power of two.
   static int ActivationFrameAlignment();
-- 
2.34.1