From 0d94d7c78529f944afa5b8a682ab3a2b1706de8b Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Thu, 12 Nov 2009 13:04:02 +0000 Subject: [PATCH] Add vfp support on ARM. Patch from John Jozwiak. Review URL: http://codereview.chromium.org/348019 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3292 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- AUTHORS | 1 + src/arm/assembler-arm.cc | 263 +++++++++++++++++++++++++++++++++++++++ src/arm/assembler-arm.h | 135 ++++++++++++++++++++ src/arm/codegen-arm.cc | 225 ++++++++++++++++++++++++--------- src/arm/constants-arm.cc | 22 ++++ src/arm/constants-arm.h | 22 ++++ src/arm/cpu-arm.cc | 3 +- src/arm/disasm-arm.cc | 201 +++++++++++++++++++++++++++++- src/arm/simulator-arm.cc | 317 ++++++++++++++++++++++++++++++++++++++++++++++- src/arm/simulator-arm.h | 43 ++++++- src/flag-definitions.h | 2 + src/platform-linux.cc | 40 ++++++ src/platform.h | 3 + 13 files changed, 1204 insertions(+), 73 deletions(-) diff --git a/AUTHORS b/AUTHORS index de8cabb..4fd7aa5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -19,3 +19,4 @@ Rafal Krypa Rene Rebe Ryan Dahl Patrick Gansterer +John Jozwiak diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 39bb3ee..8eb375f 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -42,6 +42,34 @@ namespace v8 { namespace internal { +// Safe default is no features. +uint64_t CpuFeatures::supported_ = 0; +uint64_t CpuFeatures::enabled_ = 0; + +void CpuFeatures::Probe() { + // Perform runtime detection of VFP. + static const char* descriptive_file_linux = "/proc/cpuinfo"; + + #if !defined(__arm__) || (defined(__VFP_FP__) && !defined(__SOFTFP__)) + // The supported & enabled flags for VFP are set to true for the following + // conditions, even without runtime detection of VFP: + // (1) For the simulator=arm build, always use VFP since + // the arm simulator has VFP support. + // (2) If V8 is being compiled with GCC with the vfp option turned on, + // always use VFP since the build system assumes that V8 will run on + // a platform that has VFP hardware. + supported_ |= static_cast(1) << VFP3; + enabled_ |= static_cast(1) << VFP3; + #endif + + if (OS::fgrep_vfp(descriptive_file_linux, "vfp")) { + // This implementation also sets the VFP flags if + // runtime detection of VFP returns true. + supported_ |= static_cast(1) << VFP3; + enabled_ |= static_cast(1) << VFP3; + } +} + // ----------------------------------------------------------------------------- // Implementation of Register and CRegister @@ -84,6 +112,57 @@ CRegister cr13 = { 13 }; CRegister cr14 = { 14 }; CRegister cr15 = { 15 }; +// Support for the VFP registers s0 to s31 (d0 to d15). +// Note that "sN:sM" is the same as "dN/2". +Register s0 = { 0 }; +Register s1 = { 1 }; +Register s2 = { 2 }; +Register s3 = { 3 }; +Register s4 = { 4 }; +Register s5 = { 5 }; +Register s6 = { 6 }; +Register s7 = { 7 }; +Register s8 = { 8 }; +Register s9 = { 9 }; +Register s10 = { 10 }; +Register s11 = { 11 }; +Register s12 = { 12 }; +Register s13 = { 13 }; +Register s14 = { 14 }; +Register s15 = { 15 }; +Register s16 = { 16 }; +Register s17 = { 17 }; +Register s18 = { 18 }; +Register s19 = { 19 }; +Register s20 = { 20 }; +Register s21 = { 21 }; +Register s22 = { 22 }; +Register s23 = { 23 }; +Register s24 = { 24 }; +Register s25 = { 25 }; +Register s26 = { 26 }; +Register s27 = { 27 }; +Register s28 = { 28 }; +Register s29 = { 29 }; +Register s30 = { 30 }; +Register s31 = { 31 }; + +Register d0 = { 0 }; +Register d1 = { 1 }; +Register d2 = { 2 }; +Register d3 = { 3 }; +Register d4 = { 4 }; +Register d5 = { 5 }; +Register d6 = { 6 }; +Register d7 = { 7 }; +Register d8 = { 8 }; +Register d9 = { 9 }; +Register d10 = { 10 }; +Register d11 = { 11 }; +Register d12 = { 12 }; +Register d13 = { 13 }; +Register d14 = { 14 }; +Register d15 = { 15 }; // ----------------------------------------------------------------------------- // Implementation of RelocInfo @@ -203,10 +282,14 @@ enum { B4 = 1 << 4, B5 = 1 << 5, + B6 = 1 << 6, B7 = 1 << 7, B8 = 1 << 8, + B9 = 1 << 9, B12 = 1 << 12, B16 = 1 << 16, + B18 = 1 << 18, + B19 = 1 << 19, B20 = 1 << 20, B21 = 1 << 21, B22 = 1 << 22, @@ -1281,6 +1364,186 @@ void Assembler::stc2(Coprocessor coproc, stc(coproc, crd, rn, option, l, static_cast(nv)); } +// Support for VFP. +void Assembler::fmdrr(const Register dst, + const Register src1, + const Register src2, + const SBit s, + const Condition cond) { + // Dm = . + // Instruction details available in ARM DDI 0406A, A8-646. + // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) | + // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm + + ASSERT(!src1.is(pc) && !src2.is(pc)); + emit(cond | 0xC*B24 | B22 | src2.code()*B16 | + src1.code()*B12 | 0xB*B8 | B4 | dst.code()); +} + + +void Assembler::fmrrd(const Register dst1, + const Register dst2, + const Register src, + const SBit s, + const Condition cond) { + // = Dm. + // Instruction details available in ARM DDI 0406A, A8-646. + // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) | + // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm + + ASSERT(!dst1.is(pc) && !dst2.is(pc)); + emit(cond | 0xC*B24 | B22 | B20 | dst2.code()*B16 | + dst1.code()*B12 | 0xB*B8 | B4 | src.code()); +} + + +void Assembler::fmsr(const Register dst, + const Register src, + const SBit s, + const Condition cond) { + // Sn = Rt. + // Instruction details available in ARM DDI 0406A, A8-642. + // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) | + // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0) + + ASSERT(!src.is(pc)); + emit(cond | 0xE*B24 | (dst.code() >> 1)*B16 | + src.code()*B12 | 0xA*B8 | (0x1 & dst.code())*B7 | B4); +} + + +void Assembler::fmrs(const Register dst, + const Register src, + const SBit s, + const Condition cond) { + // Rt = Sn. + // Instruction details available in ARM DDI 0406A, A8-642. + // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) | + // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0) + + ASSERT(!dst.is(pc)); + emit(cond | 0xE*B24 | B20 | (src.code() >> 1)*B16 | + dst.code()*B12 | 0xA*B8 | (0x1 & src.code())*B7 | B4); +} + + +void Assembler::fsitod(const Register dst, + const Register src, + const SBit s, + const Condition cond) { + // Dd = Sm (integer in Sm converted to IEEE 64-bit doubles in Dd). + // Instruction details available in ARM DDI 0406A, A8-576. + // cond(31-28) | 11101(27-23)| D=?(22) | 11(21-20) | 1(19) |opc2=000(18-16) | + // Vd(15-12) | 101(11-9) | sz(8)=1 | op(7)=1 | 1(6) | M=?(5) | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 | B23 | 0x3*B20 | B19 | + dst.code()*B12 | 0x5*B9 | B8 | B7 | B6 | + (0x1 & src.code())*B5 | (src.code() >> 1)); +} + + +void Assembler::ftosid(const Register dst, + const Register src, + const SBit s, + const Condition cond) { + // Sd = Dm (IEEE 64-bit doubles in Dm converted to 32 bit integer in Sd). + // Instruction details available in ARM DDI 0406A, A8-576. + // cond(31-28) | 11101(27-23)| D=?(22) | 11(21-20) | 1(19) | opc2=101(18-16)| + // Vd(15-12) | 101(11-9) | sz(8)=1 | op(7)=? | 1(6) | M=?(5) | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 | B23 |(0x1 & dst.code())*B22 | + 0x3*B20 | B19 | 0x5*B16 | (dst.code() >> 1)*B12 | + 0x5*B9 | B8 | B7 | B6 | src.code()); +} + + +void Assembler::faddd(const Register dst, + const Register src1, + const Register src2, + const SBit s, + const Condition cond) { + // Dd = faddd(Dn, Dm) double precision floating point addition. + // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. + // Instruction details available in ARM DDI 0406A, A8-536. + // cond(31-28) | 11100(27-23)| D=?(22) | 11(21-20) | Vn(19-16) | + // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 0(6) | M=?(5) | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 | 0x3*B20 | src1.code()*B16 | + dst.code()*B12 | 0x5*B9 | B8 | src2.code()); +} + + +void Assembler::fsubd(const Register dst, + const Register src1, + const Register src2, + const SBit s, + const Condition cond) { + // Dd = fsubd(Dn, Dm) double precision floating point subtraction. + // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. + // Instruction details available in ARM DDI 0406A, A8-784. + // cond(31-28) | 11100(27-23)| D=?(22) | 11(21-20) | Vn(19-16) | + // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 1(6) | M=?(5) | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 | 0x3*B20 | src1.code()*B16 | + dst.code()*B12 | 0x5*B9 | B8 | B6 | src2.code()); +} + + +void Assembler::fmuld(const Register dst, + const Register src1, + const Register src2, + const SBit s, + const Condition cond) { + // Dd = fmuld(Dn, Dm) double precision floating point multiplication. + // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. + // Instruction details available in ARM DDI 0406A, A8-784. + // cond(31-28) | 11100(27-23)| D=?(22) | 10(21-20) | Vn(19-16) | + // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 0(6) | M=?(5) | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 | 0x2*B20 | src1.code()*B16 | + dst.code()*B12 | 0x5*B9 | B8 | src2.code()); +} + + +void Assembler::fdivd(const Register dst, + const Register src1, + const Register src2, + const SBit s, + const Condition cond) { + // Dd = fdivd(Dn, Dm) double precision floating point division. + // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm. + // Instruction details available in ARM DDI 0406A, A8-584. + // cond(31-28) | 11101(27-23)| D=?(22) | 00(21-20) | Vn(19-16) | + // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=? | 0(6) | M=?(5) | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 | B23 | src1.code()*B16 | + dst.code()*B12 | 0x5*B9 | B8 | src2.code()); +} + + +void Assembler::fcmp(const Register src1, + const Register src2, + const SBit s, + const Condition cond) { + // vcmp(Dd, Dm) double precision floating point comparison. + // Instruction details available in ARM DDI 0406A, A8-570. + // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0100 (19-16) | + // Vd(15-12) | 101(11-9) | sz(8)=1 | E(7)=? | 1(6) | M(5)=? | 0(4) | Vm(3-0) + + emit(cond | 0xE*B24 |B23 | 0x3*B20 | B18 | + src1.code()*B12 | 0x5*B9 | B8 | B6 | src2.code()); +} + + +void Assembler::vmrs(Register dst, Condition cond) { + // Instruction details available in ARM DDI 0406A, A8-652. + // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) | + // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0) + + emit(cond | 0xE*B24 | 0xF*B20 | B16 | + dst.code()*B12 | 0xA*B8 | B4); +} + // Pseudo instructions void Assembler::lea(Register dst, diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index 0c791b3..886f206 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -102,6 +102,57 @@ extern Register sp; extern Register lr; extern Register pc; +// Support for VFP registers s0 to s32 (d0 to d16). +// Note that "sN:sM" is the same as "dN/2". +extern Register s0; +extern Register s1; +extern Register s2; +extern Register s3; +extern Register s4; +extern Register s5; +extern Register s6; +extern Register s7; +extern Register s8; +extern Register s9; +extern Register s10; +extern Register s11; +extern Register s12; +extern Register s13; +extern Register s14; +extern Register s15; +extern Register s16; +extern Register s17; +extern Register s18; +extern Register s19; +extern Register s20; +extern Register s21; +extern Register s22; +extern Register s23; +extern Register s24; +extern Register s25; +extern Register s26; +extern Register s27; +extern Register s28; +extern Register s29; +extern Register s30; +extern Register s31; + +extern Register d0; +extern Register d1; +extern Register d2; +extern Register d3; +extern Register d4; +extern Register d5; +extern Register d6; +extern Register d7; +extern Register d8; +extern Register d9; +extern Register d10; +extern Register d11; +extern Register d12; +extern Register d13; +extern Register d14; +extern Register d15; // Coprocessor register struct CRegister { @@ -372,6 +423,30 @@ class MemOperand BASE_EMBEDDED { friend class Assembler; }; +// CpuFeatures keeps track of which features are supported by the target CPU. +// Supported features must be enabled by a Scope before use. +class CpuFeatures : public AllStatic { + public: + enum Feature { VFP3 = 1 }; + // Detect features of the target CPU. Set safe defaults if the serializer + // is enabled (snapshots must be portable). + static void Probe(); + // Check whether a feature is supported by the target CPU. + static bool IsSupported(Feature f) { + if (f == VFP3 && !FLAG_enable_vfp3) return false; + + return (supported_ & (static_cast(1) << f)) != 0; + } + // Check whether a feature is currently enabled. + static bool IsEnabled(Feature f) { + return (enabled_ & (static_cast(1) << f)) != 0; + } + + private: + static uint64_t supported_; + static uint64_t enabled_; +}; + typedef int32_t Instr; @@ -655,6 +730,66 @@ class Assembler : public Malloced { void stc2(Coprocessor coproc, CRegister crd, Register base, int option, LFlag l = Short); // v5 and above + // Support for VFP. + // All these APIs support S0 to S31 and D0 to D15. + // Currently these APIs do not support extended D registers, i.e, D16 to D31. + // However, some simple modifications can allow + // these APIs to support D16 to D31. + + void fmdrr(const Register dst, + const Register src1, + const Register src2, + const SBit s = LeaveCC, + const Condition cond = al); + void fmrrd(const Register dst1, + const Register dst2, + const Register src, + const SBit s = LeaveCC, + const Condition cond = al); + void fmsr(const Register dst, + const Register src, + const SBit s = LeaveCC, + const Condition cond = al); + void fmrs(const Register dst, + const Register src, + const SBit s = LeaveCC, + const Condition cond = al); + void fsitod(const Register dst, + const Register src, + const SBit s = LeaveCC, + const Condition cond = al); + void ftosid(const Register dst, + const Register src, + const SBit s = LeaveCC, + const Condition cond = al); + + void faddd(const Register dst, + const Register src1, + const Register src2, + const SBit s = LeaveCC, + const Condition cond = al); + void fsubd(const Register dst, + const Register src1, + const Register src2, + const SBit s = LeaveCC, + const Condition cond = al); + void fmuld(const Register dst, + const Register src1, + const Register src2, + const SBit s = LeaveCC, + const Condition cond = al); + void fdivd(const Register dst, + const Register src1, + const Register src2, + const SBit s = LeaveCC, + const Condition cond = al); + void fcmp(const Register src1, + const Register src2, + const SBit s = LeaveCC, + const Condition cond = al); + void vmrs(const Register dst, + const Condition cond = al); + // Pseudo instructions void nop() { mov(r0, Operand(r0)); } diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc index 2165f4c..92717f7 100644 --- a/src/arm/codegen-arm.cc +++ b/src/arm/codegen-arm.cc @@ -4599,6 +4599,21 @@ static void EmitIdenticalObjectComparison(MacroAssembler* masm, } +static void IntegerToDoubleConversionWithVFP3(MacroAssembler* masm, + Register inReg, + Register outHighReg, + Register outLowReg) { + // ARMv7 VFP3 instructions to implement integer to double conversion. + // This VFP3 implementation is known to work + // on ARMv7-VFP3 Snapdragon processor. + + __ mov(r7, Operand(inReg, ASR, kSmiTagSize)); + __ fmsr(s15, r7); + __ fsitod(d7, s15); + __ fmrrd(outLowReg, outHighReg, d7); +} + + // See comment at call site. static void EmitSmiNonsmiComparison(MacroAssembler* masm, Label* rhs_not_nan, @@ -4622,9 +4637,16 @@ static void EmitSmiNonsmiComparison(MacroAssembler* masm, // Rhs is a smi, lhs is a number. __ push(lr); - __ mov(r7, Operand(r1)); - ConvertToDoubleStub stub1(r3, r2, r7, r6); - __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET); + + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + IntegerToDoubleConversionWithVFP3(masm, r1, r3, r2); + } else { + __ mov(r7, Operand(r1)); + ConvertToDoubleStub stub1(r3, r2, r7, r6); + __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET); + } + + // r3 and r2 are rhs as double. __ ldr(r1, FieldMemOperand(r0, HeapNumber::kValueOffset + kPointerSize)); __ ldr(r0, FieldMemOperand(r0, HeapNumber::kValueOffset)); @@ -4652,9 +4674,15 @@ static void EmitSmiNonsmiComparison(MacroAssembler* masm, __ push(lr); __ ldr(r2, FieldMemOperand(r1, HeapNumber::kValueOffset)); __ ldr(r3, FieldMemOperand(r1, HeapNumber::kValueOffset + kPointerSize)); - __ mov(r7, Operand(r0)); - ConvertToDoubleStub stub2(r1, r0, r7, r6); - __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET); + + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + IntegerToDoubleConversionWithVFP3(masm, r0, r1, r0); + } else { + __ mov(r7, Operand(r0)); + ConvertToDoubleStub stub2(r1, r0, r7, r6); + __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET); + } + __ pop(lr); // Fall through to both_loaded_as_doubles. } @@ -4857,9 +4885,25 @@ void CompareStub::Generate(MacroAssembler* masm) { // fall through if neither is a NaN. Also binds rhs_not_nan. EmitNanCheck(masm, &rhs_not_nan, cc_); - // Compares two doubles in r0, r1, r2, r3 that are not NaNs. Returns the - // answer. Never falls through. - EmitTwoNonNanDoubleComparison(masm, cc_); + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + // ARMv7 VFP3 instructions to implement double precision comparison. + // This VFP3 implementation is known to work on + // ARMv7-VFP3 Snapdragon processor. + + __ fmdrr(d6, r0, r1); + __ fmdrr(d7, r2, r3); + + __ fcmp(d6, d7); + __ vmrs(pc); + __ mov(r0, Operand(0), LeaveCC, eq); + __ mov(r0, Operand(1), LeaveCC, lt); + __ mvn(r0, Operand(0), LeaveCC, gt); + __ mov(pc, Operand(lr)); + } else { + // Compares two doubles in r0, r1, r2, r3 that are not NaNs. Returns the + // answer. Never falls through. + EmitTwoNonNanDoubleComparison(masm, cc_); + } __ bind(¬_smis); // At this point we know we are dealing with two different objects, @@ -4959,16 +5003,23 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm, // Since both are Smis there is no heap number to overwrite, so allocate. // The new heap number is in r5. r6 and r7 are scratch. AllocateHeapNumber(masm, &slow, r5, r6, r7); - // Write Smi from r0 to r3 and r2 in double format. r6 is scratch. - __ mov(r7, Operand(r0)); - ConvertToDoubleStub stub1(r3, r2, r7, r6); - __ push(lr); - __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET); - // Write Smi from r1 to r1 and r0 in double format. r6 is scratch. - __ mov(r7, Operand(r1)); - ConvertToDoubleStub stub2(r1, r0, r7, r6); - __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET); - __ pop(lr); + + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + IntegerToDoubleConversionWithVFP3(masm, r0, r3, r2); + IntegerToDoubleConversionWithVFP3(masm, r1, r1, r0); + } else { + // Write Smi from r0 to r3 and r2 in double format. r6 is scratch. + __ mov(r7, Operand(r0)); + ConvertToDoubleStub stub1(r3, r2, r7, r6); + __ push(lr); + __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET); + // Write Smi from r1 to r1 and r0 in double format. r6 is scratch. + __ mov(r7, Operand(r1)); + ConvertToDoubleStub stub2(r1, r0, r7, r6); + __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET); + __ pop(lr); + } + __ jmp(&do_the_call); // Tail call. No return. // We jump to here if something goes wrong (one param is not a number of any @@ -5004,12 +5055,19 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm, // We can't overwrite a Smi so get address of new heap number into r5. AllocateHeapNumber(masm, &slow, r5, r6, r7); } - // Write Smi from r0 to r3 and r2 in double format. - __ mov(r7, Operand(r0)); - ConvertToDoubleStub stub3(r3, r2, r7, r6); - __ push(lr); - __ Call(stub3.GetCode(), RelocInfo::CODE_TARGET); - __ pop(lr); + + + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + IntegerToDoubleConversionWithVFP3(masm, r0, r3, r2); + } else { + // Write Smi from r0 to r3 and r2 in double format. + __ mov(r7, Operand(r0)); + ConvertToDoubleStub stub3(r3, r2, r7, r6); + __ push(lr); + __ Call(stub3.GetCode(), RelocInfo::CODE_TARGET); + __ pop(lr); + } + __ bind(&finished_loading_r0); // Move r1 to a double in r0-r1. @@ -5029,12 +5087,18 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm, // We can't overwrite a Smi so get address of new heap number into r5. AllocateHeapNumber(masm, &slow, r5, r6, r7); } - // Write Smi from r1 to r1 and r0 in double format. - __ mov(r7, Operand(r1)); - ConvertToDoubleStub stub4(r1, r0, r7, r6); - __ push(lr); - __ Call(stub4.GetCode(), RelocInfo::CODE_TARGET); - __ pop(lr); + + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + IntegerToDoubleConversionWithVFP3(masm, r1, r1, r0); + } else { + // Write Smi from r1 to r1 and r0 in double format. + __ mov(r7, Operand(r1)); + ConvertToDoubleStub stub4(r1, r0, r7, r6); + __ push(lr); + __ Call(stub4.GetCode(), RelocInfo::CODE_TARGET); + __ pop(lr); + } + __ bind(&finished_loading_r1); __ bind(&do_the_call); @@ -5043,6 +5107,33 @@ static void HandleBinaryOpSlowCases(MacroAssembler* masm, // r2: Right value (least significant part of mantissa). // r3: Right value (sign, exponent, top of mantissa). // r5: Address of heap number for result. + + if (CpuFeatures::IsSupported(CpuFeatures::VFP3) && + ((Token::MUL == operation) || + (Token::DIV == operation) || + (Token::ADD == operation) || + (Token::SUB == operation))) { + // ARMv7 VFP3 instructions to implement + // double precision, add, subtract, multiply, divide. + // This VFP3 implementation is known to work on + // ARMv7-VFP3 Snapdragon processor + + __ fmdrr(d6, r0, r1); + __ fmdrr(d7, r2, r3); + + if (Token::MUL == operation) __ fmuld(d5, d6, d7); + else if (Token::DIV == operation) __ fdivd(d5, d6, d7); + else if (Token::ADD == operation) __ faddd(d5, d6, d7); + else if (Token::SUB == operation) __ fsubd(d5, d6, d7); + + __ fmrrd(r0, r1, d5); + + __ str(r0, FieldMemOperand(r5, HeapNumber::kValueOffset)); + __ str(r1, FieldMemOperand(r5, HeapNumber::kValueOffset + 4)); + __ mov(r0, Operand(r5)); + __ mov(pc, lr); + return; + } __ push(lr); // For later. __ push(r5); // Address of heap number that is answer. __ AlignStack(0); @@ -5111,38 +5202,50 @@ static void GetInt32(MacroAssembler* masm, __ sub(scratch2, scratch2, Operand(zero_exponent), SetCC); // Dest already has a Smi zero. __ b(lt, &done); - // We have a shifted exponent between 0 and 30 in scratch2. - __ mov(dest, Operand(scratch2, LSR, HeapNumber::kExponentShift)); - // We now have the exponent in dest. Subtract from 30 to get - // how much to shift down. - __ rsb(dest, dest, Operand(30)); - + if (!CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + // We have a shifted exponent between 0 and 30 in scratch2. + __ mov(dest, Operand(scratch2, LSR, HeapNumber::kExponentShift)); + // We now have the exponent in dest. Subtract from 30 to get + // how much to shift down. + __ rsb(dest, dest, Operand(30)); + } __ bind(&right_exponent); - // Get the top bits of the mantissa. - __ and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask)); - // Put back the implicit 1. - __ orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift)); - // Shift up the mantissa bits to take up the space the exponent used to take. - // We just orred in the implicit bit so that took care of one and we want to - // leave the sign bit 0 so we subtract 2 bits from the shift distance. - const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2; - __ mov(scratch2, Operand(scratch2, LSL, shift_distance)); - // Put sign in zero flag. - __ tst(scratch, Operand(HeapNumber::kSignMask)); - // Get the second half of the double. For some exponents we don't actually - // need this because the bits get shifted out again, but it's probably slower - // to test than just to do it. - __ ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset)); - // Shift down 22 bits to get the last 10 bits. - __ orr(scratch, scratch2, Operand(scratch, LSR, 32 - shift_distance)); - // Move down according to the exponent. - __ mov(dest, Operand(scratch, LSR, dest)); - // Fix sign if sign bit was set. - __ rsb(dest, dest, Operand(0), LeaveCC, ne); + if (CpuFeatures::IsSupported(CpuFeatures::VFP3)) { + // ARMv7 VFP3 instructions implementing double precision to integer + // conversion using round to zero. + // This VFP3 implementation is known to work on + // ARMv7-VFP3 Snapdragon processor. + __ ldr(scratch2, FieldMemOperand(source, HeapNumber::kMantissaOffset)); + __ fmdrr(d7, scratch2, scratch); + __ ftosid(s15, d7); + __ fmrs(dest, s15); + } else { + // Get the top bits of the mantissa. + __ and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask)); + // Put back the implicit 1. + __ orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift)); + // Shift up the mantissa bits to take up the space the exponent used to + // take. We just orred in the implicit bit so that took care of one and + // we want to leave the sign bit 0 so we subtract 2 bits from the shift + // distance. + const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2; + __ mov(scratch2, Operand(scratch2, LSL, shift_distance)); + // Put sign in zero flag. + __ tst(scratch, Operand(HeapNumber::kSignMask)); + // Get the second half of the double. For some exponents we don't + // actually need this because the bits get shifted out again, but + // it's probably slower to test than just to do it. + __ ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset)); + // Shift down 22 bits to get the last 10 bits. + __ orr(scratch, scratch2, Operand(scratch, LSR, 32 - shift_distance)); + // Move down according to the exponent. + __ mov(dest, Operand(scratch, LSR, dest)); + // Fix sign if sign bit was set. + __ rsb(dest, dest, Operand(0), LeaveCC, ne); + } __ bind(&done); } - // For bitwise ops where the inputs are not both Smis we here try to determine // whether both inputs are either Smis or at least heap numbers that can be // represented by a 32 bit signed value. We truncate towards zero as required @@ -5159,7 +5262,7 @@ void GenericBinaryOpStub::HandleNonSmiBitwiseOp(MacroAssembler* masm) { __ b(eq, &r1_is_smi); // It's a Smi so don't check it's a heap number. __ CompareObjectType(r1, r4, r4, HEAP_NUMBER_TYPE); __ b(ne, &slow); - GetInt32(masm, r1, r3, r4, r5, &slow); + GetInt32(masm, r1, r3, r5, r4, &slow); __ jmp(&done_checking_r1); __ bind(&r1_is_smi); __ mov(r3, Operand(r1, ASR, 1)); @@ -5169,7 +5272,7 @@ void GenericBinaryOpStub::HandleNonSmiBitwiseOp(MacroAssembler* masm) { __ b(eq, &r0_is_smi); // It's a Smi so don't check it's a heap number. __ CompareObjectType(r0, r4, r4, HEAP_NUMBER_TYPE); __ b(ne, &slow); - GetInt32(masm, r0, r2, r4, r5, &slow); + GetInt32(masm, r0, r2, r5, r4, &slow); __ jmp(&done_checking_r0); __ bind(&r0_is_smi); __ mov(r2, Operand(r0, ASR, 1)); diff --git a/src/arm/constants-arm.cc b/src/arm/constants-arm.cc index 964bfe1..4802af9 100644 --- a/src/arm/constants-arm.cc +++ b/src/arm/constants-arm.cc @@ -66,6 +66,28 @@ const char* Registers::Name(int reg) { return result; } +// Support for VFP registers s0 to s31 (d0 to d15). +// Note that "sN:sM" is the same as "dN/2" +// These register names are defined in a way to match the native disassembler +// formatting. See for example the command "objdump -d ". +const char* VFPRegisters::names_[kNumVFPRegisters] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", +}; + +const char* VFPRegisters::Name(int reg) { + const char* result; + if ((0 <= reg) && (reg < kNumVFPRegisters)) { + result = names_[reg]; + } else { + result = "no_vfp_reg"; + } + return result; +} int Registers::Number(const char* name) { // Look through the canonical names. diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h index 6bd0d00..d173a8a 100644 --- a/src/arm/constants-arm.h +++ b/src/arm/constants-arm.h @@ -75,6 +75,9 @@ namespace arm { // Number of registers in normal ARM mode. static const int kNumRegisters = 16; +// VFP support. +static const int kNumVFPRegisters = 48; + // PC is register 15. static const int kPCRegister = 15; static const int kNoRegister = -1; @@ -231,6 +234,16 @@ class Instr { inline int RnField() const { return Bits(19, 16); } inline int RdField() const { return Bits(15, 12); } + // Support for VFP. + // Vn(19-16) | Vd(15-12) | Vm(3-0) + inline int VnField() const { return Bits(19, 16); } + inline int VmField() const { return Bits(3, 0); } + inline int VdField() const { return Bits(15, 12); } + inline int NField() const { return Bit(7); } + inline int MField() const { return Bit(5); } + inline int DField() const { return Bit(22); } + inline int RtField() const { return Bits(15, 12); } + // Fields used in Data processing instructions inline Opcode OpcodeField() const { return static_cast(Bits(24, 21)); @@ -315,6 +328,15 @@ class Registers { static const RegisterAlias aliases_[]; }; +// Helper functions for converting between VFP register numbers and names. +class VFPRegisters { + public: + // Return the name of the register. + static const char* Name(int reg); + + private: + static const char* names_[kNumVFPRegisters]; +}; } } // namespace assembler::arm diff --git a/src/arm/cpu-arm.cc b/src/arm/cpu-arm.cc index cafefce..a5a358b 100644 --- a/src/arm/cpu-arm.cc +++ b/src/arm/cpu-arm.cc @@ -33,12 +33,13 @@ #include "v8.h" #include "cpu.h" +#include "macro-assembler.h" namespace v8 { namespace internal { void CPU::Setup() { - // Nothing to do. + CpuFeatures::Probe(); } diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index 6431483..d7021cf 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -97,6 +97,10 @@ class Decoder { // Printing of common values. void PrintRegister(int reg); + void PrintSRegister(int reg); + void PrintDRegister(int reg); + int FormatVFPRegister(Instr* instr, const char* format); + int FormatVFPinstruction(Instr* instr, const char* format); void PrintCondition(Instr* instr); void PrintShiftRm(Instr* instr); void PrintShiftImm(Instr* instr); @@ -121,6 +125,10 @@ class Decoder { void DecodeType6(Instr* instr); void DecodeType7(Instr* instr); void DecodeUnconditional(Instr* instr); + // For VFP support. + void DecodeTypeVFP(Instr* instr); + void DecodeType6CoprocessorIns(Instr* instr); + const disasm::NameConverter& converter_; v8::internal::Vector out_buffer_; @@ -171,6 +179,16 @@ void Decoder::PrintRegister(int reg) { Print(converter_.NameOfCPURegister(reg)); } +// Print the VFP S register name according to the active name converter. +void Decoder::PrintSRegister(int reg) { + Print(assembler::arm::VFPRegisters::Name(reg)); +} + +// Print the VFP D register name according to the active name converter. +void Decoder::PrintDRegister(int reg) { + Print(assembler::arm::VFPRegisters::Name(reg + 32)); +} + // These shift names are defined in a way to match the native disassembler // formatting. See for example the command "objdump -d ". @@ -290,6 +308,10 @@ int Decoder::FormatRegister(Instr* instr, const char* format) { int reg = instr->RmField(); PrintRegister(reg); return 2; + } else if (format[1] == 't') { // 'rt: Rt register + int reg = instr->RtField(); + PrintRegister(reg); + return 2; } else if (format[1] == 'l') { // 'rlist: register list for load and store multiple instructions ASSERT(STRING_STARTS_WITH(format, "rlist")); @@ -314,6 +336,39 @@ int Decoder::FormatRegister(Instr* instr, const char* format) { return -1; } +// Handle all VFP register based formatting in this function to reduce the +// complexity of FormatOption. +int Decoder::FormatVFPRegister(Instr* instr, const char* format) { + ASSERT((format[0] == 'S') || (format[0] == 'D')); + + if (format[1] == 'n') { + int reg = instr->VnField(); + if (format[0] == 'S') PrintSRegister(((reg << 1) | instr->NField())); + if (format[0] == 'D') PrintDRegister(reg); + return 2; + } else if (format[1] == 'm') { + int reg = instr->VmField(); + if (format[0] == 'S') PrintSRegister(((reg << 1) | instr->MField())); + if (format[0] == 'D') PrintDRegister(reg); + return 2; + } else if (format[1] == 'd') { + int reg = instr->VdField(); + if (format[0] == 'S') PrintSRegister(((reg << 1) | instr->DField())); + if (format[0] == 'D') PrintDRegister(reg); + return 2; + } + + UNREACHABLE(); + return -1; +} + +int Decoder::FormatVFPinstruction(Instr* instr, const char* format) { + Print(format); + return 0; +} + + + // FormatOption takes a formatting string and interprets it based on // the current instructions. The format string points to the first @@ -459,6 +514,13 @@ int Decoder::FormatOption(Instr* instr, const char* format) { } return 1; } + case 'v': { + return FormatVFPinstruction(instr, format); + } + case 'S': + case 'D': { + return FormatVFPRegister(instr, format); + } case 'w': { // 'w: W field of load and store instructions if (instr->HasW()) { Print("!"); @@ -761,8 +823,7 @@ void Decoder::DecodeType5(Instr* instr) { void Decoder::DecodeType6(Instr* instr) { - // Coprocessor instructions currently not supported. - Unknown(instr); + DecodeType6CoprocessorIns(instr); } @@ -770,12 +831,10 @@ void Decoder::DecodeType7(Instr* instr) { if (instr->Bit(24) == 1) { Format(instr, "swi'cond 'swi"); } else { - // Coprocessor instructions currently not supported. - Unknown(instr); + DecodeTypeVFP(instr); } } - void Decoder::DecodeUnconditional(Instr* instr) { if (instr->Bits(7, 4) == 0xB && instr->Bits(27, 25) == 0 && instr->HasL()) { Format(instr, "'memop'h'pu 'rd, "); @@ -837,6 +896,138 @@ void Decoder::DecodeUnconditional(Instr* instr) { } +// void Decoder::DecodeTypeVFP(Instr* instr) +// Implements the following +// VFP instructions +// fmsr :Sn = Rt +// fmrs :Rt = Sn +// fsitod: Dd = Sm +// ftosid: Sd = Dm +// Dd = faddd(Dn, Dm) +// Dd = fsubd(Dn, Dm) +// Dd = fmuld(Dn, Dm) +// Dd = fdivd(Dn, Dm) +// vcmp(Dd, Dm) +// VMRS +void Decoder::DecodeTypeVFP(Instr* instr) { + ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) ); + + if (instr->Bit(23) == 1) { + if ((instr->Bits(21, 19) == 0x7) && + (instr->Bits(18, 16) == 0x5) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 1) && + (instr->Bit(6) == 1) && + (instr->Bit(4) == 0)) { + Format(instr, "vcvt.s32.f64'cond 'Sd, 'Dm"); + } else if ((instr->Bits(21, 19) == 0x7) && + (instr->Bits(18, 16) == 0x0) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 1) && + (instr->Bit(7) == 1) && + (instr->Bit(6) == 1) && + (instr->Bit(4) == 0)) { + Format(instr, "vcvt.f64.s32'cond 'Dd, 'Sm"); + } else if ((instr->Bit(21) == 0x0) && + (instr->Bit(20) == 0x0) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 1) && + (instr->Bit(6) == 0) && + (instr->Bit(4) == 0)) { + Format(instr, "vdiv.f64'cond 'Dd, 'Dn, 'Dm"); + } else if ((instr->Bits(21, 20) == 0x3) && + (instr->Bits(19, 16) == 0x4) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 0x1) && + (instr->Bit(4) == 0x0)) { + Format(instr, "vcmp.f64'cond 'Dd, 'Dm"); + } else if ((instr->Bits(23, 20) == 0xF) && + (instr->Bits(19, 16) == 0x1) && + (instr->Bits(11, 8) == 0xA) && + (instr->Bits(7, 5) == 0x0) && + (instr->Bit(4) == 0x1) && + (instr->Bits(3, 0) == 0x0)) { + if (instr->Bits(15, 12) == 0xF) + Format(instr, "vmrs'cond APSR, FPSCR"); + else + Unknown(instr); // not used by V8 + } else { + Unknown(instr); // not used by V8 + } + } else if (instr->Bit(21) == 1) { + if ((instr->Bit(20) == 0x1) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 0) && + (instr->Bit(4) == 0)) { + Format(instr, "vadd.f64'cond 'Dd, 'Dn, 'Dm"); + } else if ((instr->Bit(20) == 0x1) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 1) && + (instr->Bit(4) == 0)) { + Format(instr, "vsub.f64'cond 'Dd, 'Dn, 'Dm"); + } else if ((instr->Bit(20) == 0x0) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 0) && + (instr->Bit(4) == 0)) { + Format(instr, "vmul.f64'cond 'Dd, 'Dn, 'Dm"); + } else { + Unknown(instr); // not used by V8 + } + } else { + if ((instr->Bit(20) == 0x0) && + (instr->Bits(11, 8) == 0xA) && + (instr->Bits(6, 5) == 0x0) && + (instr->Bit(4) == 1) && + (instr->Bits(3, 0) == 0x0)) { + Format(instr, "vmov'cond 'Sn, 'rt"); + } else if ((instr->Bit(20) == 0x1) && + (instr->Bits(11, 8) == 0xA) && + (instr->Bits(6, 5) == 0x0) && + (instr->Bit(4) == 1) && + (instr->Bits(3, 0) == 0x0)) { + Format(instr, "vmov'cond 'rt, 'Sn"); + } else { + Unknown(instr); // not used by V8 + } + } +} + + + +// Decode Type 6 coprocessor instructions +// Dm = fmdrr(Rt, Rt2) +// = fmrrd(Dm) +void Decoder::DecodeType6CoprocessorIns(Instr* instr) { + ASSERT((instr->TypeField() == 6)); + + if (instr->Bit(23) == 1) { + Unknown(instr); // not used by V8 + } else if (instr->Bit(22) == 1) { + if ((instr->Bits(27, 24) == 0xC) && + (instr->Bit(22) == 1) && + (instr->Bits(11, 8) == 0xB) && + (instr->Bits(7, 6) == 0x0) && + (instr->Bit(4) == 1)) { + if (instr->Bit(20) == 0) { + Format(instr, "vmov'cond 'Dm, 'rt, 'rn"); + } else if (instr->Bit(20) == 1) { + Format(instr, "vmov'cond 'rt, 'rn, 'Dm"); + } + } else { + Unknown(instr); // not used by V8 + } + } else if (instr->Bit(21) == 1) { + Unknown(instr); // not used by V8 + } else { + Unknown(instr); // not used by V8 + } +} + + // Disassemble the instruction at *instr_ptr into the output buffer. int Decoder::InstructionDecode(byte* instr_ptr) { Instr* instr = Instr::At(instr_ptr); diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index 843dab8..ce972a7 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -342,6 +342,11 @@ void Debugger::Debug() { PrintF("Z flag: %d; ", sim_->z_flag_); PrintF("C flag: %d; ", sim_->c_flag_); PrintF("V flag: %d\n", sim_->v_flag_); + PrintF("INVALID OP flag: %d; ", sim_->inv_op_vfp_flag_); + PrintF("DIV BY ZERO flag: %d; ", sim_->div_zero_vfp_flag_); + PrintF("OVERFLOW flag: %d; ", sim_->overflow_vfp_flag_); + PrintF("UNDERFLOW flag: %d; ", sim_->underflow_vfp_flag_); + PrintF("INEXACT flag: %d; ", sim_->inexact_vfp_flag_); } else if (strcmp(cmd, "unstop") == 0) { intptr_t stop_pc = sim_->get_pc() - Instr::kInstrSize; Instr* stop_instr = reinterpret_cast(stop_pc); @@ -429,6 +434,24 @@ Simulator::Simulator() { c_flag_ = false; v_flag_ = false; + // Initializing VFP registers. + // All registers are initialized to zero to start with. + // even though s_registers_ & d_registers_ share the same + // physical registers in the target + for (int i = 0; i < num_s_registers; i++) { + vfp_register[i] = 0; + } + n_flag_FPSCR_ = false; + z_flag_FPSCR_ = false; + c_flag_FPSCR_ = false; + v_flag_FPSCR_ = false; + + inv_op_vfp_flag_ = false; + div_zero_vfp_flag_ = false; + overflow_vfp_flag_ = false; + underflow_vfp_flag_ = false; + inexact_vfp_flag_ = false; + // The sp is initialized to point to the bottom (high address) of the // allocated stack area. To be safe in potential stack underflows we leave // some buffer below. @@ -544,6 +567,91 @@ int32_t Simulator::get_pc() const { return registers_[pc]; } +// Getting from and setting into VFP registers. +void Simulator::set_s_register(int sreg, unsigned int value) { + ASSERT((sreg >= 0) && (sreg < num_s_registers)); + vfp_register[sreg] = value; +} + +unsigned int Simulator::get_s_register(int sreg) const { + ASSERT((sreg >= 0) && (sreg < num_s_registers)); + return vfp_register[sreg]; +} + +void Simulator::set_s_register_from_float(int sreg, const float flt) { + ASSERT((sreg >= 0) && (sreg < num_s_registers)); + // Read the bits from the single precision floating point value + // into the unsigned integer element of vfp_register[] given by index=sreg. + char buffer[sizeof(vfp_register[0])]; + memcpy(buffer, &flt, sizeof(vfp_register[0])); + memcpy(&vfp_register[sreg], buffer, sizeof(vfp_register[0])); +} + +void Simulator::set_s_register_from_sinteger(int sreg, const int sint) { + ASSERT((sreg >= 0) && (sreg < num_s_registers)); + // Read the bits from the integer value + // into the unsigned integer element of vfp_register[] given by index=sreg. + char buffer[sizeof(vfp_register[0])]; + memcpy(buffer, &sint, sizeof(vfp_register[0])); + memcpy(&vfp_register[sreg], buffer, sizeof(vfp_register[0])); +} + +void Simulator::set_d_register_from_double(int dreg, const double& dbl) { + ASSERT((dreg >= 0) && (dreg < num_d_registers)); + // Read the bits from the double precision floating point value + // into the two consecutive unsigned integer elements of vfp_register[] + // given by index 2*sreg and 2*sreg+1. + char buffer[2 * sizeof(vfp_register[0])]; + memcpy(buffer, &dbl, 2 * sizeof(vfp_register[0])); +#ifndef BIG_ENDIAN_FLOATING_POINT + memcpy(&vfp_register[dreg * 2], buffer, 2 * sizeof(vfp_register[0])); +#else + memcpy(&vfp_register[dreg * 2], &buffer[4], sizeof(vfp_register[0])); + memcpy(&vfp_register[dreg * 2 + 1], &buffer[0], sizeof(vfp_register[0])); +#endif +} + +float Simulator::get_float_from_s_register(int sreg) { + ASSERT((sreg >= 0) && (sreg < num_s_registers)); + + float sm_val = 0.0; + // Read the bits from the unsigned integer vfp_register[] array + // into the single precision floating point value and return it. + char buffer[sizeof(vfp_register[0])]; + memcpy(buffer, &vfp_register[sreg], sizeof(vfp_register[0])); + memcpy(&sm_val, buffer, sizeof(vfp_register[0])); + return(sm_val); +} + +int Simulator::get_sinteger_from_s_register(int sreg) { + ASSERT((sreg >= 0) && (sreg < num_s_registers)); + + int sm_val = 0; + // Read the bits from the unsigned integer vfp_register[] array + // into the single precision floating point value and return it. + char buffer[sizeof(vfp_register[0])]; + memcpy(buffer, &vfp_register[sreg], sizeof(vfp_register[0])); + memcpy(&sm_val, buffer, sizeof(vfp_register[0])); + return(sm_val); +} + +double Simulator::get_double_from_d_register(int dreg) { + ASSERT((dreg >= 0) && (dreg < num_d_registers)); + + double dm_val = 0.0; + // Read the bits from the unsigned integer vfp_register[] array + // into the double precision floating point value and return it. + char buffer[2 * sizeof(vfp_register[0])]; +#ifndef BIG_ENDIAN_FLOATING_POINT + memcpy(buffer, &vfp_register[2 * dreg], 2 * sizeof(vfp_register[0])); +#else + memcpy(&buffer[0], &vfp_register[2 * dreg + 1], sizeof(vfp_register[0])); + memcpy(&buffer[4], &vfp_register[2 * dreg], sizeof(vfp_register[0])); +#endif + memcpy(&dm_val, buffer, 2 * sizeof(vfp_register[0])); + return(dm_val); +} + // For use in calls that take two double values, constructed from r0, r1, r2 // and r3. @@ -771,6 +879,37 @@ bool Simulator::OverflowFrom(int32_t alu_out, return overflow; } +// Support for VFP comparisons. +void Simulator::Compute_FPSCR_Flags(double val1, double val2) { + // All Non-Nan cases + if (val1 == val2) { + n_flag_FPSCR_ = false; + z_flag_FPSCR_ = true; + c_flag_FPSCR_ = true; + v_flag_FPSCR_ = false; + } else if (val1 < val2) { + n_flag_FPSCR_ = true; + z_flag_FPSCR_ = false; + c_flag_FPSCR_ = false; + v_flag_FPSCR_ = false; + } else { + // Case when (val1 > val2). + n_flag_FPSCR_ = false; + z_flag_FPSCR_ = false; + c_flag_FPSCR_ = true; + v_flag_FPSCR_ = false; + } +} + + +void Simulator::Copy_FPSCR_to_APSR() { + n_flag_ = n_flag_FPSCR_; + z_flag_ = z_flag_FPSCR_; + c_flag_ = c_flag_FPSCR_; + v_flag_ = v_flag_FPSCR_; +} + + // Addressing Mode 1 - Data-processing operands: // Get the value based on the shifter_operand with register. @@ -1664,16 +1803,15 @@ void Simulator::DecodeType5(Instr* instr) { void Simulator::DecodeType6(Instr* instr) { - UNIMPLEMENTED(); + DecodeType6CoprocessorIns(instr); } void Simulator::DecodeType7(Instr* instr) { if (instr->Bit(24) == 1) { - // Format(instr, "swi 'swi"); SoftwareInterrupt(instr); } else { - UNIMPLEMENTED(); + DecodeTypeVFP(instr); } } @@ -1745,6 +1883,178 @@ void Simulator::DecodeUnconditional(Instr* instr) { } +// void Simulator::DecodeTypeVFP(Instr* instr) +// The Following ARMv7 VFPv instructions are currently supported. +// fmsr :Sn = Rt +// fmrs :Rt = Sn +// fsitod: Dd = Sm +// ftosid: Sd = Dm +// Dd = faddd(Dn, Dm) +// Dd = fsubd(Dn, Dm) +// Dd = fmuld(Dn, Dm) +// Dd = fdivd(Dn, Dm) +// vcmp(Dd, Dm) +// VMRS +void Simulator::DecodeTypeVFP(Instr* instr) { + ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) ); + + int rt = instr->RtField(); + int vm = instr->VmField(); + int vn = instr->VnField(); + int vd = instr->VdField(); + + if (instr->Bit(23) == 1) { + if ((instr->Bits(21, 19) == 0x7) && + (instr->Bits(18, 16) == 0x5) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 1) && + (instr->Bit(6) == 1) && + (instr->Bit(4) == 0)) { + double dm_val = get_double_from_d_register(vm); + int32_t int_value = static_cast(dm_val); + set_s_register_from_sinteger(((vd<<1) | instr->DField()), int_value); + } else if ((instr->Bits(21, 19) == 0x7) && + (instr->Bits(18, 16) == 0x0) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 1) && + (instr->Bit(7) == 1) && + (instr->Bit(6) == 1) && + (instr->Bit(4) == 0)) { + int32_t int_value = get_sinteger_from_s_register(((vm<<1) | + instr->MField())); + double dbl_value = static_cast(int_value); + set_d_register_from_double(vd, dbl_value); + } else if ((instr->Bit(21) == 0x0) && + (instr->Bit(20) == 0x0) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 1) && + (instr->Bit(6) == 0) && + (instr->Bit(4) == 0)) { + double dn_value = get_double_from_d_register(vn); + double dm_value = get_double_from_d_register(vm); + double dd_value = dn_value / dm_value; + set_d_register_from_double(vd, dd_value); + } else if ((instr->Bits(21, 20) == 0x3) && + (instr->Bits(19, 16) == 0x4) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 0x1) && + (instr->Bit(4) == 0x0)) { + double dd_value = get_double_from_d_register(vd); + double dm_value = get_double_from_d_register(vm); + Compute_FPSCR_Flags(dd_value, dm_value); + } else if ((instr->Bits(23, 20) == 0xF) && + (instr->Bits(19, 16) == 0x1) && + (instr->Bits(11, 8) == 0xA) && + (instr->Bits(7, 5) == 0x0) && + (instr->Bit(4) == 0x1) && + (instr->Bits(3, 0) == 0x0)) { + if (instr->Bits(15, 12) == 0xF) + Copy_FPSCR_to_APSR(); + else + UNIMPLEMENTED(); // not used by V8 now + } else { + UNIMPLEMENTED(); // not used by V8 now + } + } else if (instr->Bit(21) == 1) { + if ((instr->Bit(20) == 0x1) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 0) && + (instr->Bit(4) == 0)) { + double dn_value = get_double_from_d_register(vn); + double dm_value = get_double_from_d_register(vm); + double dd_value = dn_value + dm_value; + set_d_register_from_double(vd, dd_value); + } else if ((instr->Bit(20) == 0x1) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 1) && + (instr->Bit(4) == 0)) { + double dn_value = get_double_from_d_register(vn); + double dm_value = get_double_from_d_register(vm); + double dd_value = dn_value - dm_value; + set_d_register_from_double(vd, dd_value); + } else if ((instr->Bit(20) == 0x0) && + (instr->Bits(11, 9) == 0x5) && + (instr->Bit(8) == 0x1) && + (instr->Bit(6) == 0) && + (instr->Bit(4) == 0)) { + double dn_value = get_double_from_d_register(vn); + double dm_value = get_double_from_d_register(vm); + double dd_value = dn_value * dm_value; + set_d_register_from_double(vd, dd_value); + } else { + UNIMPLEMENTED(); // not used by V8 now + } + } else { + if ((instr->Bit(20) == 0x0) && + (instr->Bits(11, 8) == 0xA) && + (instr->Bits(6, 5) == 0x0) && + (instr->Bit(4) == 1) && + (instr->Bits(3, 0) == 0x0)) { + int32_t rs_val = get_register(rt); + set_s_register_from_sinteger(((vn<<1) | instr->NField()), rs_val); + } else if ((instr->Bit(20) == 0x1) && + (instr->Bits(11, 8) == 0xA) && + (instr->Bits(6, 5) == 0x0) && + (instr->Bit(4) == 1) && + (instr->Bits(3, 0) == 0x0)) { + int32_t int_value = get_sinteger_from_s_register(((vn<<1) | + instr->NField())); + set_register(rt, int_value); + } else { + UNIMPLEMENTED(); // not used by V8 now + } + } +} + + + +// void Simulator::DecodeType6CoprocessorIns(Instr* instr) +// Decode Type 6 coprocessor instructions +// Dm = fmdrr(Rt, Rt2) +// = fmrrd(Dm) +void Simulator::DecodeType6CoprocessorIns(Instr* instr) { + ASSERT((instr->TypeField() == 6)); + + int rt = instr->RtField(); + int rn = instr->RnField(); + int vm = instr->VmField(); + + if (instr->Bit(23) == 1) { + UNIMPLEMENTED(); + } else if (instr->Bit(22) == 1) { + if ((instr->Bits(27, 24) == 0xC) && + (instr->Bit(22) == 1) && + (instr->Bits(11, 8) == 0xB) && + (instr->Bits(7, 6) == 0x0) && + (instr->Bit(4) == 1)) { + if (instr->Bit(20) == 0) { + int32_t rs_val = get_register(rt); + int32_t rn_val = get_register(rn); + + set_s_register_from_sinteger(2*vm, rs_val); + set_s_register_from_sinteger((2*vm+1), rn_val); + + } else if (instr->Bit(20) == 1) { + int32_t rt_int_value = get_sinteger_from_s_register(2*vm); + int32_t rn_int_value = get_sinteger_from_s_register(2*vm+1); + + set_register(rt, rt_int_value); + set_register(rn, rn_int_value); + } + } else { + UNIMPLEMENTED(); + } + } else if (instr->Bit(21) == 1) { + UNIMPLEMENTED(); + } else { + UNIMPLEMENTED(); + } +} + + // Executes the current instruction. void Simulator::InstructionDecode(Instr* instr) { pc_modified_ = false; @@ -1802,7 +2112,6 @@ void Simulator::InstructionDecode(Instr* instr) { } -// void Simulator::Execute() { // Get the PC to simulate. Cannot use the accessor here as we need the // raw PC value and not the one used as input to arithmetic instructions. diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h index 3c8fa71..ed81946 100644 --- a/src/arm/simulator-arm.h +++ b/src/arm/simulator-arm.h @@ -97,7 +97,6 @@ namespace arm { class Simulator { public: friend class Debugger; - enum Register { no_reg = -1, r0 = 0, r1, r2, r3, r4, r5, r6, r7, @@ -105,7 +104,15 @@ class Simulator { num_registers, sp = 13, lr = 14, - pc = 15 + pc = 15, + s0 = 0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, + s16, s17, s18, s19, s20, s21, s22, s23, + s24, s25, s26, s27, s28, s29, s30, s31, + num_s_registers = 32, + d0 = 0, d1, d2, d3, d4, d5, d6, d7, + d8, d9, d10, d11, d12, d13, d14, d15, + num_d_registers = 16 }; Simulator(); @@ -121,6 +128,16 @@ class Simulator { void set_register(int reg, int32_t value); int32_t get_register(int reg) const; + // Support for VFP. + void set_s_register(int reg, unsigned int value); + unsigned int get_s_register(int reg) const; + void set_d_register_from_double(int dreg, const double& dbl); + double get_double_from_d_register(int dreg); + void set_s_register_from_float(int sreg, const float dbl); + float get_float_from_s_register(int sreg); + void set_s_register_from_sinteger(int reg, const int value); + int get_sinteger_from_s_register(int reg); + // Special case of set_register and get_register to access the raw PC value. void set_pc(int32_t value); int32_t get_pc() const; @@ -175,6 +192,10 @@ class Simulator { int32_t right, bool addition); + // Support for VFP. + void Compute_FPSCR_Flags(double val1, double val2); + void Copy_FPSCR_to_APSR(); + // Helper functions to decode common "addressing" modes int32_t GetShiftRm(Instr* instr, bool* carry_out); int32_t GetImm(Instr* instr, bool* carry_out); @@ -206,6 +227,10 @@ class Simulator { void DecodeType7(Instr* instr); void DecodeUnconditional(Instr* instr); + // Support for VFP. + void DecodeTypeVFP(Instr* instr); + void DecodeType6CoprocessorIns(Instr* instr); + // Executes one instruction. void InstructionDecode(Instr* instr); @@ -226,6 +251,20 @@ class Simulator { bool c_flag_; bool v_flag_; + // VFP architecture state. + unsigned int vfp_register[32/*num_s_registers*/]; + bool n_flag_FPSCR_; + bool z_flag_FPSCR_; + bool c_flag_FPSCR_; + bool v_flag_FPSCR_; + + // VFP FP exception flags architecture state. + bool inv_op_vfp_flag_; + bool div_zero_vfp_flag_; + bool overflow_vfp_flag_; + bool underflow_vfp_flag_; + bool inexact_vfp_flag_; + // Simulator support. char* stack_; bool pc_modified_; diff --git a/src/flag-definitions.h b/src/flag-definitions.h index d8b0633..23f23d2 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -114,6 +114,8 @@ DEFINE_bool(enable_rdtsc, true, "enable use of RDTSC instruction if available") DEFINE_bool(enable_sahf, true, "enable use of SAHF instruction if available (X64 only)") +DEFINE_bool(enable_vfp3, true, + "enable use of VFP3 instructions if available") // bootstrapper.cc DEFINE_string(expose_natives_as, NULL, "expose natives in global object") diff --git a/src/platform-linux.cc b/src/platform-linux.cc index fe4c31f..9564e8d 100644 --- a/src/platform-linux.cc +++ b/src/platform-linux.cc @@ -89,6 +89,46 @@ double OS::nan_value() { } +bool OS::fgrep_vfp(const char* file_name, const char* string) { + // Simple detection of VFP at runtime for Linux. + // It is based on /proc/cpuinfo, which reveals hardware configuration + // to user-space applications. According to ARM (mid 2009), no similar + // facility is universally available on the ARM architectures, + // so it's up to individual OSes to provide such. + // + // This is written as a straight shot one pass parser + // and not using STL string and ifstream because, + // on Linux, it's reading from a (non-mmap-able) + // character special device. + + FILE* f = NULL; + + if (NULL == (f = fopen(file_name, "r"))) + return false; + + const char* what = string; + int k; + while (EOF != (k = fgetc(f))) { + if (k == *what) { + ++what; + while ((*what != '\0') && (*what == fgetc(f))) { + ++what; + } + if (*what == '\0') { + fclose(f); + return true; + } else { + what = string; + } + } + } + fclose(f); + + // Did not find string in the file file_name. + return false; +} + + int OS::ActivationFrameAlignment() { #ifdef V8_TARGET_ARCH_ARM // On EABI ARM targets this is required for fp correctness in the diff --git a/src/platform.h b/src/platform.h index fefe4b8..2f4953b 100644 --- a/src/platform.h +++ b/src/platform.h @@ -250,6 +250,9 @@ class OS { // Returns the double constant NAN static double nan_value(); + // Support runtime detection of VFP3 on linux platforms. + static bool fgrep_vfp(const char * file_name, const char * string); + // Returns the activation frame alignment constraint or zero if // the platform doesn't care. Guaranteed to be a power of two. static int ActivationFrameAlignment(); -- 2.7.4