From 6b20299bef87337f6ef803ed72309862585096b0 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Tue, 17 Aug 2010 08:43:45 +0000 Subject: [PATCH] Add support for vstr for single precision VFP register. This is a commit of http://codereview.chromium.org/3064045 for Rodolph Perfetta git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5281 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/assembler-arm.cc | 20 ++++++++++++++++++++ src/arm/assembler-arm.h | 5 +++++ src/arm/ic-arm.cc | 29 ++++++++++++++++------------- test/cctest/test-assembler-arm.cc | 17 ++++++++++++++++- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 136c82e..6df6411 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -1820,6 +1820,7 @@ void Assembler::vldr(const DwVfpRegister dst, ASSERT(CpuFeatures::IsEnabled(VFP3)); ASSERT(offset % 4 == 0); ASSERT((offset / 4) < 256); + ASSERT(offset >= 0); emit(cond | 0xD9*B20 | base.code()*B16 | dst.code()*B12 | 0xB*B8 | ((offset / 4) & 255)); } @@ -1836,6 +1837,7 @@ void Assembler::vldr(const SwVfpRegister dst, ASSERT(CpuFeatures::IsEnabled(VFP3)); ASSERT(offset % 4 == 0); ASSERT((offset / 4) < 256); + ASSERT(offset >= 0); emit(cond | 0xD9*B20 | base.code()*B16 | dst.code()*B12 | 0xA*B8 | ((offset / 4) & 255)); } @@ -1852,11 +1854,29 @@ void Assembler::vstr(const DwVfpRegister src, ASSERT(CpuFeatures::IsEnabled(VFP3)); ASSERT(offset % 4 == 0); ASSERT((offset / 4) < 256); + ASSERT(offset >= 0); emit(cond | 0xD8*B20 | base.code()*B16 | src.code()*B12 | 0xB*B8 | ((offset / 4) & 255)); } +void Assembler::vstr(const SwVfpRegister src, + const Register base, + int offset, + const Condition cond) { + // MEM(Rbase + offset) = SSrc. + // Instruction details available in ARM DDI 0406A, A8-786. + // cond(31-28) | 1101(27-24)| 1000(23-20) | Rbase(19-16) | + // Vdst(15-12) | 1010(11-8) | (offset/4) + ASSERT(CpuFeatures::IsEnabled(VFP3)); + ASSERT(offset % 4 == 0); + ASSERT((offset / 4) < 256); + ASSERT(offset >= 0); + emit(cond | 0xD8*B20 | base.code()*B16 | src.code()*B12 | + 0xA*B8 | ((offset / 4) & 255)); +} + + static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) { uint64_t i; memcpy(&i, &d, 8); diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index 218eb97..cc6ec05 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -966,6 +966,11 @@ class Assembler : public Malloced { int offset, // Offset must be a multiple of 4. const Condition cond = al); + void vstr(const SwVfpRegister src, + const Register base, + int offset, // Offset must be a multiple of 4. + const Condition cond = al); + void vmov(const DwVfpRegister dst, double imm, const Condition cond = al); diff --git a/src/arm/ic-arm.cc b/src/arm/ic-arm.cc index ca7cc13..abc0922 100644 --- a/src/arm/ic-arm.cc +++ b/src/arm/ic-arm.cc @@ -1791,19 +1791,22 @@ void KeyedStoreIC::GenerateGeneric(MacroAssembler* masm) { } -// Convert int passed in register ival to IEE 754 single precision -// floating point value and store it into register fval. +// Convert and store int passed in register ival to IEEE 754 single precision +// floating point value at memory location (dst + 4 * wordoffset) // If VFP3 is available use it for conversion. -static void ConvertIntToFloat(MacroAssembler* masm, - Register ival, - Register fval, - Register scratch1, - Register scratch2) { +static void StoreIntAsFloat(MacroAssembler* masm, + Register dst, + Register wordoffset, + Register ival, + Register fval, + Register scratch1, + Register scratch2) { if (CpuFeatures::IsSupported(VFP3)) { CpuFeatures::Scope scope(VFP3); __ vmov(s0, ival); + __ add(scratch1, dst, Operand(wordoffset, LSL, 2)); __ vcvt_f32_s32(s0, s0); - __ vmov(fval, s0); + __ vstr(s0, scratch1, 0); } else { Label not_special, done; // Move sign bit from source to destination. This works because the sign @@ -1853,6 +1856,7 @@ static void ConvertIntToFloat(MacroAssembler* masm, Operand(ival, LSR, kBitsPerInt - kBinary32MantissaBits)); __ bind(&done); + __ str(fval, MemOperand(dst, wordoffset, LSL, 2)); } } @@ -1947,9 +1951,8 @@ void KeyedStoreIC::GenerateExternalArray(MacroAssembler* masm, __ str(r5, MemOperand(r3, r4, LSL, 2)); break; case kExternalFloatArray: - // Need to perform int-to-float conversion. - ConvertIntToFloat(masm, r5, r6, r7, r9); - __ str(r6, MemOperand(r3, r4, LSL, 2)); + // Perform int-to-float conversion and store to memory. + StoreIntAsFloat(masm, r3, r4, r5, r6, r7, r9); break; default: UNREACHABLE(); @@ -1983,9 +1986,9 @@ void KeyedStoreIC::GenerateExternalArray(MacroAssembler* masm, // include -kHeapObjectTag into it. __ sub(r5, r0, Operand(kHeapObjectTag)); __ vldr(d0, r5, HeapNumber::kValueOffset); + __ add(r5, r3, Operand(r4, LSL, 2)); __ vcvt_f32_f64(s0, d0); - __ vmov(r5, s0); - __ str(r5, MemOperand(r3, r4, LSL, 2)); + __ vstr(s0, r5, 0); } else { // Need to perform float-to-int conversion. // Test for NaN or infinity (both give zero). diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index 9033f4b..5952b63 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -226,11 +226,13 @@ TEST(4) { double a; double b; double c; + float d; + float e; } T; T t; // Create a function that accepts &t, and loads, manipulates, and stores - // the doubles t.a, t.b, and t.c. + // the doubles t.a, t.b, and t.c, and floats t.d, t.e. Assembler assm(NULL, 0); Label L, C; @@ -252,6 +254,15 @@ TEST(4) { __ vmov(d4, r2, r3); __ vstr(d4, r4, OFFSET_OF(T, b)); + // Load t.d and t.e, switch values, and store back to the struct. + __ vldr(s0, r4, OFFSET_OF(T, d)); + __ vldr(s1, r4, OFFSET_OF(T, e)); + __ vmov(s2, s0); + __ vmov(s0, s1); + __ vmov(s1, s2); + __ vstr(s0, r4, OFFSET_OF(T, d)); + __ vstr(s1, r4, OFFSET_OF(T, e)); + __ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit()); CodeDesc desc; @@ -267,8 +278,12 @@ TEST(4) { t.a = 1.5; t.b = 2.75; t.c = 17.17; + t.d = 4.5; + t.e = 9.0; Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0); USE(dummy); + CHECK_EQ(4.5, t.e); + CHECK_EQ(9.0, t.d); CHECK_EQ(4.25, t.c); CHECK_EQ(4.25, t.b); CHECK_EQ(1.5, t.a); -- 2.7.4