From 5c6b09e6c6a0c33e6b65e104a60a03897fb584d3 Mon Sep 17 00:00:00 2001 From: "m.m.capewell@googlemail.com" Date: Mon, 15 Apr 2013 15:55:47 +0000 Subject: [PATCH] ARM: Enable VFP default NaN mode BUG=none TEST=Default NaN tests added to test-assembler-arm. Review URL: https://codereview.chromium.org/14109010 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@14268 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/code-stubs-arm.cc | 6 +- src/arm/constants-arm.h | 1 + src/arm/lithium-arm.h | 8 ++- src/arm/lithium-codegen-arm.cc | 31 ++++------ src/arm/macro-assembler-arm.cc | 95 ++++++++++++------------------- src/arm/macro-assembler-arm.h | 13 +++++ src/arm/simulator-arm.cc | 23 +++++++- src/arm/simulator-arm.h | 2 + src/arm/stub-cache-arm.cc | 4 +- test/cctest/test-assembler-arm.cc | 80 ++++++++++++++++++++++++++ 10 files changed, 179 insertions(+), 84 deletions(-) diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index a17c7dde4..3e637416d 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -3497,6 +3497,8 @@ void CEntryStub::GenerateCore(MacroAssembler* masm, masm->Jump(r5); } + __ VFPEnsureFPSCRState(r2); + if (always_allocate) { // It's okay to clobber r2 and r3 here. Don't mess with r0 and r1 // though (contain the result). @@ -3658,6 +3660,7 @@ void JSEntryStub::GenerateBody(MacroAssembler* masm, bool is_construct) { __ vstm(db_w, sp, kFirstCalleeSavedDoubleReg, kLastCalleeSavedDoubleReg); // Set up the reserved register for 0.0. __ vmov(kDoubleRegZero, 0.0); + __ VFPEnsureFPSCRState(r4); // Get address of argv, see stm above. // r0: code entry @@ -6839,6 +6842,7 @@ void DirectCEntryStub::GenerateCall(MacroAssembler* masm, __ Jump(target); // Call the C++ function. ASSERT_EQ(Assembler::kInstrSize + Assembler::kPcLoadDelta, masm->SizeOfCodeGeneratedSince(&start)); + __ VFPEnsureFPSCRState(r2); } @@ -7439,7 +7443,7 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) { __ ldr(r5, FieldMemOperand(r1, JSObject::kElementsOffset)); __ StoreNumberToDoubleElements(r0, r3, // Overwrites all regs after this. - r5, r6, r7, r9, r2, + r5, r9, r6, r7, r2, &slow_elements); __ Ret(); } diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h index 004165ac3..c3a5bebd8 100644 --- a/src/arm/constants-arm.h +++ b/src/arm/constants-arm.h @@ -403,6 +403,7 @@ const uint32_t kVFPOverflowExceptionBit = 1 << 2; const uint32_t kVFPUnderflowExceptionBit = 1 << 3; const uint32_t kVFPInexactExceptionBit = 1 << 4; const uint32_t kVFPFlushToZeroMask = 1 << 24; +const uint32_t kVFPDefaultNaNModeControlBit = 1 << 25; const uint32_t kVFPNConditionFlagBit = 1 << 31; const uint32_t kVFPZConditionFlagBit = 1 << 30; diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h index e0e5815f5..6486cad2b 100644 --- a/src/arm/lithium-arm.h +++ b/src/arm/lithium-arm.h @@ -2176,7 +2176,13 @@ class LStoreKeyed: public LTemplateInstruction<0, 3, 0> { DECLARE_HYDROGEN_ACCESSOR(StoreKeyed) virtual void PrintDataTo(StringStream* stream); - bool NeedsCanonicalization() { return hydrogen()->NeedsCanonicalization(); } + bool NeedsCanonicalization() { + if (hydrogen()->value()->IsAdd() || hydrogen()->value()->IsSub() || + hydrogen()->value()->IsMul() || hydrogen()->value()->IsDiv()) { + return false; + } + return hydrogen()->NeedsCanonicalization(); + } uint32_t additional_index() const { return hydrogen()->index_offset(); } }; diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index 82134b45c..ec63d8460 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -4431,18 +4431,14 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) { } if (instr->NeedsCanonicalization()) { - // Check for NaN. All NaNs must be canonicalized. - __ VFPCompareAndSetFlags(value, value); - Label after_canonicalization; - - // Only load canonical NaN if the comparison above set the overflow. - __ b(vc, &after_canonicalization); - __ Vmov(value, - FixedDoubleArray::canonical_not_the_hole_nan_as_double()); - - __ bind(&after_canonicalization); + // Force a canonical NaN. + if (masm()->emit_debug_code()) { + __ vmrs(ip); + __ tst(ip, Operand(kVFPDefaultNaNModeControlBit)); + __ Assert(ne, "Default NaN mode not set"); + } + __ VFPCanonicalizeNaN(value); } - __ vstr(value, scratch, instr->additional_index() << element_size_shift); } @@ -4864,16 +4860,13 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { DwVfpRegister input_reg = ToDoubleRegister(instr->value()); __ VFPCompareAndSetFlags(input_reg, input_reg); __ b(vc, &no_special_nan_handling); - __ vmov(reg, scratch0(), input_reg); - __ cmp(scratch0(), Operand(kHoleNanUpper32)); - Label canonicalize; - __ b(ne, &canonicalize); + __ vmov(scratch, input_reg.high()); + __ cmp(scratch, Operand(kHoleNanUpper32)); + // If not the hole NaN, force the NaN to be canonical. + __ VFPCanonicalizeNaN(input_reg, ne); + __ b(ne, &no_special_nan_handling); __ Move(reg, factory()->the_hole_value()); __ b(&done); - __ bind(&canonicalize); - __ Vmov(input_reg, - FixedDoubleArray::canonical_not_the_hole_nan_as_double(), - no_reg); } __ bind(&no_special_nan_handling); diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index a3379d59f..7df0c0a1f 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -773,6 +773,23 @@ void MacroAssembler::Strd(Register src1, Register src2, } +void MacroAssembler::VFPEnsureFPSCRState(Register scratch) { + // If needed, restore wanted bits of FPSCR. + Label fpscr_done; + vmrs(scratch); + tst(scratch, Operand(kVFPDefaultNaNModeControlBit)); + b(ne, &fpscr_done); + orr(scratch, scratch, Operand(kVFPDefaultNaNModeControlBit)); + vmsr(scratch); + bind(&fpscr_done); +} + +void MacroAssembler::VFPCanonicalizeNaN(const DwVfpRegister value, + const Condition cond) { + vsub(value, value, kDoubleRegZero, cond); +} + + void MacroAssembler::VFPCompareAndSetFlags(const DwVfpRegister src1, const DwVfpRegister src2, const Condition cond) { @@ -1983,7 +2000,7 @@ void MacroAssembler::StoreNumberToDoubleElements(Register value_reg, Register scratch4, Label* fail, int elements_offset) { - Label smi_value, maybe_nan, have_double_value, is_nan, done; + Label smi_value, store; Register mantissa_reg = scratch2; Register exponent_reg = scratch3; @@ -1997,68 +2014,28 @@ void MacroAssembler::StoreNumberToDoubleElements(Register value_reg, fail, DONT_DO_SMI_CHECK); - // Check for nan: all NaN values have a value greater (signed) than 0x7ff00000 - // in the exponent. - mov(scratch1, Operand(kNaNOrInfinityLowerBoundUpper32)); - ldr(exponent_reg, FieldMemOperand(value_reg, HeapNumber::kExponentOffset)); - cmp(exponent_reg, scratch1); - b(ge, &maybe_nan); - - ldr(mantissa_reg, FieldMemOperand(value_reg, HeapNumber::kMantissaOffset)); - - bind(&have_double_value); - add(scratch1, elements_reg, - Operand(key_reg, LSL, kDoubleSizeLog2 - kSmiTagSize)); - str(mantissa_reg, FieldMemOperand( - scratch1, FixedDoubleArray::kHeaderSize - elements_offset)); - uint32_t offset = FixedDoubleArray::kHeaderSize - elements_offset + - sizeof(kHoleNanLower32); - str(exponent_reg, FieldMemOperand(scratch1, offset)); - jmp(&done); - - bind(&maybe_nan); - // Could be NaN or Infinity. If fraction is not zero, it's NaN, otherwise - // it's an Infinity, and the non-NaN code path applies. - b(gt, &is_nan); - ldr(mantissa_reg, FieldMemOperand(value_reg, HeapNumber::kMantissaOffset)); - cmp(mantissa_reg, Operand::Zero()); - b(eq, &have_double_value); - bind(&is_nan); - // Load canonical NaN for storing into the double array. - uint64_t nan_int64 = BitCast( - FixedDoubleArray::canonical_not_the_hole_nan_as_double()); - mov(mantissa_reg, Operand(static_cast(nan_int64))); - mov(exponent_reg, Operand(static_cast(nan_int64 >> 32))); - jmp(&have_double_value); + vldr(d0, FieldMemOperand(value_reg, HeapNumber::kValueOffset)); + // Force a canonical NaN. + if (emit_debug_code()) { + vmrs(ip); + tst(ip, Operand(kVFPDefaultNaNModeControlBit)); + Assert(ne, "Default NaN mode not set"); + } + VFPCanonicalizeNaN(d0); + b(&store); bind(&smi_value); + Register untagged_value = scratch1; + SmiUntag(untagged_value, value_reg); + FloatingPointHelper::ConvertIntToDouble( + this, untagged_value, FloatingPointHelper::kVFPRegisters, d0, + mantissa_reg, exponent_reg, scratch4, s2); + + bind(&store); add(scratch1, elements_reg, - Operand(FixedDoubleArray::kHeaderSize - kHeapObjectTag - - elements_offset)); - add(scratch1, scratch1, Operand(key_reg, LSL, kDoubleSizeLog2 - kSmiTagSize)); - // scratch1 is now effective address of the double element - - FloatingPointHelper::Destination destination; - destination = FloatingPointHelper::kVFPRegisters; - - Register untagged_value = elements_reg; - SmiUntag(untagged_value, value_reg); - FloatingPointHelper::ConvertIntToDouble(this, - untagged_value, - destination, - d0, - mantissa_reg, - exponent_reg, - scratch4, - s2); - if (destination == FloatingPointHelper::kVFPRegisters) { - vstr(d0, scratch1, 0); - } else { - str(mantissa_reg, MemOperand(scratch1, 0)); - str(exponent_reg, MemOperand(scratch1, Register::kSizeInBytes)); - } - bind(&done); + vstr(d0, FieldMemOperand(scratch1, + FixedDoubleArray::kHeaderSize - elements_offset)); } diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h index 75ff64fc0..fc6d7978e 100644 --- a/src/arm/macro-assembler-arm.h +++ b/src/arm/macro-assembler-arm.h @@ -460,6 +460,19 @@ class MacroAssembler: public Assembler { const MemOperand& dst, Condition cond = al); + // Ensure that FPSCR contains values needed by JavaScript. + // We need the NaNModeControlBit to be sure that operations like + // vadd and vsub generate the Canonical NaN (if a NaN must be generated). + // In VFP3 it will be always the Canonical NaN. + // In VFP2 it will be either the Canonical NaN or the negative version + // of the Canonical NaN. It doesn't matter if we have two values. The aim + // is to be sure to never generate the hole NaN. + void VFPEnsureFPSCRState(Register scratch); + + // If the value is a NaN, canonicalize the value else, do nothing. + void VFPCanonicalizeNaN(const DwVfpRegister value, + const Condition cond = al); + // Compare double values and move the result to the normal condition flags. void VFPCompareAndSetFlags(const DwVfpRegister src1, const DwVfpRegister src2, diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index e3ef926ca..750cc23cc 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -773,6 +773,7 @@ Simulator::Simulator(Isolate* isolate) : isolate_(isolate) { c_flag_FPSCR_ = false; v_flag_FPSCR_ = false; FPSCR_rounding_mode_ = RZ; + FPSCR_default_NaN_mode_ = true; inv_op_vfp_flag_ = false; div_zero_vfp_flag_ = false; @@ -1868,6 +1869,11 @@ void Simulator::SoftwareInterrupt(Instruction* instr) { } +double Simulator::canonicalizeNaN(double value) { + return (FPSCR_default_NaN_mode_ && isnan(value)) ? + FixedDoubleArray::canonical_not_the_hole_nan_as_double() : value; +} + // Stop helper functions. bool Simulator::isStopInstruction(Instruction* instr) { return (instr->Bits(27, 24) == 0xF) && (instr->SvcValue() >= kStopCode); @@ -2728,11 +2734,13 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { // vabs double dm_value = get_double_from_d_register(vm); double dd_value = fabs(dm_value); + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) { // vneg double dm_value = get_double_from_d_register(vm); double dd_value = -dm_value; + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) { DecodeVCVTBetweenDoubleAndSingle(instr); @@ -2748,6 +2756,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { // vsqrt double dm_value = get_double_from_d_register(vm); double dd_value = sqrt(dm_value); + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } else if (instr->Opc3Value() == 0x0) { // vmov immediate. @@ -2769,12 +2778,14 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { double dn_value = get_double_from_d_register(vn); double dm_value = get_double_from_d_register(vm); double dd_value = dn_value - dm_value; + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } else { // vadd double dn_value = get_double_from_d_register(vn); double dm_value = get_double_from_d_register(vm); double dd_value = dn_value + dm_value; + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } } else if ((instr->Opc1Value() == 0x2) && !(instr->Opc3Value() & 0x1)) { @@ -2786,6 +2797,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { double dn_value = get_double_from_d_register(vn); double dm_value = get_double_from_d_register(vm); double dd_value = dn_value * dm_value; + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } else if ((instr->Opc1Value() == 0x0)) { // vmla, vmls @@ -2803,9 +2815,13 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { // result with too high precision. set_d_register_from_double(vd, dn_val * dm_val); if (is_vmls) { - set_d_register_from_double(vd, dd_val - get_double_from_d_register(vd)); + set_d_register_from_double( + vd, + canonicalizeNaN(dd_val - get_double_from_d_register(vd))); } else { - set_d_register_from_double(vd, dd_val + get_double_from_d_register(vd)); + set_d_register_from_double( + vd, + canonicalizeNaN(dd_val + get_double_from_d_register(vd))); } } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) { // vdiv @@ -2817,6 +2833,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { double dm_value = get_double_from_d_register(vm); double dd_value = dn_value / dm_value; div_zero_vfp_flag_ = (dm_value == 0); + dd_value = canonicalizeNaN(dd_value); set_d_register_from_double(vd, dd_value); } else { UNIMPLEMENTED(); // Not used by V8. @@ -2850,6 +2867,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { (z_flag_FPSCR_ << 30) | (c_flag_FPSCR_ << 29) | (v_flag_FPSCR_ << 28) | + (FPSCR_default_NaN_mode_ << 25) | (inexact_vfp_flag_ << 4) | (underflow_vfp_flag_ << 3) | (overflow_vfp_flag_ << 2) | @@ -2872,6 +2890,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { z_flag_FPSCR_ = (rt_value >> 30) & 1; c_flag_FPSCR_ = (rt_value >> 29) & 1; v_flag_FPSCR_ = (rt_value >> 28) & 1; + FPSCR_default_NaN_mode_ = (rt_value >> 25) & 1; inexact_vfp_flag_ = (rt_value >> 4) & 1; underflow_vfp_flag_ = (rt_value >> 3) & 1; overflow_vfp_flag_ = (rt_value >> 2) & 1; diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h index 1776887b6..45ae999b5 100644 --- a/src/arm/simulator-arm.h +++ b/src/arm/simulator-arm.h @@ -274,6 +274,7 @@ class Simulator { // Support for VFP. void Compute_FPSCR_Flags(double val1, double val2); void Copy_FPSCR_to_APSR(); + inline double canonicalizeNaN(double value); // Helper functions to decode common "addressing" modes int32_t GetShiftRm(Instruction* instr, bool* carry_out); @@ -379,6 +380,7 @@ class Simulator { // VFP rounding mode. See ARM DDI 0406B Page A2-29. VFPRoundingMode FPSCR_rounding_mode_; + bool FPSCR_default_NaN_mode_; // VFP FP exception flags architecture state. bool inv_op_vfp_flag_; diff --git a/src/arm/stub-cache-arm.cc b/src/arm/stub-cache-arm.cc index 24a83bb3d..f22acb470 100644 --- a/src/arm/stub-cache-arm.cc +++ b/src/arm/stub-cache-arm.cc @@ -1621,7 +1621,7 @@ Handle CallStubCompiler::CompileArrayPushCall( __ ldr(r4, MemOperand(sp, (argc - 1) * kPointerSize)); __ StoreNumberToDoubleElements( - r4, r0, elements, r3, r5, r2, r9, + r4, r0, elements, r5, r2, r3, r9, &call_builtin, argc * kDoubleSize); // Save new length. @@ -3586,9 +3586,9 @@ void KeyedStoreStubCompiler::GenerateStoreFastDoubleElement( // All registers after this are overwritten. elements_reg, scratch1, - scratch2, scratch3, scratch4, + scratch2, &transition_elements_kind); __ Ret(); diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index 23f5ad06d..58ce5ec8f 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -1137,4 +1137,84 @@ TEST(13) { } } + +TEST(14) { + // Test the VFP Canonicalized Nan mode. + CcTest::InitializeVM(); + Isolate* isolate = Isolate::Current(); + HandleScope scope(isolate); + + typedef struct { + double left; + double right; + double add_result; + double sub_result; + double mul_result; + double div_result; + } T; + T t; + + // Create a function that makes the four basic operations. + Assembler assm(isolate, NULL, 0); + + // Ensure FPSCR state (as JSEntryStub does). + Label fpscr_done; + __ vmrs(r1); + __ tst(r1, Operand(kVFPDefaultNaNModeControlBit)); + __ b(ne, &fpscr_done); + __ orr(r1, r1, Operand(kVFPDefaultNaNModeControlBit)); + __ vmsr(r1); + __ bind(&fpscr_done); + + __ vldr(d0, r0, OFFSET_OF(T, left)); + __ vldr(d1, r0, OFFSET_OF(T, right)); + __ vadd(d2, d0, d1); + __ vstr(d2, r0, OFFSET_OF(T, add_result)); + __ vsub(d2, d0, d1); + __ vstr(d2, r0, OFFSET_OF(T, sub_result)); + __ vmul(d2, d0, d1); + __ vstr(d2, r0, OFFSET_OF(T, mul_result)); + __ vdiv(d2, d0, d1); + __ vstr(d2, r0, OFFSET_OF(T, div_result)); + + __ mov(pc, Operand(lr)); + + CodeDesc desc; + assm.GetCode(&desc); + Object* code = isolate->heap()->CreateCode( + desc, + Code::ComputeFlags(Code::STUB), + Handle())->ToObjectChecked(); + CHECK(code->IsCode()); +#ifdef DEBUG + Code::cast(code)->Print(); +#endif + F3 f = FUNCTION_CAST(Code::cast(code)->entry()); + t.left = BitCast(kHoleNanInt64); + t.right = 1; + t.add_result = 0; + t.sub_result = 0; + t.mul_result = 0; + t.div_result = 0; + Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0); + USE(dummy); + const uint32_t kArmNanUpper32 = 0x7ff80000; + const uint32_t kArmNanLower32 = 0x00000000; +#ifdef DEBUG + const uint64_t kArmNanInt64 = + (static_cast(kArmNanUpper32) << 32) | kArmNanLower32; + ASSERT(kArmNanInt64 != kHoleNanInt64); +#endif + // With VFP2 the sign of the canonicalized Nan is undefined. So + // we remove the sign bit for the upper tests. + CHECK_EQ(kArmNanUpper32, (BitCast(t.add_result) >> 32) & 0x7fffffff); + CHECK_EQ(kArmNanLower32, BitCast(t.add_result) & 0xffffffffu); + CHECK_EQ(kArmNanUpper32, (BitCast(t.sub_result) >> 32) & 0x7fffffff); + CHECK_EQ(kArmNanLower32, BitCast(t.sub_result) & 0xffffffffu); + CHECK_EQ(kArmNanUpper32, (BitCast(t.mul_result) >> 32) & 0x7fffffff); + CHECK_EQ(kArmNanLower32, BitCast(t.mul_result) & 0xffffffffu); + CHECK_EQ(kArmNanUpper32, (BitCast(t.div_result) >> 32) & 0x7fffffff); + CHECK_EQ(kArmNanLower32, BitCast(t.div_result) & 0xffffffffu); +} + #undef __ -- 2.34.1