From 9f859fef264ddff81be857a1d35591729e0b5e2b Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Mon, 10 May 2010 11:38:58 +0000 Subject: [PATCH] Make X64 double-to-int32 conversion use the 64-bit version of cvttsd2si. Use type info to allow ia32 to use the 32-bit versions in some cases. Remove sse3-specific code from X64 GenericBinaryOpStub. Review URL: http://codereview.chromium.org/1992011 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4632 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/ia32/codegen-ia32.cc | 5 ++ src/x64/assembler-x64.cc | 11 ++++ src/x64/assembler-x64.h | 1 + src/x64/codegen-x64.cc | 150 ++++++----------------------------------------- src/x64/codegen-x64.h | 17 ++---- 5 files changed, 42 insertions(+), 142 deletions(-) diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc index 63286a7..c2701c8 100644 --- a/src/ia32/codegen-ia32.cc +++ b/src/ia32/codegen-ia32.cc @@ -10296,6 +10296,11 @@ void IntegerConvert(MacroAssembler* masm, Label done, right_exponent, normal_exponent; Register scratch = ebx; Register scratch2 = edi; + if (type_info.IsInteger32() && CpuFeatures::IsEnabled(SSE2)) { + CpuFeatures::Scope scope(SSE2); + __ cvttsd2si(ecx, FieldOperand(source, HeapNumber::kValueOffset)); + return; + } if (!type_info.IsInteger32() || !use_sse3) { // Get exponent word. __ mov(scratch, FieldOperand(source, HeapNumber::kExponentOffset)); diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc index 1c00ebc..fcfa8d0 100644 --- a/src/x64/assembler-x64.cc +++ b/src/x64/assembler-x64.cc @@ -2510,6 +2510,17 @@ void Assembler::cvttsd2si(Register dst, const Operand& src) { } +void Assembler::cvttsd2siq(Register dst, XMMRegister src) { + EnsureSpace ensure_space(this); + last_pc_ = pc_; + emit(0xF2); + emit_rex_64(dst, src); + emit(0x0F); + emit(0x2C); + emit_sse_operand(dst, src); +} + + void Assembler::cvtlsi2sd(XMMRegister dst, const Operand& src) { EnsureSpace ensure_space(this); last_pc_ = pc_; diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h index d077865..55d0828 100644 --- a/src/x64/assembler-x64.h +++ b/src/x64/assembler-x64.h @@ -1092,6 +1092,7 @@ class Assembler : public Malloced { void cvttss2si(Register dst, const Operand& src); void cvttsd2si(Register dst, const Operand& src); + void cvttsd2siq(Register dst, XMMRegister src); void cvtlsi2sd(XMMRegister dst, const Operand& src); void cvtlsi2sd(XMMRegister dst, Register src); diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc index 740be83..8556b58 100644 --- a/src/x64/codegen-x64.cc +++ b/src/x64/codegen-x64.cc @@ -277,7 +277,6 @@ class FloatingPointHelper : public AllStatic { // Takes the operands in rdx and rax and loads them as integers in rax // and rcx. static void LoadAsIntegers(MacroAssembler* masm, - bool use_sse3, Label* operand_conversion_failure); }; @@ -8015,138 +8014,29 @@ void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm, } -// Get the integer part of a heap number. Surprisingly, all this bit twiddling -// is faster than using the built-in instructions on floating point registers. +// Get the integer part of a heap number. // Trashes rdi and rbx. Dest is rcx. Source cannot be rcx or one of the // trashed registers. void IntegerConvert(MacroAssembler* masm, Register source, - bool use_sse3, Label* conversion_failure) { ASSERT(!source.is(rcx) && !source.is(rdi) && !source.is(rbx)); - Label done, right_exponent, normal_exponent; Register scratch = rbx; Register scratch2 = rdi; // Get exponent word. - __ movl(scratch, FieldOperand(source, HeapNumber::kExponentOffset)); + __ movq(scratch2, FieldOperand(source, HeapNumber::kValueOffset)); // Get exponent alone in scratch2. - __ movl(scratch2, scratch); - __ and_(scratch2, Immediate(HeapNumber::kExponentMask)); - if (use_sse3) { - CpuFeatures::Scope scope(SSE3); - // Check whether the exponent is too big for a 64 bit signed integer. - static const uint32_t kTooBigExponent = - (HeapNumber::kExponentBias + 63) << HeapNumber::kExponentShift; - __ cmpl(scratch2, Immediate(kTooBigExponent)); - __ j(greater_equal, conversion_failure); - // Load x87 register with heap number. - __ fld_d(FieldOperand(source, HeapNumber::kValueOffset)); - // Reserve space for 64 bit answer. - __ subq(rsp, Immediate(sizeof(uint64_t))); // Nolint. - // Do conversion, which cannot fail because we checked the exponent. - __ fisttp_d(Operand(rsp, 0)); - __ movl(rcx, Operand(rsp, 0)); // Load low word of answer into rcx. - __ addq(rsp, Immediate(sizeof(uint64_t))); // Nolint. - } else { - // Load rcx with zero. We use this either for the final shift or - // for the answer. - __ xor_(rcx, rcx); - // Check whether the exponent matches a 32 bit signed int that cannot be - // represented by a Smi. A non-smi 32 bit integer is 1.xxx * 2^30 so the - // exponent is 30 (biased). This is the exponent that we are fastest at and - // also the highest exponent we can handle here. - const uint32_t non_smi_exponent = - (HeapNumber::kExponentBias + 30) << HeapNumber::kExponentShift; - __ cmpl(scratch2, Immediate(non_smi_exponent)); - // If we have a match of the int32-but-not-Smi exponent then skip some - // logic. - __ j(equal, &right_exponent); - // If the exponent is higher than that then go to slow case. This catches - // numbers that don't fit in a signed int32, infinities and NaNs. - __ j(less, &normal_exponent); - - { - // Handle a big exponent. The only reason we have this code is that the - // >>> operator has a tendency to generate numbers with an exponent of 31. - const uint32_t big_non_smi_exponent = - (HeapNumber::kExponentBias + 31) << HeapNumber::kExponentShift; - __ cmpl(scratch2, Immediate(big_non_smi_exponent)); - __ j(not_equal, conversion_failure); - // We have the big exponent, typically from >>>. This means the number is - // in the range 2^31 to 2^32 - 1. Get the top bits of the mantissa. - __ movl(scratch2, scratch); - __ and_(scratch2, Immediate(HeapNumber::kMantissaMask)); - // Put back the implicit 1. - __ or_(scratch2, Immediate(1 << HeapNumber::kExponentShift)); - // Shift up the mantissa bits to take up the space the exponent used to - // take. We just orred in the implicit bit so that took care of one and - // we want to use the full unsigned range so we subtract 1 bit from the - // shift distance. - const int big_shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1; - __ shl(scratch2, Immediate(big_shift_distance)); - // Get the second half of the double. - __ movl(rcx, FieldOperand(source, HeapNumber::kMantissaOffset)); - // Shift down 21 bits to get the most significant 11 bits or the low - // mantissa word. - __ shr(rcx, Immediate(32 - big_shift_distance)); - __ or_(rcx, scratch2); - // We have the answer in rcx, but we may need to negate it. - __ testl(scratch, scratch); - __ j(positive, &done); - __ neg(rcx); - __ jmp(&done); - } - - __ bind(&normal_exponent); - // Exponent word in scratch, exponent part of exponent word in scratch2. - // Zero in rcx. - // We know the exponent is smaller than 30 (biased). If it is less than - // 0 (biased) then the number is smaller in magnitude than 1.0 * 2^0, ie - // it rounds to zero. - const uint32_t zero_exponent = - (HeapNumber::kExponentBias + 0) << HeapNumber::kExponentShift; - __ subl(scratch2, Immediate(zero_exponent)); - // rcx already has a Smi zero. - __ j(less, &done); - - // We have a shifted exponent between 0 and 30 in scratch2. - __ shr(scratch2, Immediate(HeapNumber::kExponentShift)); - __ movl(rcx, Immediate(30)); - __ subl(rcx, scratch2); - - __ bind(&right_exponent); - // Here rcx is the shift, scratch is the exponent word. - // Get the top bits of the mantissa. - __ and_(scratch, Immediate(HeapNumber::kMantissaMask)); - // Put back the implicit 1. - __ or_(scratch, Immediate(1 << HeapNumber::kExponentShift)); - // Shift up the mantissa bits to take up the space the exponent used to - // take. We have kExponentShift + 1 significant bits int he low end of the - // word. Shift them to the top bits. - const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2; - __ shl(scratch, Immediate(shift_distance)); - // Get the second half of the double. For some exponents we don't - // actually need this because the bits get shifted out again, but - // it's probably slower to test than just to do it. - __ movl(scratch2, FieldOperand(source, HeapNumber::kMantissaOffset)); - // Shift down 22 bits to get the most significant 10 bits or the low - // mantissa word. - __ shr(scratch2, Immediate(32 - shift_distance)); - __ or_(scratch2, scratch); - // Move down according to the exponent. - __ shr_cl(scratch2); - // Now the unsigned answer is in scratch2. We need to move it to rcx and - // we may need to fix the sign. - Label negative; - __ xor_(rcx, rcx); - __ cmpl(rcx, FieldOperand(source, HeapNumber::kExponentOffset)); - __ j(greater, &negative); - __ movl(rcx, scratch2); - __ jmp(&done); - __ bind(&negative); - __ subl(rcx, scratch2); - __ bind(&done); - } + __ movq(xmm0, scratch2); + __ shr(scratch2, Immediate(HeapNumber::kMantissaBits)); + __ andl(scratch2, Immediate((1 << HeapNumber::KExponentBits) - 1)); + // Check whether the exponent is too big for a 63 bit unsigned integer. + // (Notice: Doesn't handle MIN_SMI). + __ cmpl(scratch2, Immediate(63 + HeapNumber::kExponentBias)); + __ j(greater_equal, conversion_failure); + // Handle exponent range -inf..62. + __ cvttsd2siq(rcx, xmm0); + // TODO(lrn): Do bit-fiddling for exponents in range 63..84 and return + // zero for everything else (also including negative exponents). } @@ -8196,7 +8086,7 @@ void GenericUnaryOpStub::Generate(MacroAssembler* masm) { __ j(not_equal, &slow); // Convert the heap number in rax to an untagged integer in rcx. - IntegerConvert(masm, rax, CpuFeatures::IsSupported(SSE3), &slow); + IntegerConvert(masm, rax, &slow); // Do the bitwise operation and check if the result fits in a smi. Label try_float; @@ -9777,7 +9667,6 @@ void FloatingPointHelper::LoadFloatOperandsFromSmis(MacroAssembler* masm, // Input: rdx, rax are the left and right objects of a bit op. // Output: rax, rcx are left and right integers for a bit op. void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm, - bool use_sse3, Label* conversion_failure) { // Check float operands. Label arg1_is_object, check_undefined_arg1; @@ -9800,10 +9689,10 @@ void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm, __ CompareRoot(rbx, Heap::kHeapNumberMapRootIndex); __ j(not_equal, &check_undefined_arg1); // Get the untagged integer version of the edx heap number in rcx. - IntegerConvert(masm, rdx, use_sse3, conversion_failure); + IntegerConvert(masm, rdx, conversion_failure); __ movl(rdx, rcx); - // Here edx has the untagged integer, eax has a Smi or a heap number. + // Here rdx has the untagged integer, rax has a Smi or a heap number. __ bind(&load_arg2); // Test if arg2 is a Smi. __ JumpIfNotSmi(rax, &arg2_is_object); @@ -9823,7 +9712,7 @@ void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm, __ CompareRoot(rbx, Heap::kHeapNumberMapRootIndex); __ j(not_equal, &check_undefined_arg2); // Get the untagged integer version of the eax heap number in ecx. - IntegerConvert(masm, rax, use_sse3, conversion_failure); + IntegerConvert(masm, rax, conversion_failure); __ bind(&done); __ movl(rax, rdx); } @@ -9898,7 +9787,6 @@ const char* GenericBinaryOpStub::GetName() { (flags_ & NO_SMI_CODE_IN_STUB) ? "_NoSmiInStub" : "", args_in_registers_ ? "RegArgs" : "StackArgs", args_reversed_ ? "_R" : "", - use_sse3_ ? "SSE3" : "SSE2", static_operands_type_.ToString(), BinaryOpIC::GetName(runtime_operands_type_)); return name_; @@ -10331,7 +10219,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) { case Token::SHL: case Token::SHR: { Label skip_allocation, non_smi_result; - FloatingPointHelper::LoadAsIntegers(masm, use_sse3_, &call_runtime); + FloatingPointHelper::LoadAsIntegers(masm, &call_runtime); switch (op_) { case Token::BIT_OR: __ orl(rax, rcx); break; case Token::BIT_AND: __ andl(rax, rcx); break; @@ -10342,7 +10230,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) { default: UNREACHABLE(); } if (op_ == Token::SHR) { - // Check if result is non-negative. This can only happen for a shift + // Check if result is negative. This can only happen for a shift // by zero, which also doesn't update the sign flag. __ testl(rax, rax); __ j(negative, &non_smi_result); diff --git a/src/x64/codegen-x64.h b/src/x64/codegen-x64.h index a89f23e..416e966 100644 --- a/src/x64/codegen-x64.h +++ b/src/x64/codegen-x64.h @@ -713,7 +713,6 @@ class GenericBinaryOpStub: public CodeStub { static_operands_type_(operands_type), runtime_operands_type_(BinaryOpIC::DEFAULT), name_(NULL) { - use_sse3_ = CpuFeatures::IsSupported(SSE3); ASSERT(OpBits::is_valid(Token::NUM_TOKENS)); } @@ -723,7 +722,6 @@ class GenericBinaryOpStub: public CodeStub { flags_(FlagBits::decode(key)), args_in_registers_(ArgsInRegistersBits::decode(key)), args_reversed_(ArgsReversedBits::decode(key)), - use_sse3_(SSE3Bits::decode(key)), static_operands_type_(TypeInfo::ExpandedRepresentation( StaticTypeInfoBits::decode(key))), runtime_operands_type_(type_info), @@ -748,7 +746,6 @@ class GenericBinaryOpStub: public CodeStub { GenericBinaryFlags flags_; bool args_in_registers_; // Arguments passed in registers not on the stack. bool args_reversed_; // Left and right argument are swapped. - bool use_sse3_; // Number type information of operands, determined by code generator. TypeInfo static_operands_type_; @@ -774,15 +771,14 @@ class GenericBinaryOpStub: public CodeStub { } #endif - // Minor key encoding in 18 bits TTNNNFRASOOOOOOOMM. + // Minor key encoding in 17 bits TTNNNFRAOOOOOOOMM. class ModeBits: public BitField {}; class OpBits: public BitField {}; - class SSE3Bits: public BitField {}; - class ArgsInRegistersBits: public BitField {}; - class ArgsReversedBits: public BitField {}; - class FlagBits: public BitField {}; - class StaticTypeInfoBits: public BitField {}; - class RuntimeTypeInfoBits: public BitField {}; + class ArgsInRegistersBits: public BitField {}; + class ArgsReversedBits: public BitField {}; + class FlagBits: public BitField {}; + class StaticTypeInfoBits: public BitField {}; + class RuntimeTypeInfoBits: public BitField {}; Major MajorKey() { return GenericBinaryOp; } int MinorKey() { @@ -790,7 +786,6 @@ class GenericBinaryOpStub: public CodeStub { return OpBits::encode(op_) | ModeBits::encode(mode_) | FlagBits::encode(flags_) - | SSE3Bits::encode(use_sse3_) | ArgsInRegistersBits::encode(args_in_registers_) | ArgsReversedBits::encode(args_reversed_) | StaticTypeInfoBits::encode( -- 2.7.4