Make X64 double-to-int32 conversion use the 64-bit version of cvttsd2si.

author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 10 May 2010 11:38:58 +0000 (11:38 +0000)

committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 10 May 2010 11:38:58 +0000 (11:38 +0000)
author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 10 May 2010 11:38:58 +0000 (11:38 +0000)
committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 10 May 2010 11:38:58 +0000 (11:38 +0000)
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc

index 63286a7..c2701c8 100644 (file)
--- a/src/ia32/codegen-ia32.cc
+++ b/src/ia32/codegen-ia32.cc
@@ -10296,6 +10296,11 @@ void IntegerConvert(MacroAssembler* masm,
    Label done, right_exponent, normal_exponent;
    Register scratch = ebx;
    Register scratch2 = edi;
+  if (type_info.IsInteger32() && CpuFeatures::IsEnabled(SSE2)) {
+    CpuFeatures::Scope scope(SSE2);
+    __ cvttsd2si(ecx, FieldOperand(source, HeapNumber::kValueOffset));
+    return;
+  }
    if (!type_info.IsInteger32() || !use_sse3) {
      // Get exponent word.
      __ mov(scratch, FieldOperand(source, HeapNumber::kExponentOffset));
diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc

index 1c00ebc..fcfa8d0 100644 (file)
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -2510,6 +2510,17 @@ void Assembler::cvttsd2si(Register dst, const Operand& src) {
  }
  
  
+void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
+  EnsureSpace ensure_space(this);
+  last_pc_ = pc_;
+  emit(0xF2);
+  emit_rex_64(dst, src);
+  emit(0x0F);
+  emit(0x2C);
+  emit_sse_operand(dst, src);
+}
+
+
  void Assembler::cvtlsi2sd(XMMRegister dst, const Operand& src) {
    EnsureSpace ensure_space(this);
    last_pc_ = pc_;
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h

index d077865..55d0828 100644 (file)
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -1092,6 +1092,7 @@ class Assembler : public Malloced {
  
    void cvttss2si(Register dst, const Operand& src);
    void cvttsd2si(Register dst, const Operand& src);
+  void cvttsd2siq(Register dst, XMMRegister src);
  
    void cvtlsi2sd(XMMRegister dst, const Operand& src);
    void cvtlsi2sd(XMMRegister dst, Register src);
diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc

index 740be83..8556b58 100644 (file)
--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@@ -277,7 +277,6 @@ class FloatingPointHelper : public AllStatic {
    // Takes the operands in rdx and rax and loads them as integers in rax
    // and rcx.
    static void LoadAsIntegers(MacroAssembler* masm,
-                             bool use_sse3,
                               Label* operand_conversion_failure);
  };
  
@@ -8015,138 +8014,29 @@ void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm,
  }
  
  
-// Get the integer part of a heap number.  Surprisingly, all this bit twiddling
-// is faster than using the built-in instructions on floating point registers.
+// Get the integer part of a heap number.
  // Trashes rdi and rbx.  Dest is rcx.  Source cannot be rcx or one of the
  // trashed registers.
  void IntegerConvert(MacroAssembler* masm,
                      Register source,
-                    bool use_sse3,
                      Label* conversion_failure) {
    ASSERT(!source.is(rcx) && !source.is(rdi) && !source.is(rbx));
-  Label done, right_exponent, normal_exponent;
    Register scratch = rbx;
    Register scratch2 = rdi;
    // Get exponent word.
-  __ movl(scratch, FieldOperand(source, HeapNumber::kExponentOffset));
+  __ movq(scratch2, FieldOperand(source, HeapNumber::kValueOffset));
    // Get exponent alone in scratch2.
-  __ movl(scratch2, scratch);
-  __ and_(scratch2, Immediate(HeapNumber::kExponentMask));
-  if (use_sse3) {
-    CpuFeatures::Scope scope(SSE3);
-    // Check whether the exponent is too big for a 64 bit signed integer.
-    static const uint32_t kTooBigExponent =
-        (HeapNumber::kExponentBias + 63) << HeapNumber::kExponentShift;
-    __ cmpl(scratch2, Immediate(kTooBigExponent));
-    __ j(greater_equal, conversion_failure);
-    // Load x87 register with heap number.
-    __ fld_d(FieldOperand(source, HeapNumber::kValueOffset));
-    // Reserve space for 64 bit answer.
-    __ subq(rsp, Immediate(sizeof(uint64_t)));  // Nolint.
-    // Do conversion, which cannot fail because we checked the exponent.
-    __ fisttp_d(Operand(rsp, 0));
-    __ movl(rcx, Operand(rsp, 0));  // Load low word of answer into rcx.
-    __ addq(rsp, Immediate(sizeof(uint64_t)));  // Nolint.
-  } else {
-    // Load rcx with zero.  We use this either for the final shift or
-    // for the answer.
-    __ xor_(rcx, rcx);
-    // Check whether the exponent matches a 32 bit signed int that cannot be
-    // represented by a Smi.  A non-smi 32 bit integer is 1.xxx * 2^30 so the
-    // exponent is 30 (biased).  This is the exponent that we are fastest at and
-    // also the highest exponent we can handle here.
-    const uint32_t non_smi_exponent =
-        (HeapNumber::kExponentBias + 30) << HeapNumber::kExponentShift;
-    __ cmpl(scratch2, Immediate(non_smi_exponent));
-    // If we have a match of the int32-but-not-Smi exponent then skip some
-    // logic.
-    __ j(equal, &right_exponent);
-    // If the exponent is higher than that then go to slow case.  This catches
-    // numbers that don't fit in a signed int32, infinities and NaNs.
-    __ j(less, &normal_exponent);
-
-    {
-      // Handle a big exponent.  The only reason we have this code is that the
-      // >>> operator has a tendency to generate numbers with an exponent of 31.
-      const uint32_t big_non_smi_exponent =
-          (HeapNumber::kExponentBias + 31) << HeapNumber::kExponentShift;
-      __ cmpl(scratch2, Immediate(big_non_smi_exponent));
-      __ j(not_equal, conversion_failure);
-      // We have the big exponent, typically from >>>.  This means the number is
-      // in the range 2^31 to 2^32 - 1.  Get the top bits of the mantissa.
-      __ movl(scratch2, scratch);
-      __ and_(scratch2, Immediate(HeapNumber::kMantissaMask));
-      // Put back the implicit 1.
-      __ or_(scratch2, Immediate(1 << HeapNumber::kExponentShift));
-      // Shift up the mantissa bits to take up the space the exponent used to
-      // take. We just orred in the implicit bit so that took care of one and
-      // we want to use the full unsigned range so we subtract 1 bit from the
-      // shift distance.
-      const int big_shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1;
-      __ shl(scratch2, Immediate(big_shift_distance));
-      // Get the second half of the double.
-      __ movl(rcx, FieldOperand(source, HeapNumber::kMantissaOffset));
-      // Shift down 21 bits to get the most significant 11 bits or the low
-      // mantissa word.
-      __ shr(rcx, Immediate(32 - big_shift_distance));
-      __ or_(rcx, scratch2);
-      // We have the answer in rcx, but we may need to negate it.
-      __ testl(scratch, scratch);
-      __ j(positive, &done);
-      __ neg(rcx);
-      __ jmp(&done);
-    }
-
-    __ bind(&normal_exponent);
-    // Exponent word in scratch, exponent part of exponent word in scratch2.
-    // Zero in rcx.
-    // We know the exponent is smaller than 30 (biased).  If it is less than
-    // 0 (biased) then the number is smaller in magnitude than 1.0 * 2^0, ie
-    // it rounds to zero.
-    const uint32_t zero_exponent =
-        (HeapNumber::kExponentBias + 0) << HeapNumber::kExponentShift;
-    __ subl(scratch2, Immediate(zero_exponent));
-    // rcx already has a Smi zero.
-    __ j(less, &done);
-
-    // We have a shifted exponent between 0 and 30 in scratch2.
-    __ shr(scratch2, Immediate(HeapNumber::kExponentShift));
-    __ movl(rcx, Immediate(30));
-    __ subl(rcx, scratch2);
-
-    __ bind(&right_exponent);
-    // Here rcx is the shift, scratch is the exponent word.
-    // Get the top bits of the mantissa.
-    __ and_(scratch, Immediate(HeapNumber::kMantissaMask));
-    // Put back the implicit 1.
-    __ or_(scratch, Immediate(1 << HeapNumber::kExponentShift));
-    // Shift up the mantissa bits to take up the space the exponent used to
-    // take. We have kExponentShift + 1 significant bits int he low end of the
-    // word.  Shift them to the top bits.
-    const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
-    __ shl(scratch, Immediate(shift_distance));
-    // Get the second half of the double. For some exponents we don't
-    // actually need this because the bits get shifted out again, but
-    // it's probably slower to test than just to do it.
-    __ movl(scratch2, FieldOperand(source, HeapNumber::kMantissaOffset));
-    // Shift down 22 bits to get the most significant 10 bits or the low
-    // mantissa word.
-    __ shr(scratch2, Immediate(32 - shift_distance));
-    __ or_(scratch2, scratch);
-    // Move down according to the exponent.
-    __ shr_cl(scratch2);
-    // Now the unsigned answer is in scratch2.  We need to move it to rcx and
-    // we may need to fix the sign.
-    Label negative;
-    __ xor_(rcx, rcx);
-    __ cmpl(rcx, FieldOperand(source, HeapNumber::kExponentOffset));
-    __ j(greater, &negative);
-    __ movl(rcx, scratch2);
-    __ jmp(&done);
-    __ bind(&negative);
-    __ subl(rcx, scratch2);
-    __ bind(&done);
-  }
+  __ movq(xmm0, scratch2);
+  __ shr(scratch2, Immediate(HeapNumber::kMantissaBits));
+  __ andl(scratch2, Immediate((1 << HeapNumber::KExponentBits) - 1));
+  // Check whether the exponent is too big for a 63 bit unsigned integer.
+  // (Notice: Doesn't handle MIN_SMI).
+  __ cmpl(scratch2, Immediate(63 + HeapNumber::kExponentBias));
+  __ j(greater_equal, conversion_failure);
+  // Handle exponent range -inf..62.
+  __ cvttsd2siq(rcx, xmm0);
+  // TODO(lrn): Do bit-fiddling for exponents in range 63..84 and return
+  // zero for everything else (also including negative exponents).
  }
  
  
@@ -8196,7 +8086,7 @@ void GenericUnaryOpStub::Generate(MacroAssembler* masm) {
      __ j(not_equal, &slow);
  
      // Convert the heap number in rax to an untagged integer in rcx.
-    IntegerConvert(masm, rax, CpuFeatures::IsSupported(SSE3), &slow);
+    IntegerConvert(masm, rax, &slow);
  
      // Do the bitwise operation and check if the result fits in a smi.
      Label try_float;
@@ -9777,7 +9667,6 @@ void FloatingPointHelper::LoadFloatOperandsFromSmis(MacroAssembler* masm,
  // Input: rdx, rax are the left and right objects of a bit op.
  // Output: rax, rcx are left and right integers for a bit op.
  void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm,
-                                         bool use_sse3,
                                           Label* conversion_failure) {
    // Check float operands.
    Label arg1_is_object, check_undefined_arg1;
@@ -9800,10 +9689,10 @@ void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm,
    __ CompareRoot(rbx, Heap::kHeapNumberMapRootIndex);
    __ j(not_equal, &check_undefined_arg1);
    // Get the untagged integer version of the edx heap number in rcx.
-  IntegerConvert(masm, rdx, use_sse3, conversion_failure);
+  IntegerConvert(masm, rdx, conversion_failure);
    __ movl(rdx, rcx);
  
-  // Here edx has the untagged integer, eax has a Smi or a heap number.
+  // Here rdx has the untagged integer, rax has a Smi or a heap number.
    __ bind(&load_arg2);
    // Test if arg2 is a Smi.
    __ JumpIfNotSmi(rax, &arg2_is_object);
@@ -9823,7 +9712,7 @@ void FloatingPointHelper::LoadAsIntegers(MacroAssembler* masm,
    __ CompareRoot(rbx, Heap::kHeapNumberMapRootIndex);
    __ j(not_equal, &check_undefined_arg2);
    // Get the untagged integer version of the eax heap number in ecx.
-  IntegerConvert(masm, rax, use_sse3, conversion_failure);
+  IntegerConvert(masm, rax, conversion_failure);
    __ bind(&done);
    __ movl(rax, rdx);
  }
@@ -9898,7 +9787,6 @@ const char* GenericBinaryOpStub::GetName() {
                 (flags_ & NO_SMI_CODE_IN_STUB) ? "_NoSmiInStub" : "",
                 args_in_registers_ ? "RegArgs" : "StackArgs",
                 args_reversed_ ? "_R" : "",
-               use_sse3_ ? "SSE3" : "SSE2",
                 static_operands_type_.ToString(),
                 BinaryOpIC::GetName(runtime_operands_type_));
    return name_;
@@ -10331,7 +10219,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
        case Token::SHL:
        case Token::SHR: {
          Label skip_allocation, non_smi_result;
-        FloatingPointHelper::LoadAsIntegers(masm, use_sse3_, &call_runtime);
+        FloatingPointHelper::LoadAsIntegers(masm, &call_runtime);
          switch (op_) {
            case Token::BIT_OR:  __ orl(rax, rcx); break;
            case Token::BIT_AND: __ andl(rax, rcx); break;
@@ -10342,7 +10230,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
            default: UNREACHABLE();
          }
          if (op_ == Token::SHR) {
-          // Check if result is non-negative. This can only happen for a shift
+          // Check if result is negative. This can only happen for a shift
            // by zero, which also doesn't update the sign flag.
            __ testl(rax, rax);
            __ j(negative, &non_smi_result);
diff --git a/src/x64/codegen-x64.h b/src/x64/codegen-x64.h

index a89f23e..416e966 100644 (file)
--- a/src/x64/codegen-x64.h
+++ b/src/x64/codegen-x64.h
@@ -713,7 +713,6 @@ class GenericBinaryOpStub: public CodeStub {
          static_operands_type_(operands_type),
          runtime_operands_type_(BinaryOpIC::DEFAULT),
          name_(NULL) {
-    use_sse3_ = CpuFeatures::IsSupported(SSE3);
      ASSERT(OpBits::is_valid(Token::NUM_TOKENS));
    }
  
@@ -723,7 +722,6 @@ class GenericBinaryOpStub: public CodeStub {
          flags_(FlagBits::decode(key)),
          args_in_registers_(ArgsInRegistersBits::decode(key)),
          args_reversed_(ArgsReversedBits::decode(key)),
-        use_sse3_(SSE3Bits::decode(key)),
          static_operands_type_(TypeInfo::ExpandedRepresentation(
              StaticTypeInfoBits::decode(key))),
          runtime_operands_type_(type_info),
@@ -748,7 +746,6 @@ class GenericBinaryOpStub: public CodeStub {
    GenericBinaryFlags flags_;
    bool args_in_registers_;  // Arguments passed in registers not on the stack.
    bool args_reversed_;  // Left and right argument are swapped.
-  bool use_sse3_;
  
    // Number type information of operands, determined by code generator.
    TypeInfo static_operands_type_;
@@ -774,15 +771,14 @@ class GenericBinaryOpStub: public CodeStub {
    }
  #endif
  
-  // Minor key encoding in 18 bits TTNNNFRASOOOOOOOMM.
+  // Minor key encoding in 17 bits TTNNNFRAOOOOOOOMM.
    class ModeBits: public BitField<OverwriteMode, 0, 2> {};
    class OpBits: public BitField<Token::Value, 2, 7> {};
-  class SSE3Bits: public BitField<bool, 9, 1> {};
-  class ArgsInRegistersBits: public BitField<bool, 10, 1> {};
-  class ArgsReversedBits: public BitField<bool, 11, 1> {};
-  class FlagBits: public BitField<GenericBinaryFlags, 12, 1> {};
-  class StaticTypeInfoBits: public BitField<int, 13, 3> {};
-  class RuntimeTypeInfoBits: public BitField<BinaryOpIC::TypeInfo, 16, 2> {};
+  class ArgsInRegistersBits: public BitField<bool, 9, 1> {};
+  class ArgsReversedBits: public BitField<bool, 10, 1> {};
+  class FlagBits: public BitField<GenericBinaryFlags, 11, 1> {};
+  class StaticTypeInfoBits: public BitField<int, 12, 3> {};
+  class RuntimeTypeInfoBits: public BitField<BinaryOpIC::TypeInfo, 15, 2> {};
  
    Major MajorKey() { return GenericBinaryOp; }
    int MinorKey() {
@@ -790,7 +786,6 @@ class GenericBinaryOpStub: public CodeStub {
      return OpBits::encode(op_)
             | ModeBits::encode(mode_)
             | FlagBits::encode(flags_)
-           | SSE3Bits::encode(use_sse3_)
             | ArgsInRegistersBits::encode(args_in_registers_)
             | ArgsReversedBits::encode(args_reversed_)
             | StaticTypeInfoBits::encode(
author	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 10 May 2010 11:38:58 +0000 (11:38 +0000)
committer	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 10 May 2010 11:38:58 +0000 (11:38 +0000)
src/ia32/codegen-ia32.cc		patch \| blob \| history
src/x64/assembler-x64.cc		patch \| blob \| history
src/x64/assembler-x64.h		patch \| blob \| history
src/x64/codegen-x64.cc		patch \| blob \| history
src/x64/codegen-x64.h		patch \| blob \| history