From b7022fd2be9ec83e7dc9b9871f078fb4843a4527 Mon Sep 17 00:00:00 2001
From: "mvstanton@chromium.org"
 <mvstanton@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Tue, 9 Apr 2013 08:42:57 +0000
Subject: [PATCH] Improvements for x87 stack handling

BUG=

Review URL: https://codereview.chromium.org/13426006

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@14179 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 src/hydrogen-instructions.h            |  28 +-
 src/ia32/lithium-codegen-ia32.cc       | 680 +++++++++++++++++++++++++------
 src/ia32/lithium-codegen-ia32.h        |  24 +-
 src/ia32/lithium-gap-resolver-ia32.cc  |  72 +++-
 src/ia32/lithium-ia32.cc               | 119 ++++--
 src/ia32/lithium-ia32.h                |  65 +++
 src/ia32/macro-assembler-ia32.cc       |  22 +
 src/ia32/macro-assembler-ia32.h        |   2 +
 src/lithium-allocator.cc               |   4 +-
 src/objects.h                          |   2 +
 test/mjsunit/external-array-no-sse2.js | 716 +++++++++++++++++++++++++++++++++
 test/mjsunit/pixel-array-rounding.js   |   8 +-
 12 files changed, 1569 insertions(+), 173 deletions(-)
 create mode 100644 test/mjsunit/external-array-no-sse2.js

diff --git a/src/hydrogen-instructions.h b/src/hydrogen-instructions.h
index 8dd36c3..1dc9304 100644
--- a/src/hydrogen-instructions.h
+++ b/src/hydrogen-instructions.h
@@ -3190,13 +3190,19 @@ class HConstant: public HTemplateInstruction<0> {
 
   bool InOldSpace() const { return !HEAP->InNewSpace(*handle_); }
 
+  bool IsSpecialDouble() const {
+    return has_double_value_ &&
+        (BitCast<int64_t>(double_value_) == BitCast<int64_t>(-0.0) ||
+         FixedDoubleArray::is_the_hole_nan(double_value_) ||
+         isnan(double_value_));
+  }
+
   bool ImmortalImmovable() const {
     if (has_int32_value_) {
       return false;
     }
     if (has_double_value_) {
-      if (BitCast<int64_t>(double_value_) == BitCast<int64_t>(-0.0) ||
-          isnan(double_value_)) {
+      if (IsSpecialDouble()) {
         return true;
       }
       return false;
@@ -3227,7 +3233,9 @@ class HConstant: public HTemplateInstruction<0> {
     return has_int32_value_;
   }
 
-  virtual bool EmitAtUses() { return !representation().IsDouble(); }
+  virtual bool EmitAtUses() {
+    return !representation().IsDouble() || IsSpecialDouble();
+  }
   virtual void PrintDataTo(StringStream* stream);
   virtual HType CalculateInferredType();
   bool IsInteger() { return handle()->IsSmi(); }
@@ -3246,6 +3254,16 @@ class HConstant: public HTemplateInstruction<0> {
     ASSERT(HasDoubleValue());
     return double_value_;
   }
+  bool IsTheHole() const {
+    if (HasDoubleValue() && FixedDoubleArray::is_the_hole_nan(double_value_)) {
+      return true;
+    }
+    Heap* heap = isolate()->heap();
+    if (!handle_.is_null() && *handle_ == heap->the_hole_value()) {
+      return true;
+    }
+    return false;
+  }
   bool HasNumberValue() const { return has_double_value_; }
   int32_t NumberValueAsInteger32() const {
     ASSERT(HasNumberValue());
@@ -5677,6 +5695,10 @@ class HStoreKeyed
   bool IsDehoisted() { return is_dehoisted_; }
   void SetDehoisted(bool is_dehoisted) { is_dehoisted_ = is_dehoisted; }
 
+  bool IsConstantHoleStore() {
+    return value()->IsConstant() && HConstant::cast(value())->IsTheHole();
+  }
+
   virtual void SetSideEffectDominator(GVNFlag side_effect, HValue* dominator) {
     ASSERT(side_effect == kChangesNewSpacePromotion);
     new_space_dominator_ = dominator;
diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc
index c0c1079..de37ce3 100644
--- a/src/ia32/lithium-codegen-ia32.cc
+++ b/src/ia32/lithium-codegen-ia32.cc
@@ -366,7 +366,20 @@ bool LCodeGen::GenerateBody() {
           Comment(";;; @%d: %s.", current_instruction_, instr->Mnemonic());
         }
       }
+
+      if (!CpuFeatures::IsSupported(SSE2)) {
+        FlushX87StackIfNecessary(instr);
+      }
+
       instr->CompileToNative(this);
+
+      if (!CpuFeatures::IsSupported(SSE2)) {
+        ASSERT(!instr->HasDoubleRegisterResult() || x87_stack_depth_ == 1);
+
+        if (FLAG_debug_code && FLAG_enable_slow_asserts) {
+          __ VerifyX87StackDepth(x87_stack_depth_);
+        }
+      }
     }
   }
   EnsureSpaceForLazyDeopt();
@@ -521,6 +534,52 @@ bool LCodeGen::IsX87TopOfStack(LOperand* op) const {
 }
 
 
+void LCodeGen::ReadX87Operand(Operand dst) {
+  ASSERT(x87_stack_depth_ == 1);
+  __ fst_d(dst);
+}
+
+
+void LCodeGen::PushX87DoubleOperand(Operand src) {
+  ASSERT(x87_stack_depth_ == 0);
+  x87_stack_depth_++;
+  __ fld_d(src);
+}
+
+
+void LCodeGen::PushX87FloatOperand(Operand src) {
+  ASSERT(x87_stack_depth_ == 0);
+  x87_stack_depth_++;
+  __ fld_s(src);
+}
+
+
+void LCodeGen::PopX87() {
+  ASSERT(x87_stack_depth_ == 1);
+  x87_stack_depth_--;
+  __ fstp(0);
+}
+
+
+void LCodeGen::CurrentInstructionReturnsX87Result() {
+  ASSERT(x87_stack_depth_ <= 1);
+  if (x87_stack_depth_ == 0) {
+    x87_stack_depth_ = 1;
+  }
+}
+
+
+void LCodeGen::FlushX87StackIfNecessary(LInstruction* instr) {
+  if (x87_stack_depth_ > 0) {
+    if ((instr->ClobbersDoubleRegisters() ||
+         instr->HasDoubleRegisterResult()) &&
+        !instr->HasDoubleRegisterInput()) {
+      PopX87();
+    }
+  }
+}
+
+
 Register LCodeGen::ToRegister(LOperand* op) const {
   ASSERT(op->IsRegister());
   return ToRegister(op->index());
@@ -846,6 +905,8 @@ void LCodeGen::RegisterEnvironmentForDeoptimization(
 void LCodeGen::DeoptimizeIf(Condition cc, LEnvironment* environment) {
   RegisterEnvironmentForDeoptimization(environment, Safepoint::kNoLazyDeopt);
   ASSERT(environment->HasBeenRegistered());
+  // It's an error to deoptimize with the x87 fp stack in use.
+  ASSERT(x87_stack_depth_ == 0);
   int id = environment->deoptimization_index();
   ASSERT(info()->IsOptimizing() || info()->IsStub());
   Deoptimizer::BailoutType bailout_type = info()->IsStub()
@@ -1689,40 +1750,46 @@ void LCodeGen::DoConstantI(LConstantI* instr) {
 
 
 void LCodeGen::DoConstantD(LConstantD* instr) {
-  ASSERT(instr->result()->IsDoubleRegister());
-  XMMRegister res = ToDoubleRegister(instr->result());
   double v = instr->value();
-  // Use xor to produce +0.0 in a fast and compact way, but avoid to
-  // do so if the constant is -0.0.
-  if (BitCast<uint64_t, double>(v) == 0) {
-    __ xorps(res, res);
+  uint64_t int_val = BitCast<uint64_t, double>(v);
+  int32_t lower = static_cast<int32_t>(int_val);
+  int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));
+
+  if (!CpuFeatures::IsSafeForSnapshot(SSE2)) {
+    __ push(Immediate(lower));
+    __ push(Immediate(upper));
+    PushX87DoubleOperand(Operand(esp, 0));
+    __ add(Operand(esp), Immediate(kDoubleSize));
+    CurrentInstructionReturnsX87Result();
   } else {
-    Register temp = ToRegister(instr->temp());
-    uint64_t int_val = BitCast<uint64_t, double>(v);
-    int32_t lower = static_cast<int32_t>(int_val);
-    int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));
-    if (CpuFeatures::IsSupported(SSE4_1)) {
-      CpuFeatureScope scope1(masm(), SSE2);
-      CpuFeatureScope scope2(masm(), SSE4_1);
-      if (lower != 0) {
-        __ Set(temp, Immediate(lower));
-        __ movd(res, Operand(temp));
-        __ Set(temp, Immediate(upper));
-        __ pinsrd(res, Operand(temp), 1);
+    CpuFeatureScope scope1(masm(), SSE2);
+    ASSERT(instr->result()->IsDoubleRegister());
+    XMMRegister res = ToDoubleRegister(instr->result());
+    if (int_val == 0) {
+      __ xorps(res, res);
+    } else {
+      Register temp = ToRegister(instr->temp());
+      if (CpuFeatures::IsSupported(SSE4_1)) {
+        CpuFeatureScope scope2(masm(), SSE4_1);
+        if (lower != 0) {
+          __ Set(temp, Immediate(lower));
+          __ movd(res, Operand(temp));
+          __ Set(temp, Immediate(upper));
+          __ pinsrd(res, Operand(temp), 1);
+        } else {
+          __ xorps(res, res);
+          __ Set(temp, Immediate(upper));
+          __ pinsrd(res, Operand(temp), 1);
+        }
       } else {
-        __ xorps(res, res);
         __ Set(temp, Immediate(upper));
-        __ pinsrd(res, Operand(temp), 1);
-      }
-    } else {
-      CpuFeatureScope scope(masm(), SSE2);
-      __ Set(temp, Immediate(upper));
-      __ movd(res, Operand(temp));
-      __ psllq(res, 32);
-      if (lower != 0) {
-        __ Set(temp, Immediate(lower));
-        __ movd(xmm0, Operand(temp));
-        __ por(res, xmm0);
+        __ movd(res, Operand(temp));
+        __ psllq(res, 32);
+        if (lower != 0) {
+          __ Set(temp, Immediate(lower));
+          __ movd(xmm0, Operand(temp));
+          __ por(res, xmm0);
+        }
       }
     }
   }
@@ -3158,16 +3225,16 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) {
       __ movss(result, operand);
       __ cvtss2sd(result, result);
     } else {
-      __ fld_s(operand);
-      HandleX87FPReturnValue(instr);
+      PushX87FloatOperand(operand);
+      CurrentInstructionReturnsX87Result();
     }
   } else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) {
     if (CpuFeatures::IsSupported(SSE2)) {
       CpuFeatureScope scope(masm(), SSE2);
       __ movdbl(ToDoubleRegister(instr->result()), operand);
     } else {
-      __ fld_d(operand);
-      HandleX87FPReturnValue(instr);
+      PushX87DoubleOperand(operand);
+      CurrentInstructionReturnsX87Result();
     }
   } else {
     Register result(ToRegister(instr->result()));
@@ -3212,29 +3279,6 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) {
 }
 
 
-void LCodeGen::HandleX87FPReturnValue(LInstruction* instr) {
-  if (IsX87TopOfStack(instr->result())) {
-    // Return value is already on stack. If the value has no uses, then
-    // pop it off the FP stack. Otherwise, make sure that there are enough
-    // copies of the value on the stack to feed all of the usages, e.g.
-    // when the following instruction uses the return value in multiple
-    // inputs.
-    int count = instr->hydrogen_value()->UseCount();
-    if (count == 0) {
-      __ fstp(0);
-    } else {
-      count--;
-      ASSERT(count <= 7);
-      while (count-- > 0) {
-        __ fld(0);
-      }
-    }
-  } else {
-    __ fstp_d(ToOperand(instr->result()));
-  }
-}
-
-
 void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) {
   if (instr->hydrogen()->RequiresHoleCheck()) {
     int offset = FixedDoubleArray::kHeaderSize - kHeapObjectTag +
@@ -3261,8 +3305,8 @@ void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) {
     XMMRegister result = ToDoubleRegister(instr->result());
     __ movdbl(result, double_load_operand);
   } else {
-    __ fld_d(double_load_operand);
-    HandleX87FPReturnValue(instr);
+    PushX87DoubleOperand(double_load_operand);
+    CurrentInstructionReturnsX87Result();
   }
 }
 
@@ -4311,12 +4355,21 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) {
       0,
       instr->additional_index()));
   if (elements_kind == EXTERNAL_FLOAT_ELEMENTS) {
-    CpuFeatureScope scope(masm(), SSE2);
-    __ cvtsd2ss(xmm0, ToDoubleRegister(instr->value()));
-    __ movss(operand, xmm0);
+    if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+      CpuFeatureScope scope(masm(), SSE2);
+      __ cvtsd2ss(xmm0, ToDoubleRegister(instr->value()));
+      __ movss(operand, xmm0);
+    } else {
+      __ fld(0);
+      __ fstp_s(operand);
+    }
   } else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) {
-    CpuFeatureScope scope(masm(), SSE2);
-    __ movdbl(operand, ToDoubleRegister(instr->value()));
+    if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+      CpuFeatureScope scope(masm(), SSE2);
+      __ movdbl(operand, ToDoubleRegister(instr->value()));
+    } else {
+      __ fst_d(operand);
+    }
   } else {
     Register value = ToRegister(instr->value());
     switch (elements_kind) {
@@ -4351,21 +4404,8 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) {
 
 
 void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
-  XMMRegister value = ToDoubleRegister(instr->value());
-
-  if (instr->NeedsCanonicalization()) {
-    Label have_value;
-
-    __ ucomisd(value, value);
-    __ j(parity_odd, &have_value);  // NaN.
-
-    ExternalReference canonical_nan_reference =
-        ExternalReference::address_of_canonical_non_hole_nan();
-    __ movdbl(value, Operand::StaticVariable(canonical_nan_reference));
-    __ bind(&have_value);
-  }
-
+  ExternalReference canonical_nan_reference =
+      ExternalReference::address_of_canonical_non_hole_nan();
   Operand double_store_operand = BuildFastArrayOperand(
       instr->elements(),
       instr->key(),
@@ -4373,7 +4413,68 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
       FAST_DOUBLE_ELEMENTS,
       FixedDoubleArray::kHeaderSize - kHeapObjectTag,
       instr->additional_index());
-  __ movdbl(double_store_operand, value);
+
+  if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+    CpuFeatureScope scope(masm(), SSE2);
+    XMMRegister value = ToDoubleRegister(instr->value());
+
+    if (instr->NeedsCanonicalization()) {
+      Label have_value;
+
+      __ ucomisd(value, value);
+      __ j(parity_odd, &have_value);  // NaN.
+
+      __ movdbl(value, Operand::StaticVariable(canonical_nan_reference));
+      __ bind(&have_value);
+    }
+
+    __ movdbl(double_store_operand, value);
+  } else {
+    // Can't use SSE2 in the serializer
+    if (instr->hydrogen()->IsConstantHoleStore()) {
+      // This means we should store the (double) hole. No floating point
+      // registers required.
+      double nan_double = FixedDoubleArray::hole_nan_as_double();
+      uint64_t int_val = BitCast<uint64_t, double>(nan_double);
+      int32_t lower = static_cast<int32_t>(int_val);
+      int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));
+
+      __ mov(double_store_operand, Immediate(lower));
+      Operand double_store_operand2 = BuildFastArrayOperand(
+          instr->elements(),
+          instr->key(),
+          instr->hydrogen()->key()->representation(),
+          FAST_DOUBLE_ELEMENTS,
+          FixedDoubleArray::kHeaderSize - kHeapObjectTag + kPointerSize,
+          instr->additional_index());
+      __ mov(double_store_operand2, Immediate(upper));
+    } else {
+      Label no_special_nan_handling;
+      ASSERT(x87_stack_depth_ > 0);
+
+      if (instr->NeedsCanonicalization()) {
+        __ fld(0);
+        __ fld(0);
+        __ FCmp();
+
+        __ j(parity_odd, &no_special_nan_handling);
+        __ sub(esp, Immediate(kDoubleSize));
+        __ fst_d(MemOperand(esp, 0));
+        __ cmp(MemOperand(esp, sizeof(kHoleNanLower32)),
+               Immediate(kHoleNanUpper32));
+        __ add(esp, Immediate(kDoubleSize));
+        Label canonicalize;
+        __ j(not_equal, &canonicalize);
+        __ jmp(&no_special_nan_handling);
+        __ bind(&canonicalize);
+        __ fstp(0);
+        __ fld_d(Operand::StaticVariable(canonical_nan_reference));
+      }
+
+      __ bind(&no_special_nan_handling);
+      __ fst_d(double_store_operand);
+    }
+  }
 }
 
 
@@ -4805,9 +4906,6 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {
       XMMRegister input_reg = ToDoubleRegister(instr->value());
       __ ucomisd(input_reg, input_reg);
     } else {
-      if (!IsX87TopOfStack(instr->value())) {
-        __ fld_d(ToOperand(instr->value()));
-      }
       __ fld(0);
       __ fld(0);
       __ FCmp();
@@ -4829,6 +4927,9 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {
     __ j(not_equal, &canonicalize);
     __ add(esp, Immediate(kDoubleSize));
     __ mov(reg, factory()->the_hole_value());
+    if (!use_sse2) {
+      __ fstp(0);
+    }
     __ jmp(&done);
     __ bind(&canonicalize);
     __ add(esp, Immediate(kDoubleSize));
@@ -4858,10 +4959,7 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {
     XMMRegister input_reg = ToDoubleRegister(instr->value());
     __ movdbl(FieldOperand(reg, HeapNumber::kValueOffset), input_reg);
   } else {
-    if (!IsX87TopOfStack(instr->value())) {
-    __ fld_d(ToOperand(instr->value()));
-    }
-    __ fstp_d(FieldOperand(reg, HeapNumber::kValueOffset));
+    __ fst_d(FieldOperand(reg, HeapNumber::kValueOffset));
   }
   __ bind(&done);
 }
@@ -4909,6 +5007,79 @@ void LCodeGen::DoSmiUntag(LSmiUntag* instr) {
 }
 
 
+void LCodeGen::EmitNumberUntagDNoSSE2(Register input_reg,
+                                      Register temp_reg,
+                                      bool deoptimize_on_undefined,
+                                      bool deoptimize_on_minus_zero,
+                                      LEnvironment* env,
+                                      NumberUntagDMode mode) {
+  Label load_smi, done;
+
+  if (mode == NUMBER_CANDIDATE_IS_ANY_TAGGED) {
+    // Smi check.
+    __ JumpIfSmi(input_reg, &load_smi, Label::kNear);
+
+    // Heap number map check.
+    __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),
+           factory()->heap_number_map());
+    if (deoptimize_on_undefined) {
+      DeoptimizeIf(not_equal, env);
+    } else {
+      Label heap_number;
+      __ j(equal, &heap_number, Label::kNear);
+
+      __ cmp(input_reg, factory()->undefined_value());
+      DeoptimizeIf(not_equal, env);
+
+      // Convert undefined to NaN.
+      ExternalReference nan =
+          ExternalReference::address_of_canonical_non_hole_nan();
+      __ fld_d(Operand::StaticVariable(nan));
+      __ jmp(&done, Label::kNear);
+      __ bind(&heap_number);
+    }
+    // Heap number to x87 conversion.
+    __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));
+    if (deoptimize_on_minus_zero) {
+      __ fldz();
+      __ FCmp();
+      __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));
+      __ j(not_zero, &done, Label::kNear);
+
+      // Use general purpose registers to check if we have -0.0
+      __ mov(temp_reg, FieldOperand(input_reg, HeapNumber::kExponentOffset));
+      __ test(temp_reg, Immediate(HeapNumber::kSignMask));
+      __ j(zero, &done, Label::kNear);
+
+      // Pop FPU stack before deoptimizing.
+      __ fstp(0);
+      DeoptimizeIf(not_zero, env);
+    }
+    __ jmp(&done, Label::kNear);
+  } else if (mode == NUMBER_CANDIDATE_IS_SMI_OR_HOLE) {
+    __ test(input_reg, Immediate(kSmiTagMask));
+    DeoptimizeIf(not_equal, env);
+  } else if (mode == NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE) {
+    __ test(input_reg, Immediate(kSmiTagMask));
+    __ j(zero, &load_smi);
+    ExternalReference hole_nan_reference =
+        ExternalReference::address_of_the_hole_nan();
+    __ fld_d(Operand::StaticVariable(hole_nan_reference));
+    __ jmp(&done, Label::kNear);
+  } else {
+    ASSERT(mode == NUMBER_CANDIDATE_IS_SMI);
+  }
+
+  __ bind(&load_smi);
+  __ SmiUntag(input_reg);  // Untag smi before converting to float.
+  __ push(input_reg);
+  __ fild_s(Operand(esp, 0));
+  __ pop(input_reg);
+  __ SmiTag(input_reg);  // Retag smi.
+  __ bind(&done);
+}
+
+
 void LCodeGen::EmitNumberUntagD(Register input_reg,
                                 Register temp_reg,
                                 XMMRegister result_reg,
@@ -5021,7 +5192,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
       __ fisttp_d(Operand(esp, 0));
       __ mov(input_reg, Operand(esp, 0));  // Low word of answer is the result.
       __ add(Operand(esp), Immediate(kDoubleSize));
-    } else {
+    } else if (CpuFeatures::IsSupported(SSE2)) {
       CpuFeatureScope scope(masm(), SSE2);
       XMMRegister xmm_temp = ToDoubleRegister(instr->temp());
       __ movdbl(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
@@ -5035,6 +5206,8 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
       __ ucomisd(xmm_temp, xmm0);
       DeoptimizeIf(not_equal, instr->environment());
       DeoptimizeIf(parity_even, instr->environment());  // NaN.
+    } else {
+      UNREACHABLE();
     }
   } else if (CpuFeatures::IsSupported(SSE2)) {
     CpuFeatureScope scope(masm(), SSE2);
@@ -5079,18 +5252,169 @@ void LCodeGen::DoTaggedToI(LTaggedToI* instr) {
 
   LOperand* input = instr->value();
   ASSERT(input->IsRegister());
-  ASSERT(input->Equals(instr->result()));
-
   Register input_reg = ToRegister(input);
+  ASSERT(input_reg.is(ToRegister(instr->result())));
 
   DeferredTaggedToI* deferred = new(zone()) DeferredTaggedToI(this, instr);
 
-  // Smi check.
   __ JumpIfNotSmi(input_reg, deferred->entry());
+  __ SmiUntag(input_reg);
+  __ bind(deferred->exit());
+}
 
-  // Smi to int32 conversion
-  __ SmiUntag(input_reg);  // Untag smi.
 
+void LCodeGen::DoDeferredTaggedToINoSSE2(LTaggedToINoSSE2* instr) {
+  Label done, heap_number;
+  Register result_reg = ToRegister(instr->result());
+  Register input_reg = ToRegister(instr->value());
+
+  // Heap number map check.
+  __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),
+         factory()->heap_number_map());
+  __ j(equal, &heap_number, Label::kNear);
+  // Check for undefined. Undefined is converted to zero for truncating
+  // conversions.
+  __ cmp(input_reg, factory()->undefined_value());
+  __ RecordComment("Deferred TaggedToI: cannot truncate");
+  DeoptimizeIf(not_equal, instr->environment());
+  __ xor_(result_reg, result_reg);
+  __ jmp(&done, Label::kFar);
+  __ bind(&heap_number);
+
+  // Surprisingly, all of this crazy bit manipulation is considerably
+  // faster than using the built-in x86 CPU conversion functions (about 6x).
+  Label right_exponent, adjust_bias, zero_result;
+  Register scratch = ToRegister(instr->scratch());
+  Register scratch2 = ToRegister(instr->scratch2());
+  // Get exponent word.
+  __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset));
+  // Get exponent alone in scratch2.
+  __ mov(scratch2, scratch);
+  __ and_(scratch2, HeapNumber::kExponentMask);
+  __ shr(scratch2, HeapNumber::kExponentShift);
+  if (instr->truncating()) {
+    __ j(zero, &zero_result);
+  } else {
+    __ j(not_zero, &adjust_bias);
+    __ test(scratch, Immediate(HeapNumber::kMantissaMask));
+    DeoptimizeIf(not_zero, instr->environment());
+    __ cmp(FieldOperand(input_reg, HeapNumber::kMantissaOffset), Immediate(0));
+    DeoptimizeIf(not_equal, instr->environment());
+    __ bind(&adjust_bias);
+  }
+  __ sub(scratch2, Immediate(HeapNumber::kExponentBias));
+  if (!instr->truncating()) {
+    DeoptimizeIf(negative, instr->environment());
+  } else {
+    __ j(negative, &zero_result);
+  }
+
+  // Get the second half of the double. For some exponents we don't
+  // actually need this because the bits get shifted out again, but
+  // it's probably slower to test than just to do it.
+  Register scratch3 = ToRegister(instr->scratch3());
+  __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset));
+  __ xor_(result_reg, result_reg);
+
+  const uint32_t non_int32_exponent = 31;
+  __ cmp(scratch2, Immediate(non_int32_exponent));
+  // If we have a match of the int32 exponent then skip some logic.
+  __ j(equal, &right_exponent, Label::kNear);
+  // If the number doesn't find in an int32, deopt.
+  DeoptimizeIf(greater, instr->environment());
+
+  // Exponent word in scratch, exponent in scratch2.  We know that 0 <= exponent
+  // < 31.
+  __ mov(result_reg, Immediate(31));
+  __ sub(result_reg, scratch2);
+
+  __ bind(&right_exponent);
+
+  // Save off exponent for negative check later.
+  __ mov(scratch2, scratch);
+
+  // Here result_reg is the shift, scratch is the exponent word.
+  // Get the top bits of the mantissa.
+  __ and_(scratch, HeapNumber::kMantissaMask);
+  // Put back the implicit 1.
+  __ or_(scratch, 1 << HeapNumber::kExponentShift);
+  // Shift up the mantissa bits to take up the space the exponent used to
+  // take. We have kExponentShift + 1 significant bits int he low end of the
+  // word.  Shift them to the top bits.
+  const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1;
+  __ shl(scratch, shift_distance);
+  if (!instr->truncating()) {
+    // If not truncating, a non-zero value in the bottom 22 bits means a
+    // non-integral value --> trigger a deopt.
+    __ test(scratch3, Immediate((1 << (32 - shift_distance)) - 1));
+    DeoptimizeIf(not_equal, instr->environment());
+  }
+  // Shift down 22 bits to get the most significant 10 bits or the low
+  // mantissa word.
+  __ shr(scratch3, 32 - shift_distance);
+  __ or_(scratch3, scratch);
+  if (!instr->truncating()) {
+    // If truncating, a non-zero value in the bits that will be shifted away
+    // when adjusting the exponent means rounding --> deopt.
+    __ mov(scratch, 0x1);
+    ASSERT(result_reg.is(ecx));
+    __ shl_cl(scratch);
+    __ dec(scratch);
+    __ test(scratch3, scratch);
+    DeoptimizeIf(not_equal, instr->environment());
+  }
+  // Move down according to the exponent.
+  ASSERT(result_reg.is(ecx));
+  __ shr_cl(scratch3);
+  // Now the unsigned 32-bit answer is in scratch3.  We need to move it to
+  // result_reg and we may need to fix the sign.
+  Label negative_result;
+  __ xor_(result_reg, result_reg);
+  __ cmp(scratch2, result_reg);
+  __ j(less, &negative_result, Label::kNear);
+  __ cmp(scratch3, result_reg);
+  __ mov(result_reg, scratch3);
+  // If the result is > MAX_INT, result doesn't fit in signed 32-bit --> deopt.
+  DeoptimizeIf(less, instr->environment());
+  __ jmp(&done, Label::kNear);
+  __ bind(&zero_result);
+  __ xor_(result_reg, result_reg);
+  __ jmp(&done, Label::kNear);
+  __ bind(&negative_result);
+  __ sub(result_reg, scratch3);
+  if (!instr->truncating()) {
+    // -0.0 triggers a deopt.
+    DeoptimizeIf(zero, instr->environment());
+  }
+  // If the negative subtraction overflows into a positive number, there was an
+  // overflow --> deopt.
+  DeoptimizeIf(positive, instr->environment());
+  __ bind(&done);
+}
+
+
+void LCodeGen::DoTaggedToINoSSE2(LTaggedToINoSSE2* instr) {
+  class DeferredTaggedToINoSSE2: public LDeferredCode {
+   public:
+    DeferredTaggedToINoSSE2(LCodeGen* codegen, LTaggedToINoSSE2* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
+    virtual void Generate() { codegen()->DoDeferredTaggedToINoSSE2(instr_); }
+    virtual LInstruction* instr() { return instr_; }
+   private:
+    LTaggedToINoSSE2* instr_;
+  };
+
+  LOperand* input = instr->value();
+  ASSERT(input->IsRegister());
+  Register input_reg = ToRegister(input);
+  ASSERT(input_reg.is(ToRegister(instr->result())));
+
+  DeferredTaggedToINoSSE2* deferred =
+      new(zone()) DeferredTaggedToINoSSE2(this, instr);
+
+  // Smi check.
+  __ JumpIfNotSmi(input_reg, deferred->entry());
+  __ SmiUntag(input_reg);  // Untag smi.
   __ bind(deferred->exit());
 }
 
@@ -5103,32 +5427,31 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {
   LOperand* result = instr->result();
   ASSERT(result->IsDoubleRegister());
 
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    Register input_reg = ToRegister(input);
-    XMMRegister result_reg = ToDoubleRegister(result);
-
-    bool deoptimize_on_minus_zero =
-        instr->hydrogen()->deoptimize_on_minus_zero();
-    Register temp_reg = deoptimize_on_minus_zero ? ToRegister(temp) : no_reg;
-
-    NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED;
-    HValue* value = instr->hydrogen()->value();
-    if (value->type().IsSmi()) {
-      if (value->IsLoadKeyed()) {
-        HLoadKeyed* load = HLoadKeyed::cast(value);
-        if (load->UsesMustHandleHole()) {
-          if (load->hole_mode() == ALLOW_RETURN_HOLE) {
-            mode = NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE;
-          } else {
-            mode = NUMBER_CANDIDATE_IS_SMI_OR_HOLE;
-          }
+  Register input_reg = ToRegister(input);
+  bool deoptimize_on_minus_zero =
+      instr->hydrogen()->deoptimize_on_minus_zero();
+  Register temp_reg = deoptimize_on_minus_zero ? ToRegister(temp) : no_reg;
+
+  NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED;
+  HValue* value = instr->hydrogen()->value();
+  if (value->type().IsSmi()) {
+    if (value->IsLoadKeyed()) {
+      HLoadKeyed* load = HLoadKeyed::cast(value);
+      if (load->UsesMustHandleHole()) {
+        if (load->hole_mode() == ALLOW_RETURN_HOLE) {
+          mode = NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE;
         } else {
-          mode = NUMBER_CANDIDATE_IS_SMI;
+          mode = NUMBER_CANDIDATE_IS_SMI_OR_HOLE;
         }
+      } else {
+        mode = NUMBER_CANDIDATE_IS_SMI;
       }
     }
+  }
 
+  if (CpuFeatures::IsSupported(SSE2)) {
+    CpuFeatureScope scope(masm(), SSE2);
+    XMMRegister result_reg = ToDoubleRegister(result);
     EmitNumberUntagD(input_reg,
                      temp_reg,
                      result_reg,
@@ -5137,7 +5460,13 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {
                      instr->environment(),
                      mode);
   } else {
-    UNIMPLEMENTED();
+    EmitNumberUntagDNoSSE2(input_reg,
+                           temp_reg,
+                           instr->hydrogen()->deoptimize_on_undefined(),
+                           deoptimize_on_minus_zero,
+                           instr->environment(),
+                           mode);
+    CurrentInstructionReturnsX87Result();
   }
 }
 
@@ -5409,7 +5738,128 @@ void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {
   __ bind(&is_smi);
   __ SmiUntag(input_reg);
   __ ClampUint8(input_reg);
+  __ bind(&done);
+}
+
+
+void LCodeGen::DoClampTToUint8NoSSE2(LClampTToUint8NoSSE2* instr) {
+  Register input_reg = ToRegister(instr->unclamped());
+  Register result_reg = ToRegister(instr->result());
+  Register scratch = ToRegister(instr->scratch());
+  Register scratch2 = ToRegister(instr->scratch2());
+  Register scratch3 = ToRegister(instr->scratch3());
+  Label is_smi, done, heap_number, valid_exponent,
+      largest_value, zero_result, maybe_nan_or_infinity;
+
+  __ JumpIfSmi(input_reg, &is_smi);
+
+  // Check for heap number
+  __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),
+         factory()->heap_number_map());
+  __ j(equal, &heap_number, Label::kFar);
+
+  // Check for undefined. Undefined is converted to zero for clamping
+  // conversions.
+  __ cmp(input_reg, factory()->undefined_value());
+  DeoptimizeIf(not_equal, instr->environment());
+  __ jmp(&zero_result);
+
+  // Heap number
+  __ bind(&heap_number);
+
+  // Surprisingly, all of the hand-crafted bit-manipulations below are much
+  // faster than the x86 FPU built-in instruction, especially since "banker's
+  // rounding" would be additionally very expensive
+
+  // Get exponent word.
+  __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset));
+  __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset));
+
+  // Test for negative values --> clamp to zero
+  __ test(scratch, scratch);
+  __ j(negative, &zero_result);
+
+  // Get exponent alone in scratch2.
+  __ mov(scratch2, scratch);
+  __ and_(scratch2, HeapNumber::kExponentMask);
+  __ shr(scratch2, HeapNumber::kExponentShift);
+  __ j(zero, &zero_result);
+  __ sub(scratch2, Immediate(HeapNumber::kExponentBias - 1));
+  __ j(negative, &zero_result);
+
+  const uint32_t non_int8_exponent = 7;
+  __ cmp(scratch2, Immediate(non_int8_exponent + 1));
+  // If the exponent is too big, check for special values.
+  __ j(greater, &maybe_nan_or_infinity, Label::kNear);
+
+  __ bind(&valid_exponent);
+  // Exponent word in scratch, exponent in scratch2. We know that 0 <= exponent
+  // < 7. The shift bias is the number of bits to shift the mantissa such that
+  // with an exponent of 7 such the that top-most one is in bit 30, allowing
+  // detection the rounding overflow of a 255.5 to 256 (bit 31 goes from 0 to
+  // 1).
+  int shift_bias = (30 - HeapNumber::kExponentShift) - 7 - 1;
+  __ lea(result_reg, MemOperand(scratch2, shift_bias));
+  // Here result_reg (ecx) is the shift, scratch is the exponent word.  Get the
+  // top bits of the mantissa.
+  __ and_(scratch, HeapNumber::kMantissaMask);
+  // Put back the implicit 1 of the mantissa
+  __ or_(scratch, 1 << HeapNumber::kExponentShift);
+  // Shift up to round
+  __ shl_cl(scratch);
+  // Use "banker's rounding" to spec: If fractional part of number is 0.5, then
+  // use the bit in the "ones" place and add it to the "halves" place, which has
+  // the effect of rounding to even.
+  __ mov(scratch2, scratch);
+  const uint32_t one_half_bit_shift = 30 - sizeof(uint8_t) * 8;
+  const uint32_t one_bit_shift = one_half_bit_shift + 1;
+  __ and_(scratch2, Immediate((1 << one_bit_shift) - 1));
+  __ cmp(scratch2, Immediate(1 << one_half_bit_shift));
+  Label no_round;
+  __ j(less, &no_round);
+  Label round_up;
+  __ mov(scratch2, Immediate(1 << one_half_bit_shift));
+  __ j(greater, &round_up);
+  __ test(scratch3, scratch3);
+  __ j(not_zero, &round_up);
+  __ mov(scratch2, scratch);
+  __ and_(scratch2, Immediate(1 << one_bit_shift));
+  __ shr(scratch2, 1);
+  __ bind(&round_up);
+  __ add(scratch, scratch2);
+  __ j(overflow, &largest_value);
+  __ bind(&no_round);
+  __ shr(scratch, 23);
+  __ mov(result_reg, scratch);
+  __ jmp(&done, Label::kNear);
+
+  __ bind(&maybe_nan_or_infinity);
+  // Check for NaN/Infinity, all other values map to 255
+  __ cmp(scratch2, Immediate(HeapNumber::kInfinityOrNanExponent + 1));
+  __ j(not_equal, &largest_value, Label::kNear);
+
+  // Check for NaN, which differs from Infinity in that at least one mantissa
+  // bit is set.
+  __ and_(scratch, HeapNumber::kMantissaMask);
+  __ or_(scratch, FieldOperand(input_reg, HeapNumber::kMantissaOffset));
+  __ j(not_zero, &zero_result);  // M!=0 --> NaN
+  // Infinity -> Fall through to map to 255.
 
+  __ bind(&largest_value);
+  __ mov(result_reg, Immediate(255));
+  __ jmp(&done, Label::kNear);
+
+  __ bind(&zero_result);
+  __ xor_(result_reg, result_reg);
+  __ jmp(&done);
+
+  // smi
+  __ bind(&is_smi);
+  if (!input_reg.is(result_reg)) {
+    __ mov(result_reg, input_reg);
+  }
+  __ SmiUntag(result_reg);
+  __ ClampUint8(result_reg);
   __ bind(&done);
 }
 
diff --git a/src/ia32/lithium-codegen-ia32.h b/src/ia32/lithium-codegen-ia32.h
index 3a38e32..e8bdbf4 100644
--- a/src/ia32/lithium-codegen-ia32.h
+++ b/src/ia32/lithium-codegen-ia32.h
@@ -68,6 +68,7 @@ class LCodeGen BASE_EMBEDDED {
         osr_pc_offset_(-1),
         last_lazy_deopt_pc_(0),
         frame_is_built_(false),
+        x87_stack_depth_(0),
         safepoints_(info->zone()),
         resolver_(this),
         expected_safepoint_kind_(Safepoint::kSimple) {
@@ -102,10 +103,17 @@ class LCodeGen BASE_EMBEDDED {
     return Immediate(ToInteger32(LConstantOperand::cast(op)));
   }
 
-  Handle<Object> ToHandle(LConstantOperand* op) const;
+  // Support for non-sse2 (x87) floating point stack handling.
+  // These functions maintain the depth of the stack (either 0 or 1)
+  void PushX87DoubleOperand(Operand src);
+  void PushX87FloatOperand(Operand src);
+  void ReadX87Operand(Operand dst);
+  bool X87StackNonEmpty() const { return x87_stack_depth_ > 0; }
+  void PopX87();
+  void CurrentInstructionReturnsX87Result();
+  void FlushX87StackIfNecessary(LInstruction* instr);
 
-  // A utility for instructions that return floating point values on X87.
-  void HandleX87FPReturnValue(LInstruction* instr);
+  Handle<Object> ToHandle(LConstantOperand* op) const;
 
   // The operand denoting the second word (the one with a higher address) of
   // a double stack slot.
@@ -129,6 +137,7 @@ class LCodeGen BASE_EMBEDDED {
                             IntegerSignedness signedness);
 
   void DoDeferredTaggedToI(LTaggedToI* instr);
+  void DoDeferredTaggedToINoSSE2(LTaggedToINoSSE2* instr);
   void DoDeferredMathAbsTaggedHeapNumber(LUnaryMathOperation* instr);
   void DoDeferredStackCheck(LStackCheck* instr);
   void DoDeferredRandom(LRandom* instr);
@@ -315,6 +324,14 @@ class LCodeGen BASE_EMBEDDED {
       LEnvironment* env,
       NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED);
 
+  void EmitNumberUntagDNoSSE2(
+      Register input,
+      Register temp,
+      bool deoptimize_on_undefined,
+      bool deoptimize_on_minus_zero,
+      LEnvironment* env,
+      NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED);
+
   // Emits optimized code for typeof x == "y".  Modifies input register.
   // Returns the condition on which a final split to
   // true and false label should be made, to optimize fallthrough.
@@ -404,6 +421,7 @@ class LCodeGen BASE_EMBEDDED {
   int osr_pc_offset_;
   int last_lazy_deopt_pc_;
   bool frame_is_built_;
+  int x87_stack_depth_;
 
   // Builder that keeps track of safepoints in the code. The table
   // itself is emitted at the end of the generated code.
diff --git a/src/ia32/lithium-gap-resolver-ia32.cc b/src/ia32/lithium-gap-resolver-ia32.cc
index b062ba5..6c7e375 100644
--- a/src/ia32/lithium-gap-resolver-ia32.cc
+++ b/src/ia32/lithium-gap-resolver-ia32.cc
@@ -324,29 +324,61 @@ void LGapResolver::EmitMove(int index) {
     }
 
   } else if (source->IsDoubleRegister()) {
-    CpuFeatureScope scope(cgen_->masm(), SSE2);
-    XMMRegister src = cgen_->ToDoubleRegister(source);
-    if (destination->IsDoubleRegister()) {
-      XMMRegister dst = cgen_->ToDoubleRegister(destination);
-      __ movaps(dst, src);
+    if (CpuFeatures::IsSupported(SSE2)) {
+      CpuFeatureScope scope(cgen_->masm(), SSE2);
+      XMMRegister src = cgen_->ToDoubleRegister(source);
+      if (destination->IsDoubleRegister()) {
+        XMMRegister dst = cgen_->ToDoubleRegister(destination);
+        __ movaps(dst, src);
+      } else {
+        ASSERT(destination->IsDoubleStackSlot());
+        Operand dst = cgen_->ToOperand(destination);
+        __ movdbl(dst, src);
+      }
     } else {
+      // load from the register onto the stack, store in destination, which must
+      // be a double stack slot in the non-SSE2 case.
+      ASSERT(source->index() == 0);  // source is on top of the stack
       ASSERT(destination->IsDoubleStackSlot());
       Operand dst = cgen_->ToOperand(destination);
-      __ movdbl(dst, src);
+      cgen_->ReadX87Operand(dst);
     }
   } else if (source->IsDoubleStackSlot()) {
-    CpuFeatureScope scope(cgen_->masm(), SSE2);
-    ASSERT(destination->IsDoubleRegister() ||
-           destination->IsDoubleStackSlot());
-    Operand src = cgen_->ToOperand(source);
-    if (destination->IsDoubleRegister()) {
-      XMMRegister dst = cgen_->ToDoubleRegister(destination);
-      __ movdbl(dst, src);
+    if (CpuFeatures::IsSupported(SSE2)) {
+      CpuFeatureScope scope(cgen_->masm(), SSE2);
+      ASSERT(destination->IsDoubleRegister() ||
+             destination->IsDoubleStackSlot());
+      Operand src = cgen_->ToOperand(source);
+      if (destination->IsDoubleRegister()) {
+        XMMRegister dst = cgen_->ToDoubleRegister(destination);
+        __ movdbl(dst, src);
+      } else {
+        // We rely on having xmm0 available as a fixed scratch register.
+        Operand dst = cgen_->ToOperand(destination);
+        __ movdbl(xmm0, src);
+        __ movdbl(dst, xmm0);
+      }
     } else {
-      // We rely on having xmm0 available as a fixed scratch register.
-      Operand dst = cgen_->ToOperand(destination);
-      __ movdbl(xmm0, src);
-      __ movdbl(dst, xmm0);
+      // load from the stack slot on top of the floating point stack, and then
+      // store in destination. If destination is a double register, then it
+      // represents the top of the stack and nothing needs to be done.
+      if (destination->IsDoubleStackSlot()) {
+        Register tmp = EnsureTempRegister();
+        Operand src0 = cgen_->ToOperand(source);
+        Operand src1 = cgen_->HighOperand(source);
+        Operand dst0 = cgen_->ToOperand(destination);
+        Operand dst1 = cgen_->HighOperand(destination);
+        __ mov(tmp, src0);  // Then use tmp to copy source to destination.
+        __ mov(dst0, tmp);
+        __ mov(tmp, src1);
+        __ mov(dst1, tmp);
+      } else {
+        Operand src = cgen_->ToOperand(source);
+        if (cgen_->X87StackNonEmpty()) {
+          cgen_->PopX87();
+        }
+        cgen_->PushX87DoubleOperand(src);
+      }
     }
   } else {
     UNREACHABLE();
@@ -419,21 +451,19 @@ void LGapResolver::EmitSwap(int index) {
     __ movaps(xmm0, src);
     __ movaps(src, dst);
     __ movaps(dst, xmm0);
-
   } else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) {
     CpuFeatureScope scope(cgen_->masm(), SSE2);
     // XMM register-memory swap.  We rely on having xmm0
     // available as a fixed scratch register.
     ASSERT(source->IsDoubleStackSlot() || destination->IsDoubleStackSlot());
     XMMRegister reg = cgen_->ToDoubleRegister(source->IsDoubleRegister()
-                                                  ? source
-                                                  : destination);
+                                              ? source
+                                              : destination);
     Operand other =
         cgen_->ToOperand(source->IsDoubleRegister() ? destination : source);
     __ movdbl(xmm0, other);
     __ movdbl(other, reg);
     __ movdbl(reg, Operand(xmm0));
-
   } else if (source->IsDoubleStackSlot() && destination->IsDoubleStackSlot()) {
     CpuFeatureScope scope(cgen_->masm(), SSE2);
     // Double-width memory-to-memory.  Spill on demand to use a general
diff --git a/src/ia32/lithium-ia32.cc b/src/ia32/lithium-ia32.cc
index a4b1b86..3ce5738 100644
--- a/src/ia32/lithium-ia32.cc
+++ b/src/ia32/lithium-ia32.cc
@@ -91,6 +91,22 @@ void LInstruction::VerifyCall() {
 #endif
 
 
+bool LInstruction::HasDoubleRegisterResult() {
+  return HasResult() && result()->IsDoubleRegister();
+}
+
+
+bool LInstruction::HasDoubleRegisterInput() {
+  for (int i = 0; i < InputCount(); i++) {
+    LOperand* op = InputAt(i);
+    if (op->IsDoubleRegister()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
 void LInstruction::PrintTo(StringStream* stream) {
   stream->Add("%s ", this->Mnemonic());
 
@@ -542,6 +558,11 @@ LOperand* LChunkBuilder::UseFixedDouble(HValue* value, XMMRegister reg) {
 }
 
 
+LOperand* LChunkBuilder::UseX87TopOfStack(HValue* value) {
+  return Use(value, ToUnallocated(x87tos));
+}
+
+
 LOperand* LChunkBuilder::UseRegister(HValue* value) {
   return Use(value, new(zone()) LUnallocated(LUnallocated::MUST_HAVE_REGISTER));
 }
@@ -1861,20 +1882,33 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
                        ? TempRegister()
                        : NULL;
       LNumberUntagD* res = new(zone()) LNumberUntagD(value, temp);
-      return AssignEnvironment(DefineAsRegister(res));
+      if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+        return AssignEnvironment(DefineAsRegister(res));
+      } else {
+        return AssignEnvironment(DefineX87TOS(res));
+      }
     } else {
       ASSERT(to.IsInteger32());
-      LOperand* value = UseRegister(instr->value());
       if (instr->value()->type().IsSmi()) {
+        LOperand* value = UseRegister(instr->value());
         return DefineSameAsFirst(new(zone()) LSmiUntag(value, false));
       } else {
         bool truncating = instr->CanTruncateToInt32();
-        LOperand* xmm_temp =
-            (truncating && CpuFeatures::IsSupported(SSE3))
-            ? NULL
-            : FixedTemp(xmm1);
-        LTaggedToI* res = new(zone()) LTaggedToI(value, xmm_temp);
-        return AssignEnvironment(DefineSameAsFirst(res));
+        if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+          LOperand* value = UseRegister(instr->value());
+          LOperand* xmm_temp =
+              (truncating && CpuFeatures::IsSupported(SSE3))
+              ? NULL
+              : FixedTemp(xmm1);
+          LTaggedToI* res = new(zone()) LTaggedToI(value, xmm_temp);
+          return AssignEnvironment(DefineSameAsFirst(res));
+        } else {
+          LOperand* value = UseFixed(instr->value(), ecx);
+          LTaggedToINoSSE2* res =
+              new(zone()) LTaggedToINoSSE2(value, TempRegister(),
+                                           TempRegister(), TempRegister());
+          return AssignEnvironment(DefineFixed(res, ecx));
+        }
       }
     }
   } else if (from.IsDouble()) {
@@ -1992,12 +2026,20 @@ LInstruction* LChunkBuilder::DoClampToUint8(HClampToUint8* instr) {
     return DefineFixed(new(zone()) LClampIToUint8(reg), eax);
   } else {
     ASSERT(input_rep.IsTagged());
-    LOperand* reg = UseFixed(value, eax);
-    // Register allocator doesn't (yet) support allocation of double
-    // temps. Reserve xmm1 explicitly.
-    LOperand* temp = FixedTemp(xmm1);
-    LClampTToUint8* result = new(zone()) LClampTToUint8(reg, temp);
-    return AssignEnvironment(DefineFixed(result, eax));
+    if (CpuFeatures::IsSupported(SSE2)) {
+      LOperand* reg = UseFixed(value, eax);
+      // Register allocator doesn't (yet) support allocation of double
+      // temps. Reserve xmm1 explicitly.
+      LOperand* temp = FixedTemp(xmm1);
+      LClampTToUint8* result = new(zone()) LClampTToUint8(reg, temp);
+      return AssignEnvironment(DefineFixed(result, eax));
+    } else {
+      LOperand* value = UseRegister(instr->value());
+      LClampTToUint8NoSSE2* res =
+          new(zone()) LClampTToUint8NoSSE2(value, TempRegister(),
+                                           TempRegister(), TempRegister());
+      return AssignEnvironment(DefineFixed(res, ecx));
+    }
   }
 }
 
@@ -2018,10 +2060,13 @@ LInstruction* LChunkBuilder::DoConstant(HConstant* instr) {
     return DefineAsRegister(new(zone()) LConstantI);
   } else if (r.IsDouble()) {
     double value = instr->DoubleValue();
-    LOperand* temp = (BitCast<uint64_t, double>(value) != 0)
-        ? TempRegister()
-        : NULL;
-    return DefineAsRegister(new(zone()) LConstantD(temp));
+    bool value_is_zero = BitCast<uint64_t, double>(value) == 0;
+    if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+      LOperand* temp = value_is_zero ? NULL : TempRegister();
+      return DefineAsRegister(new(zone()) LConstantD(temp));
+    } else {
+      return DefineX87TOS(new(zone()) LConstantD(NULL));
+    }
   } else if (r.IsTagged()) {
     return DefineAsRegister(new(zone()) LConstantT);
   } else {
@@ -2190,6 +2235,27 @@ LInstruction* LChunkBuilder::DoLoadKeyedGeneric(HLoadKeyedGeneric* instr) {
 }
 
 
+LOperand* LChunkBuilder::GetStoreKeyedValueOperand(HStoreKeyed* instr) {
+  ElementsKind elements_kind = instr->elements_kind();
+
+  // Determine if we need a byte register in this case for the value.
+  bool val_is_fixed_register =
+      elements_kind == EXTERNAL_BYTE_ELEMENTS ||
+      elements_kind == EXTERNAL_UNSIGNED_BYTE_ELEMENTS ||
+      elements_kind == EXTERNAL_PIXEL_ELEMENTS;
+  if (val_is_fixed_register) {
+    return UseFixed(instr->value(), eax);
+  }
+
+  if (!CpuFeatures::IsSafeForSnapshot(SSE2) &&
+      IsDoubleOrFloatElementsKind(elements_kind)) {
+    return UseRegisterAtStart(instr->value());
+  }
+
+  return UseRegister(instr->value());
+}
+
+
 LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) {
   if (!instr->is_external()) {
     ASSERT(instr->elements()->representation().IsTagged());
@@ -2198,7 +2264,12 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) {
 
     if (instr->value()->representation().IsDouble()) {
       LOperand* object = UseRegisterAtStart(instr->elements());
-      LOperand* val = UseTempRegister(instr->value());
+      LOperand* val = NULL;
+      if (CpuFeatures::IsSafeForSnapshot(SSE2)) {
+        val = UseRegisterAtStart(instr->value());
+      } else if (!instr->IsConstantHoleStore()) {
+        val = UseX87TopOfStack(instr->value());
+      }
       LOperand* key = UseRegisterOrConstantAtStart(instr->key());
 
       return new(zone()) LStoreKeyed(object, key, val);
@@ -2228,15 +2299,7 @@ LInstruction* LChunkBuilder::DoStoreKeyed(HStoreKeyed* instr) {
   ASSERT(instr->elements()->representation().IsExternal());
 
   LOperand* external_pointer = UseRegister(instr->elements());
-  // Determine if we need a byte register in this case for the value.
-  bool val_is_fixed_register =
-      elements_kind == EXTERNAL_BYTE_ELEMENTS ||
-      elements_kind == EXTERNAL_UNSIGNED_BYTE_ELEMENTS ||
-      elements_kind == EXTERNAL_PIXEL_ELEMENTS;
-
-  LOperand* val = val_is_fixed_register
-      ? UseFixed(instr->value(), eax)
-      : UseRegister(instr->value());
+  LOperand* val = GetStoreKeyedValueOperand(instr);
   bool clobbers_key = ExternalArrayOpRequiresTemp(
       instr->key()->representation(), elements_kind);
   LOperand* key = clobbers_key
diff --git a/src/ia32/lithium-ia32.h b/src/ia32/lithium-ia32.h
index 4c188c3..80b8b37 100644
--- a/src/ia32/lithium-ia32.h
+++ b/src/ia32/lithium-ia32.h
@@ -74,6 +74,7 @@ class LCodeGen;
   V(ClampDToUint8)                              \
   V(ClampIToUint8)                              \
   V(ClampTToUint8)                              \
+  V(ClampTToUint8NoSSE2)                        \
   V(ClassOfTestAndBranch)                       \
   V(CmpIDAndBranch)                             \
   V(CmpObjectEqAndBranch)                       \
@@ -167,6 +168,7 @@ class LCodeGen;
   V(StringLength)                               \
   V(SubI)                                       \
   V(TaggedToI)                                  \
+  V(TaggedToINoSSE2)                            \
   V(ThisFunction)                               \
   V(Throw)                                      \
   V(ToFastProperties)                           \
@@ -265,6 +267,9 @@ class LInstruction: public ZoneObject {
   virtual bool HasResult() const = 0;
   virtual LOperand* result() = 0;
 
+  bool HasDoubleRegisterResult();
+  bool HasDoubleRegisterInput();
+
   LOperand* FirstInput() { return InputAt(0); }
   LOperand* Output() { return HasResult() ? result() : NULL; }
 
@@ -1088,6 +1093,10 @@ class LConstantD: public LTemplateInstruction<1, 0, 1> {
     temps_[0] = temp;
   }
 
+  virtual bool ClobbersDoubleRegisters() const {
+    return false;
+  }
+
   LOperand* temp() { return temps_[0]; }
 
   DECLARE_CONCRETE_INSTRUCTION(ConstantD, "constant-d")
@@ -2018,6 +2027,31 @@ class LTaggedToI: public LTemplateInstruction<1, 1, 1> {
 };
 
 
+// Truncating conversion from a tagged value to an int32.
+class LTaggedToINoSSE2: public LTemplateInstruction<1, 1, 3> {
+ public:
+  LTaggedToINoSSE2(LOperand* value,
+                   LOperand* temp1,
+                   LOperand* temp2,
+                   LOperand* temp3) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = temp3;
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* scratch() { return temps_[0]; }
+  LOperand* scratch2() { return temps_[1]; }
+  LOperand* scratch3() { return temps_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(TaggedToINoSSE2, "tagged-to-i-nosse2")
+  DECLARE_HYDROGEN_ACCESSOR(UnaryOperation)
+
+  bool truncating() { return hydrogen()->CanTruncateToInt32(); }
+};
+
+
 class LSmiTag: public LTemplateInstruction<1, 1, 0> {
  public:
   explicit LSmiTag(LOperand* value) {
@@ -2040,6 +2074,10 @@ class LNumberUntagD: public LTemplateInstruction<1, 1, 1> {
   LOperand* value() { return inputs_[0]; }
   LOperand* temp() { return temps_[0]; }
 
+  virtual bool ClobbersDoubleRegisters() const {
+    return false;
+  }
+
   DECLARE_CONCRETE_INSTRUCTION(NumberUntagD, "double-untag")
   DECLARE_HYDROGEN_ACCESSOR(Change);
 };
@@ -2380,6 +2418,30 @@ class LClampTToUint8: public LTemplateInstruction<1, 1, 1> {
 };
 
 
+// Truncating conversion from a tagged value to an int32.
+class LClampTToUint8NoSSE2: public LTemplateInstruction<1, 1, 3> {
+ public:
+  LClampTToUint8NoSSE2(LOperand* unclamped,
+                       LOperand* temp1,
+                       LOperand* temp2,
+                       LOperand* temp3) {
+    inputs_[0] = unclamped;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = temp3;
+  }
+
+  LOperand* unclamped() { return inputs_[0]; }
+  LOperand* scratch() { return temps_[0]; }
+  LOperand* scratch2() { return temps_[1]; }
+  LOperand* scratch3() { return temps_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(ClampTToUint8NoSSE2,
+                               "clamp-t-to-uint8-nosse2")
+  DECLARE_HYDROGEN_ACCESSOR(UnaryOperation)
+};
+
+
 class LCheckNonSmi: public LTemplateInstruction<0, 1, 0> {
  public:
   explicit LCheckNonSmi(LOperand* value) {
@@ -2742,6 +2804,7 @@ class LChunkBuilder BASE_EMBEDDED {
   MUST_USE_RESULT LOperand* UseFixed(HValue* value, Register fixed_register);
   MUST_USE_RESULT LOperand* UseFixedDouble(HValue* value,
                                            XMMRegister fixed_register);
+  MUST_USE_RESULT LOperand* UseX87TopOfStack(HValue* value);
 
   // A value that is guaranteed to be allocated to a register.
   // Operand created by UseRegister is guaranteed to be live until the end of
@@ -2827,6 +2890,8 @@ class LChunkBuilder BASE_EMBEDDED {
   LInstruction* DoArithmeticT(Token::Value op,
                               HArithmeticBinaryOperation* instr);
 
+  LOperand* GetStoreKeyedValueOperand(HStoreKeyed* instr);
+
   LPlatformChunk* chunk_;
   CompilationInfo* info_;
   HGraph* const graph_;
diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc
index da4e339..183ff52 100644
--- a/src/ia32/macro-assembler-ia32.cc
+++ b/src/ia32/macro-assembler-ia32.cc
@@ -2518,6 +2518,28 @@ void MacroAssembler::Ret(int bytes_dropped, Register scratch) {
 }
 
 
+void MacroAssembler::VerifyX87StackDepth(uint depth) {
+  // Make sure the floating point stack is either empty or has depth items.
+  ASSERT(depth <= 7);
+
+  // The top-of-stack (tos) is 7 if there is one item pushed.
+  int tos = (8 - depth) % 8;
+  const int kTopMask = 0x3800;
+  push(eax);
+  fwait();
+  fnstsw_ax();
+  and_(eax, kTopMask);
+  shr(eax, 11);
+  cmp(eax, Immediate(tos));
+  Label all_ok;
+  j(equal, &all_ok);
+  Check(equal, "Unexpected FPU stack depth after instruction");
+  bind(&all_ok);
+  fnclex();
+  pop(eax);
+}
+
+
 void MacroAssembler::Drop(int stack_elements) {
   if (stack_elements > 0) {
     add(esp, Immediate(stack_elements * kPointerSize));
diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h
index 5b91ae4..19d4d93 100644
--- a/src/ia32/macro-assembler-ia32.h
+++ b/src/ia32/macro-assembler-ia32.h
@@ -807,6 +807,8 @@ class MacroAssembler: public Assembler {
     return code_object_;
   }
 
+  // Insert code to verify that the x87 stack has the specified depth (0-7)
+  void VerifyX87StackDepth(uint depth);
 
   // ---------------------------------------------------------------------------
   // StatsCounter support
diff --git a/src/lithium-allocator.cc b/src/lithium-allocator.cc
index 7049a58..fa2aa24 100644
--- a/src/lithium-allocator.cc
+++ b/src/lithium-allocator.cc
@@ -1788,7 +1788,7 @@ STATIC_ASSERT(DoubleRegister::kMaxNumAllocatableRegisters >=
 bool LAllocator::TryAllocateFreeReg(LiveRange* current) {
   LifetimePosition free_until_pos[DoubleRegister::kMaxNumAllocatableRegisters];
 
-  for (int i = 0; i < DoubleRegister::kMaxNumAllocatableRegisters; i++) {
+  for (int i = 0; i < num_registers_; i++) {
     free_until_pos[i] = LifetimePosition::MaxPosition();
   }
 
@@ -1880,7 +1880,7 @@ void LAllocator::AllocateBlockedReg(LiveRange* current) {
   LifetimePosition use_pos[DoubleRegister::kMaxNumAllocatableRegisters];
   LifetimePosition block_pos[DoubleRegister::kMaxNumAllocatableRegisters];
 
-  for (int i = 0; i < DoubleRegister::NumAllocatableRegisters(); i++) {
+  for (int i = 0; i < num_registers_; i++) {
     use_pos[i] = block_pos[i] = LifetimePosition::MaxPosition();
   }
 
diff --git a/src/objects.h b/src/objects.h
index a19e510..c97a6c9 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -1495,6 +1495,8 @@ class HeapNumber: public HeapObject {
   static const int kExponentBits = 11;
   static const int kExponentBias = 1023;
   static const int kExponentShift = 20;
+  static const int kInfinityOrNanExponent =
+      (kExponentMask >> kExponentShift) - kExponentBias;
   static const int kMantissaBitsInTopWord = 20;
   static const int kNonMantissaBitsInTopWord = 12;
 
diff --git a/test/mjsunit/external-array-no-sse2.js b/test/mjsunit/external-array-no-sse2.js
new file mode 100644
index 0000000..0b843d8
--- /dev/null
+++ b/test/mjsunit/external-array-no-sse2.js
@@ -0,0 +1,716 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Flags: --allow-natives-syntax --expose-gc --noenable-sse2
+
+// Helper
+function assertInstance(o, f) {
+  assertSame(o.constructor, f);
+  assertInstanceof(o, f);
+}
+
+// This is a regression test for overlapping key and value registers.
+function f(a) {
+  a[0] = 0;
+  a[1] = 0;
+}
+
+var a = new Int32Array(2);
+for (var i = 0; i < 5; i++) {
+  f(a);
+}
+%OptimizeFunctionOnNextCall(f);
+f(a);
+
+assertEquals(0, a[0]);
+assertEquals(0, a[1]);
+
+// No-parameter constructor should fail right now.
+function abfunc1() {
+  return new ArrayBuffer();
+}
+assertThrows(abfunc1);
+
+// Test derivation from an ArrayBuffer
+var ab = new ArrayBuffer(12);
+assertInstance(ab, ArrayBuffer);
+var derived_uint8 = new Uint8Array(ab);
+assertInstance(derived_uint8, Uint8Array);
+assertSame(ab, derived_uint8.buffer);
+assertEquals(12, derived_uint8.length);
+assertEquals(12, derived_uint8.byteLength);
+assertEquals(0, derived_uint8.byteOffset);
+assertEquals(1, derived_uint8.BYTES_PER_ELEMENT);
+var derived_uint8_2 = new Uint8Array(ab,7);
+assertInstance(derived_uint8_2, Uint8Array);
+assertSame(ab, derived_uint8_2.buffer);
+assertEquals(5, derived_uint8_2.length);
+assertEquals(5, derived_uint8_2.byteLength);
+assertEquals(7, derived_uint8_2.byteOffset);
+assertEquals(1, derived_uint8_2.BYTES_PER_ELEMENT);
+var derived_int16 = new Int16Array(ab);
+assertInstance(derived_int16, Int16Array);
+assertSame(ab, derived_int16.buffer);
+assertEquals(6, derived_int16.length);
+assertEquals(12, derived_int16.byteLength);
+assertEquals(0, derived_int16.byteOffset);
+assertEquals(2, derived_int16.BYTES_PER_ELEMENT);
+var derived_int16_2 = new Int16Array(ab,6);
+assertInstance(derived_int16_2, Int16Array);
+assertSame(ab, derived_int16_2.buffer);
+assertEquals(3, derived_int16_2.length);
+assertEquals(6, derived_int16_2.byteLength);
+assertEquals(6, derived_int16_2.byteOffset);
+assertEquals(2, derived_int16_2.BYTES_PER_ELEMENT);
+var derived_uint32 = new Uint32Array(ab);
+assertInstance(derived_uint32, Uint32Array);
+assertSame(ab, derived_uint32.buffer);
+assertEquals(3, derived_uint32.length);
+assertEquals(12, derived_uint32.byteLength);
+assertEquals(0, derived_uint32.byteOffset);
+assertEquals(4, derived_uint32.BYTES_PER_ELEMENT);
+var derived_uint32_2 = new Uint32Array(ab,4);
+assertInstance(derived_uint32_2, Uint32Array);
+assertSame(ab, derived_uint32_2.buffer);
+assertEquals(2, derived_uint32_2.length);
+assertEquals(8, derived_uint32_2.byteLength);
+assertEquals(4, derived_uint32_2.byteOffset);
+assertEquals(4, derived_uint32_2.BYTES_PER_ELEMENT);
+var derived_uint32_3 = new Uint32Array(ab,4,1);
+assertInstance(derived_uint32_3, Uint32Array);
+assertSame(ab, derived_uint32_3.buffer);
+assertEquals(1, derived_uint32_3.length);
+assertEquals(4, derived_uint32_3.byteLength);
+assertEquals(4, derived_uint32_3.byteOffset);
+assertEquals(4, derived_uint32_3.BYTES_PER_ELEMENT);
+var derived_float64 = new Float64Array(ab,0,1);
+assertInstance(derived_float64, Float64Array);
+assertSame(ab, derived_float64.buffer);
+assertEquals(1, derived_float64.length);
+assertEquals(8, derived_float64.byteLength);
+assertEquals(0, derived_float64.byteOffset);
+assertEquals(8, derived_float64.BYTES_PER_ELEMENT);
+
+// If a given byteOffset and length references an area beyond the end of the
+// ArrayBuffer an exception is raised.
+function abfunc3() {
+  new Uint32Array(ab,4,3);
+}
+assertThrows(abfunc3);
+function abfunc4() {
+  new Uint32Array(ab,16);
+}
+assertThrows(abfunc4);
+
+// The given byteOffset must be a multiple of the element size of the specific
+// type, otherwise an exception is raised.
+function abfunc5() {
+  new Uint32Array(ab,5);
+}
+assertThrows(abfunc5);
+
+// If length is not explicitly specified, the length of the ArrayBuffer minus
+// the byteOffset must be a multiple of the element size of the specific type,
+// or an exception is raised.
+var ab2 = new ArrayBuffer(13);
+function abfunc6() {
+  new Uint32Array(ab2,4);
+}
+assertThrows(abfunc6);
+
+// Test that an array constructed without an array buffer creates one properly.
+a = new Uint8Array(31);
+assertEquals(a.byteLength, a.buffer.byteLength);
+assertEquals(a.length, a.buffer.byteLength);
+assertEquals(a.length * a.BYTES_PER_ELEMENT, a.buffer.byteLength);
+a = new Int16Array(5);
+assertEquals(a.byteLength, a.buffer.byteLength);
+assertEquals(a.length * a.BYTES_PER_ELEMENT, a.buffer.byteLength);
+a = new Float64Array(7);
+assertEquals(a.byteLength, a.buffer.byteLength);
+assertEquals(a.length * a.BYTES_PER_ELEMENT, a.buffer.byteLength);
+
+// Test that an implicitly created buffer is a valid buffer.
+a = new Float64Array(7);
+assertSame(a.buffer, (new Uint16Array(a.buffer)).buffer);
+assertSame(a.buffer, (new Float32Array(a.buffer,4)).buffer);
+assertSame(a.buffer, (new Int8Array(a.buffer,3,51)).buffer);
+assertInstance(a.buffer, ArrayBuffer);
+
+// Test the correct behavior of the |BYTES_PER_ELEMENT| property (which is
+// "constant", but not read-only).
+a = new Int32Array(2);
+assertEquals(4, a.BYTES_PER_ELEMENT);
+a.BYTES_PER_ELEMENT = 42;
+assertEquals(42, a.BYTES_PER_ELEMENT);
+a = new Uint8Array(2);
+assertEquals(1, a.BYTES_PER_ELEMENT);
+a = new Int16Array(2);
+assertEquals(2, a.BYTES_PER_ELEMENT);
+
+// Test Float64Arrays.
+function get(a, index) {
+  return a[index];
+}
+function set(a, index, value) {
+  a[index] = value;
+}
+function temp() {
+var array = new Float64Array(2);
+for (var i = 0; i < 5; i++) {
+  set(array, 0, 2.5);
+  assertEquals(2.5, array[0]);
+}
+%OptimizeFunctionOnNextCall(set);
+set(array, 0, 2.5);
+assertEquals(2.5, array[0]);
+set(array, 1, 3.5);
+assertEquals(3.5, array[1]);
+for (var i = 0; i < 5; i++) {
+  assertEquals(2.5, get(array, 0));
+  assertEquals(3.5, array[1]);
+}
+%OptimizeFunctionOnNextCall(get);
+assertEquals(2.5, get(array, 0));
+assertEquals(3.5, get(array, 1));
+}
+
+// Test non-number parameters.
+var array_with_length_from_non_number = new Int32Array("2");
+assertEquals(2, array_with_length_from_non_number.length);
+array_with_length_from_non_number = new Int32Array(undefined);
+assertEquals(0, array_with_length_from_non_number.length);
+var foo = { valueOf: function() { return 3; } };
+array_with_length_from_non_number = new Int32Array(foo);
+assertEquals(3, array_with_length_from_non_number.length);
+foo = { toString: function() { return "4"; } };
+array_with_length_from_non_number = new Int32Array(foo);
+assertEquals(4, array_with_length_from_non_number.length);
+
+
+// Test loads and stores.
+types = [Array, Int8Array, Uint8Array, Int16Array, Uint16Array, Int32Array,
+         Uint32Array, Uint8ClampedArray, Float32Array, Float64Array];
+
+test_result_nan = [NaN, 0, 0, 0, 0, 0, 0, 0, NaN, NaN];
+test_result_low_int = [-1, -1, 255, -1, 65535, -1, 0xFFFFFFFF, 0, -1, -1];
+test_result_low_double = [-1.25, -1, 255, -1, 65535, -1, 0xFFFFFFFF, 0, -1.25, -1.25];
+test_result_middle = [253.75, -3, 253, 253, 253, 253, 253, 254, 253.75, 253.75];
+test_result_high_int = [256, 0, 0, 256, 256, 256, 256, 255, 256, 256];
+test_result_high_double = [256.25, 0, 0, 256, 256, 256, 256, 255, 256.25, 256.25];
+
+const kElementCount = 40;
+
+function test_load(array, sum) {
+  for (var i = 0; i < kElementCount; i++) {
+    sum += array[i];
+  }
+  return sum;
+}
+
+function test_load_const_key(array, sum) {
+  sum += array[0];
+  sum += array[1];
+  sum += array[2];
+  return sum;
+}
+
+function test_store(array, sum) {
+  for (var i = 0; i < kElementCount; i++) {
+    sum += array[i] = i+1;
+  }
+  return sum;
+}
+
+function test_store_const_key(array, sum) {
+  sum += array[0] = 1;
+  sum += array[1] = 2;
+  sum += array[2] = 3;
+  return sum;
+}
+
+function zero() {
+  return 0.0;
+}
+
+function test_store_middle_tagged(array, sum) {
+  array[0] = 253.75;
+  return array[0];
+}
+
+function test_store_high_tagged(array, sum) {
+  array[0] = 256.25;
+  return array[0];
+}
+
+function test_store_middle_double(array, sum) {
+  array[0] = 253.75 + zero(); // + forces double type feedback
+  return array[0];
+}
+
+function test_store_high_double(array, sum) {
+  array[0] = 256.25 + zero(); // + forces double type feedback
+  return array[0];
+}
+
+function test_store_high_double(array, sum) {
+  array[0] = 256.25;
+  return array[0];
+}
+
+function test_store_low_int(array, sum) {
+  array[0] = -1;
+  return array[0];
+}
+
+function test_store_low_tagged(array, sum) {
+  array[0] = -1.25;
+  return array[0];
+}
+
+function test_store_low_double(array, sum) {
+  array[0] = -1.25 + zero(); // + forces double type feedback
+  return array[0];
+}
+
+function test_store_high_int(array, sum) {
+  array[0] = 256;
+  return array[0];
+}
+
+function test_store_nan(array, sum) {
+  array[0] = NaN;
+  return array[0];
+}
+
+const kRuns = 10;
+
+function run_test(test_func, array, expected_result) {
+  for (var i = 0; i < 5; i++) test_func(array, 0);
+  %OptimizeFunctionOnNextCall(test_func);
+  var sum = 0;
+  for (var i = 0; i < kRuns; i++) {
+    sum = test_func(array, sum);
+  }
+  assertEquals(expected_result, sum);
+  %DeoptimizeFunction(test_func);
+  gc();  // Makes V8 forget about type information for test_func.
+}
+
+function run_bounds_test(test_func, array, expected_result) {
+  assertEquals(undefined, a[kElementCount]);
+  a[kElementCount] = 456;
+  assertEquals(undefined, a[kElementCount]);
+  assertEquals(undefined, a[kElementCount+1]);
+  a[kElementCount+1] = 456;
+  assertEquals(undefined, a[kElementCount+1]);
+}
+
+for (var t = 0; t < types.length; t++) {
+  var type = types[t];
+  var a = new type(kElementCount);
+
+  for (var i = 0; i < kElementCount; i++) {
+    a[i] = i;
+  }
+
+  // Run test functions defined above.
+  run_test(test_load, a, 780 * kRuns);
+  run_test(test_load_const_key, a, 3 * kRuns);
+  run_test(test_store, a, 820 * kRuns);
+  run_test(test_store_const_key, a, 6 * kRuns);
+  run_test(test_store_low_int, a, test_result_low_int[t]);
+  run_test(test_store_low_double, a, test_result_low_double[t]);
+  run_test(test_store_low_tagged, a, test_result_low_double[t]);
+  run_test(test_store_high_int, a, test_result_high_int[t]);
+  run_test(test_store_nan, a, test_result_nan[t]);
+  run_test(test_store_middle_double, a, test_result_middle[t]);
+  run_test(test_store_middle_tagged, a, test_result_middle[t]);
+  run_test(test_store_high_double, a, test_result_high_double[t]);
+  run_test(test_store_high_tagged, a, test_result_high_double[t]);
+
+  // Test the correct behavior of the |length| property (which is read-only).
+  if (t != 0) {
+    assertEquals(kElementCount, a.length);
+    a.length = 2;
+    assertEquals(kElementCount, a.length);
+    assertTrue(delete a.length);
+    a.length = 2;
+    assertEquals(2, a.length);
+
+    // Make sure bounds checks are handled correctly for external arrays.
+    run_bounds_test(a);
+    run_bounds_test(a);
+    run_bounds_test(a);
+    %OptimizeFunctionOnNextCall(run_bounds_test);
+    run_bounds_test(a);
+    %DeoptimizeFunction(run_bounds_test);
+    gc();  // Makes V8 forget about type information for test_func.
+
+  }
+
+  function array_load_set_smi_check(a) {
+    return a[0] = a[0] = 1;
+  }
+
+  array_load_set_smi_check(a);
+  array_load_set_smi_check(0);
+
+  function array_load_set_smi_check2(a) {
+    return a[0] = a[0] = 1;
+  }
+
+  array_load_set_smi_check2(a);
+  %OptimizeFunctionOnNextCall(array_load_set_smi_check2);
+  array_load_set_smi_check2(a);
+  array_load_set_smi_check2(0);
+  %DeoptimizeFunction(array_load_set_smi_check2);
+  gc();  // Makes V8 forget about type information for array_load_set_smi_check.
+}
+
+// Check handling of undefined in 32- and 64-bit external float arrays.
+
+function store_float32_undefined(ext_array) {
+  ext_array[0] = undefined;
+}
+
+var float32_array = new Float32Array(1);
+// Make sure runtime does it right
+store_float32_undefined(float32_array);
+assertTrue(isNaN(float32_array[0]));
+// Make sure the ICs do it right
+store_float32_undefined(float32_array);
+assertTrue(isNaN(float32_array[0]));
+// Make sure that Cranskshft does it right.
+%OptimizeFunctionOnNextCall(store_float32_undefined);
+store_float32_undefined(float32_array);
+assertTrue(isNaN(float32_array[0]));
+
+function store_float64_undefined(ext_array) {
+  ext_array[0] = undefined;
+}
+
+var float64_array = new Float64Array(1);
+// Make sure runtime does it right
+store_float64_undefined(float64_array);
+assertTrue(isNaN(float64_array[0]));
+// Make sure the ICs do it right
+store_float64_undefined(float64_array);
+assertTrue(isNaN(float64_array[0]));
+// Make sure that Cranskshft does it right.
+%OptimizeFunctionOnNextCall(store_float64_undefined);
+store_float64_undefined(float64_array);
+assertTrue(isNaN(float64_array[0]));
+
+
+// Check handling of 0-sized buffers and arrays.
+ab = new ArrayBuffer(0);
+assertInstance(ab, ArrayBuffer);
+assertEquals(0, ab.byteLength);
+a = new Int8Array(ab);
+assertInstance(a, Int8Array);
+assertEquals(0, a.byteLength);
+assertEquals(0, a.length);
+a[0] = 1;
+assertEquals(undefined, a[0]);
+ab = new ArrayBuffer(16);
+assertInstance(ab, ArrayBuffer);
+a = new Float32Array(ab,4,0);
+assertInstance(a, Float32Array);
+assertEquals(0, a.byteLength);
+assertEquals(0, a.length);
+a[0] = 1;
+assertEquals(undefined, a[0]);
+a = new Uint16Array(0);
+assertInstance(a, Uint16Array);
+assertEquals(0, a.byteLength);
+assertEquals(0, a.length);
+a[0] = 1;
+assertEquals(undefined, a[0]);
+
+
+// Check construction from arrays.
+a = new Uint32Array([]);
+assertInstance(a, Uint32Array);
+assertEquals(0, a.length);
+assertEquals(0, a.byteLength);
+assertEquals(0, a.buffer.byteLength);
+assertEquals(4, a.BYTES_PER_ELEMENT);
+assertInstance(a.buffer, ArrayBuffer);
+a = new Uint16Array([1,2,3]);
+assertInstance(a, Uint16Array);
+assertEquals(3, a.length);
+assertEquals(6, a.byteLength);
+assertEquals(6, a.buffer.byteLength);
+assertEquals(2, a.BYTES_PER_ELEMENT);
+assertEquals(1, a[0]);
+assertEquals(3, a[2]);
+assertInstance(a.buffer, ArrayBuffer);
+a = new Uint32Array(a);
+assertInstance(a, Uint32Array);
+assertEquals(3, a.length);
+assertEquals(12, a.byteLength);
+assertEquals(12, a.buffer.byteLength);
+assertEquals(4, a.BYTES_PER_ELEMENT);
+assertEquals(1, a[0]);
+assertEquals(3, a[2]);
+assertInstance(a.buffer, ArrayBuffer);
+
+// Check subarrays.
+a = new Uint16Array([1,2,3,4,5,6]);
+aa = a.subarray(3);
+assertInstance(aa, Uint16Array);
+assertEquals(3, aa.length);
+assertEquals(6, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(3,5);
+assertInstance(aa, Uint16Array);
+assertEquals(2, aa.length);
+assertEquals(4, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(4,8);
+assertInstance(aa, Uint16Array);
+assertEquals(2, aa.length);
+assertEquals(4, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(9);
+assertInstance(aa, Uint16Array);
+assertEquals(0, aa.length);
+assertEquals(0, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(-4);
+assertInstance(aa, Uint16Array);
+assertEquals(4, aa.length);
+assertEquals(8, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(-3,-1);
+assertInstance(aa, Uint16Array);
+assertEquals(2, aa.length);
+assertEquals(4, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(3,2);
+assertInstance(aa, Uint16Array);
+assertEquals(0, aa.length);
+assertEquals(0, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(-3,-4);
+assertInstance(aa, Uint16Array);
+assertEquals(0, aa.length);
+assertEquals(0, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+aa = a.subarray(0,-8);
+assertInstance(aa, Uint16Array);
+assertEquals(0, aa.length);
+assertEquals(0, aa.byteLength);
+assertEquals(2, aa.BYTES_PER_ELEMENT);
+assertSame(a.buffer, aa.buffer);
+
+assertThrows(function(){ a.subarray.call({}, 0) });
+assertThrows(function(){ a.subarray.call([], 0) });
+assertThrows(function(){ a.subarray.call(a) });
+
+
+// Call constructors directly as functions, and through .call and .apply
+
+b = ArrayBuffer(100)
+a = Int8Array(b, 5, 77)
+assertInstance(b, ArrayBuffer)
+assertInstance(a, Int8Array)
+assertSame(b, a.buffer)
+assertEquals(5, a.byteOffset)
+assertEquals(77, a.byteLength)
+b = ArrayBuffer.call(null, 10)
+a = Uint16Array.call(null, b, 2, 4)
+assertInstance(b, ArrayBuffer)
+assertInstance(a, Uint16Array)
+assertSame(b, a.buffer)
+assertEquals(2, a.byteOffset)
+assertEquals(8, a.byteLength)
+b = ArrayBuffer.apply(null, [1000])
+a = Float32Array.apply(null, [b, 128, 1])
+assertInstance(b, ArrayBuffer)
+assertInstance(a, Float32Array)
+assertSame(b, a.buffer)
+assertEquals(128, a.byteOffset)
+assertEquals(4, a.byteLength)
+
+
+// Test array.set in different combinations.
+
+function assertArrayPrefix(expected, array) {
+  for (var i = 0; i < expected.length; ++i) {
+    assertEquals(expected[i], array[i]);
+  }
+}
+
+var a11 = new Int16Array([1, 2, 3, 4, 0, -1])
+var a12 = new Uint16Array(15)
+a12.set(a11, 3)
+assertArrayPrefix([0, 0, 0, 1, 2, 3, 4, 0, 0xffff, 0, 0], a12)
+assertThrows(function(){ a11.set(a12) })
+
+var a21 = [1, undefined, 10, NaN, 0, -1, {valueOf: function() {return 3}}]
+var a22 = new Int32Array(12)
+a22.set(a21, 2)
+assertArrayPrefix([0, 0, 1, 0, 10, 0, 0, -1, 3, 0], a22)
+
+var a31 = new Float32Array([2, 4, 6, 8, 11, NaN, 1/0, -3])
+var a32 = a31.subarray(2, 6)
+a31.set(a32, 4)
+assertArrayPrefix([2, 4, 6, 8, 6, 8, 11, NaN], a31)
+assertArrayPrefix([6, 8, 6, 8], a32)
+
+var a4 = new Uint8ClampedArray([3,2,5,6])
+a4.set(a4)
+assertArrayPrefix([3, 2, 5, 6], a4)
+
+// Cases with overlapping backing store but different element sizes.
+var b = new ArrayBuffer(4)
+var a5 = new Int16Array(b)
+var a50 = new Int8Array(b)
+var a51 = new Int8Array(b, 0, 2)
+var a52 = new Int8Array(b, 1, 2)
+var a53 = new Int8Array(b, 2, 2)
+
+a5.set([0x5050, 0x0a0a])
+assertArrayPrefix([0x50, 0x50, 0x0a, 0x0a], a50)
+assertArrayPrefix([0x50, 0x50], a51)
+assertArrayPrefix([0x50, 0x0a], a52)
+assertArrayPrefix([0x0a, 0x0a], a53)
+
+a50.set([0x50, 0x50, 0x0a, 0x0a])
+a51.set(a5)
+assertArrayPrefix([0x50, 0x0a, 0x0a, 0x0a], a50)
+
+a50.set([0x50, 0x50, 0x0a, 0x0a])
+a52.set(a5)
+assertArrayPrefix([0x50, 0x50, 0x0a, 0x0a], a50)
+
+a50.set([0x50, 0x50, 0x0a, 0x0a])
+a53.set(a5)
+assertArrayPrefix([0x50, 0x50, 0x50, 0x0a], a50)
+
+a50.set([0x50, 0x51, 0x0a, 0x0b])
+a5.set(a51)
+assertArrayPrefix([0x0050, 0x0051], a5)
+
+a50.set([0x50, 0x51, 0x0a, 0x0b])
+a5.set(a52)
+assertArrayPrefix([0x0051, 0x000a], a5)
+
+a50.set([0x50, 0x51, 0x0a, 0x0b])
+a5.set(a53)
+assertArrayPrefix([0x000a, 0x000b], a5)
+
+// Mixed types of same size.
+var a61 = new Float32Array([1.2, 12.3])
+var a62 = new Int32Array(2)
+a62.set(a61)
+assertArrayPrefix([1, 12], a62)
+a61.set(a62)
+assertArrayPrefix([1, 12], a61)
+
+// Invalid source
+assertThrows(function() { a.set(0) })
+assertThrows(function() { a.set({}) })
+
+
+// Test arraybuffer.slice
+
+var a0 = new Int8Array([1, 2, 3, 4, 5, 6])
+var b0 = a0.buffer
+
+var b1 = b0.slice(0)
+assertEquals(b0.byteLength, b1.byteLength)
+assertArrayPrefix([1, 2, 3, 4, 5, 6], Int8Array(b1))
+
+var b2 = b0.slice(3)
+assertEquals(b0.byteLength - 3, b2.byteLength)
+assertArrayPrefix([4, 5, 6], Int8Array(b2))
+
+var b3 = b0.slice(2, 4)
+assertEquals(2, b3.byteLength)
+assertArrayPrefix([3, 4], Int8Array(b3))
+
+function goo(a, i) {
+  return a[i];
+}
+
+function boo(a, i, v) {
+  return a[i] = v;
+}
+
+function do_tagged_index_external_array_test(constructor) {
+  var t_array = new constructor([1, 2, 3, 4, 5, 6]);
+  assertEquals(1, goo(t_array, 0));
+  assertEquals(1, goo(t_array, 0));
+  boo(t_array, 0, 13);
+  assertEquals(13, goo(t_array, 0));
+  %OptimizeFunctionOnNextCall(goo);
+  %OptimizeFunctionOnNextCall(boo);
+  boo(t_array, 0, 15);
+  assertEquals(15, goo(t_array, 0));
+  %ClearFunctionTypeFeedback(goo);
+  %ClearFunctionTypeFeedback(boo);
+}
+
+do_tagged_index_external_array_test(Int8Array);
+do_tagged_index_external_array_test(Uint8Array);
+do_tagged_index_external_array_test(Int16Array);
+do_tagged_index_external_array_test(Uint16Array);
+do_tagged_index_external_array_test(Int32Array);
+do_tagged_index_external_array_test(Uint32Array);
+do_tagged_index_external_array_test(Float32Array);
+do_tagged_index_external_array_test(Float64Array);
+
+var built_in_array = new Array(1, 2, 3, 4, 5, 6);
+assertEquals(1, goo(built_in_array, 0));
+assertEquals(1, goo(built_in_array, 0));
+%OptimizeFunctionOnNextCall(goo);
+%OptimizeFunctionOnNextCall(boo);
+boo(built_in_array, 0, 11);
+assertEquals(11, goo(built_in_array, 0));
+%ClearFunctionTypeFeedback(goo);
+%ClearFunctionTypeFeedback(boo);
+
+built_in_array = new Array(1.5, 2, 3, 4, 5, 6);
+assertEquals(1.5, goo(built_in_array, 0));
+assertEquals(1.5, goo(built_in_array, 0));
+%OptimizeFunctionOnNextCall(goo);
+%OptimizeFunctionOnNextCall(boo);
+boo(built_in_array, 0, 2.5);
+assertEquals(2.5, goo(built_in_array, 0));
+%ClearFunctionTypeFeedback(goo);
+%ClearFunctionTypeFeedback(boo);
diff --git a/test/mjsunit/pixel-array-rounding.js b/test/mjsunit/pixel-array-rounding.js
index 0c307e6..b7db51c 100755
--- a/test/mjsunit/pixel-array-rounding.js
+++ b/test/mjsunit/pixel-array-rounding.js
@@ -27,12 +27,15 @@
 
 // Flags: --allow-natives-syntax
 
-var pixels = new Uint8ClampedArray(8);
+var pixels = new Uint8ClampedArray(11);
 
 function f() {
   for (var i = 0; i < 8; i++) {
     pixels[i] = (i * 1.1);
   }
+  pixels[8] = 255.5;
+  pixels[9] = NaN;
+  pixels[10] = -0.5;
   return pixels[1] + pixels[6];
 }
 
@@ -42,3 +45,6 @@ assertEquals(6, pixels[5]);
 %OptimizeFunctionOnNextCall(f);
 f();
 assertEquals(6, pixels[5]);
+assertEquals(255, pixels[8]);
+assertEquals(0, pixels[9]);
+assertEquals(0, pixels[10]);
-- 
2.7.4