From: yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Fri, 9 May 2014 13:01:50 +0000 (+0000)
Subject: Require SSE2 support for the ia32 port.
X-Git-Tag: upstream/4.7.83~9203
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3fa6100ed3296175423fcfceb33feb2ef3f98b31;p=platform%2Fupstream%2Fv8.git

Require SSE2 support for the ia32 port.

R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/275433004

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@21223 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---

diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc
index 2486dd7..910714c 100644
--- a/src/arm/code-stubs-arm.cc
+++ b/src/arm/code-stubs-arm.cc
@@ -4455,11 +4455,6 @@ void StoreBufferOverflowStub::GenerateFixedRegStubsAheadOfTime(
 }
 
 
-bool CodeStub::CanUseFPRegisters() {
-  return true;  // VFP2 is a base requirement for V8
-}
-
-
 // Takes the input in 3 registers: address_ value_ and object_.  A pointer to
 // the value has just been written into the object, now this stub makes sure
 // we keep the GC informed.  The word in the object where the value has been
@@ -4715,7 +4710,7 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) {
 
 
 void StubFailureTrampolineStub::Generate(MacroAssembler* masm) {
-  CEntryStub ces(isolate(), 1, fp_registers_ ? kSaveFPRegs : kDontSaveFPRegs);
+  CEntryStub ces(isolate(), 1, kSaveFPRegs);
   __ Call(ces.GetCode(), RelocInfo::CODE_TARGET);
   int parameter_count_offset =
       StubFailureTrampolineFrame::kCallerStackParameterCountFrameOffset;
diff --git a/src/arm/deoptimizer-arm.cc b/src/arm/deoptimizer-arm.cc
index aa98c8b..8c2b3a9 100644
--- a/src/arm/deoptimizer-arm.cc
+++ b/src/arm/deoptimizer-arm.cc
@@ -128,11 +128,6 @@ bool Deoptimizer::HasAlignmentPadding(JSFunction* function) {
 }
 
 
-Code* Deoptimizer::NotifyStubFailureBuiltin() {
-  return isolate_->builtins()->builtin(Builtins::kNotifyStubFailureSaveDoubles);
-}
-
-
 #define __ masm()->
 
 // This code tries to be close to ia32 code so that any changes can be
diff --git a/src/arm64/code-stubs-arm64.cc b/src/arm64/code-stubs-arm64.cc
index 520ed3f..dc9d63c 100644
--- a/src/arm64/code-stubs-arm64.cc
+++ b/src/arm64/code-stubs-arm64.cc
@@ -4403,12 +4403,6 @@ void BinaryOpICWithAllocationSiteStub::Generate(MacroAssembler* masm) {
 }
 
 
-bool CodeStub::CanUseFPRegisters() {
-  // FP registers always available on ARM64.
-  return true;
-}
-
-
 void RecordWriteStub::GenerateIncremental(MacroAssembler* masm, Mode mode) {
   // We need some extra registers for this stub, they have been allocated
   // but we need to save them before using them.
@@ -4657,7 +4651,7 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) {
 
 
 void StubFailureTrampolineStub::Generate(MacroAssembler* masm) {
-  CEntryStub ces(isolate(), 1, fp_registers_ ? kSaveFPRegs : kDontSaveFPRegs);
+  CEntryStub ces(isolate(), 1, kSaveFPRegs);
   __ Call(ces.GetCode(), RelocInfo::CODE_TARGET);
   int parameter_count_offset =
       StubFailureTrampolineFrame::kCallerStackParameterCountFrameOffset;
diff --git a/src/arm64/deoptimizer-arm64.cc b/src/arm64/deoptimizer-arm64.cc
index a19e2fc..535e429 100644
--- a/src/arm64/deoptimizer-arm64.cc
+++ b/src/arm64/deoptimizer-arm64.cc
@@ -110,11 +110,6 @@ void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) {
 }
 
 
-Code* Deoptimizer::NotifyStubFailureBuiltin() {
-  return isolate_->builtins()->builtin(Builtins::kNotifyStubFailureSaveDoubles);
-}
-
-
 #define __ masm->
 
 static void CopyRegisterDumpToFrame(MacroAssembler* masm,
diff --git a/src/code-stubs.h b/src/code-stubs.h
index b5c07e5..a6c2294 100644
--- a/src/code-stubs.h
+++ b/src/code-stubs.h
@@ -191,8 +191,6 @@ class CodeStub BASE_EMBEDDED {
   Isolate* isolate() const { return isolate_; }
 
  protected:
-  static bool CanUseFPRegisters();
-
   // Generates the assembler code for the stub.
   virtual Handle<Code> GenerateCode() = 0;
 
@@ -1168,9 +1166,7 @@ class BinaryOpICStub : public HydrogenCodeStub {
     return state_.GetExtraICState();
   }
 
-  virtual void VerifyPlatformFeatures() V8_FINAL V8_OVERRIDE {
-    ASSERT(CpuFeatures::VerifyCrossCompiling(SSE2));
-  }
+  virtual void VerifyPlatformFeatures() V8_FINAL V8_OVERRIDE { }
 
   virtual Handle<Code> GenerateCode() V8_OVERRIDE;
 
@@ -1225,9 +1221,7 @@ class BinaryOpICWithAllocationSiteStub V8_FINAL : public PlatformCodeStub {
     return state_.GetExtraICState();
   }
 
-  virtual void VerifyPlatformFeatures() V8_OVERRIDE {
-    ASSERT(CpuFeatures::VerifyCrossCompiling(SSE2));
-  }
+  virtual void VerifyPlatformFeatures() V8_OVERRIDE { }
 
   virtual void Generate(MacroAssembler* masm) V8_OVERRIDE;
 
@@ -1308,9 +1302,7 @@ class StringAddStub V8_FINAL : public HydrogenCodeStub {
     return PretenureFlagBits::decode(bit_field_);
   }
 
-  virtual void VerifyPlatformFeatures() V8_OVERRIDE {
-    ASSERT(CpuFeatures::VerifyCrossCompiling(SSE2));
-  }
+  virtual void VerifyPlatformFeatures() V8_OVERRIDE { }
 
   virtual Handle<Code> GenerateCode() V8_OVERRIDE;
 
@@ -1522,9 +1514,7 @@ class CEntryStub : public PlatformCodeStub {
   static void GenerateAheadOfTime(Isolate* isolate);
 
  protected:
-  virtual void VerifyPlatformFeatures() V8_OVERRIDE {
-    ASSERT(CpuFeatures::VerifyCrossCompiling(SSE2));
-  };
+  virtual void VerifyPlatformFeatures() V8_OVERRIDE { }
 
  private:
   // Number of pointers/values returned.
@@ -1920,9 +1910,7 @@ class DoubleToIStub : public PlatformCodeStub {
       OffsetBits::encode(offset) |
       IsTruncatingBits::encode(is_truncating) |
       SkipFastPathBits::encode(skip_fastpath) |
-      SSEBits::encode(
-          CpuFeatures::IsSafeForSnapshot(isolate, SSE2) ?
-          CpuFeatures::IsSafeForSnapshot(isolate, SSE3) ? 2 : 1 : 0);
+      SSE3Bits::encode(CpuFeatures::IsSafeForSnapshot(isolate, SSE3) ? 1 : 0);
   }
 
   Register source() {
@@ -1950,9 +1938,7 @@ class DoubleToIStub : public PlatformCodeStub {
   virtual bool SometimesSetsUpAFrame() { return false; }
 
  protected:
-  virtual void VerifyPlatformFeatures() V8_OVERRIDE {
-    ASSERT(CpuFeatures::VerifyCrossCompiling(SSE2));
-  }
+  virtual void VerifyPlatformFeatures() V8_OVERRIDE { }
 
  private:
   static const int kBitsPerRegisterNumber = 6;
@@ -1968,8 +1954,8 @@ class DoubleToIStub : public PlatformCodeStub {
       public BitField<int, 2 * kBitsPerRegisterNumber + 1, 3> {};  // NOLINT
   class SkipFastPathBits:
       public BitField<int, 2 * kBitsPerRegisterNumber + 4, 1> {};  // NOLINT
-  class SSEBits:
-      public BitField<int, 2 * kBitsPerRegisterNumber + 5, 2> {};  // NOLINT
+  class SSE3Bits:
+      public BitField<int, 2 * kBitsPerRegisterNumber + 5, 1> {};  // NOLINT
 
   Major MajorKey() { return DoubleToI; }
   int MinorKey() { return bit_field_; }
@@ -2310,15 +2296,13 @@ class KeyedStoreElementStub : public PlatformCodeStub {
       : PlatformCodeStub(isolate),
         is_js_array_(is_js_array),
         elements_kind_(elements_kind),
-        store_mode_(store_mode),
-        fp_registers_(CanUseFPRegisters()) { }
+        store_mode_(store_mode) { }
 
   Major MajorKey() { return KeyedStoreElement; }
   int MinorKey() {
     return ElementsKindBits::encode(elements_kind_) |
         IsJSArrayBits::encode(is_js_array_) |
-        StoreModeBits::encode(store_mode_) |
-        FPRegisters::encode(fp_registers_);
+        StoreModeBits::encode(store_mode_);
   }
 
   void Generate(MacroAssembler* masm);
@@ -2327,12 +2311,10 @@ class KeyedStoreElementStub : public PlatformCodeStub {
   class ElementsKindBits: public BitField<ElementsKind,      0, 8> {};
   class StoreModeBits: public BitField<KeyedAccessStoreMode, 8, 4> {};
   class IsJSArrayBits: public BitField<bool,                12, 1> {};
-  class FPRegisters: public BitField<bool,                  13, 1> {};
 
   bool is_js_array_;
   ElementsKind elements_kind_;
   KeyedAccessStoreMode store_mode_;
-  bool fp_registers_;
 
   DISALLOW_COPY_AND_ASSIGN(KeyedStoreElementStub);
 };
@@ -2500,18 +2482,14 @@ class ArrayShiftStub V8_FINAL : public HydrogenCodeStub {
 class StoreArrayLiteralElementStub : public PlatformCodeStub {
  public:
   explicit StoreArrayLiteralElementStub(Isolate* isolate)
-      : PlatformCodeStub(isolate), fp_registers_(CanUseFPRegisters()) { }
+      : PlatformCodeStub(isolate) { }
 
  private:
-  class FPRegisters: public BitField<bool,                0, 1> {};
-
   Major MajorKey() { return StoreArrayLiteralElement; }
-  int MinorKey() { return FPRegisters::encode(fp_registers_); }
+  int MinorKey() { return 0; }
 
   void Generate(MacroAssembler* masm);
 
-  bool fp_registers_;
-
   DISALLOW_COPY_AND_ASSIGN(StoreArrayLiteralElementStub);
 };
 
@@ -2520,24 +2498,20 @@ class StubFailureTrampolineStub : public PlatformCodeStub {
  public:
   StubFailureTrampolineStub(Isolate* isolate, StubFunctionMode function_mode)
       : PlatformCodeStub(isolate),
-        fp_registers_(CanUseFPRegisters()),
         function_mode_(function_mode) {}
 
   static void GenerateAheadOfTime(Isolate* isolate);
 
  private:
-  class FPRegisters:       public BitField<bool,                0, 1> {};
-  class FunctionModeField: public BitField<StubFunctionMode,    1, 1> {};
+  class FunctionModeField: public BitField<StubFunctionMode,    0, 1> {};
 
   Major MajorKey() { return StubFailureTrampoline; }
   int MinorKey() {
-    return FPRegisters::encode(fp_registers_) |
-        FunctionModeField::encode(function_mode_);
+    return FunctionModeField::encode(function_mode_);
   }
 
   void Generate(MacroAssembler* masm);
 
-  bool fp_registers_;
   StubFunctionMode function_mode_;
 
   DISALLOW_COPY_AND_ASSIGN(StubFailureTrampolineStub);
diff --git a/src/deoptimizer.cc b/src/deoptimizer.cc
index e8cf599..6e9ead7 100644
--- a/src/deoptimizer.cc
+++ b/src/deoptimizer.cc
@@ -1764,7 +1764,8 @@ void Deoptimizer::DoComputeCompiledStubFrame(TranslationIterator* iterator,
     output_frame->SetRegister(constant_pool_reg.code(), constant_pool_value);
   }
   output_frame->SetState(Smi::FromInt(FullCodeGenerator::NO_REGISTERS));
-  Code* notify_failure = NotifyStubFailureBuiltin();
+  Code* notify_failure =
+      isolate_->builtins()->builtin(Builtins::kNotifyStubFailureSaveDoubles);
   output_frame->SetContinuation(
       reinterpret_cast<intptr_t>(notify_failure->entry()));
 }
diff --git a/src/deoptimizer.h b/src/deoptimizer.h
index 373f888..3262f9c 100644
--- a/src/deoptimizer.h
+++ b/src/deoptimizer.h
@@ -387,10 +387,6 @@ class Deoptimizer : public Malloced {
   // at the dynamic alignment state slot inside the frame.
   bool HasAlignmentPadding(JSFunction* function);
 
-  // Select the version of NotifyStubFailure builtin that either saves or
-  // doesn't save the double registers depending on CPU features.
-  Code* NotifyStubFailureBuiltin();
-
   Isolate* isolate_;
   JSFunction* function_;
   Code* compiled_code_;
diff --git a/src/flag-definitions.h b/src/flag-definitions.h
index f96d75a..189d5b6 100644
--- a/src/flag-definitions.h
+++ b/src/flag-definitions.h
@@ -350,8 +350,6 @@ DEFINE_implication(trace_opt_verbose, trace_opt)
 DEFINE_bool(debug_code, false,
             "generate extra code (assertions) for debugging")
 DEFINE_bool(code_comments, false, "emit comments in code disassembly")
-DEFINE_bool(enable_sse2, true,
-            "enable use of SSE2 instructions if available")
 DEFINE_bool(enable_sse3, true,
             "enable use of SSE3 instructions if available")
 DEFINE_bool(enable_sse4_1, true,
diff --git a/src/ia32/assembler-ia32.cc b/src/ia32/assembler-ia32.cc
index 7a88e70..58ccb1c 100644
--- a/src/ia32/assembler-ia32.cc
+++ b/src/ia32/assembler-ia32.cc
@@ -62,30 +62,18 @@ ExternalReference ExternalReference::cpu_features() {
 }
 
 
-int IntelDoubleRegister::NumAllocatableRegisters() {
-  if (CpuFeatures::IsSupported(SSE2)) {
-    return XMMRegister::kNumAllocatableRegisters;
-  } else {
-    return X87Register::kNumAllocatableRegisters;
-  }
+int DoubleRegister::NumAllocatableRegisters() {
+  return XMMRegister::kNumAllocatableRegisters;
 }
 
 
-int IntelDoubleRegister::NumRegisters() {
-  if (CpuFeatures::IsSupported(SSE2)) {
-    return XMMRegister::kNumRegisters;
-  } else {
-    return X87Register::kNumRegisters;
-  }
+int DoubleRegister::NumRegisters() {
+  return XMMRegister::kNumRegisters;
 }
 
 
-const char* IntelDoubleRegister::AllocationIndexToString(int index) {
-  if (CpuFeatures::IsSupported(SSE2)) {
-    return XMMRegister::AllocationIndexToString(index);
-  } else {
-    return X87Register::AllocationIndexToString(index);
-  }
+const char* DoubleRegister::AllocationIndexToString(int index) {
+  return XMMRegister::AllocationIndexToString(index);
 }
 
 
@@ -108,9 +96,9 @@ void CpuFeatures::Probe(bool serializer_enabled) {
   if (cpu.has_sse3()) {
     probed_features |= static_cast<uint64_t>(1) << SSE3;
   }
-  if (cpu.has_sse2()) {
-    probed_features |= static_cast<uint64_t>(1) << SSE2;
-  }
+
+  CHECK(cpu.has_sse2());  // SSE2 support is mandatory.
+
   if (cpu.has_cmov()) {
     probed_features |= static_cast<uint64_t>(1) << CMOV;
   }
@@ -349,15 +337,6 @@ bool Assembler::IsNop(Address addr) {
 void Assembler::Nop(int bytes) {
   EnsureSpace ensure_space(this);
 
-  if (!CpuFeatures::IsSupported(SSE2)) {
-    // Older CPUs that do not support SSE2 may not support multibyte NOP
-    // instructions.
-    for (; bytes > 0; bytes--) {
-      EMIT(0x90);
-    }
-    return;
-  }
-
   // Multi byte nops from http://support.amd.com/us/Processor_TechDocs/40546.pdf
   while (bytes > 0) {
     switch (bytes) {
@@ -1951,7 +1930,6 @@ void Assembler::setcc(Condition cc, Register reg) {
 
 
 void Assembler::cvttss2si(Register dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF3);
   EMIT(0x0F);
@@ -1961,7 +1939,6 @@ void Assembler::cvttss2si(Register dst, const Operand& src) {
 
 
 void Assembler::cvttsd2si(Register dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -1971,7 +1948,6 @@ void Assembler::cvttsd2si(Register dst, const Operand& src) {
 
 
 void Assembler::cvtsd2si(Register dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -1981,7 +1957,6 @@ void Assembler::cvtsd2si(Register dst, XMMRegister src) {
 
 
 void Assembler::cvtsi2sd(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -1991,7 +1966,6 @@ void Assembler::cvtsi2sd(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF3);
   EMIT(0x0F);
@@ -2001,7 +1975,6 @@ void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2011,7 +1984,6 @@ void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2021,7 +1993,6 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::addsd(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2031,7 +2002,6 @@ void Assembler::addsd(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2041,7 +2011,6 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::mulsd(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2051,7 +2020,6 @@ void Assembler::mulsd(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2061,7 +2029,6 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2071,7 +2038,6 @@ void Assembler::divsd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2081,7 +2047,6 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::andps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x54);
@@ -2090,7 +2055,6 @@ void Assembler::andps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::orps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x56);
@@ -2099,7 +2063,6 @@ void Assembler::orps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::xorps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x57);
@@ -2108,7 +2071,6 @@ void Assembler::xorps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::addps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x58);
@@ -2117,7 +2079,6 @@ void Assembler::addps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::subps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x5C);
@@ -2126,7 +2087,6 @@ void Assembler::subps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::mulps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x59);
@@ -2135,7 +2095,6 @@ void Assembler::mulps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::divps(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x5E);
@@ -2144,7 +2103,6 @@ void Assembler::divps(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2154,7 +2112,6 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2164,7 +2121,6 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::orpd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2174,7 +2130,6 @@ void Assembler::orpd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::ucomisd(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2197,7 +2152,6 @@ void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
 
 
 void Assembler::movmskpd(Register dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2207,7 +2161,6 @@ void Assembler::movmskpd(Register dst, XMMRegister src) {
 
 
 void Assembler::movmskps(Register dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x50);
@@ -2216,7 +2169,6 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
 
 
 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2226,7 +2178,6 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);
   EMIT(0x0F);
@@ -2237,7 +2188,6 @@ void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
   EMIT(0x28);
@@ -2246,7 +2196,6 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) {
-  ASSERT(IsEnabled(SSE2));
   ASSERT(is_uint8(imm8));
   EnsureSpace ensure_space(this);
   EMIT(0x0F);
@@ -2257,7 +2206,6 @@ void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) {
 
 
 void Assembler::movdqa(const Operand& dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2267,7 +2215,6 @@ void Assembler::movdqa(const Operand& dst, XMMRegister src) {
 
 
 void Assembler::movdqa(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2277,7 +2224,6 @@ void Assembler::movdqa(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::movdqu(const Operand& dst, XMMRegister src ) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF3);
   EMIT(0x0F);
@@ -2287,7 +2233,6 @@ void Assembler::movdqu(const Operand& dst, XMMRegister src ) {
 
 
 void Assembler::movdqu(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF3);
   EMIT(0x0F);
@@ -2308,7 +2253,6 @@ void Assembler::movntdqa(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::movntdq(const Operand& dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2329,7 +2273,6 @@ void Assembler::prefetch(const Operand& src, int level) {
 
 
 void Assembler::movsd(const Operand& dst, XMMRegister src ) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);  // double
   EMIT(0x0F);
@@ -2339,7 +2282,6 @@ void Assembler::movsd(const Operand& dst, XMMRegister src ) {
 
 
 void Assembler::movsd(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF2);  // double
   EMIT(0x0F);
@@ -2349,7 +2291,6 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::movss(const Operand& dst, XMMRegister src ) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF3);  // float
   EMIT(0x0F);
@@ -2359,7 +2300,6 @@ void Assembler::movss(const Operand& dst, XMMRegister src ) {
 
 
 void Assembler::movss(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0xF3);  // float
   EMIT(0x0F);
@@ -2369,7 +2309,6 @@ void Assembler::movss(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::movd(XMMRegister dst, const Operand& src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2379,7 +2318,6 @@ void Assembler::movd(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::movd(const Operand& dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2402,7 +2340,6 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
 
 
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2412,7 +2349,6 @@ void Assembler::pand(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2422,7 +2358,6 @@ void Assembler::pxor(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2443,7 +2378,6 @@ void Assembler::ptest(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::psllq(XMMRegister reg, int8_t shift) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2454,7 +2388,6 @@ void Assembler::psllq(XMMRegister reg, int8_t shift) {
 
 
 void Assembler::psllq(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2464,7 +2397,6 @@ void Assembler::psllq(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::psrlq(XMMRegister reg, int8_t shift) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2475,7 +2407,6 @@ void Assembler::psrlq(XMMRegister reg, int8_t shift) {
 
 
 void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
@@ -2485,7 +2416,6 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
 
 
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
-  ASSERT(IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
   EMIT(0x0F);
diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h
index 0d35fa4..74709fa 100644
--- a/src/ia32/assembler-ia32.h
+++ b/src/ia32/assembler-ia32.h
@@ -141,25 +141,25 @@ inline Register Register::FromAllocationIndex(int index)  {
 }
 
 
-struct IntelDoubleRegister {
+struct DoubleRegister {
   static const int kMaxNumRegisters = 8;
   static const int kMaxNumAllocatableRegisters = 7;
   static int NumAllocatableRegisters();
   static int NumRegisters();
   static const char* AllocationIndexToString(int index);
 
-  static int ToAllocationIndex(IntelDoubleRegister reg) {
+  static int ToAllocationIndex(DoubleRegister reg) {
     ASSERT(reg.code() != 0);
     return reg.code() - 1;
   }
 
-  static IntelDoubleRegister FromAllocationIndex(int index) {
+  static DoubleRegister FromAllocationIndex(int index) {
     ASSERT(index >= 0 && index < NumAllocatableRegisters());
     return from_code(index + 1);
   }
 
-  static IntelDoubleRegister from_code(int code) {
-    IntelDoubleRegister result = { code };
+  static DoubleRegister from_code(int code) {
+    DoubleRegister result = { code };
     return result;
   }
 
@@ -175,23 +175,23 @@ struct IntelDoubleRegister {
 };
 
 
-const IntelDoubleRegister double_register_0 = { 0 };
-const IntelDoubleRegister double_register_1 = { 1 };
-const IntelDoubleRegister double_register_2 = { 2 };
-const IntelDoubleRegister double_register_3 = { 3 };
-const IntelDoubleRegister double_register_4 = { 4 };
-const IntelDoubleRegister double_register_5 = { 5 };
-const IntelDoubleRegister double_register_6 = { 6 };
-const IntelDoubleRegister double_register_7 = { 7 };
-const IntelDoubleRegister no_double_reg = { -1 };
+const DoubleRegister double_register_0 = { 0 };
+const DoubleRegister double_register_1 = { 1 };
+const DoubleRegister double_register_2 = { 2 };
+const DoubleRegister double_register_3 = { 3 };
+const DoubleRegister double_register_4 = { 4 };
+const DoubleRegister double_register_5 = { 5 };
+const DoubleRegister double_register_6 = { 6 };
+const DoubleRegister double_register_7 = { 7 };
+const DoubleRegister no_double_reg = { -1 };
 
 
-struct XMMRegister : IntelDoubleRegister {
+struct XMMRegister : DoubleRegister {
   static const int kNumAllocatableRegisters = 7;
   static const int kNumRegisters = 8;
 
   static XMMRegister from_code(int code) {
-    STATIC_ASSERT(sizeof(XMMRegister) == sizeof(IntelDoubleRegister));
+    STATIC_ASSERT(sizeof(XMMRegister) == sizeof(DoubleRegister));
     XMMRegister result;
     result.code_ = code;
     return result;
@@ -231,45 +231,6 @@ struct XMMRegister : IntelDoubleRegister {
 #define no_xmm_reg (static_cast<const XMMRegister&>(no_double_reg))
 
 
-struct X87Register : IntelDoubleRegister {
-  static const int kNumAllocatableRegisters = 5;
-  static const int kNumRegisters = 5;
-
-  bool is(X87Register reg) const {
-    return code_ == reg.code_;
-  }
-
-  static const char* AllocationIndexToString(int index) {
-    ASSERT(index >= 0 && index < kNumAllocatableRegisters);
-    const char* const names[] = {
-      "stX_0", "stX_1", "stX_2", "stX_3", "stX_4"
-    };
-    return names[index];
-  }
-
-  static X87Register FromAllocationIndex(int index) {
-    STATIC_ASSERT(sizeof(X87Register) == sizeof(IntelDoubleRegister));
-    ASSERT(index >= 0 && index < NumAllocatableRegisters());
-    X87Register result;
-    result.code_ = index;
-    return result;
-  }
-
-  static int ToAllocationIndex(X87Register reg) {
-    return reg.code_;
-  }
-};
-
-#define stX_0 static_cast<const X87Register&>(double_register_0)
-#define stX_1 static_cast<const X87Register&>(double_register_1)
-#define stX_2 static_cast<const X87Register&>(double_register_2)
-#define stX_3 static_cast<const X87Register&>(double_register_3)
-#define stX_4 static_cast<const X87Register&>(double_register_4)
-
-
-typedef IntelDoubleRegister DoubleRegister;
-
-
 enum Condition {
   // any value < 0 is considered no_condition
   no_condition  = -1,
@@ -520,11 +481,11 @@ class Displacement BASE_EMBEDDED {
 // CpuFeatures keeps track of which features are supported by the target CPU.
 // Supported features must be enabled by a CpuFeatureScope before use.
 // Example:
-//   if (assembler->IsSupported(SSE2)) {
-//     CpuFeatureScope fscope(assembler, SSE2);
-//     // Generate SSE2 floating point code.
+//   if (assembler->IsSupported(CMOV)) {
+//     CpuFeatureScope fscope(assembler, CMOV);
+//     // Generate code containing cmov.
 //   } else {
-//     // Generate standard x87 floating point code.
+//     // Generate alternative code.
 //   }
 class CpuFeatures : public AllStatic {
  public:
@@ -536,7 +497,6 @@ class CpuFeatures : public AllStatic {
   static bool IsSupported(CpuFeature f) {
     ASSERT(initialized_);
     if (Check(f, cross_compile_)) return true;
-    if (f == SSE2 && !FLAG_enable_sse2) return false;
     if (f == SSE3 && !FLAG_enable_sse3) return false;
     if (f == SSE4_1 && !FLAG_enable_sse4_1) return false;
     if (f == CMOV && !FLAG_enable_cmov) return false;
@@ -560,7 +520,7 @@ class CpuFeatures : public AllStatic {
            (cross_compile_ & mask) == mask;
   }
 
-  static bool SupportsCrankshaft() { return IsSupported(SSE2); }
+  static bool SupportsCrankshaft() { return true; }
 
  private:
   static bool Check(CpuFeature f, uint64_t set) {
diff --git a/src/ia32/builtins-ia32.cc b/src/ia32/builtins-ia32.cc
index b3af2b2..969aae1 100644
--- a/src/ia32/builtins-ia32.cc
+++ b/src/ia32/builtins-ia32.cc
@@ -677,12 +677,7 @@ void Builtins::Generate_NotifyStubFailure(MacroAssembler* masm) {
 
 
 void Builtins::Generate_NotifyStubFailureSaveDoubles(MacroAssembler* masm) {
-  if (Serializer::enabled(masm->isolate())) {
-    PlatformFeatureScope sse2(masm->isolate(), SSE2);
-    Generate_NotifyStubFailureHelper(masm, kSaveFPRegs);
-  } else {
-    Generate_NotifyStubFailureHelper(masm, kSaveFPRegs);
-  }
+  Generate_NotifyStubFailureHelper(masm, kSaveFPRegs);
 }
 
 
diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc
index afc0109..2fde5ac 100644
--- a/src/ia32/code-stubs-ia32.cc
+++ b/src/ia32/code-stubs-ia32.cc
@@ -466,7 +466,6 @@ void StoreBufferOverflowStub::Generate(MacroAssembler* masm) {
   // restore them.
   __ pushad();
   if (save_doubles_ == kSaveFPRegs) {
-    CpuFeatureScope scope(masm, SSE2);
     __ sub(esp, Immediate(kDoubleSize * XMMRegister::kNumRegisters));
     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
       XMMRegister reg = XMMRegister::from_code(i);
@@ -483,7 +482,6 @@ void StoreBufferOverflowStub::Generate(MacroAssembler* masm) {
       ExternalReference::store_buffer_overflow_function(isolate()),
       argument_count);
   if (save_doubles_ == kSaveFPRegs) {
-    CpuFeatureScope scope(masm, SSE2);
     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
       XMMRegister reg = XMMRegister::from_code(i);
       __ movsd(reg, Operand(esp, i * kDoubleSize));
@@ -736,7 +734,6 @@ void FloatingPointHelper::CheckFloatOperands(MacroAssembler* masm,
 
 
 void MathPowStub::Generate(MacroAssembler* masm) {
-  CpuFeatureScope use_sse2(masm, SSE2);
   Factory* factory = isolate()->factory();
   const Register exponent = eax;
   const Register base = edx;
@@ -2051,15 +2048,14 @@ void ICCompareStub::GenerateGeneric(MacroAssembler* masm) {
   Label non_number_comparison;
   Label unordered;
   __ bind(&generic_heap_number_comparison);
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope use_sse2(masm, SSE2);
-    CpuFeatureScope use_cmov(masm, CMOV);
 
-    FloatingPointHelper::LoadSSE2Operands(masm, &non_number_comparison);
-    __ ucomisd(xmm0, xmm1);
+  FloatingPointHelper::LoadSSE2Operands(masm, &non_number_comparison);
+  __ ucomisd(xmm0, xmm1);
+  // Don't base result on EFLAGS when a NaN is involved.
+  __ j(parity_even, &unordered, Label::kNear);
 
-    // Don't base result on EFLAGS when a NaN is involved.
-    __ j(parity_even, &unordered, Label::kNear);
+  if (CpuFeatures::IsSupported(CMOV)) {
+    CpuFeatureScope use_cmov(masm, CMOV);
     // Return a result of -1, 0, or 1, based on EFLAGS.
     __ mov(eax, 0);  // equal
     __ mov(ecx, Immediate(Smi::FromInt(1)));
@@ -2068,15 +2064,6 @@ void ICCompareStub::GenerateGeneric(MacroAssembler* masm) {
     __ cmov(below, eax, ecx);
     __ ret(0);
   } else {
-    FloatingPointHelper::CheckFloatOperands(
-        masm, &non_number_comparison, ebx);
-    FloatingPointHelper::LoadFloatOperand(masm, eax);
-    FloatingPointHelper::LoadFloatOperand(masm, edx);
-    __ FCmp();
-
-    // Don't base result on EFLAGS when a NaN is involved.
-    __ j(parity_even, &unordered, Label::kNear);
-
     Label below_label, above_label;
     // Return a result of -1, 0, or 1, based on EFLAGS.
     __ j(below, &below_label, Label::kNear);
@@ -2614,28 +2601,20 @@ void CodeStub::GenerateStubsAheadOfTime(Isolate* isolate) {
   // It is important that the store buffer overflow stubs are generated first.
   ArrayConstructorStubBase::GenerateStubsAheadOfTime(isolate);
   CreateAllocationSiteStub::GenerateAheadOfTime(isolate);
-  if (Serializer::enabled(isolate)) {
-    PlatformFeatureScope sse2(isolate, SSE2);
-    BinaryOpICStub::GenerateAheadOfTime(isolate);
-    BinaryOpICWithAllocationSiteStub::GenerateAheadOfTime(isolate);
-  } else {
-    BinaryOpICStub::GenerateAheadOfTime(isolate);
-    BinaryOpICWithAllocationSiteStub::GenerateAheadOfTime(isolate);
-  }
+  BinaryOpICStub::GenerateAheadOfTime(isolate);
+  BinaryOpICWithAllocationSiteStub::GenerateAheadOfTime(isolate);
 }
 
 
 void CodeStub::GenerateFPStubs(Isolate* isolate) {
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CEntryStub save_doubles(isolate, 1, kSaveFPRegs);
-    // Stubs might already be in the snapshot, detect that and don't regenerate,
-    // which would lead to code stub initialization state being messed up.
-    Code* save_doubles_code;
-    if (!save_doubles.FindCodeInCache(&save_doubles_code)) {
-      save_doubles_code = *(save_doubles.GetCode());
-    }
-    isolate->set_fp_stubs_generated(true);
+  CEntryStub save_doubles(isolate, 1, kSaveFPRegs);
+  // Stubs might already be in the snapshot, detect that and don't regenerate,
+  // which would lead to code stub initialization state being messed up.
+  Code* save_doubles_code;
+  if (!save_doubles.FindCodeInCache(&save_doubles_code)) {
+    save_doubles_code = *(save_doubles.GetCode());
   }
+  isolate->set_fp_stubs_generated(true);
 }
 
 
@@ -3785,8 +3764,7 @@ void ICCompareStub::GenerateNumbers(MacroAssembler* masm) {
 
   // Inlining the double comparison and falling back to the general compare
   // stub if NaN is involved or SSE2 or CMOV is unsupported.
-  if (CpuFeatures::IsSupported(SSE2) && CpuFeatures::IsSupported(CMOV)) {
-    CpuFeatureScope scope1(masm, SSE2);
+  if (CpuFeatures::IsSupported(CMOV)) {
     CpuFeatureScope scope2(masm, CMOV);
 
     // Load left and right operand.
@@ -4332,15 +4310,8 @@ void StoreBufferOverflowStub::GenerateFixedRegStubsAheadOfTime(
     Isolate* isolate) {
   StoreBufferOverflowStub stub(isolate, kDontSaveFPRegs);
   stub.GetCode();
-  if (CpuFeatures::IsSafeForSnapshot(isolate, SSE2)) {
-    StoreBufferOverflowStub stub2(isolate, kSaveFPRegs);
-    stub2.GetCode();
-  }
-}
-
-
-bool CodeStub::CanUseFPRegisters() {
-  return CpuFeatures::IsSupported(SSE2);
+  StoreBufferOverflowStub stub2(isolate, kSaveFPRegs);
+  stub2.GetCode();
 }
 
 
@@ -4616,15 +4587,14 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) {
                                  ecx,
                                  edi,
                                  xmm0,
-                                 &slow_elements_from_double,
-                                 false);
+                                 &slow_elements_from_double);
   __ pop(edx);
   __ ret(0);
 }
 
 
 void StubFailureTrampolineStub::Generate(MacroAssembler* masm) {
-  CEntryStub ces(isolate(), 1, fp_registers_ ? kSaveFPRegs : kDontSaveFPRegs);
+  CEntryStub ces(isolate(), 1, kSaveFPRegs);
   __ call(ces.GetCode(), RelocInfo::CODE_TARGET);
   int parameter_count_offset =
       StubFailureTrampolineFrame::kCallerStackParameterCountFrameOffset;
diff --git a/src/ia32/code-stubs-ia32.h b/src/ia32/code-stubs-ia32.h
index 1d55ec3..588248f 100644
--- a/src/ia32/code-stubs-ia32.h
+++ b/src/ia32/code-stubs-ia32.h
@@ -20,10 +20,7 @@ void ArrayNativeCode(MacroAssembler* masm,
 class StoreBufferOverflowStub: public PlatformCodeStub {
  public:
   StoreBufferOverflowStub(Isolate* isolate, SaveFPRegsMode save_fp)
-      : PlatformCodeStub(isolate), save_doubles_(save_fp) {
-    ASSERT(CpuFeatures::IsSafeForSnapshot(isolate, SSE2) ||
-           save_fp == kDontSaveFPRegs);
-  }
+      : PlatformCodeStub(isolate), save_doubles_(save_fp) { }
 
   void Generate(MacroAssembler* masm);
 
@@ -197,8 +194,6 @@ class RecordWriteStub: public PlatformCodeStub {
         regs_(object,   // An input reg.
               address,  // An input reg.
               value) {  // One scratch reg.
-    ASSERT(CpuFeatures::IsSafeForSnapshot(isolate, SSE2) ||
-           fp_mode == kDontSaveFPRegs);
   }
 
   enum Mode {
@@ -340,7 +335,6 @@ class RecordWriteStub: public PlatformCodeStub {
       if (!scratch0_.is(eax) && !scratch1_.is(eax)) masm->push(eax);
       if (!scratch0_.is(edx) && !scratch1_.is(edx)) masm->push(edx);
       if (mode == kSaveFPRegs) {
-        CpuFeatureScope scope(masm, SSE2);
         masm->sub(esp,
                   Immediate(kDoubleSize * (XMMRegister::kNumRegisters - 1)));
         // Save all XMM registers except XMM0.
@@ -354,7 +348,6 @@ class RecordWriteStub: public PlatformCodeStub {
     inline void RestoreCallerSaveRegisters(MacroAssembler*masm,
                                            SaveFPRegsMode mode) {
       if (mode == kSaveFPRegs) {
-        CpuFeatureScope scope(masm, SSE2);
         // Restore all XMM registers except XMM0.
         for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) {
           XMMRegister reg = XMMRegister::from_code(i);
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc
index 19b66ae..04d0064 100644
--- a/src/ia32/codegen-ia32.cc
+++ b/src/ia32/codegen-ia32.cc
@@ -35,7 +35,6 @@ void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
 
 
 UnaryMathFunction CreateExpFunction() {
-  if (!CpuFeatures::IsSupported(SSE2)) return &std::exp;
   if (!FLAG_fast_math) return &std::exp;
   size_t actual_size;
   byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
@@ -46,7 +45,6 @@ UnaryMathFunction CreateExpFunction() {
   // esp[1 * kPointerSize]: raw double input
   // esp[0 * kPointerSize]: return address
   {
-    CpuFeatureScope use_sse2(&masm, SSE2);
     XMMRegister input = xmm1;
     XMMRegister result = xmm2;
     __ movsd(input, Operand(esp, 1 * kPointerSize));
@@ -78,15 +76,12 @@ UnaryMathFunction CreateSqrtFunction() {
   byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB,
                                                  &actual_size,
                                                  true));
-  // If SSE2 is not available, we can use libc's implementation to ensure
-  // consistency since code by fullcodegen's calls into runtime in that case.
-  if (buffer == NULL || !CpuFeatures::IsSupported(SSE2)) return &std::sqrt;
+  if (buffer == NULL) return &std::sqrt;
   MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
   // esp[1 * kPointerSize]: raw double input
   // esp[0 * kPointerSize]: return address
   // Move double input into registers.
   {
-    CpuFeatureScope use_sse2(&masm, SSE2);
     __ movsd(xmm0, Operand(esp, 1 * kPointerSize));
     __ sqrtsd(xmm0, xmm0);
     __ movsd(Operand(esp, 1 * kPointerSize), xmm0);
@@ -243,325 +238,264 @@ OS::MemMoveFunction CreateMemMoveFunction() {
   __ cmp(dst, src);
   __ j(equal, &pop_and_return);
 
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope sse2_scope(&masm, SSE2);
-    __ prefetch(Operand(src, 0), 1);
+  __ prefetch(Operand(src, 0), 1);
+  __ cmp(count, kSmallCopySize);
+  __ j(below_equal, &small_size);
+  __ cmp(count, kMediumCopySize);
+  __ j(below_equal, &medium_size);
+  __ cmp(dst, src);
+  __ j(above, &backward);
+
+  {
+    // |dst| is a lower address than |src|. Copy front-to-back.
+    Label unaligned_source, move_last_15, skip_last_move;
+    __ mov(eax, src);
+    __ sub(eax, dst);
+    __ cmp(eax, kMinMoveDistance);
+    __ j(below, &forward_much_overlap);
+    // Copy first 16 bytes.
+    __ movdqu(xmm0, Operand(src, 0));
+    __ movdqu(Operand(dst, 0), xmm0);
+    // Determine distance to alignment: 16 - (dst & 0xF).
+    __ mov(edx, dst);
+    __ and_(edx, 0xF);
+    __ neg(edx);
+    __ add(edx, Immediate(16));
+    __ add(dst, edx);
+    __ add(src, edx);
+    __ sub(count, edx);
+    // dst is now aligned. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    // Check if src is also aligned.
+    __ test(src, Immediate(0xF));
+    __ j(not_zero, &unaligned_source);
+    // Copy loop for aligned source and destination.
+    MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, MOVE_ALIGNED);
+    // At most 15 bytes to copy. Copy 16 bytes at end of string.
+    __ bind(&move_last_15);
+    __ and_(count, 0xF);
+    __ j(zero, &skip_last_move, Label::kNear);
+    __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
+    __ bind(&skip_last_move);
+    MemMoveEmitPopAndReturn(&masm);
+
+    // Copy loop for unaligned source and aligned destination.
+    __ bind(&unaligned_source);
+    MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, MOVE_UNALIGNED);
+    __ jmp(&move_last_15);
+
+    // Less than kMinMoveDistance offset between dst and src.
+    Label loop_until_aligned, last_15_much_overlap;
+    __ bind(&loop_until_aligned);
+    __ mov_b(eax, Operand(src, 0));
+    __ inc(src);
+    __ mov_b(Operand(dst, 0), eax);
+    __ inc(dst);
+    __ dec(count);
+    __ bind(&forward_much_overlap);  // Entry point into this block.
+    __ test(dst, Immediate(0xF));
+    __ j(not_zero, &loop_until_aligned);
+    // dst is now aligned, src can't be. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    MemMoveEmitMainLoop(&masm, &last_15_much_overlap,
+                        FORWARD, MOVE_UNALIGNED);
+    __ bind(&last_15_much_overlap);
+    __ and_(count, 0xF);
+    __ j(zero, &pop_and_return);
     __ cmp(count, kSmallCopySize);
     __ j(below_equal, &small_size);
-    __ cmp(count, kMediumCopySize);
-    __ j(below_equal, &medium_size);
-    __ cmp(dst, src);
-    __ j(above, &backward);
-
-    {
-      // |dst| is a lower address than |src|. Copy front-to-back.
-      Label unaligned_source, move_last_15, skip_last_move;
-      __ mov(eax, src);
-      __ sub(eax, dst);
-      __ cmp(eax, kMinMoveDistance);
-      __ j(below, &forward_much_overlap);
-      // Copy first 16 bytes.
-      __ movdqu(xmm0, Operand(src, 0));
-      __ movdqu(Operand(dst, 0), xmm0);
-      // Determine distance to alignment: 16 - (dst & 0xF).
-      __ mov(edx, dst);
-      __ and_(edx, 0xF);
-      __ neg(edx);
-      __ add(edx, Immediate(16));
-      __ add(dst, edx);
-      __ add(src, edx);
-      __ sub(count, edx);
-      // dst is now aligned. Main copy loop.
-      __ mov(loop_count, count);
-      __ shr(loop_count, 6);
-      // Check if src is also aligned.
-      __ test(src, Immediate(0xF));
-      __ j(not_zero, &unaligned_source);
-      // Copy loop for aligned source and destination.
-      MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, MOVE_ALIGNED);
-      // At most 15 bytes to copy. Copy 16 bytes at end of string.
-      __ bind(&move_last_15);
-      __ and_(count, 0xF);
-      __ j(zero, &skip_last_move, Label::kNear);
-      __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
-      __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
-      __ bind(&skip_last_move);
-      MemMoveEmitPopAndReturn(&masm);
-
-      // Copy loop for unaligned source and aligned destination.
-      __ bind(&unaligned_source);
-      MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, MOVE_UNALIGNED);
-      __ jmp(&move_last_15);
-
-      // Less than kMinMoveDistance offset between dst and src.
-      Label loop_until_aligned, last_15_much_overlap;
-      __ bind(&loop_until_aligned);
-      __ mov_b(eax, Operand(src, 0));
-      __ inc(src);
-      __ mov_b(Operand(dst, 0), eax);
-      __ inc(dst);
-      __ dec(count);
-      __ bind(&forward_much_overlap);  // Entry point into this block.
-      __ test(dst, Immediate(0xF));
-      __ j(not_zero, &loop_until_aligned);
-      // dst is now aligned, src can't be. Main copy loop.
-      __ mov(loop_count, count);
-      __ shr(loop_count, 6);
-      MemMoveEmitMainLoop(&masm, &last_15_much_overlap,
-                          FORWARD, MOVE_UNALIGNED);
-      __ bind(&last_15_much_overlap);
-      __ and_(count, 0xF);
-      __ j(zero, &pop_and_return);
-      __ cmp(count, kSmallCopySize);
-      __ j(below_equal, &small_size);
-      __ jmp(&medium_size);
-    }
+    __ jmp(&medium_size);
+  }
 
-    {
-      // |dst| is a higher address than |src|. Copy backwards.
-      Label unaligned_source, move_first_15, skip_last_move;
-      __ bind(&backward);
-      // |dst| and |src| always point to the end of what's left to copy.
-      __ add(dst, count);
-      __ add(src, count);
-      __ mov(eax, dst);
-      __ sub(eax, src);
-      __ cmp(eax, kMinMoveDistance);
-      __ j(below, &backward_much_overlap);
-      // Copy last 16 bytes.
-      __ movdqu(xmm0, Operand(src, -0x10));
-      __ movdqu(Operand(dst, -0x10), xmm0);
-      // Find distance to alignment: dst & 0xF
-      __ mov(edx, dst);
-      __ and_(edx, 0xF);
-      __ sub(dst, edx);
-      __ sub(src, edx);
-      __ sub(count, edx);
-      // dst is now aligned. Main copy loop.
-      __ mov(loop_count, count);
-      __ shr(loop_count, 6);
-      // Check if src is also aligned.
-      __ test(src, Immediate(0xF));
-      __ j(not_zero, &unaligned_source);
-      // Copy loop for aligned source and destination.
-      MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, MOVE_ALIGNED);
-      // At most 15 bytes to copy. Copy 16 bytes at beginning of string.
-      __ bind(&move_first_15);
-      __ and_(count, 0xF);
-      __ j(zero, &skip_last_move, Label::kNear);
-      __ sub(src, count);
-      __ sub(dst, count);
-      __ movdqu(xmm0, Operand(src, 0));
-      __ movdqu(Operand(dst, 0), xmm0);
-      __ bind(&skip_last_move);
-      MemMoveEmitPopAndReturn(&masm);
-
-      // Copy loop for unaligned source and aligned destination.
-      __ bind(&unaligned_source);
-      MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, MOVE_UNALIGNED);
-      __ jmp(&move_first_15);
-
-      // Less than kMinMoveDistance offset between dst and src.
-      Label loop_until_aligned, first_15_much_overlap;
-      __ bind(&loop_until_aligned);
-      __ dec(src);
-      __ dec(dst);
-      __ mov_b(eax, Operand(src, 0));
-      __ mov_b(Operand(dst, 0), eax);
-      __ dec(count);
-      __ bind(&backward_much_overlap);  // Entry point into this block.
-      __ test(dst, Immediate(0xF));
-      __ j(not_zero, &loop_until_aligned);
-      // dst is now aligned, src can't be. Main copy loop.
-      __ mov(loop_count, count);
-      __ shr(loop_count, 6);
-      MemMoveEmitMainLoop(&masm, &first_15_much_overlap,
-                          BACKWARD, MOVE_UNALIGNED);
-      __ bind(&first_15_much_overlap);
-      __ and_(count, 0xF);
-      __ j(zero, &pop_and_return);
-      // Small/medium handlers expect dst/src to point to the beginning.
-      __ sub(dst, count);
-      __ sub(src, count);
-      __ cmp(count, kSmallCopySize);
-      __ j(below_equal, &small_size);
-      __ jmp(&medium_size);
-    }
-    {
-      // Special handlers for 9 <= copy_size < 64. No assumptions about
-      // alignment or move distance, so all reads must be unaligned and
-      // must happen before any writes.
-      Label medium_handlers, f9_16, f17_32, f33_48, f49_63;
-
-      __ bind(&f9_16);
-      __ movsd(xmm0, Operand(src, 0));
-      __ movsd(xmm1, Operand(src, count, times_1, -8));
-      __ movsd(Operand(dst, 0), xmm0);
-      __ movsd(Operand(dst, count, times_1, -8), xmm1);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f17_32);
-      __ movdqu(xmm0, Operand(src, 0));
-      __ movdqu(xmm1, Operand(src, count, times_1, -0x10));
-      __ movdqu(Operand(dst, 0x00), xmm0);
-      __ movdqu(Operand(dst, count, times_1, -0x10), xmm1);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f33_48);
-      __ movdqu(xmm0, Operand(src, 0x00));
-      __ movdqu(xmm1, Operand(src, 0x10));
-      __ movdqu(xmm2, Operand(src, count, times_1, -0x10));
-      __ movdqu(Operand(dst, 0x00), xmm0);
-      __ movdqu(Operand(dst, 0x10), xmm1);
-      __ movdqu(Operand(dst, count, times_1, -0x10), xmm2);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f49_63);
-      __ movdqu(xmm0, Operand(src, 0x00));
-      __ movdqu(xmm1, Operand(src, 0x10));
-      __ movdqu(xmm2, Operand(src, 0x20));
-      __ movdqu(xmm3, Operand(src, count, times_1, -0x10));
-      __ movdqu(Operand(dst, 0x00), xmm0);
-      __ movdqu(Operand(dst, 0x10), xmm1);
-      __ movdqu(Operand(dst, 0x20), xmm2);
-      __ movdqu(Operand(dst, count, times_1, -0x10), xmm3);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&medium_handlers);
-      __ dd(conv.address(&f9_16));
-      __ dd(conv.address(&f17_32));
-      __ dd(conv.address(&f33_48));
-      __ dd(conv.address(&f49_63));
-
-      __ bind(&medium_size);  // Entry point into this block.
-      __ mov(eax, count);
-      __ dec(eax);
-      __ shr(eax, 4);
-      if (FLAG_debug_code) {
-        Label ok;
-        __ cmp(eax, 3);
-        __ j(below_equal, &ok);
-        __ int3();
-        __ bind(&ok);
-      }
-      __ mov(eax, Operand(eax, times_4, conv.address(&medium_handlers)));
-      __ jmp(eax);
-    }
-    {
-      // Specialized copiers for copy_size <= 8 bytes.
-      Label small_handlers, f0, f1, f2, f3, f4, f5_8;
-      __ bind(&f0);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f1);
-      __ mov_b(eax, Operand(src, 0));
-      __ mov_b(Operand(dst, 0), eax);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f2);
-      __ mov_w(eax, Operand(src, 0));
-      __ mov_w(Operand(dst, 0), eax);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f3);
-      __ mov_w(eax, Operand(src, 0));
-      __ mov_b(edx, Operand(src, 2));
-      __ mov_w(Operand(dst, 0), eax);
-      __ mov_b(Operand(dst, 2), edx);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f4);
-      __ mov(eax, Operand(src, 0));
-      __ mov(Operand(dst, 0), eax);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&f5_8);
-      __ mov(eax, Operand(src, 0));
-      __ mov(edx, Operand(src, count, times_1, -4));
-      __ mov(Operand(dst, 0), eax);
-      __ mov(Operand(dst, count, times_1, -4), edx);
-      MemMoveEmitPopAndReturn(&masm);
-
-      __ bind(&small_handlers);
-      __ dd(conv.address(&f0));
-      __ dd(conv.address(&f1));
-      __ dd(conv.address(&f2));
-      __ dd(conv.address(&f3));
-      __ dd(conv.address(&f4));
-      __ dd(conv.address(&f5_8));
-      __ dd(conv.address(&f5_8));
-      __ dd(conv.address(&f5_8));
-      __ dd(conv.address(&f5_8));
-
-      __ bind(&small_size);  // Entry point into this block.
-      if (FLAG_debug_code) {
-        Label ok;
-        __ cmp(count, 8);
-        __ j(below_equal, &ok);
-        __ int3();
-        __ bind(&ok);
-      }
-      __ mov(eax, Operand(count, times_4, conv.address(&small_handlers)));
-      __ jmp(eax);
-    }
-  } else {
-    // No SSE2.
-    Label forward;
-    __ cmp(count, 0);
-    __ j(equal, &pop_and_return);
-    __ cmp(dst, src);
-    __ j(above, &backward);
-    __ jmp(&forward);
-    {
-      // Simple forward copier.
-      Label forward_loop_1byte, forward_loop_4byte;
-      __ bind(&forward_loop_4byte);
-      __ mov(eax, Operand(src, 0));
-      __ sub(count, Immediate(4));
-      __ add(src, Immediate(4));
-      __ mov(Operand(dst, 0), eax);
-      __ add(dst, Immediate(4));
-      __ bind(&forward);  // Entry point.
-      __ cmp(count, 3);
-      __ j(above, &forward_loop_4byte);
-      __ bind(&forward_loop_1byte);
-      __ cmp(count, 0);
-      __ j(below_equal, &pop_and_return);
-      __ mov_b(eax, Operand(src, 0));
-      __ dec(count);
-      __ inc(src);
-      __ mov_b(Operand(dst, 0), eax);
-      __ inc(dst);
-      __ jmp(&forward_loop_1byte);
+  {
+    // |dst| is a higher address than |src|. Copy backwards.
+    Label unaligned_source, move_first_15, skip_last_move;
+    __ bind(&backward);
+    // |dst| and |src| always point to the end of what's left to copy.
+    __ add(dst, count);
+    __ add(src, count);
+    __ mov(eax, dst);
+    __ sub(eax, src);
+    __ cmp(eax, kMinMoveDistance);
+    __ j(below, &backward_much_overlap);
+    // Copy last 16 bytes.
+    __ movdqu(xmm0, Operand(src, -0x10));
+    __ movdqu(Operand(dst, -0x10), xmm0);
+    // Find distance to alignment: dst & 0xF
+    __ mov(edx, dst);
+    __ and_(edx, 0xF);
+    __ sub(dst, edx);
+    __ sub(src, edx);
+    __ sub(count, edx);
+    // dst is now aligned. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    // Check if src is also aligned.
+    __ test(src, Immediate(0xF));
+    __ j(not_zero, &unaligned_source);
+    // Copy loop for aligned source and destination.
+    MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, MOVE_ALIGNED);
+    // At most 15 bytes to copy. Copy 16 bytes at beginning of string.
+    __ bind(&move_first_15);
+    __ and_(count, 0xF);
+    __ j(zero, &skip_last_move, Label::kNear);
+    __ sub(src, count);
+    __ sub(dst, count);
+    __ movdqu(xmm0, Operand(src, 0));
+    __ movdqu(Operand(dst, 0), xmm0);
+    __ bind(&skip_last_move);
+    MemMoveEmitPopAndReturn(&masm);
+
+    // Copy loop for unaligned source and aligned destination.
+    __ bind(&unaligned_source);
+    MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, MOVE_UNALIGNED);
+    __ jmp(&move_first_15);
+
+    // Less than kMinMoveDistance offset between dst and src.
+    Label loop_until_aligned, first_15_much_overlap;
+    __ bind(&loop_until_aligned);
+    __ dec(src);
+    __ dec(dst);
+    __ mov_b(eax, Operand(src, 0));
+    __ mov_b(Operand(dst, 0), eax);
+    __ dec(count);
+    __ bind(&backward_much_overlap);  // Entry point into this block.
+    __ test(dst, Immediate(0xF));
+    __ j(not_zero, &loop_until_aligned);
+    // dst is now aligned, src can't be. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    MemMoveEmitMainLoop(&masm, &first_15_much_overlap,
+                        BACKWARD, MOVE_UNALIGNED);
+    __ bind(&first_15_much_overlap);
+    __ and_(count, 0xF);
+    __ j(zero, &pop_and_return);
+    // Small/medium handlers expect dst/src to point to the beginning.
+    __ sub(dst, count);
+    __ sub(src, count);
+    __ cmp(count, kSmallCopySize);
+    __ j(below_equal, &small_size);
+    __ jmp(&medium_size);
+  }
+  {
+    // Special handlers for 9 <= copy_size < 64. No assumptions about
+    // alignment or move distance, so all reads must be unaligned and
+    // must happen before any writes.
+    Label medium_handlers, f9_16, f17_32, f33_48, f49_63;
+
+    __ bind(&f9_16);
+    __ movsd(xmm0, Operand(src, 0));
+    __ movsd(xmm1, Operand(src, count, times_1, -8));
+    __ movsd(Operand(dst, 0), xmm0);
+    __ movsd(Operand(dst, count, times_1, -8), xmm1);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f17_32);
+    __ movdqu(xmm0, Operand(src, 0));
+    __ movdqu(xmm1, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, 0x00), xmm0);
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm1);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f33_48);
+    __ movdqu(xmm0, Operand(src, 0x00));
+    __ movdqu(xmm1, Operand(src, 0x10));
+    __ movdqu(xmm2, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, 0x00), xmm0);
+    __ movdqu(Operand(dst, 0x10), xmm1);
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm2);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f49_63);
+    __ movdqu(xmm0, Operand(src, 0x00));
+    __ movdqu(xmm1, Operand(src, 0x10));
+    __ movdqu(xmm2, Operand(src, 0x20));
+    __ movdqu(xmm3, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, 0x00), xmm0);
+    __ movdqu(Operand(dst, 0x10), xmm1);
+    __ movdqu(Operand(dst, 0x20), xmm2);
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm3);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&medium_handlers);
+    __ dd(conv.address(&f9_16));
+    __ dd(conv.address(&f17_32));
+    __ dd(conv.address(&f33_48));
+    __ dd(conv.address(&f49_63));
+
+    __ bind(&medium_size);  // Entry point into this block.
+    __ mov(eax, count);
+    __ dec(eax);
+    __ shr(eax, 4);
+    if (FLAG_debug_code) {
+      Label ok;
+      __ cmp(eax, 3);
+      __ j(below_equal, &ok);
+      __ int3();
+      __ bind(&ok);
     }
-    {
-      // Simple backward copier.
-      Label backward_loop_1byte, backward_loop_4byte, entry_shortcut;
-      __ bind(&backward);
-      __ add(src, count);
-      __ add(dst, count);
-      __ cmp(count, 3);
-      __ j(below_equal, &entry_shortcut);
-
-      __ bind(&backward_loop_4byte);
-      __ sub(src, Immediate(4));
-      __ sub(count, Immediate(4));
-      __ mov(eax, Operand(src, 0));
-      __ sub(dst, Immediate(4));
-      __ mov(Operand(dst, 0), eax);
-      __ cmp(count, 3);
-      __ j(above, &backward_loop_4byte);
-      __ bind(&backward_loop_1byte);
-      __ cmp(count, 0);
-      __ j(below_equal, &pop_and_return);
-      __ bind(&entry_shortcut);
-      __ dec(src);
-      __ dec(count);
-      __ mov_b(eax, Operand(src, 0));
-      __ dec(dst);
-      __ mov_b(Operand(dst, 0), eax);
-      __ jmp(&backward_loop_1byte);
+    __ mov(eax, Operand(eax, times_4, conv.address(&medium_handlers)));
+    __ jmp(eax);
+  }
+  {
+    // Specialized copiers for copy_size <= 8 bytes.
+    Label small_handlers, f0, f1, f2, f3, f4, f5_8;
+    __ bind(&f0);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f1);
+    __ mov_b(eax, Operand(src, 0));
+    __ mov_b(Operand(dst, 0), eax);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f2);
+    __ mov_w(eax, Operand(src, 0));
+    __ mov_w(Operand(dst, 0), eax);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f3);
+    __ mov_w(eax, Operand(src, 0));
+    __ mov_b(edx, Operand(src, 2));
+    __ mov_w(Operand(dst, 0), eax);
+    __ mov_b(Operand(dst, 2), edx);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f4);
+    __ mov(eax, Operand(src, 0));
+    __ mov(Operand(dst, 0), eax);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f5_8);
+    __ mov(eax, Operand(src, 0));
+    __ mov(edx, Operand(src, count, times_1, -4));
+    __ mov(Operand(dst, 0), eax);
+    __ mov(Operand(dst, count, times_1, -4), edx);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&small_handlers);
+    __ dd(conv.address(&f0));
+    __ dd(conv.address(&f1));
+    __ dd(conv.address(&f2));
+    __ dd(conv.address(&f3));
+    __ dd(conv.address(&f4));
+    __ dd(conv.address(&f5_8));
+    __ dd(conv.address(&f5_8));
+    __ dd(conv.address(&f5_8));
+    __ dd(conv.address(&f5_8));
+
+    __ bind(&small_size);  // Entry point into this block.
+    if (FLAG_debug_code) {
+      Label ok;
+      __ cmp(count, 8);
+      __ j(below_equal, &ok);
+      __ int3();
+      __ bind(&ok);
     }
+    __ mov(eax, Operand(count, times_4, conv.address(&small_handlers)));
+    __ jmp(eax);
   }
 
   __ bind(&pop_and_return);
@@ -671,11 +605,8 @@ void ElementsTransitionGenerator::GenerateSmiToDouble(
   ExternalReference canonical_the_hole_nan_reference =
       ExternalReference::address_of_the_hole_nan();
   XMMRegister the_hole_nan = xmm1;
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope use_sse2(masm, SSE2);
-    __ movsd(the_hole_nan,
-              Operand::StaticVariable(canonical_the_hole_nan_reference));
-  }
+  __ movsd(the_hole_nan,
+           Operand::StaticVariable(canonical_the_hole_nan_reference));
   __ jmp(&entry);
 
   // Call into runtime if GC is required.
@@ -696,17 +627,9 @@ void ElementsTransitionGenerator::GenerateSmiToDouble(
 
   // Normal smi, convert it to double and store.
   __ SmiUntag(ebx);
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope fscope(masm, SSE2);
-    __ Cvtsi2sd(xmm0, ebx);
-    __ movsd(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
-              xmm0);
-  } else {
-    __ push(ebx);
-    __ fild_s(Operand(esp, 0));
-    __ pop(ebx);
-    __ fstp_d(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize));
-  }
+  __ Cvtsi2sd(xmm0, ebx);
+  __ movsd(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
+           xmm0);
   __ jmp(&entry);
 
   // Found hole, store hole_nan_as_double instead.
@@ -717,14 +640,8 @@ void ElementsTransitionGenerator::GenerateSmiToDouble(
     __ Assert(equal, kObjectFoundInSmiOnlyArray);
   }
 
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope use_sse2(masm, SSE2);
-    __ movsd(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
-              the_hole_nan);
-  } else {
-    __ fld_d(Operand::StaticVariable(canonical_the_hole_nan_reference));
-    __ fstp_d(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize));
-  }
+  __ movsd(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
+           the_hole_nan);
 
   __ bind(&entry);
   __ sub(edi, Immediate(Smi::FromInt(1)));
@@ -826,17 +743,9 @@ void ElementsTransitionGenerator::GenerateDoubleToObject(
   // Non-hole double, copy value into a heap number.
   __ AllocateHeapNumber(edx, esi, no_reg, &gc_required);
   // edx: new heap number
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope fscope(masm, SSE2);
-    __ movsd(xmm0,
-              FieldOperand(edi, ebx, times_4, FixedDoubleArray::kHeaderSize));
-    __ movsd(FieldOperand(edx, HeapNumber::kValueOffset), xmm0);
-  } else {
-    __ mov(esi, FieldOperand(edi, ebx, times_4, FixedDoubleArray::kHeaderSize));
-    __ mov(FieldOperand(edx, HeapNumber::kValueOffset), esi);
-    __ mov(esi, FieldOperand(edi, ebx, times_4, offset));
-    __ mov(FieldOperand(edx, HeapNumber::kValueOffset + kPointerSize), esi);
-  }
+  __ movsd(xmm0,
+           FieldOperand(edi, ebx, times_4, FixedDoubleArray::kHeaderSize));
+  __ movsd(FieldOperand(edx, HeapNumber::kValueOffset), xmm0);
   __ mov(FieldOperand(eax, ebx, times_2, FixedArray::kHeaderSize), edx);
   __ mov(esi, ebx);
   __ RecordWriteArray(eax,
diff --git a/src/ia32/deoptimizer-ia32.cc b/src/ia32/deoptimizer-ia32.cc
index 6db0450..c061db1 100644
--- a/src/ia32/deoptimizer-ia32.cc
+++ b/src/ia32/deoptimizer-ia32.cc
@@ -209,7 +209,6 @@ void Deoptimizer::SetPlatformCompiledStubRegisters(
 
 
 void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) {
-  if (!CpuFeatures::IsSupported(SSE2)) return;
   for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
     double double_value = input_->GetDoubleRegister(i);
     output_frame->SetDoubleRegister(i, double_value);
@@ -231,13 +230,6 @@ bool Deoptimizer::HasAlignmentPadding(JSFunction* function) {
 }
 
 
-Code* Deoptimizer::NotifyStubFailureBuiltin() {
-  Builtins::Name name = CpuFeatures::IsSupported(SSE2) ?
-      Builtins::kNotifyStubFailureSaveDoubles : Builtins::kNotifyStubFailure;
-  return isolate_->builtins()->builtin(name);
-}
-
-
 #define __ masm()->
 
 void Deoptimizer::EntryGenerator::Generate() {
@@ -249,13 +241,10 @@ void Deoptimizer::EntryGenerator::Generate() {
   const int kDoubleRegsSize = kDoubleSize *
                               XMMRegister::kNumAllocatableRegisters;
   __ sub(esp, Immediate(kDoubleRegsSize));
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
-      XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
-      int offset = i * kDoubleSize;
-      __ movsd(Operand(esp, offset), xmm_reg);
-    }
+  for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
+    XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
+    int offset = i * kDoubleSize;
+    __ movsd(Operand(esp, offset), xmm_reg);
   }
 
   __ pushad();
@@ -300,15 +289,12 @@ void Deoptimizer::EntryGenerator::Generate() {
   }
 
   int double_regs_offset = FrameDescription::double_registers_offset();
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    // Fill in the double input registers.
-    for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
-      int dst_offset = i * kDoubleSize + double_regs_offset;
-      int src_offset = i * kDoubleSize;
-      __ movsd(xmm0, Operand(esp, src_offset));
-      __ movsd(Operand(ebx, dst_offset), xmm0);
-    }
+  // Fill in the double input registers.
+  for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
+    int dst_offset = i * kDoubleSize + double_regs_offset;
+    int src_offset = i * kDoubleSize;
+    __ movsd(xmm0, Operand(esp, src_offset));
+    __ movsd(Operand(ebx, dst_offset), xmm0);
   }
 
   // Clear FPU all exceptions.
@@ -387,13 +373,10 @@ void Deoptimizer::EntryGenerator::Generate() {
   __ j(below, &outer_push_loop);
 
   // In case of a failed STUB, we have to restore the XMM registers.
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
-      XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
-      int src_offset = i * kDoubleSize + double_regs_offset;
-      __ movsd(xmm_reg, Operand(ebx, src_offset));
-    }
+  for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
+    XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
+    int src_offset = i * kDoubleSize + double_regs_offset;
+    __ movsd(xmm_reg, Operand(ebx, src_offset));
   }
 
   // Push state, pc, and continuation from the last output frame.
diff --git a/src/ia32/full-codegen-ia32.cc b/src/ia32/full-codegen-ia32.cc
index 63c3ee6..f43b81d 100644
--- a/src/ia32/full-codegen-ia32.cc
+++ b/src/ia32/full-codegen-ia32.cc
@@ -3446,12 +3446,8 @@ void FullCodeGenerator::EmitMathPow(CallRuntime* expr) {
   VisitForStackValue(args->at(0));
   VisitForStackValue(args->at(1));
 
-  if (CpuFeatures::IsSupported(SSE2)) {
-    MathPowStub stub(isolate(), MathPowStub::ON_STACK);
-    __ CallStub(&stub);
-  } else {
-    __ CallRuntime(Runtime::kHiddenMathPowSlow, 2);
-  }
+  MathPowStub stub(isolate(), MathPowStub::ON_STACK);
+  __ CallStub(&stub);
   context()->Plug(eax);
 }
 
diff --git a/src/ia32/ic-ia32.cc b/src/ia32/ic-ia32.cc
index 52aa0ea..9108951 100644
--- a/src/ia32/ic-ia32.cc
+++ b/src/ia32/ic-ia32.cc
@@ -775,7 +775,7 @@ static void KeyedStoreGenerateGenericHelper(
 
   __ bind(&fast_double_without_map_check);
   __ StoreNumberToDoubleElements(eax, ebx, ecx, edi, xmm0,
-                                 &transition_double_elements, false);
+                                 &transition_double_elements);
   if (increment_length == kIncrementLength) {
     // Add 1 to receiver->length.
     __ add(FieldOperand(edx, JSArray::kLengthOffset),
diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc
index 38032da..d474062 100644
--- a/src/ia32/lithium-codegen-ia32.cc
+++ b/src/ia32/lithium-codegen-ia32.cc
@@ -17,14 +17,6 @@
 namespace v8 {
 namespace internal {
 
-
-static SaveFPRegsMode GetSaveFPRegsMode(Isolate* isolate) {
-  // We don't need to save floating point regs when generating the snapshot
-  return CpuFeatures::IsSafeForSnapshot(isolate, SSE2) ? kSaveFPRegs
-                                                       : kDontSaveFPRegs;
-}
-
-
 // When invoking builtins, we need to record the safepoint in the middle of
 // the invoke instruction sequence generated by the macro assembler.
 class SafepointGenerator V8_FINAL : public CallWrapper {
@@ -103,7 +95,6 @@ void LCodeGen::SaveCallerDoubles() {
   ASSERT(info()->saves_caller_doubles());
   ASSERT(NeedsEagerFrame());
   Comment(";;; Save clobbered callee double registers");
-  CpuFeatureScope scope(masm(), SSE2);
   int count = 0;
   BitVector* doubles = chunk()->allocated_double_registers();
   BitVector::Iterator save_iterator(doubles);
@@ -120,7 +111,6 @@ void LCodeGen::RestoreCallerDoubles() {
   ASSERT(info()->saves_caller_doubles());
   ASSERT(NeedsEagerFrame());
   Comment(";;; Restore clobbered callee double registers");
-  CpuFeatureScope scope(masm(), SSE2);
   BitVector* doubles = chunk()->allocated_double_registers();
   BitVector::Iterator save_iterator(doubles);
   int count = 0;
@@ -253,9 +243,7 @@ bool LCodeGen::GeneratePrologue() {
       }
     }
 
-    if (info()->saves_caller_doubles() && CpuFeatures::IsSupported(SSE2)) {
-      SaveCallerDoubles();
-    }
+    if (info()->saves_caller_doubles()) SaveCallerDoubles();
   }
 
   // Possibly allocate a local context.
@@ -367,27 +355,10 @@ void LCodeGen::GenerateBodyInstructionPre(LInstruction* instr) {
   if (!instr->IsLazyBailout() && !instr->IsGap()) {
     safepoints_.BumpLastLazySafepointIndex();
   }
-  if (!CpuFeatures::IsSupported(SSE2)) FlushX87StackIfNecessary(instr);
 }
 
 
-void LCodeGen::GenerateBodyInstructionPost(LInstruction* instr) {
-  if (!CpuFeatures::IsSupported(SSE2)) {
-    if (instr->IsGoto()) {
-      x87_stack_.LeavingBlock(current_block_, LGoto::cast(instr));
-    } else if (FLAG_debug_code && FLAG_enable_slow_asserts &&
-               !instr->IsGap() && !instr->IsReturn()) {
-      if (instr->ClobbersDoubleRegisters(isolate())) {
-        if (instr->HasDoubleRegisterResult()) {
-          ASSERT_EQ(1, x87_stack_.depth());
-        } else {
-          ASSERT_EQ(0, x87_stack_.depth());
-        }
-      }
-      __ VerifyX87StackDepth(x87_stack_.depth());
-    }
-  }
-}
+void LCodeGen::GenerateBodyInstructionPost(LInstruction* instr) { }
 
 
 bool LCodeGen::GenerateJumpTable() {
@@ -433,9 +404,7 @@ bool LCodeGen::GenerateJumpTable() {
         __ ret(0);  // Call the continuation without clobbering registers.
       }
     } else {
-      if (info()->saves_caller_doubles() && CpuFeatures::IsSupported(SSE2)) {
-        RestoreCallerDoubles();
-      }
+      if (info()->saves_caller_doubles()) RestoreCallerDoubles();
       __ call(entry, RelocInfo::RUNTIME_ENTRY);
     }
   }
@@ -448,8 +417,6 @@ bool LCodeGen::GenerateDeferredCode() {
   if (deferred_.length() > 0) {
     for (int i = 0; !is_aborted() && i < deferred_.length(); i++) {
       LDeferredCode* code = deferred_[i];
-      X87Stack copy(code->x87_stack());
-      x87_stack_ = copy;
 
       HValue* value =
           instructions_->at(code->instruction_index())->hydrogen_value();
@@ -515,232 +482,17 @@ Register LCodeGen::ToRegister(int index) const {
 }
 
 
-X87Register LCodeGen::ToX87Register(int index) const {
-  return X87Register::FromAllocationIndex(index);
-}
-
-
 XMMRegister LCodeGen::ToDoubleRegister(int index) const {
   return XMMRegister::FromAllocationIndex(index);
 }
 
 
-void LCodeGen::X87LoadForUsage(X87Register reg) {
-  ASSERT(x87_stack_.Contains(reg));
-  x87_stack_.Fxch(reg);
-  x87_stack_.pop();
-}
-
-
-void LCodeGen::X87LoadForUsage(X87Register reg1, X87Register reg2) {
-  ASSERT(x87_stack_.Contains(reg1));
-  ASSERT(x87_stack_.Contains(reg2));
-  x87_stack_.Fxch(reg1, 1);
-  x87_stack_.Fxch(reg2);
-  x87_stack_.pop();
-  x87_stack_.pop();
-}
-
-
-void LCodeGen::X87Stack::Fxch(X87Register reg, int other_slot) {
-  ASSERT(is_mutable_);
-  ASSERT(Contains(reg) && stack_depth_ > other_slot);
-  int i  = ArrayIndex(reg);
-  int st = st2idx(i);
-  if (st != other_slot) {
-    int other_i = st2idx(other_slot);
-    X87Register other = stack_[other_i];
-    stack_[other_i]   = reg;
-    stack_[i]         = other;
-    if (st == 0) {
-      __ fxch(other_slot);
-    } else if (other_slot == 0) {
-      __ fxch(st);
-    } else {
-      __ fxch(st);
-      __ fxch(other_slot);
-      __ fxch(st);
-    }
-  }
-}
-
-
-int LCodeGen::X87Stack::st2idx(int pos) {
-  return stack_depth_ - pos - 1;
-}
-
-
-int LCodeGen::X87Stack::ArrayIndex(X87Register reg) {
-  for (int i = 0; i < stack_depth_; i++) {
-    if (stack_[i].is(reg)) return i;
-  }
-  UNREACHABLE();
-  return -1;
-}
-
-
-bool LCodeGen::X87Stack::Contains(X87Register reg) {
-  for (int i = 0; i < stack_depth_; i++) {
-    if (stack_[i].is(reg)) return true;
-  }
-  return false;
-}
-
-
-void LCodeGen::X87Stack::Free(X87Register reg) {
-  ASSERT(is_mutable_);
-  ASSERT(Contains(reg));
-  int i  = ArrayIndex(reg);
-  int st = st2idx(i);
-  if (st > 0) {
-    // keep track of how fstp(i) changes the order of elements
-    int tos_i = st2idx(0);
-    stack_[i] = stack_[tos_i];
-  }
-  pop();
-  __ fstp(st);
-}
-
-
-void LCodeGen::X87Mov(X87Register dst, Operand src, X87OperandType opts) {
-  if (x87_stack_.Contains(dst)) {
-    x87_stack_.Fxch(dst);
-    __ fstp(0);
-  } else {
-    x87_stack_.push(dst);
-  }
-  X87Fld(src, opts);
-}
-
-
-void LCodeGen::X87Fld(Operand src, X87OperandType opts) {
-  ASSERT(!src.is_reg_only());
-  switch (opts) {
-    case kX87DoubleOperand:
-      __ fld_d(src);
-      break;
-    case kX87FloatOperand:
-      __ fld_s(src);
-      break;
-    case kX87IntOperand:
-      __ fild_s(src);
-      break;
-    default:
-      UNREACHABLE();
-  }
-}
-
-
-void LCodeGen::X87Mov(Operand dst, X87Register src, X87OperandType opts) {
-  ASSERT(!dst.is_reg_only());
-  x87_stack_.Fxch(src);
-  switch (opts) {
-    case kX87DoubleOperand:
-      __ fst_d(dst);
-      break;
-    case kX87IntOperand:
-      __ fist_s(dst);
-      break;
-    default:
-      UNREACHABLE();
-  }
-}
-
-
-void LCodeGen::X87Stack::PrepareToWrite(X87Register reg) {
-  ASSERT(is_mutable_);
-  if (Contains(reg)) {
-    Free(reg);
-  }
-  // Mark this register as the next register to write to
-  stack_[stack_depth_] = reg;
-}
-
-
-void LCodeGen::X87Stack::CommitWrite(X87Register reg) {
-  ASSERT(is_mutable_);
-  // Assert the reg is prepared to write, but not on the virtual stack yet
-  ASSERT(!Contains(reg) && stack_[stack_depth_].is(reg) &&
-      stack_depth_ < X87Register::kNumAllocatableRegisters);
-  stack_depth_++;
-}
-
-
-void LCodeGen::X87PrepareBinaryOp(
-    X87Register left, X87Register right, X87Register result) {
-  // You need to use DefineSameAsFirst for x87 instructions
-  ASSERT(result.is(left));
-  x87_stack_.Fxch(right, 1);
-  x87_stack_.Fxch(left);
-}
-
-
-void LCodeGen::X87Stack::FlushIfNecessary(LInstruction* instr, LCodeGen* cgen) {
-  if (stack_depth_ > 0 && instr->ClobbersDoubleRegisters(isolate())) {
-    bool double_inputs = instr->HasDoubleRegisterInput();
-
-    // Flush stack from tos down, since FreeX87() will mess with tos
-    for (int i = stack_depth_-1; i >= 0; i--) {
-      X87Register reg = stack_[i];
-      // Skip registers which contain the inputs for the next instruction
-      // when flushing the stack
-      if (double_inputs && instr->IsDoubleInput(reg, cgen)) {
-        continue;
-      }
-      Free(reg);
-      if (i < stack_depth_-1) i++;
-    }
-  }
-  if (instr->IsReturn()) {
-    while (stack_depth_ > 0) {
-      __ fstp(0);
-      stack_depth_--;
-    }
-    if (FLAG_debug_code && FLAG_enable_slow_asserts) __ VerifyX87StackDepth(0);
-  }
-}
-
-
-void LCodeGen::X87Stack::LeavingBlock(int current_block_id, LGoto* goto_instr) {
-  ASSERT(stack_depth_ <= 1);
-  // If ever used for new stubs producing two pairs of doubles joined into two
-  // phis this assert hits. That situation is not handled, since the two stacks
-  // might have st0 and st1 swapped.
-  if (current_block_id + 1 != goto_instr->block_id()) {
-    // If we have a value on the x87 stack on leaving a block, it must be a
-    // phi input. If the next block we compile is not the join block, we have
-    // to discard the stack state.
-    stack_depth_ = 0;
-  }
-}
-
-
-void LCodeGen::EmitFlushX87ForDeopt() {
-  // The deoptimizer does not support X87 Registers. But as long as we
-  // deopt from a stub its not a problem, since we will re-materialize the
-  // original stub inputs, which can't be double registers.
-  ASSERT(info()->IsStub());
-  if (FLAG_debug_code && FLAG_enable_slow_asserts) {
-    __ pushfd();
-    __ VerifyX87StackDepth(x87_stack_.depth());
-    __ popfd();
-  }
-  for (int i = 0; i < x87_stack_.depth(); i++) __ fstp(0);
-}
-
-
 Register LCodeGen::ToRegister(LOperand* op) const {
   ASSERT(op->IsRegister());
   return ToRegister(op->index());
 }
 
 
-X87Register LCodeGen::ToX87Register(LOperand* op) const {
-  ASSERT(op->IsDoubleRegister());
-  return ToX87Register(op->index());
-}
-
-
 XMMRegister LCodeGen::ToDoubleRegister(LOperand* op) const {
   ASSERT(op->IsDoubleRegister());
   return ToDoubleRegister(op->index());
@@ -1092,17 +844,6 @@ void LCodeGen::DeoptimizeIf(Condition cc,
     __ popfd();
   }
 
-  // Before Instructions which can deopt, we normally flush the x87 stack. But
-  // we can have inputs or outputs of the current instruction on the stack,
-  // thus we need to flush them here from the physical stack to leave it in a
-  // consistent state.
-  if (x87_stack_.depth() > 0) {
-    Label done;
-    if (cc != no_condition) __ j(NegateCondition(cc), &done, Label::kNear);
-    EmitFlushX87ForDeopt();
-    __ bind(&done);
-  }
-
   if (info()->ShouldTrapOnDeopt()) {
     Label done;
     if (cc != no_condition) __ j(NegateCondition(cc), &done, Label::kNear);
@@ -1963,41 +1704,32 @@ void LCodeGen::DoConstantD(LConstantD* instr) {
   int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));
   ASSERT(instr->result()->IsDoubleRegister());
 
-  if (!CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-    __ push(Immediate(upper));
-    __ push(Immediate(lower));
-    X87Register reg = ToX87Register(instr->result());
-    X87Mov(reg, Operand(esp, 0));
-    __ add(Operand(esp), Immediate(kDoubleSize));
+  XMMRegister res = ToDoubleRegister(instr->result());
+  if (int_val == 0) {
+    __ xorps(res, res);
   } else {
-    CpuFeatureScope scope1(masm(), SSE2);
-    XMMRegister res = ToDoubleRegister(instr->result());
-    if (int_val == 0) {
-      __ xorps(res, res);
-    } else {
-      Register temp = ToRegister(instr->temp());
-      if (CpuFeatures::IsSupported(SSE4_1)) {
-        CpuFeatureScope scope2(masm(), SSE4_1);
-        if (lower != 0) {
-          __ Move(temp, Immediate(lower));
-          __ movd(res, Operand(temp));
-          __ Move(temp, Immediate(upper));
-          __ pinsrd(res, Operand(temp), 1);
-        } else {
-          __ xorps(res, res);
-          __ Move(temp, Immediate(upper));
-          __ pinsrd(res, Operand(temp), 1);
-        }
+    Register temp = ToRegister(instr->temp());
+    if (CpuFeatures::IsSupported(SSE4_1)) {
+      CpuFeatureScope scope2(masm(), SSE4_1);
+      if (lower != 0) {
+        __ Move(temp, Immediate(lower));
+        __ movd(res, Operand(temp));
+        __ Move(temp, Immediate(upper));
+        __ pinsrd(res, Operand(temp), 1);
       } else {
+        __ xorps(res, res);
         __ Move(temp, Immediate(upper));
-        __ movd(res, Operand(temp));
-        __ psllq(res, 32);
-        if (lower != 0) {
-          XMMRegister xmm_scratch = double_scratch0();
-          __ Move(temp, Immediate(lower));
-          __ movd(xmm_scratch, Operand(temp));
-          __ orps(res, xmm_scratch);
-        }
+        __ pinsrd(res, Operand(temp), 1);
+      }
+    } else {
+      __ Move(temp, Immediate(upper));
+      __ movd(res, Operand(temp));
+      __ psllq(res, 32);
+      if (lower != 0) {
+        XMMRegister xmm_scratch = double_scratch0();
+        __ Move(temp, Immediate(lower));
+        __ movd(xmm_scratch, Operand(temp));
+        __ orps(res, xmm_scratch);
       }
     }
   }
@@ -2180,7 +1912,6 @@ void LCodeGen::DoAddI(LAddI* instr) {
 
 
 void LCodeGen::DoMathMinMax(LMathMinMax* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   LOperand* left = instr->left();
   LOperand* right = instr->right();
   ASSERT(left->Equals(instr->result()));
@@ -2243,88 +1974,45 @@ void LCodeGen::DoMathMinMax(LMathMinMax* instr) {
 
 
 void LCodeGen::DoArithmeticD(LArithmeticD* instr) {
-  if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister left = ToDoubleRegister(instr->left());
-    XMMRegister right = ToDoubleRegister(instr->right());
-    XMMRegister result = ToDoubleRegister(instr->result());
-    switch (instr->op()) {
-      case Token::ADD:
-        __ addsd(left, right);
-        break;
-      case Token::SUB:
-        __ subsd(left, right);
-        break;
-      case Token::MUL:
-        __ mulsd(left, right);
-        break;
-      case Token::DIV:
-        __ divsd(left, right);
-        // Don't delete this mov. It may improve performance on some CPUs,
-        // when there is a mulsd depending on the result
-        __ movaps(left, left);
-        break;
-      case Token::MOD: {
-        // Pass two doubles as arguments on the stack.
-        __ PrepareCallCFunction(4, eax);
-        __ movsd(Operand(esp, 0 * kDoubleSize), left);
-        __ movsd(Operand(esp, 1 * kDoubleSize), right);
-        __ CallCFunction(
-            ExternalReference::mod_two_doubles_operation(isolate()),
-            4);
-
-        // Return value is in st(0) on ia32.
-        // Store it into the result register.
-        __ sub(Operand(esp), Immediate(kDoubleSize));
-        __ fstp_d(Operand(esp, 0));
-        __ movsd(result, Operand(esp, 0));
-        __ add(Operand(esp), Immediate(kDoubleSize));
-        break;
-      }
-      default:
-        UNREACHABLE();
-        break;
-    }
-  } else {
-    X87Register left = ToX87Register(instr->left());
-    X87Register right = ToX87Register(instr->right());
-    X87Register result = ToX87Register(instr->result());
-    if (instr->op() != Token::MOD) {
-      X87PrepareBinaryOp(left, right, result);
-    }
-    switch (instr->op()) {
-      case Token::ADD:
-        __ fadd_i(1);
-        break;
-      case Token::SUB:
-        __ fsub_i(1);
-        break;
-      case Token::MUL:
-        __ fmul_i(1);
-        break;
-      case Token::DIV:
-        __ fdiv_i(1);
-        break;
-      case Token::MOD: {
-        // Pass two doubles as arguments on the stack.
-        __ PrepareCallCFunction(4, eax);
-        X87Mov(Operand(esp, 1 * kDoubleSize), right);
-        X87Mov(Operand(esp, 0), left);
-        X87Free(right);
-        ASSERT(left.is(result));
-        X87PrepareToWrite(result);
-        __ CallCFunction(
-            ExternalReference::mod_two_doubles_operation(isolate()),
-            4);
-
-        // Return value is in st(0) on ia32.
-        X87CommitWrite(result);
-        break;
-      }
-      default:
-        UNREACHABLE();
-        break;
+  XMMRegister left = ToDoubleRegister(instr->left());
+  XMMRegister right = ToDoubleRegister(instr->right());
+  XMMRegister result = ToDoubleRegister(instr->result());
+  switch (instr->op()) {
+    case Token::ADD:
+      __ addsd(left, right);
+      break;
+    case Token::SUB:
+      __ subsd(left, right);
+      break;
+    case Token::MUL:
+      __ mulsd(left, right);
+      break;
+    case Token::DIV:
+      __ divsd(left, right);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a mulsd depending on the result
+      __ movaps(left, left);
+      break;
+    case Token::MOD: {
+      // Pass two doubles as arguments on the stack.
+      __ PrepareCallCFunction(4, eax);
+      __ movsd(Operand(esp, 0 * kDoubleSize), left);
+      __ movsd(Operand(esp, 1 * kDoubleSize), right);
+      __ CallCFunction(
+          ExternalReference::mod_two_doubles_operation(isolate()),
+          4);
+
+      // Return value is in st(0) on ia32.
+      // Store it into the result register.
+      __ sub(Operand(esp), Immediate(kDoubleSize));
+      __ fstp_d(Operand(esp, 0));
+      __ movsd(result, Operand(esp, 0));
+      __ add(Operand(esp), Immediate(kDoubleSize));
+      break;
     }
+    default:
+      UNREACHABLE();
+      break;
   }
 }
 
@@ -2379,7 +2067,6 @@ void LCodeGen::DoBranch(LBranch* instr) {
     EmitBranch(instr, not_zero);
   } else if (r.IsDouble()) {
     ASSERT(!info()->IsStub());
-    CpuFeatureScope scope(masm(), SSE2);
     XMMRegister reg = ToDoubleRegister(instr->value());
     XMMRegister xmm_scratch = double_scratch0();
     __ xorps(xmm_scratch, xmm_scratch);
@@ -2402,7 +2089,6 @@ void LCodeGen::DoBranch(LBranch* instr) {
       EmitBranch(instr, no_condition);
     } else if (type.IsHeapNumber()) {
       ASSERT(!info()->IsStub());
-      CpuFeatureScope scope(masm(), SSE2);
       XMMRegister xmm_scratch = double_scratch0();
       __ xorps(xmm_scratch, xmm_scratch);
       __ ucomisd(xmm_scratch, FieldOperand(reg, HeapNumber::kValueOffset));
@@ -2488,16 +2174,9 @@ void LCodeGen::DoBranch(LBranch* instr) {
         __ cmp(FieldOperand(reg, HeapObject::kMapOffset),
                factory()->heap_number_map());
         __ j(not_equal, &not_heap_number, Label::kNear);
-        if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-          CpuFeatureScope scope(masm(), SSE2);
-          XMMRegister xmm_scratch = double_scratch0();
-          __ xorps(xmm_scratch, xmm_scratch);
-          __ ucomisd(xmm_scratch, FieldOperand(reg, HeapNumber::kValueOffset));
-        } else {
-          __ fldz();
-          __ fld_d(FieldOperand(reg, HeapNumber::kValueOffset));
-          __ FCmp();
-        }
+        XMMRegister xmm_scratch = double_scratch0();
+        __ xorps(xmm_scratch, xmm_scratch);
+        __ ucomisd(xmm_scratch, FieldOperand(reg, HeapNumber::kValueOffset));
         __ j(zero, instr->FalseLabel(chunk_));
         __ jmp(instr->TrueLabel(chunk_));
         __ bind(&not_heap_number);
@@ -2520,10 +2199,6 @@ void LCodeGen::EmitGoto(int block) {
 }
 
 
-void LCodeGen::DoClobberDoubles(LClobberDoubles* instr) {
-}
-
-
 void LCodeGen::DoGoto(LGoto* instr) {
   EmitGoto(instr->block_id());
 }
@@ -2575,13 +2250,7 @@ void LCodeGen::DoCompareNumericAndBranch(LCompareNumericAndBranch* instr) {
     EmitGoto(next_block);
   } else {
     if (instr->is_double()) {
-      if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-        CpuFeatureScope scope(masm(), SSE2);
-        __ ucomisd(ToDoubleRegister(left), ToDoubleRegister(right));
-      } else {
-        X87LoadForUsage(ToX87Register(right), ToX87Register(left));
-        __ FCmp();
-      }
+      __ ucomisd(ToDoubleRegister(left), ToDoubleRegister(right));
       // Don't base result on EFLAGS when a NaN is involved. Instead
       // jump to the false block.
       __ j(parity_even, instr->FalseLabel(chunk_));
@@ -2625,35 +2294,12 @@ void LCodeGen::DoCmpHoleAndBranch(LCmpHoleAndBranch* instr) {
     return;
   }
 
-  bool use_sse2 = CpuFeatures::IsSupported(SSE2);
-  if (use_sse2) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister input_reg = ToDoubleRegister(instr->object());
-    __ ucomisd(input_reg, input_reg);
-    EmitFalseBranch(instr, parity_odd);
-  } else {
-    // Put the value to the top of stack
-    X87Register src = ToX87Register(instr->object());
-    X87LoadForUsage(src);
-    __ fld(0);
-    __ fld(0);
-    __ FCmp();
-    Label ok;
-    __ j(parity_even, &ok, Label::kNear);
-    __ fstp(0);
-    EmitFalseBranch(instr, no_condition);
-    __ bind(&ok);
-  }
-
+  XMMRegister input_reg = ToDoubleRegister(instr->object());
+  __ ucomisd(input_reg, input_reg);
+  EmitFalseBranch(instr, parity_odd);
 
   __ sub(esp, Immediate(kDoubleSize));
-  if (use_sse2) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister input_reg = ToDoubleRegister(instr->object());
-    __ movsd(MemOperand(esp, 0), input_reg);
-  } else {
-    __ fstp_d(MemOperand(esp, 0));
-  }
+  __ movsd(MemOperand(esp, 0), input_reg);
 
   __ add(esp, Immediate(kDoubleSize));
   int offset = sizeof(kHoleNanUpper32);
@@ -2668,7 +2314,6 @@ void LCodeGen::DoCompareMinusZeroAndBranch(LCompareMinusZeroAndBranch* instr) {
   Register scratch = ToRegister(instr->temp());
 
   if (rep.IsDouble()) {
-    CpuFeatureScope use_sse2(masm(), SSE2);
     XMMRegister value = ToDoubleRegister(instr->value());
     XMMRegister xmm_scratch = double_scratch0();
     __ xorps(xmm_scratch, xmm_scratch);
@@ -2971,9 +2616,8 @@ void LCodeGen::DoInstanceOfKnownGlobal(LInstanceOfKnownGlobal* instr) {
   class DeferredInstanceOfKnownGlobal V8_FINAL : public LDeferredCode {
    public:
     DeferredInstanceOfKnownGlobal(LCodeGen* codegen,
-                                  LInstanceOfKnownGlobal* instr,
-                                  const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+                                  LInstanceOfKnownGlobal* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredInstanceOfKnownGlobal(instr_, &map_check_);
     }
@@ -2985,7 +2629,7 @@ void LCodeGen::DoInstanceOfKnownGlobal(LInstanceOfKnownGlobal* instr) {
   };
 
   DeferredInstanceOfKnownGlobal* deferred;
-  deferred = new(zone()) DeferredInstanceOfKnownGlobal(this, instr, x87_stack_);
+  deferred = new(zone()) DeferredInstanceOfKnownGlobal(this, instr);
 
   Label done, false_result;
   Register object = ToRegister(instr->value());
@@ -3134,9 +2778,7 @@ void LCodeGen::DoReturn(LReturn* instr) {
     __ mov(esi, Operand(ebp, StandardFrameConstants::kContextOffset));
     __ CallRuntime(Runtime::kTraceExit, 1);
   }
-  if (info()->saves_caller_doubles() && CpuFeatures::IsSupported(SSE2)) {
-    RestoreCallerDoubles();
-  }
+  if (info()->saves_caller_doubles()) RestoreCallerDoubles();
   if (dynamic_frame_alignment_) {
     // Fetch the state of the dynamic frame alignment.
     __ mov(edx, Operand(ebp,
@@ -3251,7 +2893,7 @@ void LCodeGen::DoStoreContextSlot(LStoreContextSlot* instr) {
                               offset,
                               value,
                               temp,
-                              GetSaveFPRegsMode(isolate()),
+                              kSaveFPRegs,
                               EMIT_REMEMBERED_SET,
                               check_needed);
   }
@@ -3276,13 +2918,8 @@ void LCodeGen::DoLoadNamedField(LLoadNamedField* instr) {
 
   Register object = ToRegister(instr->object());
   if (instr->hydrogen()->representation().IsDouble()) {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      XMMRegister result = ToDoubleRegister(instr->result());
-      __ movsd(result, FieldOperand(object, offset));
-    } else {
-      X87Mov(ToX87Register(instr->result()), FieldOperand(object, offset));
-    }
+    XMMRegister result = ToDoubleRegister(instr->result());
+    __ movsd(result, FieldOperand(object, offset));
     return;
   }
 
@@ -3409,22 +3046,12 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) {
       instr->additional_index()));
   if (elements_kind == EXTERNAL_FLOAT32_ELEMENTS ||
       elements_kind == FLOAT32_ELEMENTS) {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      XMMRegister result(ToDoubleRegister(instr->result()));
-      __ movss(result, operand);
-      __ cvtss2sd(result, result);
-    } else {
-      X87Mov(ToX87Register(instr->result()), operand, kX87FloatOperand);
-    }
+    XMMRegister result(ToDoubleRegister(instr->result()));
+    __ movss(result, operand);
+    __ cvtss2sd(result, result);
   } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS ||
              elements_kind == FLOAT64_ELEMENTS) {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      __ movsd(ToDoubleRegister(instr->result()), operand);
-    } else {
-      X87Mov(ToX87Register(instr->result()), operand);
-    }
+    __ movsd(ToDoubleRegister(instr->result()), operand);
   } else {
     Register result(ToRegister(instr->result()));
     switch (elements_kind) {
@@ -3498,13 +3125,8 @@ void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) {
       FAST_DOUBLE_ELEMENTS,
       FixedDoubleArray::kHeaderSize - kHeapObjectTag,
       instr->additional_index());
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister result = ToDoubleRegister(instr->result());
-    __ movsd(result, double_load_operand);
-  } else {
-    X87Mov(ToX87Register(instr->result()), double_load_operand);
-  }
+  XMMRegister result = ToDoubleRegister(instr->result());
+  __ movsd(result, double_load_operand);
 }
 
 
@@ -3926,9 +3548,8 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
   class DeferredMathAbsTaggedHeapNumber V8_FINAL : public LDeferredCode {
    public:
     DeferredMathAbsTaggedHeapNumber(LCodeGen* codegen,
-                                    LMathAbs* instr,
-                                    const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+                                    LMathAbs* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredMathAbsTaggedHeapNumber(instr_);
     }
@@ -3940,7 +3561,6 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
   ASSERT(instr->value()->Equals(instr->result()));
   Representation r = instr->hydrogen()->value()->representation();
 
-  CpuFeatureScope scope(masm(), SSE2);
   if (r.IsDouble()) {
     XMMRegister scratch = double_scratch0();
     XMMRegister input_reg = ToDoubleRegister(instr->value());
@@ -3951,7 +3571,7 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
     EmitIntegerMathAbs(instr);
   } else {  // Tagged case.
     DeferredMathAbsTaggedHeapNumber* deferred =
-        new(zone()) DeferredMathAbsTaggedHeapNumber(this, instr, x87_stack_);
+        new(zone()) DeferredMathAbsTaggedHeapNumber(this, instr);
     Register input_reg = ToRegister(instr->value());
     // Smi check.
     __ JumpIfNotSmi(input_reg, deferred->entry());
@@ -3962,7 +3582,6 @@ void LCodeGen::DoMathAbs(LMathAbs* instr) {
 
 
 void LCodeGen::DoMathFloor(LMathFloor* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   XMMRegister xmm_scratch = double_scratch0();
   Register output_reg = ToRegister(instr->result());
   XMMRegister input_reg = ToDoubleRegister(instr->value());
@@ -4028,7 +3647,6 @@ void LCodeGen::DoMathFloor(LMathFloor* instr) {
 
 
 void LCodeGen::DoMathRound(LMathRound* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   Register output_reg = ToRegister(instr->result());
   XMMRegister input_reg = ToDoubleRegister(instr->value());
   XMMRegister xmm_scratch = double_scratch0();
@@ -4091,7 +3709,6 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
 
 
 void LCodeGen::DoMathSqrt(LMathSqrt* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   XMMRegister input_reg = ToDoubleRegister(instr->value());
   ASSERT(ToDoubleRegister(instr->result()).is(input_reg));
   __ sqrtsd(input_reg, input_reg);
@@ -4099,7 +3716,6 @@ void LCodeGen::DoMathSqrt(LMathSqrt* instr) {
 
 
 void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   XMMRegister xmm_scratch = double_scratch0();
   XMMRegister input_reg = ToDoubleRegister(instr->value());
   Register scratch = ToRegister(instr->temp());
@@ -4167,7 +3783,6 @@ void LCodeGen::DoPower(LPower* instr) {
 
 
 void LCodeGen::DoMathLog(LMathLog* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   ASSERT(instr->value()->Equals(instr->result()));
   XMMRegister input_reg = ToDoubleRegister(instr->value());
   XMMRegister xmm_scratch = double_scratch0();
@@ -4199,7 +3814,6 @@ void LCodeGen::DoMathLog(LMathLog* instr) {
 
 
 void LCodeGen::DoMathClz32(LMathClz32* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   Register input = ToRegister(instr->value());
   Register result = ToRegister(instr->result());
   Label not_zero_input;
@@ -4214,7 +3828,6 @@ void LCodeGen::DoMathClz32(LMathClz32* instr) {
 
 
 void LCodeGen::DoMathExp(LMathExp* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   XMMRegister input = ToDoubleRegister(instr->value());
   XMMRegister result = ToDoubleRegister(instr->result());
   XMMRegister temp0 = double_scratch0();
@@ -4393,14 +4006,8 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) {
     ASSERT(access.IsInobject());
     ASSERT(!instr->hydrogen()->has_transition());
     ASSERT(!instr->hydrogen()->NeedsWriteBarrier());
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      XMMRegister value = ToDoubleRegister(instr->value());
-      __ movsd(FieldOperand(object, offset), value);
-    } else {
-      X87Register value = ToX87Register(instr->value());
-      X87Mov(FieldOperand(object, offset), value);
-    }
+    XMMRegister value = ToDoubleRegister(instr->value());
+    __ movsd(FieldOperand(object, offset), value);
     return;
   }
 
@@ -4419,7 +4026,7 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) {
                           HeapObject::kMapOffset,
                           temp_map,
                           temp,
-                          GetSaveFPRegsMode(isolate()),
+                          kSaveFPRegs,
                           OMIT_REMEMBERED_SET,
                           OMIT_SMI_CHECK);
     }
@@ -4460,7 +4067,7 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) {
                         offset,
                         value,
                         temp,
-                        GetSaveFPRegsMode(isolate()),
+                        kSaveFPRegs,
                         EMIT_REMEMBERED_SET,
                         check_needed);
   }
@@ -4520,23 +4127,12 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) {
       instr->additional_index()));
   if (elements_kind == EXTERNAL_FLOAT32_ELEMENTS ||
       elements_kind == FLOAT32_ELEMENTS) {
-    if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      XMMRegister xmm_scratch = double_scratch0();
-      __ cvtsd2ss(xmm_scratch, ToDoubleRegister(instr->value()));
-      __ movss(operand, xmm_scratch);
-    } else {
-      __ fld(0);
-      __ fstp_s(operand);
-    }
+    XMMRegister xmm_scratch = double_scratch0();
+    __ cvtsd2ss(xmm_scratch, ToDoubleRegister(instr->value()));
+    __ movss(operand, xmm_scratch);
   } else if (elements_kind == EXTERNAL_FLOAT64_ELEMENTS ||
              elements_kind == FLOAT64_ELEMENTS) {
-    if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      __ movsd(operand, ToDoubleRegister(instr->value()));
-    } else {
-      X87Mov(operand, ToX87Register(instr->value()));
-    }
+    __ movsd(operand, ToDoubleRegister(instr->value()));
   } else {
     Register value = ToRegister(instr->value());
     switch (elements_kind) {
@@ -4590,68 +4186,19 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {
       FixedDoubleArray::kHeaderSize - kHeapObjectTag,
       instr->additional_index());
 
-  if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister value = ToDoubleRegister(instr->value());
+  XMMRegister value = ToDoubleRegister(instr->value());
 
-    if (instr->NeedsCanonicalization()) {
-      Label have_value;
+  if (instr->NeedsCanonicalization()) {
+    Label have_value;
 
-      __ ucomisd(value, value);
-      __ j(parity_odd, &have_value, Label::kNear);  // NaN.
+    __ ucomisd(value, value);
+    __ j(parity_odd, &have_value, Label::kNear);  // NaN.
 
-      __ movsd(value, Operand::StaticVariable(canonical_nan_reference));
-      __ bind(&have_value);
-    }
-
-    __ movsd(double_store_operand, value);
-  } else {
-    // Can't use SSE2 in the serializer
-    if (instr->hydrogen()->IsConstantHoleStore()) {
-      // This means we should store the (double) hole. No floating point
-      // registers required.
-      double nan_double = FixedDoubleArray::hole_nan_as_double();
-      uint64_t int_val = BitCast<uint64_t, double>(nan_double);
-      int32_t lower = static_cast<int32_t>(int_val);
-      int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));
-
-      __ mov(double_store_operand, Immediate(lower));
-      Operand double_store_operand2 = BuildFastArrayOperand(
-          instr->elements(),
-          instr->key(),
-          instr->hydrogen()->key()->representation(),
-          FAST_DOUBLE_ELEMENTS,
-          FixedDoubleArray::kHeaderSize - kHeapObjectTag + kPointerSize,
-          instr->additional_index());
-      __ mov(double_store_operand2, Immediate(upper));
-    } else {
-      Label no_special_nan_handling;
-      X87Register value = ToX87Register(instr->value());
-      X87Fxch(value);
-
-      if (instr->NeedsCanonicalization()) {
-        __ fld(0);
-        __ fld(0);
-        __ FCmp();
-
-        __ j(parity_odd, &no_special_nan_handling, Label::kNear);
-        __ sub(esp, Immediate(kDoubleSize));
-        __ fst_d(MemOperand(esp, 0));
-        __ cmp(MemOperand(esp, sizeof(kHoleNanLower32)),
-               Immediate(kHoleNanUpper32));
-        __ add(esp, Immediate(kDoubleSize));
-        Label canonicalize;
-        __ j(not_equal, &canonicalize, Label::kNear);
-        __ jmp(&no_special_nan_handling, Label::kNear);
-        __ bind(&canonicalize);
-        __ fstp(0);
-        __ fld_d(Operand::StaticVariable(canonical_nan_reference));
-      }
-
-      __ bind(&no_special_nan_handling);
-      __ fst_d(double_store_operand);
-    }
+    __ movsd(value, Operand::StaticVariable(canonical_nan_reference));
+    __ bind(&have_value);
   }
+
+  __ movsd(double_store_operand, value);
 }
 
 
@@ -4692,7 +4239,7 @@ void LCodeGen::DoStoreKeyedFixedArray(LStoreKeyed* instr) {
     __ RecordWrite(elements,
                    key,
                    value,
-                   GetSaveFPRegsMode(isolate()),
+                   kSaveFPRegs,
                    EMIT_REMEMBERED_SET,
                    check_needed);
   }
@@ -4786,9 +4333,8 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
   class DeferredStringCharCodeAt V8_FINAL : public LDeferredCode {
    public:
     DeferredStringCharCodeAt(LCodeGen* codegen,
-                             LStringCharCodeAt* instr,
-                             const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+                             LStringCharCodeAt* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredStringCharCodeAt(instr_);
     }
@@ -4798,7 +4344,7 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
   };
 
   DeferredStringCharCodeAt* deferred =
-      new(zone()) DeferredStringCharCodeAt(this, instr, x87_stack_);
+      new(zone()) DeferredStringCharCodeAt(this, instr);
 
   StringCharLoadGenerator::Generate(masm(),
                                     factory(),
@@ -4845,9 +4391,8 @@ void LCodeGen::DoStringCharFromCode(LStringCharFromCode* instr) {
   class DeferredStringCharFromCode V8_FINAL : public LDeferredCode {
    public:
     DeferredStringCharFromCode(LCodeGen* codegen,
-                               LStringCharFromCode* instr,
-                               const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+                               LStringCharFromCode* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredStringCharFromCode(instr_);
     }
@@ -4857,7 +4402,7 @@ void LCodeGen::DoStringCharFromCode(LStringCharFromCode* instr) {
   };
 
   DeferredStringCharFromCode* deferred =
-      new(zone()) DeferredStringCharFromCode(this, instr, x87_stack_);
+      new(zone()) DeferredStringCharFromCode(this, instr);
 
   ASSERT(instr->hydrogen()->value()->representation().IsInteger32());
   Register char_code = ToRegister(instr->char_code());
@@ -4909,36 +4454,17 @@ void LCodeGen::DoInteger32ToDouble(LInteger32ToDouble* instr) {
   LOperand* output = instr->result();
   ASSERT(input->IsRegister() || input->IsStackSlot());
   ASSERT(output->IsDoubleRegister());
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    __ Cvtsi2sd(ToDoubleRegister(output), ToOperand(input));
-  } else if (input->IsRegister()) {
-    Register input_reg = ToRegister(input);
-    __ push(input_reg);
-    X87Mov(ToX87Register(output), Operand(esp, 0), kX87IntOperand);
-    __ pop(input_reg);
-  } else {
-    X87Mov(ToX87Register(output), ToOperand(input), kX87IntOperand);
-  }
+  __ Cvtsi2sd(ToDoubleRegister(output), ToOperand(input));
 }
 
 
 void LCodeGen::DoUint32ToDouble(LUint32ToDouble* instr) {
   LOperand* input = instr->value();
   LOperand* output = instr->result();
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    LOperand* temp = instr->temp();
-
-    __ LoadUint32(ToDoubleRegister(output),
-                  ToRegister(input),
-                  ToDoubleRegister(temp));
-  } else {
-    X87Register res = ToX87Register(output);
-    X87PrepareToWrite(res);
-    __ LoadUint32NoSSE2(ToRegister(input));
-    X87CommitWrite(res);
-  }
+  LOperand* temp = instr->temp();
+  __ LoadUint32(ToDoubleRegister(output),
+                ToRegister(input),
+                ToDoubleRegister(temp));
 }
 
 
@@ -4946,9 +4472,8 @@ void LCodeGen::DoNumberTagI(LNumberTagI* instr) {
   class DeferredNumberTagI V8_FINAL : public LDeferredCode {
    public:
     DeferredNumberTagI(LCodeGen* codegen,
-                       LNumberTagI* instr,
-                       const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+                       LNumberTagI* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredNumberTagIU(instr_, instr_->value(), instr_->temp(),
                                        NULL, SIGNED_INT32);
@@ -4963,7 +4488,7 @@ void LCodeGen::DoNumberTagI(LNumberTagI* instr) {
   Register reg = ToRegister(input);
 
   DeferredNumberTagI* deferred =
-      new(zone()) DeferredNumberTagI(this, instr, x87_stack_);
+      new(zone()) DeferredNumberTagI(this, instr);
   __ SmiTag(reg);
   __ j(overflow, deferred->entry());
   __ bind(deferred->exit());
@@ -4973,10 +4498,8 @@ void LCodeGen::DoNumberTagI(LNumberTagI* instr) {
 void LCodeGen::DoNumberTagU(LNumberTagU* instr) {
   class DeferredNumberTagU V8_FINAL : public LDeferredCode {
    public:
-    DeferredNumberTagU(LCodeGen* codegen,
-                       LNumberTagU* instr,
-                       const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+    DeferredNumberTagU(LCodeGen* codegen, LNumberTagU* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredNumberTagIU(instr_, instr_->value(), instr_->temp1(),
                                        instr_->temp2(), UNSIGNED_INT32);
@@ -4991,7 +4514,7 @@ void LCodeGen::DoNumberTagU(LNumberTagU* instr) {
   Register reg = ToRegister(input);
 
   DeferredNumberTagU* deferred =
-      new(zone()) DeferredNumberTagU(this, instr, x87_stack_);
+      new(zone()) DeferredNumberTagU(this, instr);
   __ cmp(reg, Immediate(Smi::kMaxValue));
   __ j(above, deferred->entry());
   __ SmiTag(reg);
@@ -5015,27 +4538,9 @@ void LCodeGen::DoDeferredNumberTagIU(LInstruction* instr,
     // the value in there. If that fails, call the runtime system.
     __ SmiUntag(reg);
     __ xor_(reg, 0x80000000);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope feature_scope(masm(), SSE2);
-      __ Cvtsi2sd(xmm_scratch, Operand(reg));
-    } else {
-      __ push(reg);
-      __ fild_s(Operand(esp, 0));
-      __ pop(reg);
-    }
+    __ Cvtsi2sd(xmm_scratch, Operand(reg));
   } else {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope feature_scope(masm(), SSE2);
-      __ LoadUint32(xmm_scratch, reg, ToDoubleRegister(temp2));
-    } else {
-      // There's no fild variant for unsigned values, so zero-extend to a 64-bit
-      // int manually.
-      __ push(Immediate(0));
-      __ push(reg);
-      __ fild_d(Operand(esp, 0));
-      __ pop(reg);
-      __ pop(reg);
-    }
+    __ LoadUint32(xmm_scratch, reg, ToDoubleRegister(temp2));
   }
 
   if (FLAG_inline_new) {
@@ -5069,22 +4574,15 @@ void LCodeGen::DoDeferredNumberTagIU(LInstruction* instr,
   // Done. Put the value in xmm_scratch into the value of the allocated heap
   // number.
   __ bind(&done);
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope feature_scope(masm(), SSE2);
-    __ movsd(FieldOperand(reg, HeapNumber::kValueOffset), xmm_scratch);
-  } else {
-    __ fstp_d(FieldOperand(reg, HeapNumber::kValueOffset));
-  }
+  __ movsd(FieldOperand(reg, HeapNumber::kValueOffset), xmm_scratch);
 }
 
 
 void LCodeGen::DoNumberTagD(LNumberTagD* instr) {
   class DeferredNumberTagD V8_FINAL : public LDeferredCode {
    public:
-    DeferredNumberTagD(LCodeGen* codegen,
-                       LNumberTagD* instr,
-                       const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+    DeferredNumberTagD(LCodeGen* codegen, LNumberTagD* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredNumberTagD(instr_);
     }
@@ -5095,15 +4593,8 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {
 
   Register reg = ToRegister(instr->result());
 
-  bool use_sse2 = CpuFeatures::IsSupported(SSE2);
-  if (!use_sse2) {
-    // Put the value to the top of stack
-    X87Register src = ToX87Register(instr->value());
-    X87LoadForUsage(src);
-  }
-
   DeferredNumberTagD* deferred =
-      new(zone()) DeferredNumberTagD(this, instr, x87_stack_);
+      new(zone()) DeferredNumberTagD(this, instr);
   if (FLAG_inline_new) {
     Register tmp = ToRegister(instr->temp());
     __ AllocateHeapNumber(reg, tmp, no_reg, deferred->entry());
@@ -5111,13 +4602,8 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {
     __ jmp(deferred->entry());
   }
   __ bind(deferred->exit());
-  if (use_sse2) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister input_reg = ToDoubleRegister(instr->value());
-    __ movsd(FieldOperand(reg, HeapNumber::kValueOffset), input_reg);
-  } else {
-    __ fstp_d(FieldOperand(reg, HeapNumber::kValueOffset));
-  }
+  XMMRegister input_reg = ToDoubleRegister(instr->value());
+  __ movsd(FieldOperand(reg, HeapNumber::kValueOffset), input_reg);
 }
 
 
@@ -5172,76 +4658,6 @@ void LCodeGen::DoSmiUntag(LSmiUntag* instr) {
 }
 
 
-void LCodeGen::EmitNumberUntagDNoSSE2(Register input_reg,
-                                      Register temp_reg,
-                                      X87Register res_reg,
-                                      bool can_convert_undefined_to_nan,
-                                      bool deoptimize_on_minus_zero,
-                                      LEnvironment* env,
-                                      NumberUntagDMode mode) {
-  Label load_smi, done;
-
-  X87PrepareToWrite(res_reg);
-  if (mode == NUMBER_CANDIDATE_IS_ANY_TAGGED) {
-    // Smi check.
-    __ JumpIfSmi(input_reg, &load_smi, Label::kNear);
-
-    // Heap number map check.
-    __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),
-           factory()->heap_number_map());
-    if (!can_convert_undefined_to_nan) {
-      DeoptimizeIf(not_equal, env);
-    } else {
-      Label heap_number, convert;
-      __ j(equal, &heap_number, Label::kNear);
-
-      // Convert undefined (or hole) to NaN.
-      __ cmp(input_reg, factory()->undefined_value());
-      DeoptimizeIf(not_equal, env);
-
-      __ bind(&convert);
-      ExternalReference nan =
-          ExternalReference::address_of_canonical_non_hole_nan();
-      __ fld_d(Operand::StaticVariable(nan));
-      __ jmp(&done, Label::kNear);
-
-      __ bind(&heap_number);
-    }
-    // Heap number to x87 conversion.
-    __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));
-    if (deoptimize_on_minus_zero) {
-      __ fldz();
-      __ FCmp();
-      __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));
-      __ j(not_zero, &done, Label::kNear);
-
-      // Use general purpose registers to check if we have -0.0
-      __ mov(temp_reg, FieldOperand(input_reg, HeapNumber::kExponentOffset));
-      __ test(temp_reg, Immediate(HeapNumber::kSignMask));
-      __ j(zero, &done, Label::kNear);
-
-      // Pop FPU stack before deoptimizing.
-      __ fstp(0);
-      DeoptimizeIf(not_zero, env);
-    }
-    __ jmp(&done, Label::kNear);
-  } else {
-    ASSERT(mode == NUMBER_CANDIDATE_IS_SMI);
-  }
-
-  __ bind(&load_smi);
-  // Clobbering a temp is faster than re-tagging the
-  // input register since we avoid dependencies.
-  __ mov(temp_reg, input_reg);
-  __ SmiUntag(temp_reg);  // Untag smi before converting to float.
-  __ push(temp_reg);
-  __ fild_s(Operand(esp, 0));
-  __ add(esp, Immediate(kPointerSize));
-  __ bind(&done);
-  X87CommitWrite(res_reg);
-}
-
-
 void LCodeGen::EmitNumberUntagD(Register input_reg,
                                 Register temp_reg,
                                 XMMRegister result_reg,
@@ -5357,10 +4773,8 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr, Label* done) {
 void LCodeGen::DoTaggedToI(LTaggedToI* instr) {
   class DeferredTaggedToI V8_FINAL : public LDeferredCode {
    public:
-    DeferredTaggedToI(LCodeGen* codegen,
-                      LTaggedToI* instr,
-                      const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+    DeferredTaggedToI(LCodeGen* codegen, LTaggedToI* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredTaggedToI(instr_, done());
     }
@@ -5378,7 +4792,7 @@ void LCodeGen::DoTaggedToI(LTaggedToI* instr) {
     __ SmiUntag(input_reg);
   } else {
     DeferredTaggedToI* deferred =
-        new(zone()) DeferredTaggedToI(this, instr, x87_stack_);
+        new(zone()) DeferredTaggedToI(this, instr);
     // Optimistically untag the input.
     // If the input is a HeapObject, SmiUntag will set the carry flag.
     STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
@@ -5408,25 +4822,14 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {
   NumberUntagDMode mode = value->representation().IsSmi()
       ? NUMBER_CANDIDATE_IS_SMI : NUMBER_CANDIDATE_IS_ANY_TAGGED;
 
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister result_reg = ToDoubleRegister(result);
-    EmitNumberUntagD(input_reg,
-                     temp_reg,
-                     result_reg,
-                     instr->hydrogen()->can_convert_undefined_to_nan(),
-                     deoptimize_on_minus_zero,
-                     instr->environment(),
-                     mode);
-  } else {
-    EmitNumberUntagDNoSSE2(input_reg,
-                           temp_reg,
-                           ToX87Register(instr->result()),
-                           instr->hydrogen()->can_convert_undefined_to_nan(),
-                           deoptimize_on_minus_zero,
-                           instr->environment(),
-                           mode);
-  }
+  XMMRegister result_reg = ToDoubleRegister(result);
+  EmitNumberUntagD(input_reg,
+                   temp_reg,
+                   result_reg,
+                   instr->hydrogen()->can_convert_undefined_to_nan(),
+                   deoptimize_on_minus_zero,
+                   instr->environment(),
+                   mode);
 }
 
 
@@ -5438,29 +4841,14 @@ void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
   Register result_reg = ToRegister(result);
 
   if (instr->truncating()) {
-    if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      XMMRegister input_reg = ToDoubleRegister(input);
-      __ TruncateDoubleToI(result_reg, input_reg);
-    } else {
-      X87Register input_reg = ToX87Register(input);
-      X87Fxch(input_reg);
-      __ TruncateX87TOSToI(result_reg);
-    }
+    XMMRegister input_reg = ToDoubleRegister(input);
+    __ TruncateDoubleToI(result_reg, input_reg);
   } else {
     Label bailout, done;
-    if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-      CpuFeatureScope scope(masm(), SSE2);
-      XMMRegister input_reg = ToDoubleRegister(input);
-      XMMRegister xmm_scratch = double_scratch0();
-       __ DoubleToI(result_reg, input_reg, xmm_scratch,
-           instr->hydrogen()->GetMinusZeroMode(), &bailout, Label::kNear);
-    } else {
-      X87Register input_reg = ToX87Register(input);
-      X87Fxch(input_reg);
-      __ X87TOSToI(result_reg, instr->hydrogen()->GetMinusZeroMode(),
-                   &bailout, Label::kNear);
-    }
+    XMMRegister input_reg = ToDoubleRegister(input);
+    XMMRegister xmm_scratch = double_scratch0();
+     __ DoubleToI(result_reg, input_reg, xmm_scratch,
+         instr->hydrogen()->GetMinusZeroMode(), &bailout, Label::kNear);
     __ jmp(&done, Label::kNear);
     __ bind(&bailout);
     DeoptimizeIf(no_condition, instr->environment());
@@ -5477,18 +4865,10 @@ void LCodeGen::DoDoubleToSmi(LDoubleToSmi* instr) {
   Register result_reg = ToRegister(result);
 
   Label bailout, done;
-  if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-    CpuFeatureScope scope(masm(), SSE2);
-    XMMRegister input_reg = ToDoubleRegister(input);
-    XMMRegister xmm_scratch = double_scratch0();
-    __ DoubleToI(result_reg, input_reg, xmm_scratch,
-        instr->hydrogen()->GetMinusZeroMode(), &bailout, Label::kNear);
-  } else {
-    X87Register input_reg = ToX87Register(input);
-    X87Fxch(input_reg);
-    __ X87TOSToI(result_reg, instr->hydrogen()->GetMinusZeroMode(),
-        &bailout, Label::kNear);
-  }
+  XMMRegister input_reg = ToDoubleRegister(input);
+  XMMRegister xmm_scratch = double_scratch0();
+  __ DoubleToI(result_reg, input_reg, xmm_scratch,
+      instr->hydrogen()->GetMinusZeroMode(), &bailout, Label::kNear);
   __ jmp(&done, Label::kNear);
   __ bind(&bailout);
   DeoptimizeIf(no_condition, instr->environment());
@@ -5592,11 +4972,8 @@ void LCodeGen::DoDeferredInstanceMigration(LCheckMaps* instr, Register object) {
 void LCodeGen::DoCheckMaps(LCheckMaps* instr) {
   class DeferredCheckMaps V8_FINAL : public LDeferredCode {
    public:
-    DeferredCheckMaps(LCodeGen* codegen,
-                      LCheckMaps* instr,
-                      Register object,
-                      const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr), object_(object) {
+    DeferredCheckMaps(LCodeGen* codegen, LCheckMaps* instr,  Register object)
+        : LDeferredCode(codegen), instr_(instr), object_(object) {
       SetExit(check_maps());
     }
     virtual void Generate() V8_OVERRIDE {
@@ -5624,7 +5001,7 @@ void LCodeGen::DoCheckMaps(LCheckMaps* instr) {
 
   DeferredCheckMaps* deferred = NULL;
   if (instr->hydrogen()->HasMigrationTarget()) {
-    deferred = new(zone()) DeferredCheckMaps(this, instr, reg, x87_stack_);
+    deferred = new(zone()) DeferredCheckMaps(this, instr, reg);
     __ bind(deferred->check_maps());
   }
 
@@ -5649,7 +5026,6 @@ void LCodeGen::DoCheckMaps(LCheckMaps* instr) {
 
 
 void LCodeGen::DoClampDToUint8(LClampDToUint8* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   XMMRegister value_reg = ToDoubleRegister(instr->unclamped());
   XMMRegister xmm_scratch = double_scratch0();
   Register result_reg = ToRegister(instr->result());
@@ -5665,8 +5041,6 @@ void LCodeGen::DoClampIToUint8(LClampIToUint8* instr) {
 
 
 void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
-
   ASSERT(instr->unclamped()->Equals(instr->result()));
   Register input_reg = ToRegister(instr->unclamped());
   XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp_xmm());
@@ -5701,130 +5075,7 @@ void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {
 }
 
 
-void LCodeGen::DoClampTToUint8NoSSE2(LClampTToUint8NoSSE2* instr) {
-  Register input_reg = ToRegister(instr->unclamped());
-  Register result_reg = ToRegister(instr->result());
-  Register scratch = ToRegister(instr->scratch());
-  Register scratch2 = ToRegister(instr->scratch2());
-  Register scratch3 = ToRegister(instr->scratch3());
-  Label is_smi, done, heap_number, valid_exponent,
-      largest_value, zero_result, maybe_nan_or_infinity;
-
-  __ JumpIfSmi(input_reg, &is_smi);
-
-  // Check for heap number
-  __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),
-         factory()->heap_number_map());
-  __ j(equal, &heap_number, Label::kNear);
-
-  // Check for undefined. Undefined is converted to zero for clamping
-  // conversions.
-  __ cmp(input_reg, factory()->undefined_value());
-  DeoptimizeIf(not_equal, instr->environment());
-  __ jmp(&zero_result, Label::kNear);
-
-  // Heap number
-  __ bind(&heap_number);
-
-  // Surprisingly, all of the hand-crafted bit-manipulations below are much
-  // faster than the x86 FPU built-in instruction, especially since "banker's
-  // rounding" would be additionally very expensive
-
-  // Get exponent word.
-  __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset));
-  __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset));
-
-  // Test for negative values --> clamp to zero
-  __ test(scratch, scratch);
-  __ j(negative, &zero_result, Label::kNear);
-
-  // Get exponent alone in scratch2.
-  __ mov(scratch2, scratch);
-  __ and_(scratch2, HeapNumber::kExponentMask);
-  __ shr(scratch2, HeapNumber::kExponentShift);
-  __ j(zero, &zero_result, Label::kNear);
-  __ sub(scratch2, Immediate(HeapNumber::kExponentBias - 1));
-  __ j(negative, &zero_result, Label::kNear);
-
-  const uint32_t non_int8_exponent = 7;
-  __ cmp(scratch2, Immediate(non_int8_exponent + 1));
-  // If the exponent is too big, check for special values.
-  __ j(greater, &maybe_nan_or_infinity, Label::kNear);
-
-  __ bind(&valid_exponent);
-  // Exponent word in scratch, exponent in scratch2. We know that 0 <= exponent
-  // < 7. The shift bias is the number of bits to shift the mantissa such that
-  // with an exponent of 7 such the that top-most one is in bit 30, allowing
-  // detection the rounding overflow of a 255.5 to 256 (bit 31 goes from 0 to
-  // 1).
-  int shift_bias = (30 - HeapNumber::kExponentShift) - 7 - 1;
-  __ lea(result_reg, MemOperand(scratch2, shift_bias));
-  // Here result_reg (ecx) is the shift, scratch is the exponent word.  Get the
-  // top bits of the mantissa.
-  __ and_(scratch, HeapNumber::kMantissaMask);
-  // Put back the implicit 1 of the mantissa
-  __ or_(scratch, 1 << HeapNumber::kExponentShift);
-  // Shift up to round
-  __ shl_cl(scratch);
-  // Use "banker's rounding" to spec: If fractional part of number is 0.5, then
-  // use the bit in the "ones" place and add it to the "halves" place, which has
-  // the effect of rounding to even.
-  __ mov(scratch2, scratch);
-  const uint32_t one_half_bit_shift = 30 - sizeof(uint8_t) * 8;
-  const uint32_t one_bit_shift = one_half_bit_shift + 1;
-  __ and_(scratch2, Immediate((1 << one_bit_shift) - 1));
-  __ cmp(scratch2, Immediate(1 << one_half_bit_shift));
-  Label no_round;
-  __ j(less, &no_round, Label::kNear);
-  Label round_up;
-  __ mov(scratch2, Immediate(1 << one_half_bit_shift));
-  __ j(greater, &round_up, Label::kNear);
-  __ test(scratch3, scratch3);
-  __ j(not_zero, &round_up, Label::kNear);
-  __ mov(scratch2, scratch);
-  __ and_(scratch2, Immediate(1 << one_bit_shift));
-  __ shr(scratch2, 1);
-  __ bind(&round_up);
-  __ add(scratch, scratch2);
-  __ j(overflow, &largest_value, Label::kNear);
-  __ bind(&no_round);
-  __ shr(scratch, 23);
-  __ mov(result_reg, scratch);
-  __ jmp(&done, Label::kNear);
-
-  __ bind(&maybe_nan_or_infinity);
-  // Check for NaN/Infinity, all other values map to 255
-  __ cmp(scratch2, Immediate(HeapNumber::kInfinityOrNanExponent + 1));
-  __ j(not_equal, &largest_value, Label::kNear);
-
-  // Check for NaN, which differs from Infinity in that at least one mantissa
-  // bit is set.
-  __ and_(scratch, HeapNumber::kMantissaMask);
-  __ or_(scratch, FieldOperand(input_reg, HeapNumber::kMantissaOffset));
-  __ j(not_zero, &zero_result, Label::kNear);  // M!=0 --> NaN
-  // Infinity -> Fall through to map to 255.
-
-  __ bind(&largest_value);
-  __ mov(result_reg, Immediate(255));
-  __ jmp(&done, Label::kNear);
-
-  __ bind(&zero_result);
-  __ xor_(result_reg, result_reg);
-  __ jmp(&done, Label::kNear);
-
-  // smi
-  __ bind(&is_smi);
-  if (!input_reg.is(result_reg)) {
-    __ mov(result_reg, input_reg);
-  }
-  __ SmiUntag(result_reg);
-  __ ClampUint8(result_reg);
-  __ bind(&done);
-}
-
-
 void LCodeGen::DoDoubleBits(LDoubleBits* instr) {
-  CpuFeatureScope scope(masm(), SSE2);
   XMMRegister value_reg = ToDoubleRegister(instr->value());
   Register result_reg = ToRegister(instr->result());
   if (instr->hydrogen()->bits() == HDoubleBits::HIGH) {
@@ -5846,7 +5097,6 @@ void LCodeGen::DoConstructDouble(LConstructDouble* instr) {
   Register hi_reg = ToRegister(instr->hi());
   Register lo_reg = ToRegister(instr->lo());
   XMMRegister result_reg = ToDoubleRegister(instr->result());
-  CpuFeatureScope scope(masm(), SSE2);
 
   if (CpuFeatures::IsSupported(SSE4_1)) {
     CpuFeatureScope scope2(masm(), SSE4_1);
@@ -5865,10 +5115,8 @@ void LCodeGen::DoConstructDouble(LConstructDouble* instr) {
 void LCodeGen::DoAllocate(LAllocate* instr) {
   class DeferredAllocate V8_FINAL : public LDeferredCode {
    public:
-    DeferredAllocate(LCodeGen* codegen,
-                     LAllocate* instr,
-                     const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+    DeferredAllocate(LCodeGen* codegen,  LAllocate* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredAllocate(instr_);
     }
@@ -5877,8 +5125,7 @@ void LCodeGen::DoAllocate(LAllocate* instr) {
     LAllocate* instr_;
   };
 
-  DeferredAllocate* deferred =
-      new(zone()) DeferredAllocate(this, instr, x87_stack_);
+  DeferredAllocate* deferred = new(zone()) DeferredAllocate(this, instr);
 
   Register result = ToRegister(instr->result());
   Register temp = ToRegister(instr->temp());
@@ -6243,10 +5490,8 @@ void LCodeGen::DoDeferredStackCheck(LStackCheck* instr) {
 void LCodeGen::DoStackCheck(LStackCheck* instr) {
   class DeferredStackCheck V8_FINAL : public LDeferredCode {
    public:
-    DeferredStackCheck(LCodeGen* codegen,
-                       LStackCheck* instr,
-                       const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack), instr_(instr) { }
+    DeferredStackCheck(LCodeGen* codegen, LStackCheck* instr)
+        : LDeferredCode(codegen), instr_(instr) { }
     virtual void Generate() V8_OVERRIDE {
       codegen()->DoDeferredStackCheck(instr_);
     }
@@ -6277,7 +5522,7 @@ void LCodeGen::DoStackCheck(LStackCheck* instr) {
     ASSERT(instr->hydrogen()->is_backwards_branch());
     // Perform stack overflow check if this goto needs it before jumping.
     DeferredStackCheck* deferred_stack_check =
-        new(zone()) DeferredStackCheck(this, instr, x87_stack_);
+        new(zone()) DeferredStackCheck(this, instr);
     ExternalReference stack_limit =
         ExternalReference::address_of_stack_limit(isolate());
     __ cmp(esp, Operand::StaticVariable(stack_limit));
@@ -6391,9 +5636,8 @@ void LCodeGen::DoLoadFieldByIndex(LLoadFieldByIndex* instr) {
     DeferredLoadMutableDouble(LCodeGen* codegen,
                               LLoadFieldByIndex* instr,
                               Register object,
-                              Register index,
-                              const X87Stack& x87_stack)
-        : LDeferredCode(codegen, x87_stack),
+                              Register index)
+        : LDeferredCode(codegen),
           instr_(instr),
           object_(object),
           index_(index) {
@@ -6413,7 +5657,7 @@ void LCodeGen::DoLoadFieldByIndex(LLoadFieldByIndex* instr) {
 
   DeferredLoadMutableDouble* deferred;
   deferred = new(zone()) DeferredLoadMutableDouble(
-      this, instr, object, index, x87_stack_);
+      this, instr, object, index);
 
   Label out_of_object, done;
   __ test(index, Immediate(Smi::FromInt(1)));
diff --git a/src/ia32/lithium-codegen-ia32.h b/src/ia32/lithium-codegen-ia32.h
index f4542ee..d771893 100644
--- a/src/ia32/lithium-codegen-ia32.h
+++ b/src/ia32/lithium-codegen-ia32.h
@@ -38,7 +38,6 @@ class LCodeGen: public LCodeGenBase {
         support_aligned_spilled_doubles_(false),
         osr_pc_offset_(-1),
         frame_is_built_(false),
-        x87_stack_(assembler),
         safepoints_(info->zone()),
         resolver_(this),
         expected_safepoint_kind_(Safepoint::kSimple) {
@@ -67,7 +66,6 @@ class LCodeGen: public LCodeGenBase {
   Operand ToOperand(LOperand* op) const;
   Register ToRegister(LOperand* op) const;
   XMMRegister ToDoubleRegister(LOperand* op) const;
-  X87Register ToX87Register(LOperand* op) const;
 
   bool IsInteger32(LConstantOperand* op) const;
   bool IsSmi(LConstantOperand* op) const;
@@ -76,36 +74,6 @@ class LCodeGen: public LCodeGenBase {
   }
   double ToDouble(LConstantOperand* op) const;
 
-  // Support for non-sse2 (x87) floating point stack handling.
-  // These functions maintain the mapping of physical stack registers to our
-  // virtual registers between instructions.
-  enum X87OperandType { kX87DoubleOperand, kX87FloatOperand, kX87IntOperand };
-
-  void X87Mov(X87Register reg, Operand src,
-      X87OperandType operand = kX87DoubleOperand);
-  void X87Mov(Operand src, X87Register reg,
-      X87OperandType operand = kX87DoubleOperand);
-
-  void X87PrepareBinaryOp(
-      X87Register left, X87Register right, X87Register result);
-
-  void X87LoadForUsage(X87Register reg);
-  void X87LoadForUsage(X87Register reg1, X87Register reg2);
-  void X87PrepareToWrite(X87Register reg) { x87_stack_.PrepareToWrite(reg); }
-  void X87CommitWrite(X87Register reg) { x87_stack_.CommitWrite(reg); }
-
-  void X87Fxch(X87Register reg, int other_slot = 0) {
-    x87_stack_.Fxch(reg, other_slot);
-  }
-  void X87Free(X87Register reg) {
-    x87_stack_.Free(reg);
-  }
-
-
-  bool X87StackEmpty() {
-    return x87_stack_.depth() == 0;
-  }
-
   Handle<Object> ToHandle(LConstantOperand* op) const;
 
   // The operand denoting the second word (the one with a higher address) of
@@ -265,7 +233,6 @@ class LCodeGen: public LCodeGenBase {
 
   Register ToRegister(int index) const;
   XMMRegister ToDoubleRegister(int index) const;
-  X87Register ToX87Register(int index) const;
   int32_t ToRepresentation(LConstantOperand* op, const Representation& r) const;
   int32_t ToInteger32(LConstantOperand* op) const;
   ExternalReference ToExternalReference(LConstantOperand* op) const;
@@ -313,15 +280,6 @@ class LCodeGen: public LCodeGenBase {
       LEnvironment* env,
       NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED);
 
-  void EmitNumberUntagDNoSSE2(
-      Register input,
-      Register temp,
-      X87Register res_reg,
-      bool allow_undefined_as_nan,
-      bool deoptimize_on_minus_zero,
-      LEnvironment* env,
-      NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED);
-
   // Emits optimized code for typeof x == "y".  Modifies input register.
   // Returns the condition on which a final split to
   // true and false label should be made, to optimize fallthrough.
@@ -369,12 +327,6 @@ class LCodeGen: public LCodeGenBase {
   // register, or a stack slot operand.
   void EmitPushTaggedOperand(LOperand* operand);
 
-  void X87Fld(Operand src, X87OperandType opts);
-
-  void EmitFlushX87ForDeopt();
-  void FlushX87StackIfNecessary(LInstruction* instr) {
-    x87_stack_.FlushIfNecessary(instr, this);
-  }
   friend class LGapResolver;
 
 #ifdef _MSC_VER
@@ -397,56 +349,6 @@ class LCodeGen: public LCodeGenBase {
   int osr_pc_offset_;
   bool frame_is_built_;
 
-  class X87Stack {
-   public:
-    explicit X87Stack(MacroAssembler* masm)
-        : stack_depth_(0), is_mutable_(true), masm_(masm) { }
-    explicit X87Stack(const X87Stack& other)
-        : stack_depth_(other.stack_depth_), is_mutable_(false), masm_(masm()) {
-      for (int i = 0; i < stack_depth_; i++) {
-        stack_[i] = other.stack_[i];
-      }
-    }
-    bool operator==(const X87Stack& other) const {
-      if (stack_depth_ != other.stack_depth_) return false;
-      for (int i = 0; i < stack_depth_; i++) {
-        if (!stack_[i].is(other.stack_[i])) return false;
-      }
-      return true;
-    }
-    bool Contains(X87Register reg);
-    void Fxch(X87Register reg, int other_slot = 0);
-    void Free(X87Register reg);
-    void PrepareToWrite(X87Register reg);
-    void CommitWrite(X87Register reg);
-    void FlushIfNecessary(LInstruction* instr, LCodeGen* cgen);
-    void LeavingBlock(int current_block_id, LGoto* goto_instr);
-    int depth() const { return stack_depth_; }
-    void pop() {
-      ASSERT(is_mutable_);
-      stack_depth_--;
-    }
-    void push(X87Register reg) {
-      ASSERT(is_mutable_);
-      ASSERT(stack_depth_ < X87Register::kNumAllocatableRegisters);
-      stack_[stack_depth_] = reg;
-      stack_depth_++;
-    }
-
-    MacroAssembler* masm() const { return masm_; }
-    Isolate* isolate() const { return masm_->isolate(); }
-
-   private:
-    int ArrayIndex(X87Register reg);
-    int st2idx(int pos);
-
-    X87Register stack_[X87Register::kNumAllocatableRegisters];
-    int stack_depth_;
-    bool is_mutable_;
-    MacroAssembler* masm_;
-  };
-  X87Stack x87_stack_;
-
   // Builder that keeps track of safepoints in the code. The table
   // itself is emitted at the end of the generated code.
   SafepointTableBuilder safepoints_;
@@ -485,11 +387,10 @@ class LCodeGen: public LCodeGenBase {
 
 class LDeferredCode : public ZoneObject {
  public:
-  explicit LDeferredCode(LCodeGen* codegen, const LCodeGen::X87Stack& x87_stack)
+  explicit LDeferredCode(LCodeGen* codegen)
       : codegen_(codegen),
         external_exit_(NULL),
-        instruction_index_(codegen->current_instruction_),
-        x87_stack_(x87_stack) {
+        instruction_index_(codegen->current_instruction_) {
     codegen->AddDeferredCode(this);
   }
 
@@ -502,7 +403,6 @@ class LDeferredCode : public ZoneObject {
   Label* exit() { return external_exit_ != NULL ? external_exit_ : &exit_; }
   Label* done() { return codegen_->NeedsDeferredFrame() ? &done_ : exit(); }
   int instruction_index() const { return instruction_index_; }
-  const LCodeGen::X87Stack& x87_stack() const { return x87_stack_; }
 
  protected:
   LCodeGen* codegen() const { return codegen_; }
@@ -515,7 +415,6 @@ class LDeferredCode : public ZoneObject {
   Label* external_exit_;
   Label done_;
   int instruction_index_;
-  LCodeGen::X87Stack x87_stack_;
 };
 
 } }  // namespace v8::internal
diff --git a/src/ia32/lithium-gap-resolver-ia32.cc b/src/ia32/lithium-gap-resolver-ia32.cc
index 34b9490..c80f89b 100644
--- a/src/ia32/lithium-gap-resolver-ia32.cc
+++ b/src/ia32/lithium-gap-resolver-ia32.cc
@@ -295,22 +295,13 @@ void LGapResolver::EmitMove(int index) {
       uint64_t int_val = BitCast<uint64_t, double>(v);
       int32_t lower = static_cast<int32_t>(int_val);
       int32_t upper = static_cast<int32_t>(int_val >> kBitsPerInt);
-      if (CpuFeatures::IsSupported(SSE2)) {
-        CpuFeatureScope scope(cgen_->masm(), SSE2);
-        XMMRegister dst = cgen_->ToDoubleRegister(destination);
-        if (int_val == 0) {
-          __ xorps(dst, dst);
-        } else {
-          __ push(Immediate(upper));
-          __ push(Immediate(lower));
-          __ movsd(dst, Operand(esp, 0));
-          __ add(esp, Immediate(kDoubleSize));
-        }
+      XMMRegister dst = cgen_->ToDoubleRegister(destination);
+      if (int_val == 0) {
+        __ xorps(dst, dst);
       } else {
         __ push(Immediate(upper));
         __ push(Immediate(lower));
-        X87Register dst = cgen_->ToX87Register(destination);
-        cgen_->X87Mov(dst, MemOperand(esp, 0));
+        __ movsd(dst, Operand(esp, 0));
         __ add(esp, Immediate(kDoubleSize));
       }
     } else {
@@ -328,59 +319,27 @@ void LGapResolver::EmitMove(int index) {
     }
 
   } else if (source->IsDoubleRegister()) {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope scope(cgen_->masm(), SSE2);
-      XMMRegister src = cgen_->ToDoubleRegister(source);
-      if (destination->IsDoubleRegister()) {
-        XMMRegister dst = cgen_->ToDoubleRegister(destination);
-        __ movaps(dst, src);
-      } else {
-        ASSERT(destination->IsDoubleStackSlot());
-        Operand dst = cgen_->ToOperand(destination);
-        __ movsd(dst, src);
-      }
+    XMMRegister src = cgen_->ToDoubleRegister(source);
+    if (destination->IsDoubleRegister()) {
+      XMMRegister dst = cgen_->ToDoubleRegister(destination);
+      __ movaps(dst, src);
     } else {
-      // load from the register onto the stack, store in destination, which must
-      // be a double stack slot in the non-SSE2 case.
       ASSERT(destination->IsDoubleStackSlot());
       Operand dst = cgen_->ToOperand(destination);
-      X87Register src = cgen_->ToX87Register(source);
-      cgen_->X87Mov(dst, src);
+      __ movsd(dst, src);
     }
   } else if (source->IsDoubleStackSlot()) {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope scope(cgen_->masm(), SSE2);
-      ASSERT(destination->IsDoubleRegister() ||
-             destination->IsDoubleStackSlot());
-      Operand src = cgen_->ToOperand(source);
-      if (destination->IsDoubleRegister()) {
-        XMMRegister dst = cgen_->ToDoubleRegister(destination);
-        __ movsd(dst, src);
-      } else {
-        // We rely on having xmm0 available as a fixed scratch register.
-        Operand dst = cgen_->ToOperand(destination);
-        __ movsd(xmm0, src);
-        __ movsd(dst, xmm0);
-      }
+    ASSERT(destination->IsDoubleRegister() ||
+           destination->IsDoubleStackSlot());
+    Operand src = cgen_->ToOperand(source);
+    if (destination->IsDoubleRegister()) {
+      XMMRegister dst = cgen_->ToDoubleRegister(destination);
+      __ movsd(dst, src);
     } else {
-      // load from the stack slot on top of the floating point stack, and then
-      // store in destination. If destination is a double register, then it
-      // represents the top of the stack and nothing needs to be done.
-      if (destination->IsDoubleStackSlot()) {
-        Register tmp = EnsureTempRegister();
-        Operand src0 = cgen_->ToOperand(source);
-        Operand src1 = cgen_->HighOperand(source);
-        Operand dst0 = cgen_->ToOperand(destination);
-        Operand dst1 = cgen_->HighOperand(destination);
-        __ mov(tmp, src0);  // Then use tmp to copy source to destination.
-        __ mov(dst0, tmp);
-        __ mov(tmp, src1);
-        __ mov(dst1, tmp);
-      } else {
-        Operand src = cgen_->ToOperand(source);
-        X87Register dst = cgen_->ToX87Register(destination);
-        cgen_->X87Mov(dst, src);
-      }
+      // We rely on having xmm0 available as a fixed scratch register.
+      Operand dst = cgen_->ToOperand(destination);
+      __ movsd(xmm0, src);
+      __ movsd(dst, xmm0);
     }
   } else {
     UNREACHABLE();
@@ -445,7 +404,6 @@ void LGapResolver::EmitSwap(int index) {
       __ mov(src, tmp0);
     }
   } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
-    CpuFeatureScope scope(cgen_->masm(), SSE2);
     // XMM register-register swap. We rely on having xmm0
     // available as a fixed scratch register.
     XMMRegister src = cgen_->ToDoubleRegister(source);
@@ -454,7 +412,6 @@ void LGapResolver::EmitSwap(int index) {
     __ movaps(src, dst);
     __ movaps(dst, xmm0);
   } else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) {
-    CpuFeatureScope scope(cgen_->masm(), SSE2);
     // XMM register-memory swap.  We rely on having xmm0
     // available as a fixed scratch register.
     ASSERT(source->IsDoubleStackSlot() || destination->IsDoubleStackSlot());
@@ -467,7 +424,6 @@ void LGapResolver::EmitSwap(int index) {
     __ movsd(other, reg);
     __ movaps(reg, xmm0);
   } else if (source->IsDoubleStackSlot() && destination->IsDoubleStackSlot()) {
-    CpuFeatureScope scope(cgen_->masm(), SSE2);
     // Double-width memory-to-memory.  Spill on demand to use a general
     // purpose temporary register and also rely on having xmm0 available as
     // a fixed scratch register.
diff --git a/src/ia32/lithium-ia32.cc b/src/ia32/lithium-ia32.cc
index 00fd0e8..067863d 100644
--- a/src/ia32/lithium-ia32.cc
+++ b/src/ia32/lithium-ia32.cc
@@ -60,17 +60,6 @@ bool LInstruction::HasDoubleRegisterInput() {
 }
 
 
-bool LInstruction::IsDoubleInput(X87Register reg, LCodeGen* cgen) {
-  for (int i = 0; i < InputCount(); i++) {
-    LOperand* op = InputAt(i);
-    if (op != NULL && op->IsDoubleRegister()) {
-      if (cgen->ToX87Register(op).is(reg)) return true;
-    }
-  }
-  return false;
-}
-
-
 void LInstruction::PrintTo(StringStream* stream) {
   stream->Add("%s ", this->Mnemonic());
 
@@ -940,16 +929,6 @@ void LChunkBuilder::VisitInstruction(HInstruction* current) {
     if (FLAG_stress_environments && !instr->HasEnvironment()) {
       instr = AssignEnvironment(instr);
     }
-    if (!CpuFeatures::IsSafeForSnapshot(isolate(), SSE2) && instr->IsGoto() &&
-        LGoto::cast(instr)->jumps_to_join()) {
-      // TODO(olivf) Since phis of spilled values are joined as registers
-      // (not in the stack slot), we need to allow the goto gaps to keep one
-      // x87 register alive. To ensure all other values are still spilled, we
-      // insert a fpu register barrier right before.
-      LClobberDoubles* clobber = new(zone()) LClobberDoubles(isolate());
-      clobber->set_hydrogen_value(current);
-      chunk_->AddInstruction(clobber, current_block_);
-    }
     chunk_->AddInstruction(instr, current_block_);
 
     if (instr->IsCall()) {
@@ -1918,9 +1897,7 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
       } else {
         LOperand* value = UseRegister(val);
         bool truncating = instr->CanTruncateToInt32();
-        LOperand* xmm_temp =
-            (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2) && !truncating)
-                ? FixedTemp(xmm1) : NULL;
+        LOperand* xmm_temp = !truncating ? FixedTemp(xmm1) : NULL;
         LInstruction* result =
             DefineSameAsFirst(new(zone()) LTaggedToI(value, xmm_temp));
         if (!val->representation().IsSmi()) result = AssignEnvironment(result);
@@ -1942,8 +1919,7 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
     } else {
       ASSERT(to.IsInteger32());
       bool truncating = instr->CanTruncateToInt32();
-      bool needs_temp =
-          CpuFeatures::IsSafeForSnapshot(isolate(), SSE2) && !truncating;
+      bool needs_temp = !truncating;
       LOperand* value = needs_temp ? UseTempRegister(val) : UseRegister(val);
       LOperand* temp = needs_temp ? TempRegister() : NULL;
       LInstruction* result =
@@ -1960,8 +1936,7 @@ LInstruction* LChunkBuilder::DoChange(HChange* instr) {
       } else if (val->CheckFlag(HInstruction::kUint32)) {
         LOperand* value = UseRegister(val);
         LOperand* temp1 = TempRegister();
-        LOperand* temp2 =
-            CpuFeatures::IsSupported(SSE2) ? FixedTemp(xmm1) : NULL;
+        LOperand* temp2 = FixedTemp(xmm1);
         LNumberTagU* result = new(zone()) LNumberTagU(value, temp1, temp2);
         return AssignPointerMap(DefineSameAsFirst(result));
       } else {
@@ -2049,20 +2024,12 @@ LInstruction* LChunkBuilder::DoClampToUint8(HClampToUint8* instr) {
     return DefineFixed(new(zone()) LClampIToUint8(reg), eax);
   } else {
     ASSERT(input_rep.IsSmiOrTagged());
-    if (CpuFeatures::IsSupported(SSE2)) {
-      LOperand* reg = UseFixed(value, eax);
-      // Register allocator doesn't (yet) support allocation of double
-      // temps. Reserve xmm1 explicitly.
-      LOperand* temp = FixedTemp(xmm1);
-      LClampTToUint8* result = new(zone()) LClampTToUint8(reg, temp);
-      return AssignEnvironment(DefineFixed(result, eax));
-    } else {
-      LOperand* value = UseRegister(instr->value());
-      LClampTToUint8NoSSE2* res =
-          new(zone()) LClampTToUint8NoSSE2(value, TempRegister(),
-                                           TempRegister(), TempRegister());
-      return AssignEnvironment(DefineFixed(res, ecx));
-    }
+    LOperand* reg = UseFixed(value, eax);
+    // Register allocator doesn't (yet) support allocation of double
+    // temps. Reserve xmm1 explicitly.
+    LOperand* temp = FixedTemp(xmm1);
+    LClampTToUint8* result = new(zone()) LClampTToUint8(reg, temp);
+    return AssignEnvironment(DefineFixed(result, eax));
   }
 }
 
@@ -2258,11 +2225,6 @@ LOperand* LChunkBuilder::GetStoreKeyedValueOperand(HStoreKeyed* instr) {
     return UseFixed(instr->value(), eax);
   }
 
-  if (!CpuFeatures::IsSafeForSnapshot(isolate(), SSE2) &&
-      IsDoubleOrFloatElementsKind(elements_kind)) {
-    return UseRegisterAtStart(instr->value());
-  }
-
   return UseRegister(instr->value());
 }
 
diff --git a/src/ia32/lithium-ia32.h b/src/ia32/lithium-ia32.h
index 01ffaaf..ae80138 100644
--- a/src/ia32/lithium-ia32.h
+++ b/src/ia32/lithium-ia32.h
@@ -46,9 +46,7 @@ class LCodeGen;
   V(ClampDToUint8)                              \
   V(ClampIToUint8)                              \
   V(ClampTToUint8)                              \
-  V(ClampTToUint8NoSSE2)                        \
   V(ClassOfTestAndBranch)                       \
-  V(ClobberDoubles)                             \
   V(CompareMinusZeroAndBranch)                  \
   V(CompareNumericAndBranch)                    \
   V(CmpObjectEqAndBranch)                       \
@@ -240,10 +238,7 @@ class LInstruction : public ZoneObject {
   bool ClobbersTemps() const { return IsCall(); }
   bool ClobbersRegisters() const { return IsCall(); }
   virtual bool ClobbersDoubleRegisters(Isolate* isolate) const {
-    return IsCall() ||
-           // We only have rudimentary X87Stack tracking, thus in general
-           // cannot handle phi-nodes.
-        (!CpuFeatures::IsSafeForSnapshot(isolate, SSE2) && IsControl());
+    return IsCall();
   }
 
   virtual bool HasResult() const = 0;
@@ -251,7 +246,6 @@ class LInstruction : public ZoneObject {
 
   bool HasDoubleRegisterResult();
   bool HasDoubleRegisterInput();
-  bool IsDoubleInput(X87Register reg, LCodeGen* cgen);
 
   LOperand* FirstInput() { return InputAt(0); }
   LOperand* Output() { return HasResult() ? result() : NULL; }
@@ -376,20 +370,6 @@ class LInstructionGap V8_FINAL : public LGap {
 };
 
 
-class LClobberDoubles V8_FINAL : public LTemplateInstruction<0, 0, 0> {
- public:
-  explicit LClobberDoubles(Isolate* isolate) {
-    ASSERT(!CpuFeatures::IsSafeForSnapshot(isolate, SSE2));
-  }
-
-  virtual bool ClobbersDoubleRegisters(Isolate* isolate) const V8_OVERRIDE {
-    return true;
-  }
-
-  DECLARE_CONCRETE_INSTRUCTION(ClobberDoubles, "clobber-d")
-};
-
-
 class LGoto V8_FINAL : public LTemplateInstruction<0, 0, 0> {
  public:
   explicit LGoto(HBasicBlock* block) : block_(block) { }
@@ -2476,30 +2456,6 @@ class LClampTToUint8 V8_FINAL : public LTemplateInstruction<1, 1, 1> {
 };
 
 
-// Truncating conversion from a tagged value to an int32.
-class LClampTToUint8NoSSE2 V8_FINAL : public LTemplateInstruction<1, 1, 3> {
- public:
-  LClampTToUint8NoSSE2(LOperand* unclamped,
-                       LOperand* temp1,
-                       LOperand* temp2,
-                       LOperand* temp3) {
-    inputs_[0] = unclamped;
-    temps_[0] = temp1;
-    temps_[1] = temp2;
-    temps_[2] = temp3;
-  }
-
-  LOperand* unclamped() { return inputs_[0]; }
-  LOperand* scratch() { return temps_[0]; }
-  LOperand* scratch2() { return temps_[1]; }
-  LOperand* scratch3() { return temps_[2]; }
-
-  DECLARE_CONCRETE_INSTRUCTION(ClampTToUint8NoSSE2,
-                               "clamp-t-to-uint8-nosse2")
-  DECLARE_HYDROGEN_ACCESSOR(UnaryOperation)
-};
-
-
 class LCheckNonSmi V8_FINAL : public LTemplateInstruction<0, 1, 0> {
  public:
   explicit LCheckNonSmi(LOperand* value) {
@@ -2794,7 +2750,6 @@ class LChunkBuilder V8_FINAL : public LChunkBuilderBase {
   // Methods for getting operands for Use / Define / Temp.
   LUnallocated* ToUnallocated(Register reg);
   LUnallocated* ToUnallocated(XMMRegister reg);
-  LUnallocated* ToUnallocated(X87Register reg);
 
   // Methods for setting up define-use relationships.
   MUST_USE_RESULT LOperand* Use(HValue* value, LUnallocated* operand);
@@ -2856,7 +2811,6 @@ class LChunkBuilder V8_FINAL : public LChunkBuilderBase {
                             Register reg);
   LInstruction* DefineFixedDouble(LTemplateResultInstruction<1>* instr,
                                   XMMRegister reg);
-  LInstruction* DefineX87TOS(LTemplateResultInstruction<1>* instr);
   // Assigns an environment to an instruction.  An instruction which can
   // deoptimize must have an environment.
   LInstruction* AssignEnvironment(LInstruction* instr);
diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc
index f27927d..6c97226 100644
--- a/src/ia32/macro-assembler-ia32.cc
+++ b/src/ia32/macro-assembler-ia32.cc
@@ -244,42 +244,6 @@ void MacroAssembler::TruncateDoubleToI(Register result_reg,
 }
 
 
-void MacroAssembler::TruncateX87TOSToI(Register result_reg) {
-  sub(esp, Immediate(kDoubleSize));
-  fst_d(MemOperand(esp, 0));
-  SlowTruncateToI(result_reg, esp, 0);
-  add(esp, Immediate(kDoubleSize));
-}
-
-
-void MacroAssembler::X87TOSToI(Register result_reg,
-                               MinusZeroMode minus_zero_mode,
-                               Label* conversion_failed,
-                               Label::Distance dst) {
-  Label done;
-  sub(esp, Immediate(kPointerSize));
-  fld(0);
-  fist_s(MemOperand(esp, 0));
-  fild_s(MemOperand(esp, 0));
-  pop(result_reg);
-  FCmp();
-  j(not_equal, conversion_failed, dst);
-  j(parity_even, conversion_failed, dst);
-  if (minus_zero_mode == FAIL_ON_MINUS_ZERO) {
-    test(result_reg, Operand(result_reg));
-    j(not_zero, &done, Label::kNear);
-    // To check for minus zero, we load the value again as float, and check
-    // if that is still 0.
-    sub(esp, Immediate(kPointerSize));
-    fst_s(MemOperand(esp, 0));
-    pop(result_reg);
-    test(result_reg, Operand(result_reg));
-    j(not_zero, conversion_failed, dst);
-  }
-  bind(&done);
-}
-
-
 void MacroAssembler::DoubleToI(Register result_reg,
                                XMMRegister input_reg,
                                XMMRegister scratch,
@@ -347,8 +311,7 @@ void MacroAssembler::TruncateHeapNumberToI(Register result_reg,
       fstp(0);
       SlowTruncateToI(result_reg, input_reg);
     }
-  } else if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(this, SSE2);
+  } else {
     movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
     cvttsd2si(result_reg, Operand(xmm0));
     cmp(result_reg, 0x1);
@@ -372,8 +335,6 @@ void MacroAssembler::TruncateHeapNumberToI(Register result_reg,
     } else {
       SlowTruncateToI(result_reg, input_reg);
     }
-  } else {
-    SlowTruncateToI(result_reg, input_reg);
   }
   bind(&done);
 }
@@ -391,60 +352,23 @@ void MacroAssembler::TaggedToI(Register result_reg,
       isolate()->factory()->heap_number_map());
   j(not_equal, lost_precision, Label::kNear);
 
-  if (CpuFeatures::IsSafeForSnapshot(isolate(), SSE2)) {
-    ASSERT(!temp.is(no_xmm_reg));
-    CpuFeatureScope scope(this, SSE2);
+  ASSERT(!temp.is(no_xmm_reg));
 
-    movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
-    cvttsd2si(result_reg, Operand(xmm0));
-    Cvtsi2sd(temp, Operand(result_reg));
-    ucomisd(xmm0, temp);
-    RecordComment("Deferred TaggedToI: lost precision");
-    j(not_equal, lost_precision, Label::kNear);
-    RecordComment("Deferred TaggedToI: NaN");
-    j(parity_even, lost_precision, Label::kNear);
-    if (minus_zero_mode == FAIL_ON_MINUS_ZERO) {
-      test(result_reg, Operand(result_reg));
-      j(not_zero, &done, Label::kNear);
-      movmskpd(result_reg, xmm0);
-      and_(result_reg, 1);
-      RecordComment("Deferred TaggedToI: minus zero");
-      j(not_zero, lost_precision, Label::kNear);
-    }
-  } else {
-    // TODO(olivf) Converting a number on the fpu is actually quite slow. We
-    // should first try a fast conversion and then bailout to this slow case.
-    Label lost_precision_pop, zero_check;
-    Label* lost_precision_int = (minus_zero_mode == FAIL_ON_MINUS_ZERO)
-        ? &lost_precision_pop : lost_precision;
-    sub(esp, Immediate(kPointerSize));
-    fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));
-    if (minus_zero_mode == FAIL_ON_MINUS_ZERO) fld(0);
-    fist_s(MemOperand(esp, 0));
-    fild_s(MemOperand(esp, 0));
-    FCmp();
-    pop(result_reg);
-    j(not_equal, lost_precision_int, Label::kNear);
-    j(parity_even, lost_precision_int, Label::kNear);  // NaN.
-    if (minus_zero_mode == FAIL_ON_MINUS_ZERO) {
-      test(result_reg, Operand(result_reg));
-      j(zero, &zero_check, Label::kNear);
-      fstp(0);
-      jmp(&done, Label::kNear);
-      bind(&zero_check);
-      // To check for minus zero, we load the value again as float, and check
-      // if that is still 0.
-      sub(esp, Immediate(kPointerSize));
-      fstp_s(Operand(esp, 0));
-      pop(result_reg);
-      test(result_reg, Operand(result_reg));
-      j(zero, &done, Label::kNear);
-      jmp(lost_precision, Label::kNear);
-
-      bind(&lost_precision_pop);
-      fstp(0);
-      jmp(lost_precision, Label::kNear);
-    }
+  movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
+  cvttsd2si(result_reg, Operand(xmm0));
+  Cvtsi2sd(temp, Operand(result_reg));
+  ucomisd(xmm0, temp);
+  RecordComment("Deferred TaggedToI: lost precision");
+  j(not_equal, lost_precision, Label::kNear);
+  RecordComment("Deferred TaggedToI: NaN");
+  j(parity_even, lost_precision, Label::kNear);
+  if (minus_zero_mode == FAIL_ON_MINUS_ZERO) {
+    test(result_reg, Operand(result_reg));
+    j(not_zero, &done, Label::kNear);
+    movmskpd(result_reg, xmm0);
+    and_(result_reg, 1);
+    RecordComment("Deferred TaggedToI: minus zero");
+    j(not_zero, lost_precision, Label::kNear);
   }
   bind(&done);
 }
@@ -465,21 +389,6 @@ void MacroAssembler::LoadUint32(XMMRegister dst,
 }
 
 
-void MacroAssembler::LoadUint32NoSSE2(Register src) {
-  Label done;
-  push(src);
-  fild_s(Operand(esp, 0));
-  cmp(src, Immediate(0));
-  j(not_sign, &done, Label::kNear);
-  ExternalReference uint32_bias =
-        ExternalReference::address_of_uint32_bias();
-  fld_d(Operand::StaticVariable(uint32_bias));
-  faddp(1);
-  bind(&done);
-  add(esp, Immediate(kPointerSize));
-}
-
-
 void MacroAssembler::RecordWriteArray(Register object,
                                       Register value,
                                       Register index,
@@ -794,7 +703,6 @@ void MacroAssembler::StoreNumberToDoubleElements(
     Register scratch1,
     XMMRegister scratch2,
     Label* fail,
-    bool specialize_for_processor,
     int elements_offset) {
   Label smi_value, done, maybe_nan, not_nan, is_nan, have_double_value;
   JumpIfSmi(maybe_number, &smi_value, Label::kNear);
@@ -813,19 +721,11 @@ void MacroAssembler::StoreNumberToDoubleElements(
   bind(&not_nan);
   ExternalReference canonical_nan_reference =
       ExternalReference::address_of_canonical_non_hole_nan();
-  if (CpuFeatures::IsSupported(SSE2) && specialize_for_processor) {
-    CpuFeatureScope use_sse2(this, SSE2);
-    movsd(scratch2, FieldOperand(maybe_number, HeapNumber::kValueOffset));
-    bind(&have_double_value);
-    movsd(FieldOperand(elements, key, times_4,
-                        FixedDoubleArray::kHeaderSize - elements_offset),
-           scratch2);
-  } else {
-    fld_d(FieldOperand(maybe_number, HeapNumber::kValueOffset));
-    bind(&have_double_value);
-    fstp_d(FieldOperand(elements, key, times_4,
-                        FixedDoubleArray::kHeaderSize - elements_offset));
-  }
+  movsd(scratch2, FieldOperand(maybe_number, HeapNumber::kValueOffset));
+  bind(&have_double_value);
+  movsd(FieldOperand(elements, key, times_4,
+                     FixedDoubleArray::kHeaderSize - elements_offset),
+        scratch2);
   jmp(&done);
 
   bind(&maybe_nan);
@@ -835,12 +735,7 @@ void MacroAssembler::StoreNumberToDoubleElements(
   cmp(FieldOperand(maybe_number, HeapNumber::kValueOffset), Immediate(0));
   j(zero, &not_nan);
   bind(&is_nan);
-  if (CpuFeatures::IsSupported(SSE2) && specialize_for_processor) {
-    CpuFeatureScope use_sse2(this, SSE2);
-    movsd(scratch2, Operand::StaticVariable(canonical_nan_reference));
-  } else {
-    fld_d(Operand::StaticVariable(canonical_nan_reference));
-  }
+  movsd(scratch2, Operand::StaticVariable(canonical_nan_reference));
   jmp(&have_double_value, Label::kNear);
 
   bind(&smi_value);
@@ -848,19 +743,10 @@ void MacroAssembler::StoreNumberToDoubleElements(
   // Preserve original value.
   mov(scratch1, maybe_number);
   SmiUntag(scratch1);
-  if (CpuFeatures::IsSupported(SSE2) && specialize_for_processor) {
-    CpuFeatureScope fscope(this, SSE2);
-    Cvtsi2sd(scratch2, scratch1);
-    movsd(FieldOperand(elements, key, times_4,
-                        FixedDoubleArray::kHeaderSize - elements_offset),
-           scratch2);
-  } else {
-    push(scratch1);
-    fild_s(Operand(esp, 0));
-    pop(scratch1);
-    fstp_d(FieldOperand(elements, key, times_4,
-                        FixedDoubleArray::kHeaderSize - elements_offset));
-  }
+  Cvtsi2sd(scratch2, scratch1);
+  movsd(FieldOperand(elements, key, times_4,
+                     FixedDoubleArray::kHeaderSize - elements_offset),
+        scratch2);
   bind(&done);
 }
 
@@ -1093,7 +979,6 @@ void MacroAssembler::EnterExitFramePrologue() {
 void MacroAssembler::EnterExitFrameEpilogue(int argc, bool save_doubles) {
   // Optionally save all XMM registers.
   if (save_doubles) {
-    CpuFeatureScope scope(this, SSE2);
     int space = XMMRegister::kNumRegisters * kDoubleSize + argc * kPointerSize;
     sub(esp, Immediate(space));
     const int offset = -2 * kPointerSize;
@@ -1139,7 +1024,6 @@ void MacroAssembler::EnterApiExitFrame(int argc) {
 void MacroAssembler::LeaveExitFrame(bool save_doubles) {
   // Optionally restore all XMM registers.
   if (save_doubles) {
-    CpuFeatureScope scope(this, SSE2);
     const int offset = -2 * kPointerSize;
     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
       XMMRegister reg = XMMRegister::from_code(i);
@@ -2212,10 +2096,7 @@ void MacroAssembler::CallRuntime(const Runtime::Function* f,
   // smarter.
   Move(eax, Immediate(num_arguments));
   mov(ebx, Immediate(ExternalReference(f, isolate())));
-  CEntryStub ces(isolate(),
-                 1,
-                 CpuFeatures::IsSupported(SSE2) ? save_doubles
-                                                : kDontSaveFPRegs);
+  CEntryStub ces(isolate(), 1, save_doubles);
   CallStub(&ces);
 }
 
@@ -2764,27 +2645,6 @@ void MacroAssembler::Ret(int bytes_dropped, Register scratch) {
 }
 
 
-void MacroAssembler::VerifyX87StackDepth(uint32_t depth) {
-  // Make sure the floating point stack is either empty or has depth items.
-  ASSERT(depth <= 7);
-  // This is very expensive.
-  ASSERT(FLAG_debug_code && FLAG_enable_slow_asserts);
-
-  // The top-of-stack (tos) is 7 if there is one item pushed.
-  int tos = (8 - depth) % 8;
-  const int kTopMask = 0x3800;
-  push(eax);
-  fwait();
-  fnstsw_ax();
-  and_(eax, kTopMask);
-  shr(eax, 11);
-  cmp(eax, Immediate(tos));
-  Check(equal, kUnexpectedFPUStackDepthAfterInstruction);
-  fnclex();
-  pop(eax);
-}
-
-
 void MacroAssembler::Drop(int stack_elements) {
   if (stack_elements > 0) {
     add(esp, Immediate(stack_elements * kPointerSize));
@@ -2815,7 +2675,6 @@ void MacroAssembler::Move(const Operand& dst, const Immediate& x) {
 
 void MacroAssembler::Move(XMMRegister dst, double val) {
   // TODO(titzer): recognize double constants with ExternalReferences.
-  CpuFeatureScope scope(this, SSE2);
   uint64_t int_val = BitCast<uint64_t, double>(val);
   if (int_val == 0) {
     xorps(dst, dst);
@@ -3075,15 +2934,8 @@ void MacroAssembler::LookupNumberStringCache(Register object,
                    times_twice_pointer_size,
                    FixedArray::kHeaderSize));
   JumpIfSmi(probe, not_found);
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope fscope(this, SSE2);
-    movsd(xmm0, FieldOperand(object, HeapNumber::kValueOffset));
-    ucomisd(xmm0, FieldOperand(probe, HeapNumber::kValueOffset));
-  } else {
-    fld_d(FieldOperand(object, HeapNumber::kValueOffset));
-    fld_d(FieldOperand(probe, HeapNumber::kValueOffset));
-    FCmp();
-  }
+  movsd(xmm0, FieldOperand(object, HeapNumber::kValueOffset));
+  ucomisd(xmm0, FieldOperand(probe, HeapNumber::kValueOffset));
   j(parity_even, not_found);  // Bail out if NaN is involved.
   j(not_equal, not_found);  // The cache did not contain this value.
   jmp(&load_result_from_cache, Label::kNear);
diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h
index f8c2401..6a1d7ef 100644
--- a/src/ia32/macro-assembler-ia32.h
+++ b/src/ia32/macro-assembler-ia32.h
@@ -370,7 +370,6 @@ class MacroAssembler: public Assembler {
                                    Register scratch1,
                                    XMMRegister scratch2,
                                    Label* fail,
-                                   bool specialize_for_processor,
                                    int offset = 0);
 
   // Compare an object's map with the specified map.
@@ -439,13 +438,10 @@ class MacroAssembler: public Assembler {
 
   void TruncateHeapNumberToI(Register result_reg, Register input_reg);
   void TruncateDoubleToI(Register result_reg, XMMRegister input_reg);
-  void TruncateX87TOSToI(Register result_reg);
 
   void DoubleToI(Register result_reg, XMMRegister input_reg,
       XMMRegister scratch, MinusZeroMode minus_zero_mode,
       Label* conversion_failed, Label::Distance dst = Label::kFar);
-  void X87TOSToI(Register result_reg, MinusZeroMode minus_zero_mode,
-      Label* conversion_failed, Label::Distance dst = Label::kFar);
 
   void TaggedToI(Register result_reg, Register input_reg, XMMRegister temp,
       MinusZeroMode minus_zero_mode, Label* lost_precision);
@@ -469,7 +465,6 @@ class MacroAssembler: public Assembler {
   }
 
   void LoadUint32(XMMRegister dst, Register src, XMMRegister scratch);
-  void LoadUint32NoSSE2(Register src);
 
   // Jump the register contains a smi.
   inline void JumpIfSmi(Register value,
@@ -831,9 +826,6 @@ class MacroAssembler: public Assembler {
     return code_object_;
   }
 
-  // Insert code to verify that the x87 stack has the specified depth (0-7)
-  void VerifyX87StackDepth(uint32_t depth);
-
   // Emit code for a truncating division by a constant. The dividend register is
   // unchanged, the result is in edx, and eax gets clobbered.
   void TruncatingDiv(Register dividend, int32_t divisor);
diff --git a/src/ia32/stub-cache-ia32.cc b/src/ia32/stub-cache-ia32.cc
index adc8cd5..4083a9b 100644
--- a/src/ia32/stub-cache-ia32.cc
+++ b/src/ia32/stub-cache-ia32.cc
@@ -527,34 +527,17 @@ void StoreStubCompiler::GenerateStoreTransition(MacroAssembler* masm,
 
     __ JumpIfNotSmi(value_reg, &heap_number);
     __ SmiUntag(value_reg);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope use_sse2(masm, SSE2);
-      __ Cvtsi2sd(xmm0, value_reg);
-    } else {
-      __ push(value_reg);
-      __ fild_s(Operand(esp, 0));
-      __ pop(value_reg);
-    }
+    __ Cvtsi2sd(xmm0, value_reg);
     __ SmiTag(value_reg);
     __ jmp(&do_store);
 
     __ bind(&heap_number);
     __ CheckMap(value_reg, masm->isolate()->factory()->heap_number_map(),
                 miss_label, DONT_DO_SMI_CHECK);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope use_sse2(masm, SSE2);
-      __ movsd(xmm0, FieldOperand(value_reg, HeapNumber::kValueOffset));
-    } else {
-      __ fld_d(FieldOperand(value_reg, HeapNumber::kValueOffset));
-    }
+    __ movsd(xmm0, FieldOperand(value_reg, HeapNumber::kValueOffset));
 
     __ bind(&do_store);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope use_sse2(masm, SSE2);
-      __ movsd(FieldOperand(storage_reg, HeapNumber::kValueOffset), xmm0);
-    } else {
-      __ fstp_d(FieldOperand(storage_reg, HeapNumber::kValueOffset));
-    }
+    __ movsd(FieldOperand(storage_reg, HeapNumber::kValueOffset), xmm0);
   }
 
   // Stub never generated for non-global objects that require access
@@ -721,32 +704,15 @@ void StoreStubCompiler::GenerateStoreField(MacroAssembler* masm,
     Label do_store, heap_number;
     __ JumpIfNotSmi(value_reg, &heap_number);
     __ SmiUntag(value_reg);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope use_sse2(masm, SSE2);
-      __ Cvtsi2sd(xmm0, value_reg);
-    } else {
-      __ push(value_reg);
-      __ fild_s(Operand(esp, 0));
-      __ pop(value_reg);
-    }
+    __ Cvtsi2sd(xmm0, value_reg);
     __ SmiTag(value_reg);
     __ jmp(&do_store);
     __ bind(&heap_number);
     __ CheckMap(value_reg, masm->isolate()->factory()->heap_number_map(),
                 miss_label, DONT_DO_SMI_CHECK);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope use_sse2(masm, SSE2);
-      __ movsd(xmm0, FieldOperand(value_reg, HeapNumber::kValueOffset));
-    } else {
-      __ fld_d(FieldOperand(value_reg, HeapNumber::kValueOffset));
-    }
+    __ movsd(xmm0, FieldOperand(value_reg, HeapNumber::kValueOffset));
     __ bind(&do_store);
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope use_sse2(masm, SSE2);
-      __ movsd(FieldOperand(scratch1, HeapNumber::kValueOffset), xmm0);
-    } else {
-      __ fstp_d(FieldOperand(scratch1, HeapNumber::kValueOffset));
-    }
+    __ movsd(FieldOperand(scratch1, HeapNumber::kValueOffset), xmm0);
     // Return the value (register eax).
     ASSERT(value_reg.is(eax));
     __ ret(0);
diff --git a/src/ic.cc b/src/ic.cc
index 3897f88..a6efdab 100644
--- a/src/ic.cc
+++ b/src/ic.cc
@@ -2107,9 +2107,6 @@ RUNTIME_FUNCTION(ElementsTransitionAndStoreIC_Miss) {
 
 BinaryOpIC::State::State(Isolate* isolate, ExtraICState extra_ic_state)
     : isolate_(isolate) {
-  // We don't deserialize the SSE2 Field, since this is only used to be able
-  // to include SSE2 as well as non-SSE2 versions in the snapshot. For code
-  // generation we always want it to reflect the current state.
   op_ = static_cast<Token::Value>(
       FIRST_TOKEN + OpField::decode(extra_ic_state));
   mode_ = OverwriteModeField::decode(extra_ic_state);
@@ -2129,10 +2126,7 @@ BinaryOpIC::State::State(Isolate* isolate, ExtraICState extra_ic_state)
 
 
 ExtraICState BinaryOpIC::State::GetExtraICState() const {
-  bool sse2 = (Max(result_kind_, Max(left_kind_, right_kind_)) > SMI &&
-               CpuFeatures::IsSafeForSnapshot(isolate(), SSE2));
   ExtraICState extra_ic_state =
-      SSE2Field::encode(sse2) |
       OpField::encode(op_ - FIRST_TOKEN) |
       OverwriteModeField::encode(mode_) |
       LeftKindField::encode(left_kind_) |
@@ -2453,14 +2447,9 @@ void BinaryOpIC::State::Update(Handle<Object> left,
     // Tagged operations can lead to non-truncating HChanges
     if (left->IsUndefined() || left->IsBoolean()) {
       left_kind_ = GENERIC;
-    } else if (right->IsUndefined() || right->IsBoolean()) {
-      right_kind_ = GENERIC;
     } else {
-      // Since the X87 is too precise, we might bail out on numbers which
-      // actually would truncate with 64 bit precision.
-      ASSERT(!CpuFeatures::IsSupported(SSE2));
-      ASSERT(result_kind_ < NUMBER);
-      result_kind_ = NUMBER;
+      ASSERT(right->IsUndefined() || right->IsBoolean());
+      right_kind_ = GENERIC;
     }
   }
 }
diff --git a/src/ic.h b/src/ic.h
index 895c21e..197c32e 100644
--- a/src/ic.h
+++ b/src/ic.h
@@ -893,14 +893,13 @@ class BinaryOpIC: public IC {
     STATIC_ASSERT(LAST_TOKEN - FIRST_TOKEN < (1 << 4));
     class OpField:                 public BitField<int, 0, 4> {};
     class OverwriteModeField:      public BitField<OverwriteMode, 4, 2> {};
-    class SSE2Field:               public BitField<bool, 6, 1> {};
-    class ResultKindField:         public BitField<Kind, 7, 3> {};
-    class LeftKindField:           public BitField<Kind, 10,  3> {};
+    class ResultKindField:         public BitField<Kind, 6, 3> {};
+    class LeftKindField:           public BitField<Kind, 9,  3> {};
     // When fixed right arg is set, we don't need to store the right kind.
     // Thus the two fields can overlap.
-    class HasFixedRightArgField:   public BitField<bool, 13, 1> {};
-    class FixedRightArgValueField: public BitField<int,  14, 4> {};
-    class RightKindField:          public BitField<Kind, 14, 3> {};
+    class HasFixedRightArgField:   public BitField<bool, 12, 1> {};
+    class FixedRightArgValueField: public BitField<int,  13, 4> {};
+    class RightKindField:          public BitField<Kind, 13, 3> {};
 
     Token::Value op_;
     OverwriteMode mode_;
diff --git a/src/mips/code-stubs-mips.cc b/src/mips/code-stubs-mips.cc
index 79af219..486e4c2 100644
--- a/src/mips/code-stubs-mips.cc
+++ b/src/mips/code-stubs-mips.cc
@@ -4660,11 +4660,6 @@ void StoreBufferOverflowStub::GenerateFixedRegStubsAheadOfTime(
 }
 
 
-bool CodeStub::CanUseFPRegisters() {
-  return true;  // FPU is a base requirement for V8.
-}
-
-
 // Takes the input in 3 registers: address_ value_ and object_.  A pointer to
 // the value has just been written into the object, now this stub makes sure
 // we keep the GC informed.  The word in the object where the value has been
@@ -4922,7 +4917,7 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) {
 
 
 void StubFailureTrampolineStub::Generate(MacroAssembler* masm) {
-  CEntryStub ces(isolate(), 1, fp_registers_ ? kSaveFPRegs : kDontSaveFPRegs);
+  CEntryStub ces(isolate(), 1, kSaveFPRegs);
   __ Call(ces.GetCode(), RelocInfo::CODE_TARGET);
   int parameter_count_offset =
       StubFailureTrampolineFrame::kCallerStackParameterCountFrameOffset;
diff --git a/src/mips/deoptimizer-mips.cc b/src/mips/deoptimizer-mips.cc
index 4297ad1..efe1d3b 100644
--- a/src/mips/deoptimizer-mips.cc
+++ b/src/mips/deoptimizer-mips.cc
@@ -125,11 +125,6 @@ bool Deoptimizer::HasAlignmentPadding(JSFunction* function) {
 }
 
 
-Code* Deoptimizer::NotifyStubFailureBuiltin() {
-  return isolate_->builtins()->builtin(Builtins::kNotifyStubFailureSaveDoubles);
-}
-
-
 #define __ masm()->
 
 
diff --git a/src/platform-posix.cc b/src/platform-posix.cc
index 143bf3c..1fb6da7 100644
--- a/src/platform-posix.cc
+++ b/src/platform-posix.cc
@@ -55,10 +55,10 @@ static const pthread_t kNoThread = (pthread_t) 0;
 
 uint64_t OS::CpuFeaturesImpliedByPlatform() {
 #if V8_OS_MACOSX
-  // Mac OS X requires all these to install so we can assume they are present.
+  // Mac OS X requires CMOV to install so we can assume it is present.
   // These constants are defined by the CPUid instructions.
   const uint64_t one = 1;
-  return (one << SSE2) | (one << CMOV);
+  return one << CMOV;
 #else
   return 0;  // Nothing special about the other systems.
 #endif
diff --git a/src/platform.h b/src/platform.h
index 764bd54..492e5e4 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -268,7 +268,7 @@ class OS {
   static void SignalCodeMovingGC();
 
   // The return value indicates the CPU features we are sure of because of the
-  // OS.  For example MacOSX doesn't run on any x86 CPUs that don't have SSE2
+  // OS.  For example MacOSX doesn't run on any x86 CPUs that don't have CMOV
   // instructions.
   // This is a little messy because the interpretation is subject to the cross
   // of the CPU and the OS.  The bits in the answer correspond to the bit
diff --git a/src/runtime.cc b/src/runtime.cc
index d2dc1eb..130afad 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -7907,7 +7907,7 @@ RUNTIME_FUNCTION(Runtime_MathFloor) {
 
 
 // Slow version of Math.pow.  We check for fast paths for special cases.
-// Used if SSE2/VFP3 is not available.
+// Used if VFP3 is not available.
 RUNTIME_FUNCTION(RuntimeHidden_MathPowSlow) {
   HandleScope scope(isolate);
   ASSERT(args.length() == 2);
diff --git a/src/v8globals.h b/src/v8globals.h
index 0693223..f2ab24a 100644
--- a/src/v8globals.h
+++ b/src/v8globals.h
@@ -403,7 +403,6 @@ enum StateTag {
 // On X86/X64, values below 32 are bits in EDX, values above 32 are bits in ECX.
 enum CpuFeature { SSE4_1 = 32 + 19,  // x86
                   SSE3 = 32 + 0,     // x86
-                  SSE2 = 26,   // x86
                   CMOV = 15,   // x86
                   VFP3 = 1,    // ARM
                   ARMv7 = 2,   // ARM
diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc
index 306a54d..bdb39a5 100644
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -51,7 +51,6 @@ void CpuFeatures::Probe(bool serializer_enabled) {
 
   // SSE2 must be available on every x64 CPU.
   ASSERT(cpu.has_sse2());
-  probed_features |= static_cast<uint64_t>(1) << SSE2;
 
   // CMOV must be available on every x64 CPU.
   ASSERT(cpu.has_cmov());
diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc
index 6f3fc83..f65a94a 100644
--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@@ -4197,11 +4197,6 @@ void StoreBufferOverflowStub::GenerateFixedRegStubsAheadOfTime(
 }
 
 
-bool CodeStub::CanUseFPRegisters() {
-  return true;  // Always have SSE2 on x64.
-}
-
-
 // Takes the input in 3 registers: address_ value_ and object_.  A pointer to
 // the value has just been written into the object, now this stub makes sure
 // we keep the GC informed.  The word in the object where the value has been
@@ -4476,7 +4471,7 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) {
 
 
 void StubFailureTrampolineStub::Generate(MacroAssembler* masm) {
-  CEntryStub ces(isolate(), 1, fp_registers_ ? kSaveFPRegs : kDontSaveFPRegs);
+  CEntryStub ces(isolate(), 1, kSaveFPRegs);
   __ Call(ces.GetCode(), RelocInfo::CODE_TARGET);
   int parameter_count_offset =
       StubFailureTrampolineFrame::kCallerStackParameterCountFrameOffset;
diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc
index 9016d4b..465a71c 100644
--- a/src/x64/deoptimizer-x64.cc
+++ b/src/x64/deoptimizer-x64.cc
@@ -129,11 +129,6 @@ bool Deoptimizer::HasAlignmentPadding(JSFunction* function) {
 }
 
 
-Code* Deoptimizer::NotifyStubFailureBuiltin() {
-  return isolate_->builtins()->builtin(Builtins::kNotifyStubFailureSaveDoubles);
-}
-
-
 #define __ masm()->
 
 void Deoptimizer::EntryGenerator::Generate() {
diff --git a/test/cctest/test-assembler-ia32.cc b/test/cctest/test-assembler-ia32.cc
index ba83b3d..4e082a2 100644
--- a/test/cctest/test-assembler-ia32.cc
+++ b/test/cctest/test-assembler-ia32.cc
@@ -152,7 +152,6 @@ typedef int (*F3)(float x);
 
 TEST(AssemblerIa323) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2)) return;
 
   Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
   HandleScope scope(isolate);
@@ -160,11 +159,8 @@ TEST(AssemblerIa323) {
   v8::internal::byte buffer[256];
   Assembler assm(isolate, buffer, sizeof buffer);
 
-  CHECK(CpuFeatures::IsSupported(SSE2));
-  { CpuFeatureScope fscope(&assm, SSE2);
-    __ cvttss2si(eax, Operand(esp, 4));
-    __ ret(0);
-  }
+  __ cvttss2si(eax, Operand(esp, 4));
+  __ ret(0);
 
   CodeDesc desc;
   assm.GetCode(&desc);
@@ -186,7 +182,6 @@ typedef int (*F4)(double x);
 
 TEST(AssemblerIa324) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2)) return;
 
   Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
   HandleScope scope(isolate);
@@ -194,8 +189,6 @@ TEST(AssemblerIa324) {
   v8::internal::byte buffer[256];
   Assembler assm(isolate, buffer, sizeof buffer);
 
-  CHECK(CpuFeatures::IsSupported(SSE2));
-  CpuFeatureScope fscope(&assm, SSE2);
   __ cvttsd2si(eax, Operand(esp, 4));
   __ ret(0);
 
@@ -241,14 +234,12 @@ typedef double (*F5)(double x, double y);
 
 TEST(AssemblerIa326) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2)) return;
 
   Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
   HandleScope scope(isolate);
   v8::internal::byte buffer[256];
   Assembler assm(isolate, buffer, sizeof buffer);
 
-  CpuFeatureScope fscope(&assm, SSE2);
   __ movsd(xmm0, Operand(esp, 1 * kPointerSize));
   __ movsd(xmm1, Operand(esp, 3 * kPointerSize));
   __ addsd(xmm0, xmm1);
@@ -285,13 +276,11 @@ typedef double (*F6)(int x);
 
 TEST(AssemblerIa328) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2)) return;
 
   Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
   HandleScope scope(isolate);
   v8::internal::byte buffer[256];
   Assembler assm(isolate, buffer, sizeof buffer);
-  CpuFeatureScope fscope(&assm, SSE2);
   __ mov(eax, Operand(esp, 4));
   __ cvtsi2sd(xmm0, eax);
   // Copy xmm0 to st(0) using eight bytes of stack.
@@ -462,9 +451,6 @@ void DoSSE2(const v8::FunctionCallbackInfo<v8::Value>& args) {
   v8::internal::byte buffer[256];
   Assembler assm(isolate, buffer, sizeof buffer);
 
-  ASSERT(CpuFeatures::IsSupported(SSE2));
-  CpuFeatureScope fscope(&assm, SSE2);
-
   // Remove return address from the stack for fix stack frame alignment.
   __ pop(ecx);
 
@@ -500,8 +486,6 @@ void DoSSE2(const v8::FunctionCallbackInfo<v8::Value>& args) {
 
 TEST(StackAlignmentForSSE2) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2)) return;
-
   CHECK_EQ(0, OS::ActivationFrameAlignment() % 16);
 
   v8::Isolate* isolate = CcTest::isolate();
@@ -540,15 +524,13 @@ TEST(StackAlignmentForSSE2) {
 
 TEST(AssemblerIa32Extractps) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2) ||
-      !CpuFeatures::IsSupported(SSE4_1)) return;
+  if (!CpuFeatures::IsSupported(SSE4_1)) return;
 
   Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
   HandleScope scope(isolate);
   v8::internal::byte buffer[256];
   MacroAssembler assm(isolate, buffer, sizeof buffer);
-  { CpuFeatureScope fscope2(&assm, SSE2);
-    CpuFeatureScope fscope41(&assm, SSE4_1);
+  { CpuFeatureScope fscope41(&assm, SSE4_1);
     __ movsd(xmm1, Operand(esp, 4));
     __ extractps(eax, xmm1, 0x1);
     __ ret(0);
@@ -573,14 +555,12 @@ TEST(AssemblerIa32Extractps) {
 typedef int (*F8)(float x, float y);
 TEST(AssemblerIa32SSE) {
   CcTest::InitializeVM();
-  if (!CpuFeatures::IsSupported(SSE2)) return;
 
   Isolate* isolate = reinterpret_cast<Isolate*>(CcTest::isolate());
   HandleScope scope(isolate);
   v8::internal::byte buffer[256];
   MacroAssembler assm(isolate, buffer, sizeof buffer);
   {
-    CpuFeatureScope fscope(&assm, SSE2);
     __ movss(xmm0, Operand(esp, kPointerSize));
     __ movss(xmm1, Operand(esp, 2 * kPointerSize));
     __ shufps(xmm0, xmm0, 0x0);
diff --git a/test/cctest/test-disasm-ia32.cc b/test/cctest/test-disasm-ia32.cc
index 6972aea..de2bbdf 100644
--- a/test/cctest/test-disasm-ia32.cc
+++ b/test/cctest/test-disasm-ia32.cc
@@ -364,58 +364,52 @@ TEST(DisasmIa320) {
 
   // SSE instruction
   {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope fscope(&assm, SSE2);
-      // Move operation
-      __ movaps(xmm0, xmm1);
-      __ shufps(xmm0, xmm0, 0x0);
-
-      // logic operation
-      __ andps(xmm0, xmm1);
-      __ andps(xmm0, Operand(ebx, ecx, times_4, 10000));
-      __ orps(xmm0, xmm1);
-      __ orps(xmm0, Operand(ebx, ecx, times_4, 10000));
-      __ xorps(xmm0, xmm1);
-      __ xorps(xmm0, Operand(ebx, ecx, times_4, 10000));
-
-      // Arithmetic operation
-      __ addps(xmm1, xmm0);
-      __ addps(xmm1, Operand(ebx, ecx, times_4, 10000));
-      __ subps(xmm1, xmm0);
-      __ subps(xmm1, Operand(ebx, ecx, times_4, 10000));
-      __ mulps(xmm1, xmm0);
-      __ mulps(xmm1, Operand(ebx, ecx, times_4, 10000));
-      __ divps(xmm1, xmm0);
-      __ divps(xmm1, Operand(ebx, ecx, times_4, 10000));
-    }
+    // Move operation
+    __ movaps(xmm0, xmm1);
+    __ shufps(xmm0, xmm0, 0x0);
+
+    // logic operation
+    __ andps(xmm0, xmm1);
+    __ andps(xmm0, Operand(ebx, ecx, times_4, 10000));
+    __ orps(xmm0, xmm1);
+    __ orps(xmm0, Operand(ebx, ecx, times_4, 10000));
+    __ xorps(xmm0, xmm1);
+    __ xorps(xmm0, Operand(ebx, ecx, times_4, 10000));
+
+    // Arithmetic operation
+    __ addps(xmm1, xmm0);
+    __ addps(xmm1, Operand(ebx, ecx, times_4, 10000));
+    __ subps(xmm1, xmm0);
+    __ subps(xmm1, Operand(ebx, ecx, times_4, 10000));
+    __ mulps(xmm1, xmm0);
+    __ mulps(xmm1, Operand(ebx, ecx, times_4, 10000));
+    __ divps(xmm1, xmm0);
+    __ divps(xmm1, Operand(ebx, ecx, times_4, 10000));
   }
   {
-    if (CpuFeatures::IsSupported(SSE2)) {
-      CpuFeatureScope fscope(&assm, SSE2);
-      __ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000));
-      __ cvtsi2sd(xmm1, Operand(ebx, ecx, times_4, 10000));
-      __ movsd(xmm1, Operand(ebx, ecx, times_4, 10000));
-      __ movsd(Operand(ebx, ecx, times_4, 10000), xmm1);
-      // 128 bit move instructions.
-      __ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000));
-      __ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0);
-      __ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
-      __ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
-
-      __ addsd(xmm1, xmm0);
-      __ mulsd(xmm1, xmm0);
-      __ subsd(xmm1, xmm0);
-      __ divsd(xmm1, xmm0);
-      __ ucomisd(xmm0, xmm1);
-      __ cmpltsd(xmm0, xmm1);
-
-      __ andpd(xmm0, xmm1);
-      __ psllq(xmm0, 17);
-      __ psllq(xmm0, xmm1);
-      __ psrlq(xmm0, 17);
-      __ psrlq(xmm0, xmm1);
-      __ por(xmm0, xmm1);
-    }
+    __ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000));
+    __ cvtsi2sd(xmm1, Operand(ebx, ecx, times_4, 10000));
+    __ movsd(xmm1, Operand(ebx, ecx, times_4, 10000));
+    __ movsd(Operand(ebx, ecx, times_4, 10000), xmm1);
+    // 128 bit move instructions.
+    __ movdqa(xmm0, Operand(ebx, ecx, times_4, 10000));
+    __ movdqa(Operand(ebx, ecx, times_4, 10000), xmm0);
+    __ movdqu(xmm0, Operand(ebx, ecx, times_4, 10000));
+    __ movdqu(Operand(ebx, ecx, times_4, 10000), xmm0);
+
+    __ addsd(xmm1, xmm0);
+    __ mulsd(xmm1, xmm0);
+    __ subsd(xmm1, xmm0);
+    __ divsd(xmm1, xmm0);
+    __ ucomisd(xmm0, xmm1);
+    __ cmpltsd(xmm0, xmm1);
+
+    __ andpd(xmm0, xmm1);
+    __ psllq(xmm0, 17);
+    __ psllq(xmm0, xmm1);
+    __ psrlq(xmm0, 17);
+    __ psrlq(xmm0, xmm1);
+    __ por(xmm0, xmm1);
   }
 
   // cmov.
@@ -442,8 +436,7 @@ TEST(DisasmIa320) {
   }
 
   {
-    if (CpuFeatures::IsSupported(SSE2) &&
-        CpuFeatures::IsSupported(SSE4_1)) {
+    if (CpuFeatures::IsSupported(SSE4_1)) {
       CpuFeatureScope scope(&assm, SSE4_1);
       __ pextrd(eax, xmm0, 1);
       __ pinsrd(xmm1, eax, 0);
diff --git a/test/cctest/test-macro-assembler-ia32.cc b/test/cctest/test-macro-assembler-ia32.cc
index 3ad5271..45af557 100644
--- a/test/cctest/test-macro-assembler-ia32.cc
+++ b/test/cctest/test-macro-assembler-ia32.cc
@@ -123,20 +123,17 @@ TEST(LoadAndStoreWithRepresentation) {
   __ j(not_equal, &exit);
 
   // Test 5.
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatureScope scope(masm, SSE2);
-    __ mov(eax, Immediate(5));  // Test XMM move immediate.
-    __ Move(xmm0, 0.0);
-    __ Move(xmm1, 0.0);
-    __ ucomisd(xmm0, xmm1);
-    __ j(not_equal, &exit);
-    __ Move(xmm2, 991.01);
-    __ ucomisd(xmm0, xmm2);
-    __ j(equal, &exit);
-    __ Move(xmm0, 991.01);
-    __ ucomisd(xmm0, xmm2);
-    __ j(not_equal, &exit);
-  }
+  __ mov(eax, Immediate(5));  // Test XMM move immediate.
+  __ Move(xmm0, 0.0);
+  __ Move(xmm1, 0.0);
+  __ ucomisd(xmm0, xmm1);
+  __ j(not_equal, &exit);
+  __ Move(xmm2, 991.01);
+  __ ucomisd(xmm0, xmm2);
+  __ j(equal, &exit);
+  __ Move(xmm0, 991.01);
+  __ ucomisd(xmm0, xmm2);
+  __ j(not_equal, &exit);
 
   // Test 6.
   __ mov(eax, Immediate(6));