ARM64: Optimize generated code for gaps
authorJacob.Bramley@arm.com <Jacob.Bramley@arm.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 23 Jun 2014 16:00:53 +0000 (16:00 +0000)
committerJacob.Bramley@arm.com <Jacob.Bramley@arm.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 23 Jun 2014 16:00:53 +0000 (16:00 +0000)
R=ulan@chromium.org, jochen@chromium.org

Review URL: https://codereview.chromium.org/268673003

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@21945 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/arm64/assembler-arm64.cc
src/arm64/assembler-arm64.h
src/arm64/delayed-masm-arm64-inl.h [new file with mode: 0644]
src/arm64/delayed-masm-arm64.cc [new file with mode: 0644]
src/arm64/delayed-masm-arm64.h [new file with mode: 0644]
src/arm64/lithium-gap-resolver-arm64.cc
src/arm64/lithium-gap-resolver-arm64.h
tools/gyp/v8.gyp

index ff0ff52029b720d23096b8bc35ff94a1d2c0b6b7..484bf10fad61e1bc2bbf437557b1490434162759 100644 (file)
@@ -462,6 +462,34 @@ void ConstPool::EmitMarker() {
 }
 
 
+MemOperand::PairResult MemOperand::AreConsistentForPair(
+    const MemOperand& operandA,
+    const MemOperand& operandB,
+    int access_size_log2) {
+  ASSERT(access_size_log2 >= 0);
+  ASSERT(access_size_log2 <= 3);
+  // Step one: check that they share the same base, that the mode is Offset
+  // and that the offset is a multiple of access size.
+  if (!operandA.base().Is(operandB.base()) ||
+      (operandA.addrmode() != Offset) ||
+      (operandB.addrmode() != Offset) ||
+      ((operandA.offset() & ((1 << access_size_log2) - 1)) != 0)) {
+    return kNotPair;
+  }
+  // Step two: check that the offsets are contiguous and that the range
+  // is OK for ldp/stp.
+  if ((operandB.offset() == operandA.offset() + (1 << access_size_log2)) &&
+      is_int7(operandA.offset() >> access_size_log2)) {
+    return kPairAB;
+  }
+  if ((operandA.offset() == operandB.offset() + (1 << access_size_log2)) &&
+      is_int7(operandB.offset() >> access_size_log2)) {
+    return kPairBA;
+  }
+  return kNotPair;
+}
+
+
 void ConstPool::EmitGuard() {
 #ifdef DEBUG
   Instruction* instr = reinterpret_cast<Instruction*>(assm_->pc());
index 20c8d0951533a90a7179382f0786c947822157ec..f737931a9d783d98388b2461ad73f1c314af5a00 100644 (file)
@@ -730,6 +730,16 @@ class MemOperand {
   // handle indexed modes.
   inline Operand OffsetAsOperand() const;
 
+  enum PairResult {
+    kNotPair,   // Can't use a pair instruction.
+    kPairAB,    // Can use a pair instruction (operandA has lower address).
+    kPairBA     // Can use a pair instruction (operandB has lower address).
+  };
+  // Check if two MemOperand are consistent for stp/ldp use.
+  static PairResult AreConsistentForPair(const MemOperand& operandA,
+                                         const MemOperand& operandB,
+                                         int access_size_log2 = kXRegSizeLog2);
+
  private:
   Register base_;
   Register regoffset_;
diff --git a/src/arm64/delayed-masm-arm64-inl.h b/src/arm64/delayed-masm-arm64-inl.h
new file mode 100644 (file)
index 0000000..17d6652
--- /dev/null
@@ -0,0 +1,55 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_ARM64_DELAYED_MASM_ARM64_INL_H_
+#define V8_ARM64_DELAYED_MASM_ARM64_INL_H_
+
+#include "src/arm64/delayed-masm-arm64.h"
+
+namespace v8 {
+namespace internal {
+
+#define __ ACCESS_MASM(masm_)
+
+
+void DelayedMasm::EndDelayedUse() {
+  EmitPending();
+  ASSERT(!scratch_register_acquired_);
+  ResetSavedValue();
+}
+
+
+void DelayedMasm::Mov(const Register& rd,
+                      const Operand& operand,
+                      DiscardMoveMode discard_mode) {
+  EmitPending();
+  ASSERT(!IsScratchRegister(rd) || scratch_register_acquired_);
+  __ Mov(rd, operand, discard_mode);
+}
+
+
+void DelayedMasm::Fmov(FPRegister fd, FPRegister fn) {
+  EmitPending();
+  __ Fmov(fd, fn);
+}
+
+
+void DelayedMasm::Fmov(FPRegister fd, double imm) {
+  EmitPending();
+  __ Fmov(fd, imm);
+}
+
+
+void DelayedMasm::LoadObject(Register result, Handle<Object> object) {
+  EmitPending();
+  ASSERT(!IsScratchRegister(result) || scratch_register_acquired_);
+  __ LoadObject(result, object);
+}
+
+
+#undef __
+
+} }  // namespace v8::internal
+
+#endif  // V8_ARM64_DELAYED_MASM_ARM64_INL_H_
diff --git a/src/arm64/delayed-masm-arm64.cc b/src/arm64/delayed-masm-arm64.cc
new file mode 100644 (file)
index 0000000..b7040e1
--- /dev/null
@@ -0,0 +1,198 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/v8.h"
+
+#if V8_TARGET_ARCH_ARM64
+
+#include "src/arm64/delayed-masm-arm64.h"
+#include "src/arm64/lithium-codegen-arm64.h"
+
+namespace v8 {
+namespace internal {
+
+#define __ ACCESS_MASM(masm_)
+
+
+void DelayedMasm::StackSlotMove(LOperand* src, LOperand* dst) {
+  ASSERT(src->IsStackSlot());
+  ASSERT(dst->IsStackSlot());
+  MemOperand src_operand = cgen_->ToMemOperand(src);
+  MemOperand dst_operand = cgen_->ToMemOperand(dst);
+  if (pending_ == kStackSlotMove) {
+    ASSERT(pending_pc_ == masm_->pc_offset());
+    UseScratchRegisterScope scope(masm_);
+    DoubleRegister temp1 = scope.AcquireD();
+    DoubleRegister temp2 = scope.AcquireD();
+    switch (MemOperand::AreConsistentForPair(pending_address_src_,
+                                             src_operand)) {
+      case MemOperand::kNotPair:
+        __ Ldr(temp1, pending_address_src_);
+        __ Ldr(temp2, src_operand);
+        break;
+      case MemOperand::kPairAB:
+        __ Ldp(temp1, temp2, pending_address_src_);
+        break;
+      case MemOperand::kPairBA:
+        __ Ldp(temp2, temp1, src_operand);
+        break;
+    }
+    switch (MemOperand::AreConsistentForPair(pending_address_dst_,
+                                             dst_operand)) {
+      case MemOperand::kNotPair:
+        __ Str(temp1, pending_address_dst_);
+        __ Str(temp2, dst_operand);
+        break;
+      case MemOperand::kPairAB:
+        __ Stp(temp1, temp2, pending_address_dst_);
+        break;
+      case MemOperand::kPairBA:
+        __ Stp(temp2, temp1, dst_operand);
+        break;
+    }
+    ResetPending();
+    return;
+  }
+
+  EmitPending();
+  pending_ = kStackSlotMove;
+  pending_address_src_ = src_operand;
+  pending_address_dst_ = dst_operand;
+#ifdef DEBUG
+  pending_pc_ = masm_->pc_offset();
+#endif
+}
+
+
+void DelayedMasm::StoreConstant(uint64_t value, const MemOperand& operand) {
+  ASSERT(!scratch_register_acquired_);
+  if ((pending_ == kStoreConstant) && (value == pending_value_)) {
+    MemOperand::PairResult result =
+        MemOperand::AreConsistentForPair(pending_address_dst_, operand);
+    if (result != MemOperand::kNotPair) {
+      const MemOperand& dst =
+          (result == MemOperand::kPairAB) ?
+              pending_address_dst_ :
+              operand;
+      ASSERT(pending_pc_ == masm_->pc_offset());
+      if (pending_value_ == 0) {
+        __ Stp(xzr, xzr, dst);
+      } else {
+        SetSavedValue(pending_value_);
+        __ Stp(ScratchRegister(), ScratchRegister(), dst);
+      }
+      ResetPending();
+      return;
+    }
+  }
+
+  EmitPending();
+  pending_ = kStoreConstant;
+  pending_address_dst_ = operand;
+  pending_value_ = value;
+#ifdef DEBUG
+  pending_pc_ = masm_->pc_offset();
+#endif
+}
+
+
+void DelayedMasm::Load(const CPURegister& rd, const MemOperand& operand) {
+  if ((pending_ == kLoad) &&
+      pending_register_.IsSameSizeAndType(rd)) {
+    switch (MemOperand::AreConsistentForPair(pending_address_src_, operand)) {
+      case MemOperand::kNotPair:
+        break;
+      case MemOperand::kPairAB:
+        ASSERT(pending_pc_ == masm_->pc_offset());
+        ASSERT(!IsScratchRegister(pending_register_) ||
+               scratch_register_acquired_);
+        ASSERT(!IsScratchRegister(rd) || scratch_register_acquired_);
+        __ Ldp(pending_register_, rd, pending_address_src_);
+        ResetPending();
+        return;
+      case MemOperand::kPairBA:
+        ASSERT(pending_pc_ == masm_->pc_offset());
+        ASSERT(!IsScratchRegister(pending_register_) ||
+               scratch_register_acquired_);
+        ASSERT(!IsScratchRegister(rd) || scratch_register_acquired_);
+        __ Ldp(rd, pending_register_, operand);
+        ResetPending();
+        return;
+    }
+  }
+
+  EmitPending();
+  pending_ = kLoad;
+  pending_register_ = rd;
+  pending_address_src_ = operand;
+#ifdef DEBUG
+  pending_pc_ = masm_->pc_offset();
+#endif
+}
+
+
+void DelayedMasm::Store(const CPURegister& rd, const MemOperand& operand) {
+  if ((pending_ == kStore) &&
+      pending_register_.IsSameSizeAndType(rd)) {
+    switch (MemOperand::AreConsistentForPair(pending_address_dst_, operand)) {
+      case MemOperand::kNotPair:
+        break;
+      case MemOperand::kPairAB:
+        ASSERT(pending_pc_ == masm_->pc_offset());
+        __ Stp(pending_register_, rd, pending_address_dst_);
+        ResetPending();
+        return;
+      case MemOperand::kPairBA:
+        ASSERT(pending_pc_ == masm_->pc_offset());
+        __ Stp(rd, pending_register_, operand);
+        ResetPending();
+        return;
+    }
+  }
+
+  EmitPending();
+  pending_ = kStore;
+  pending_register_ = rd;
+  pending_address_dst_ = operand;
+#ifdef DEBUG
+  pending_pc_ = masm_->pc_offset();
+#endif
+}
+
+
+void DelayedMasm::EmitPending() {
+  ASSERT((pending_ == kNone) || (pending_pc_ == masm_->pc_offset()));
+  switch (pending_) {
+    case kNone:
+      return;
+    case kStoreConstant:
+      if (pending_value_ == 0) {
+        __ Str(xzr, pending_address_dst_);
+      } else {
+        SetSavedValue(pending_value_);
+        __ Str(ScratchRegister(), pending_address_dst_);
+      }
+      break;
+    case kLoad:
+      ASSERT(!IsScratchRegister(pending_register_) ||
+              scratch_register_acquired_);
+      __ Ldr(pending_register_, pending_address_src_);
+      break;
+    case kStore:
+      __ Str(pending_register_, pending_address_dst_);
+      break;
+    case kStackSlotMove: {
+      UseScratchRegisterScope scope(masm_);
+      DoubleRegister temp = scope.AcquireD();
+      __ Ldr(temp, pending_address_src_);
+      __ Str(temp, pending_address_dst_);
+      break;
+    }
+  }
+  ResetPending();
+}
+
+} }  // namespace v8::internal
+
+#endif  // V8_TARGET_ARCH_ARM64
diff --git a/src/arm64/delayed-masm-arm64.h b/src/arm64/delayed-masm-arm64.h
new file mode 100644 (file)
index 0000000..a8782c3
--- /dev/null
@@ -0,0 +1,164 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_ARM64_DELAYED_MASM_ARM64_H_
+#define V8_ARM64_DELAYED_MASM_ARM64_H_
+
+#include "src/lithium.h"
+
+namespace v8 {
+namespace internal {
+
+class LCodeGen;
+
+// This class delays the generation of some instructions. This way, we have a
+// chance to merge two instructions in one (with load/store pair).
+// Each instruction must either:
+//  - merge with the pending instruction and generate just one instruction.
+//  - emit the pending instruction and then generate the instruction (or set the
+//    pending instruction).
+class DelayedMasm BASE_EMBEDDED {
+ public:
+  DelayedMasm(LCodeGen* owner,
+              MacroAssembler* masm,
+              const Register& scratch_register)
+    : cgen_(owner), masm_(masm), scratch_register_(scratch_register),
+      scratch_register_used_(false), pending_(kNone), saved_value_(0) {
+#ifdef DEBUG
+    pending_register_ = no_reg;
+    pending_value_ = 0;
+    pending_pc_ = 0;
+    scratch_register_acquired_ = false;
+#endif
+  }
+  ~DelayedMasm() {
+    ASSERT(!scratch_register_acquired_);
+    ASSERT(!scratch_register_used_);
+    ASSERT(!pending());
+  }
+  inline void EndDelayedUse();
+
+  const Register& ScratchRegister() {
+    scratch_register_used_ = true;
+    return scratch_register_;
+  }
+  bool IsScratchRegister(const CPURegister& reg) {
+    return reg.Is(scratch_register_);
+  }
+  bool scratch_register_used() const { return scratch_register_used_; }
+  void reset_scratch_register_used() { scratch_register_used_ = false; }
+  // Acquire/Release scratch register for use outside this class.
+  void AcquireScratchRegister() {
+    EmitPending();
+    ResetSavedValue();
+#ifdef DEBUG
+    ASSERT(!scratch_register_acquired_);
+    scratch_register_acquired_ = true;
+#endif
+  }
+  void ReleaseScratchRegister() {
+#ifdef DEBUG
+    ASSERT(scratch_register_acquired_);
+    scratch_register_acquired_ = false;
+#endif
+  }
+  bool pending() { return pending_ != kNone; }
+
+  // Extra layer over the macro-assembler instructions (which emits the
+  // potential pending instruction).
+  inline void Mov(const Register& rd,
+                  const Operand& operand,
+                  DiscardMoveMode discard_mode = kDontDiscardForSameWReg);
+  inline void Fmov(FPRegister fd, FPRegister fn);
+  inline void Fmov(FPRegister fd, double imm);
+  inline void LoadObject(Register result, Handle<Object> object);
+  // Instructions which try to merge which the pending instructions.
+  void StackSlotMove(LOperand* src, LOperand* dst);
+  // StoreConstant can only be used if the scratch register is not acquired.
+  void StoreConstant(uint64_t value, const MemOperand& operand);
+  void Load(const CPURegister& rd, const MemOperand& operand);
+  void Store(const CPURegister& rd, const MemOperand& operand);
+  // Emit the potential pending instruction.
+  void EmitPending();
+  // Reset the pending state.
+  void ResetPending() {
+    pending_ = kNone;
+#ifdef DEBUG
+    pending_register_ = no_reg;
+    MemOperand tmp;
+    pending_address_src_ = tmp;
+    pending_address_dst_ = tmp;
+    pending_value_ = 0;
+    pending_pc_ = 0;
+#endif
+  }
+  void InitializeRootRegister() {
+    masm_->InitializeRootRegister();
+  }
+
+ private:
+  // Set the saved value and load the ScratchRegister with it.
+  void SetSavedValue(uint64_t saved_value) {
+    ASSERT(saved_value != 0);
+    if (saved_value_ != saved_value) {
+      masm_->Mov(ScratchRegister(), saved_value);
+      saved_value_ = saved_value;
+    }
+  }
+  // Reset the saved value (i.e. the value of ScratchRegister is no longer
+  // known).
+  void ResetSavedValue() {
+    saved_value_ = 0;
+  }
+
+  LCodeGen* cgen_;
+  MacroAssembler* masm_;
+
+  // Register used to store a constant.
+  Register scratch_register_;
+  bool scratch_register_used_;
+
+  // Sometimes we store or load two values in two contiguous stack slots.
+  // In this case, we try to use the ldp/stp instructions to reduce code size.
+  // To be able to do that, instead of generating directly the instructions,
+  // we register with the following fields that an instruction needs to be
+  // generated. Then with the next instruction, if the instruction is
+  // consistent with the pending one for stp/ldp we generate ldp/stp. Else,
+  // if they are not consistent, we generate the pending instruction and we
+  // register the new instruction (which becomes pending).
+
+  // Enumeration of instructions which can be pending.
+  enum Pending {
+    kNone,
+    kStoreConstant,
+    kLoad, kStore,
+    kStackSlotMove
+  };
+  // The pending instruction.
+  Pending pending_;
+  // For kLoad, kStore: register which must be loaded/stored.
+  CPURegister pending_register_;
+  // For kLoad, kStackSlotMove: address of the load.
+  MemOperand pending_address_src_;
+  // For kStoreConstant, kStore, kStackSlotMove: address of the store.
+  MemOperand pending_address_dst_;
+  // For kStoreConstant: value to be stored.
+  uint64_t pending_value_;
+  // Value held into the ScratchRegister if the saved_value_ is not 0.
+  // For 0, we use xzr.
+  uint64_t saved_value_;
+#ifdef DEBUG
+  // Address where the pending instruction must be generated. It's only used to
+  // check that nothing else has been generated since we set the pending
+  // instruction.
+  int pending_pc_;
+  // If true, the scratch register has been acquired outside this class. The
+  // scratch register can no longer be used for constants.
+  bool scratch_register_acquired_;
+#endif
+};
+
+} }  // namespace v8::internal
+
+#endif  // V8_ARM64_DELAYED_MASM_ARM64_H_
index d845e1efc06677a5cedf57914f51b4c1c8d8be5d..e5b55b9fb518b490a2f8d55c4ec2c92fe1bb3b44 100644 (file)
@@ -4,36 +4,36 @@
 
 #include "src/v8.h"
 
+#include "src/arm64/delayed-masm-arm64-inl.h"
 #include "src/arm64/lithium-codegen-arm64.h"
 #include "src/arm64/lithium-gap-resolver-arm64.h"
 
 namespace v8 {
 namespace internal {
 
-// We use the root register to spill a value while breaking a cycle in parallel
-// moves. We don't need access to roots while resolving the move list and using
-// the root register has two advantages:
-//  - It is not in crankshaft allocatable registers list, so it can't interfere
-//    with any of the moves we are resolving.
-//  - We don't need to push it on the stack, as we can reload it with its value
-//    once we have resolved a cycle.
-#define kSavedValue root
+#define __ ACCESS_MASM((&masm_))
 
-// We use the MacroAssembler floating-point scratch register to break a cycle
-// involving double values as the MacroAssembler will not need it for the
-// operations performed by the gap resolver.
-#define kSavedDoubleValue fp_scratch
 
+void DelayedGapMasm::EndDelayedUse() {
+  DelayedMasm::EndDelayedUse();
+  if (scratch_register_used()) {
+    ASSERT(ScratchRegister().Is(root));
+    ASSERT(!pending());
+    InitializeRootRegister();
+    reset_scratch_register_used();
+  }
+}
 
-LGapResolver::LGapResolver(LCodeGen* owner)
-    : cgen_(owner), moves_(32, owner->zone()), root_index_(0), in_cycle_(false),
-      saved_destination_(NULL), need_to_restore_root_(false) { }
 
+LGapResolver::LGapResolver(LCodeGen* owner)
+    : cgen_(owner), masm_(owner, owner->masm()), moves_(32, owner->zone()),
+      root_index_(0), in_cycle_(false), saved_destination_(NULL) {
+}
 
-#define __ ACCESS_MASM(cgen_->masm())
 
 void LGapResolver::Resolve(LParallelMove* parallel_move) {
   ASSERT(moves_.is_empty());
+  ASSERT(!masm_.pending());
 
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
@@ -61,11 +61,7 @@ void LGapResolver::Resolve(LParallelMove* parallel_move) {
     }
   }
 
-  if (need_to_restore_root_) {
-    ASSERT(kSavedValue.Is(root));
-    __ InitializeRootRegister();
-    need_to_restore_root_ = false;
-  }
+  __ EndDelayedUse();
 
   moves_.Rewind(0);
 }
@@ -152,11 +148,6 @@ void LGapResolver::BreakCycle(int index) {
   ASSERT(moves_[index].destination()->Equals(moves_[root_index_].source()));
   ASSERT(!in_cycle_);
 
-  // We use registers which are not allocatable by crankshaft to break the cycle
-  // to be sure they don't interfere with the moves we are resolving.
-  ASSERT(!kSavedValue.IsAllocatable());
-  ASSERT(!kSavedDoubleValue.IsAllocatable());
-
   // We save in a register the source of that move and we remember its
   // destination. Then we mark this move as resolved so the cycle is
   // broken and we can perform the other moves.
@@ -165,19 +156,15 @@ void LGapResolver::BreakCycle(int index) {
   saved_destination_ = moves_[index].destination();
 
   if (source->IsRegister()) {
-    need_to_restore_root_ = true;
-    __ Mov(kSavedValue, cgen_->ToRegister(source));
+    AcquireSavedValueRegister();
+    __ Mov(SavedValueRegister(), cgen_->ToRegister(source));
   } else if (source->IsStackSlot()) {
-    need_to_restore_root_ = true;
-    __ Ldr(kSavedValue, cgen_->ToMemOperand(source));
+    AcquireSavedValueRegister();
+    __ Load(SavedValueRegister(), cgen_->ToMemOperand(source));
   } else if (source->IsDoubleRegister()) {
-    ASSERT(cgen_->masm()->FPTmpList()->IncludesAliasOf(kSavedDoubleValue));
-    cgen_->masm()->FPTmpList()->Remove(kSavedDoubleValue);
-    __ Fmov(kSavedDoubleValue, cgen_->ToDoubleRegister(source));
+    __ Fmov(SavedFPValueRegister(), cgen_->ToDoubleRegister(source));
   } else if (source->IsDoubleStackSlot()) {
-    ASSERT(cgen_->masm()->FPTmpList()->IncludesAliasOf(kSavedDoubleValue));
-    cgen_->masm()->FPTmpList()->Remove(kSavedDoubleValue);
-    __ Ldr(kSavedDoubleValue, cgen_->ToMemOperand(source));
+    __ Load(SavedFPValueRegister(), cgen_->ToMemOperand(source));
   } else {
     UNREACHABLE();
   }
@@ -194,15 +181,16 @@ void LGapResolver::RestoreValue() {
   ASSERT(saved_destination_ != NULL);
 
   if (saved_destination_->IsRegister()) {
-    __ Mov(cgen_->ToRegister(saved_destination_), kSavedValue);
+    __ Mov(cgen_->ToRegister(saved_destination_), SavedValueRegister());
+    ReleaseSavedValueRegister();
   } else if (saved_destination_->IsStackSlot()) {
-    __ Str(kSavedValue, cgen_->ToMemOperand(saved_destination_));
+    __ Store(SavedValueRegister(), cgen_->ToMemOperand(saved_destination_));
+    ReleaseSavedValueRegister();
   } else if (saved_destination_->IsDoubleRegister()) {
-    __ Fmov(cgen_->ToDoubleRegister(saved_destination_), kSavedDoubleValue);
-    cgen_->masm()->FPTmpList()->Combine(kSavedDoubleValue);
+    __ Fmov(cgen_->ToDoubleRegister(saved_destination_),
+            SavedFPValueRegister());
   } else if (saved_destination_->IsDoubleStackSlot()) {
-    __ Str(kSavedDoubleValue, cgen_->ToMemOperand(saved_destination_));
-    cgen_->masm()->FPTmpList()->Combine(kSavedDoubleValue);
+    __ Store(SavedFPValueRegister(), cgen_->ToMemOperand(saved_destination_));
   } else {
     UNREACHABLE();
   }
@@ -225,13 +213,13 @@ void LGapResolver::EmitMove(int index) {
       __ Mov(cgen_->ToRegister(destination), source_register);
     } else {
       ASSERT(destination->IsStackSlot());
-      __ Str(source_register, cgen_->ToMemOperand(destination));
+      __ Store(source_register, cgen_->ToMemOperand(destination));
     }
 
   } else if (source->IsStackSlot()) {
     MemOperand source_operand = cgen_->ToMemOperand(source);
     if (destination->IsRegister()) {
-      __ Ldr(cgen_->ToRegister(destination), source_operand);
+      __ Load(cgen_->ToRegister(destination), source_operand);
     } else {
       ASSERT(destination->IsStackSlot());
       EmitStackSlotMove(index);
@@ -254,15 +242,28 @@ void LGapResolver::EmitMove(int index) {
     } else {
       ASSERT(destination->IsStackSlot());
       ASSERT(!in_cycle_);  // Constant moves happen after all cycles are gone.
-      need_to_restore_root_ = true;
       if (cgen_->IsSmi(constant_source)) {
-        __ Mov(kSavedValue, cgen_->ToSmi(constant_source));
+        Smi* smi = cgen_->ToSmi(constant_source);
+        __ StoreConstant(reinterpret_cast<intptr_t>(smi),
+                         cgen_->ToMemOperand(destination));
       } else if (cgen_->IsInteger32Constant(constant_source)) {
-        __ Mov(kSavedValue, cgen_->ToInteger32(constant_source));
+        __ StoreConstant(cgen_->ToInteger32(constant_source),
+                         cgen_->ToMemOperand(destination));
       } else {
-        __ LoadObject(kSavedValue, cgen_->ToHandle(constant_source));
+        Handle<Object> handle = cgen_->ToHandle(constant_source);
+        AllowDeferredHandleDereference smi_object_check;
+        if (handle->IsSmi()) {
+          Object* obj = *handle;
+          ASSERT(!obj->IsHeapObject());
+          __ StoreConstant(reinterpret_cast<intptr_t>(obj),
+                           cgen_->ToMemOperand(destination));
+        } else {
+          AcquireSavedValueRegister();
+          __ LoadObject(SavedValueRegister(), handle);
+          __ Store(SavedValueRegister(), cgen_->ToMemOperand(destination));
+          ReleaseSavedValueRegister();
+        }
       }
-      __ Str(kSavedValue, cgen_->ToMemOperand(destination));
     }
 
   } else if (source->IsDoubleRegister()) {
@@ -271,13 +272,13 @@ void LGapResolver::EmitMove(int index) {
       __ Fmov(cgen_->ToDoubleRegister(destination), src);
     } else {
       ASSERT(destination->IsDoubleStackSlot());
-      __ Str(src, cgen_->ToMemOperand(destination));
+      __ Store(src, cgen_->ToMemOperand(destination));
     }
 
   } else if (source->IsDoubleStackSlot()) {
     MemOperand src = cgen_->ToMemOperand(source);
     if (destination->IsDoubleRegister()) {
-      __ Ldr(cgen_->ToDoubleRegister(destination), src);
+      __ Load(cgen_->ToDoubleRegister(destination), src);
     } else {
       ASSERT(destination->IsDoubleStackSlot());
       EmitStackSlotMove(index);
@@ -291,21 +292,4 @@ void LGapResolver::EmitMove(int index) {
   moves_[index].Eliminate();
 }
 
-
-void LGapResolver::EmitStackSlotMove(int index) {
-  // We need a temp register to perform a stack slot to stack slot move, and
-  // the register must not be involved in breaking cycles.
-
-  // Use the Crankshaft double scratch register as the temporary.
-  DoubleRegister temp = crankshaft_fp_scratch;
-
-  LOperand* src = moves_[index].source();
-  LOperand* dst = moves_[index].destination();
-
-  ASSERT(src->IsStackSlot());
-  ASSERT(dst->IsStackSlot());
-  __ Ldr(temp, cgen_->ToMemOperand(src));
-  __ Str(temp, cgen_->ToMemOperand(dst));
-}
-
 } }  // namespace v8::internal
index 55d4ecbf9d210865d074abd64d7e315bd3643937..3c4c200a51a4649b90e5e908937f798603b9b12f 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "src/v8.h"
 
+#include "src/arm64/delayed-masm-arm64.h"
 #include "src/lithium.h"
 
 namespace v8 {
@@ -15,6 +16,21 @@ namespace internal {
 class LCodeGen;
 class LGapResolver;
 
+class DelayedGapMasm : public DelayedMasm {
+ public:
+  DelayedGapMasm(LCodeGen* owner, MacroAssembler* masm)
+    : DelayedMasm(owner, masm, root) {
+    // We use the root register as an extra scratch register.
+    // The root register has two advantages:
+    //  - It is not in crankshaft allocatable registers list, so it can't
+    //    interfere with the allocatable registers.
+    //  - We don't need to push it on the stack, as we can reload it with its
+    //    value once we have finish.
+  }
+  void EndDelayedUse();
+};
+
+
 class LGapResolver BASE_EMBEDDED {
  public:
   explicit LGapResolver(LCodeGen* owner);
@@ -43,12 +59,32 @@ class LGapResolver BASE_EMBEDDED {
   void EmitMove(int index);
 
   // Emit a move from one stack slot to another.
-  void EmitStackSlotMove(int index);
+  void EmitStackSlotMove(int index) {
+    masm_.StackSlotMove(moves_[index].source(), moves_[index].destination());
+  }
 
   // Verify the move list before performing moves.
   void Verify();
 
+  // Registers used to solve cycles.
+  const Register& SavedValueRegister() {
+    ASSERT(!masm_.ScratchRegister().IsAllocatable());
+    return masm_.ScratchRegister();
+  }
+  // The scratch register is used to break cycles and to store constant.
+  // These two methods switch from one mode to the other.
+  void AcquireSavedValueRegister() { masm_.AcquireScratchRegister(); }
+  void ReleaseSavedValueRegister() { masm_.ReleaseScratchRegister(); }
+  const FPRegister& SavedFPValueRegister() {
+    // We use the Crankshaft floating-point scratch register to break a cycle
+    // involving double values as the MacroAssembler will not need it for the
+    // operations performed by the gap resolver.
+    ASSERT(!crankshaft_fp_scratch.IsAllocatable());
+    return crankshaft_fp_scratch;
+  }
+
   LCodeGen* cgen_;
+  DelayedGapMasm masm_;
 
   // List of moves not yet resolved.
   ZoneList<LMoveOperands> moves_;
@@ -56,10 +92,6 @@ class LGapResolver BASE_EMBEDDED {
   int root_index_;
   bool in_cycle_;
   LOperand* saved_destination_;
-
-  // We use the root register as a scratch in a few places. When that happens,
-  // this flag is set to indicate that it needs to be restored.
-  bool need_to_restore_root_;
 };
 
 } }  // namespace v8::internal
index 485fec18de09e5a6ccd0891864df5c9c6fef9a16..c6dcf69b54a563ec922518bdce271467a846ff20 100644 (file)
             '../../src/arm64/decoder-arm64.cc',
             '../../src/arm64/decoder-arm64.h',
             '../../src/arm64/decoder-arm64-inl.h',
+            '../../src/arm64/delayed-masm-arm64.cc',
+            '../../src/arm64/delayed-masm-arm64.h',
+            '../../src/arm64/delayed-masm-arm64-inl.h',
             '../../src/arm64/deoptimizer-arm64.cc',
             '../../src/arm64/disasm-arm64.cc',
             '../../src/arm64/disasm-arm64.h',