src/v8/src/x64/assembler-x64.h

   1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
   2 // All Rights Reserved.
   3 //
   4 // Redistribution and use in source and binary forms, with or without
   5 // modification, are permitted provided that the following conditions are
   6 // met:
   7 //
   8 // - Redistributions of source code must retain the above copyright notice,
   9 // this list of conditions and the following disclaimer.
  10 //
  11 // - Redistribution in binary form must reproduce the above copyright
  12 // notice, this list of conditions and the following disclaimer in the
  13 // documentation and/or other materials provided with the distribution.
  14 //
  15 // - Neither the name of Sun Microsystems or the names of contributors may
  16 // be used to endorse or promote products derived from this software without
  17 // specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 // The original source code covered by the above license above has been
  32 // modified significantly by Google Inc.
  33 // Copyright 2012 the V8 project authors. All rights reserved.
  34
  35 // A lightweight X64 Assembler.
  36
  37 #ifndef V8_X64_ASSEMBLER_X64_H_
  38 #define V8_X64_ASSEMBLER_X64_H_
  39
  40 #include "serialize.h"
  41
  42 namespace v8 {
  43 namespace internal {
  44
  45 // Utility functions
  46
  47 // Test whether a 64-bit value is in a specific range.
  48 inline bool is_uint32(int64_t x) {
  49   static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
  50   return static_cast<uint64_t>(x) <= kMaxUInt32;
  51 }
  52
  53 inline bool is_int32(int64_t x) {
  54   static const int64_t kMinInt32 = -V8_INT64_C(0x80000000);
  55   return is_uint32(x - kMinInt32);
  56 }
  57
  58 inline bool uint_is_int32(uint64_t x) {
  59   static const uint64_t kMaxInt32 = V8_UINT64_C(0x7fffffff);
  60   return x <= kMaxInt32;
  61 }
  62
  63 inline bool is_uint32(uint64_t x) {
  64   static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
  65   return x <= kMaxUInt32;
  66 }
  67
  68 // CPU Registers.
  69 //
  70 // 1) We would prefer to use an enum, but enum values are assignment-
  71 // compatible with int, which has caused code-generation bugs.
  72 //
  73 // 2) We would prefer to use a class instead of a struct but we don't like
  74 // the register initialization to depend on the particular initialization
  75 // order (which appears to be different on OS X, Linux, and Windows for the
  76 // installed versions of C++ we tried). Using a struct permits C-style
  77 // "initialization". Also, the Register objects cannot be const as this
  78 // forces initialization stubs in MSVC, making us dependent on initialization
  79 // order.
  80 //
  81 // 3) By not using an enum, we are possibly preventing the compiler from
  82 // doing certain constant folds, which may significantly reduce the
  83 // code generated for some assembly instructions (because they boil down
  84 // to a few constants). If this is a problem, we could change the code
  85 // such that we use an enum in optimized mode, and the struct in debug
  86 // mode. This way we get the compile-time error checking in debug mode
  87 // and best performance in optimized code.
  88 //
  89
  90 struct Register {
  91   // The non-allocatable registers are:
  92   //  rsp - stack pointer
  93   //  rbp - frame pointer
  94   //  r10 - fixed scratch register
  95   //  r12 - smi constant register
  96   //  r13 - root register
  97   static const int kMaxNumAllocatableRegisters = 11;
  98   static int NumAllocatableRegisters() {
  99     return kMaxNumAllocatableRegisters;
 100   }
 101   static const int kNumRegisters = 16;
 102
 103   static int ToAllocationIndex(Register reg) {
 104     return kAllocationIndexByRegisterCode[reg.code()];
 105   }
 106
 107   static Register FromAllocationIndex(int index) {
 108     ASSERT(index >= 0 && index < kMaxNumAllocatableRegisters);
 109     Register result = { kRegisterCodeByAllocationIndex[index] };
 110     return result;
 111   }
 112
 113   static const char* AllocationIndexToString(int index) {
 114     ASSERT(index >= 0 && index < kMaxNumAllocatableRegisters);
 115     const char* const names[] = {
 116       "rax",
 117       "rbx",
 118       "rdx",
 119       "rcx",
 120       "rsi",
 121       "rdi",
 122       "r8",
 123       "r9",
 124       "r11",
 125       "r14",
 126       "r15"
 127     };
 128     return names[index];
 129   }
 130
 131   static Register from_code(int code) {
 132     Register r = { code };
 133     return r;
 134   }
 135   bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
 136   bool is(Register reg) const { return code_ == reg.code_; }
 137   // rax, rbx, rcx and rdx are byte registers, the rest are not.
 138   bool is_byte_register() const { return code_ <= 3; }
 139   int code() const {
 140     ASSERT(is_valid());
 141     return code_;
 142   }
 143   int bit() const {
 144     return 1 << code_;
 145   }
 146
 147   // Return the high bit of the register code as a 0 or 1.  Used often
 148   // when constructing the REX prefix byte.
 149   int high_bit() const {
 150     return code_ >> 3;
 151   }
 152   // Return the 3 low bits of the register code.  Used when encoding registers
 153   // in modR/M, SIB, and opcode bytes.
 154   int low_bits() const {
 155     return code_ & 0x7;
 156   }
 157
 158   // Unfortunately we can't make this private in a struct when initializing
 159   // by assignment.
 160   int code_;
 161
 162  private:
 163   static const int kRegisterCodeByAllocationIndex[kMaxNumAllocatableRegisters];
 164   static const int kAllocationIndexByRegisterCode[kNumRegisters];
 165 };
 166
 167 const int kRegister_rax_Code = 0;
 168 const int kRegister_rcx_Code = 1;
 169 const int kRegister_rdx_Code = 2;
 170 const int kRegister_rbx_Code = 3;
 171 const int kRegister_rsp_Code = 4;
 172 const int kRegister_rbp_Code = 5;
 173 const int kRegister_rsi_Code = 6;
 174 const int kRegister_rdi_Code = 7;
 175 const int kRegister_r8_Code = 8;
 176 const int kRegister_r9_Code = 9;
 177 const int kRegister_r10_Code = 10;
 178 const int kRegister_r11_Code = 11;
 179 const int kRegister_r12_Code = 12;
 180 const int kRegister_r13_Code = 13;
 181 const int kRegister_r14_Code = 14;
 182 const int kRegister_r15_Code = 15;
 183 const int kRegister_no_reg_Code = -1;
 184
 185 const Register rax = { kRegister_rax_Code };
 186 const Register rcx = { kRegister_rcx_Code };
 187 const Register rdx = { kRegister_rdx_Code };
 188 const Register rbx = { kRegister_rbx_Code };
 189 const Register rsp = { kRegister_rsp_Code };
 190 const Register rbp = { kRegister_rbp_Code };
 191 const Register rsi = { kRegister_rsi_Code };
 192 const Register rdi = { kRegister_rdi_Code };
 193 const Register r8 = { kRegister_r8_Code };
 194 const Register r9 = { kRegister_r9_Code };
 195 const Register r10 = { kRegister_r10_Code };
 196 const Register r11 = { kRegister_r11_Code };
 197 const Register r12 = { kRegister_r12_Code };
 198 const Register r13 = { kRegister_r13_Code };
 199 const Register r14 = { kRegister_r14_Code };
 200 const Register r15 = { kRegister_r15_Code };
 201 const Register no_reg = { kRegister_no_reg_Code };
 202
 203 #ifdef _WIN64
 204   // Windows calling convention
 205   const Register arg_reg_1 = { kRegister_rcx_Code };
 206   const Register arg_reg_2 = { kRegister_rdx_Code };
 207   const Register arg_reg_3 = { kRegister_r8_Code };
 208   const Register arg_reg_4 = { kRegister_r9_Code };
 209 #else
 210   // AMD64 calling convention
 211   const Register arg_reg_1 = { kRegister_rdi_Code };
 212   const Register arg_reg_2 = { kRegister_rsi_Code };
 213   const Register arg_reg_3 = { kRegister_rdx_Code };
 214   const Register arg_reg_4 = { kRegister_rcx_Code };
 215 #endif  // _WIN64
 216
 217 struct XMMRegister {
 218   static const int kMaxNumRegisters = 16;
 219   static const int kMaxNumAllocatableRegisters = 15;
 220   static int NumAllocatableRegisters() {
 221     return kMaxNumAllocatableRegisters;
 222   }
 223
 224   static int ToAllocationIndex(XMMRegister reg) {
 225     ASSERT(reg.code() != 0);
 226     return reg.code() - 1;
 227   }
 228
 229   static XMMRegister FromAllocationIndex(int index) {
 230     ASSERT(0 <= index && index < kMaxNumAllocatableRegisters);
 231     XMMRegister result = { index + 1 };
 232     return result;
 233   }
 234
 235   static const char* AllocationIndexToString(int index) {
 236     ASSERT(index >= 0 && index < kMaxNumAllocatableRegisters);
 237     const char* const names[] = {
 238       "xmm1",
 239       "xmm2",
 240       "xmm3",
 241       "xmm4",
 242       "xmm5",
 243       "xmm6",
 244       "xmm7",
 245       "xmm8",
 246       "xmm9",
 247       "xmm10",
 248       "xmm11",
 249       "xmm12",
 250       "xmm13",
 251       "xmm14",
 252       "xmm15"
 253     };
 254     return names[index];
 255   }
 256
 257   static XMMRegister from_code(int code) {
 258     ASSERT(code >= 0);
 259     ASSERT(code < kMaxNumRegisters);
 260     XMMRegister r = { code };
 261     return r;
 262   }
 263   bool is_valid() const { return 0 <= code_ && code_ < kMaxNumRegisters; }
 264   bool is(XMMRegister reg) const { return code_ == reg.code_; }
 265   int code() const {
 266     ASSERT(is_valid());
 267     return code_;
 268   }
 269
 270   // Return the high bit of the register code as a 0 or 1.  Used often
 271   // when constructing the REX prefix byte.
 272   int high_bit() const {
 273     return code_ >> 3;
 274   }
 275   // Return the 3 low bits of the register code.  Used when encoding registers
 276   // in modR/M, SIB, and opcode bytes.
 277   int low_bits() const {
 278     return code_ & 0x7;
 279   }
 280
 281   int code_;
 282 };
 283
 284 const XMMRegister xmm0 = { 0 };
 285 const XMMRegister xmm1 = { 1 };
 286 const XMMRegister xmm2 = { 2 };
 287 const XMMRegister xmm3 = { 3 };
 288 const XMMRegister xmm4 = { 4 };
 289 const XMMRegister xmm5 = { 5 };
 290 const XMMRegister xmm6 = { 6 };
 291 const XMMRegister xmm7 = { 7 };
 292 const XMMRegister xmm8 = { 8 };
 293 const XMMRegister xmm9 = { 9 };
 294 const XMMRegister xmm10 = { 10 };
 295 const XMMRegister xmm11 = { 11 };
 296 const XMMRegister xmm12 = { 12 };
 297 const XMMRegister xmm13 = { 13 };
 298 const XMMRegister xmm14 = { 14 };
 299 const XMMRegister xmm15 = { 15 };
 300
 301
 302 typedef XMMRegister DoubleRegister;
 303 typedef XMMRegister SIMD128Register;
 304
 305
 306 enum Condition {
 307   // any value < 0 is considered no_condition
 308   no_condition  = -1,
 309
 310   overflow      =  0,
 311   no_overflow   =  1,
 312   below         =  2,
 313   above_equal   =  3,
 314   equal         =  4,
 315   not_equal     =  5,
 316   below_equal   =  6,
 317   above         =  7,
 318   negative      =  8,
 319   positive      =  9,
 320   parity_even   = 10,
 321   parity_odd    = 11,
 322   less          = 12,
 323   greater_equal = 13,
 324   less_equal    = 14,
 325   greater       = 15,
 326
 327   // Fake conditions that are handled by the
 328   // opcodes using them.
 329   always        = 16,
 330   never         = 17,
 331   // aliases
 332   carry         = below,
 333   not_carry     = above_equal,
 334   zero          = equal,
 335   not_zero      = not_equal,
 336   sign          = negative,
 337   not_sign      = positive,
 338   last_condition = greater
 339 };
 340
 341
 342 // Returns the equivalent of !cc.
 343 // Negation of the default no_condition (-1) results in a non-default
 344 // no_condition value (-2). As long as tests for no_condition check
 345 // for condition < 0, this will work as expected.
 346 inline Condition NegateCondition(Condition cc) {
 347   return static_cast<Condition>(cc ^ 1);
 348 }
 349
 350
 351 // Corresponds to transposing the operands of a comparison.
 352 inline Condition ReverseCondition(Condition cc) {
 353   switch (cc) {
 354     case below:
 355       return above;
 356     case above:
 357       return below;
 358     case above_equal:
 359       return below_equal;
 360     case below_equal:
 361       return above_equal;
 362     case less:
 363       return greater;
 364     case greater:
 365       return less;
 366     case greater_equal:
 367       return less_equal;
 368     case less_equal:
 369       return greater_equal;
 370     default:
 371       return cc;
 372   };
 373 }
 374
 375
 376 // -----------------------------------------------------------------------------
 377 // Machine instruction Immediates
 378
 379 class Immediate BASE_EMBEDDED {
 380  public:
 381   explicit Immediate(int32_t value) : value_(value) {}
 382
 383  private:
 384   int32_t value_;
 385
 386   friend class Assembler;
 387 };
 388
 389
 390 // -----------------------------------------------------------------------------
 391 // Machine instruction Operands
 392
 393 enum ScaleFactor {
 394   times_1 = 0,
 395   times_2 = 1,
 396   times_4 = 2,
 397   times_8 = 3,
 398   maximal_scale_factor = times_8,
 399   times_int_size = times_4,
 400   times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
 401 };
 402
 403
 404 class Operand BASE_EMBEDDED {
 405  public:
 406   // [base + disp/r]
 407   Operand(Register base, int32_t disp);
 408
 409   // [base + index*scale + disp/r]
 410   Operand(Register base,
 411           Register index,
 412           ScaleFactor scale,
 413           int32_t disp);
 414
 415   // [index*scale + disp/r]
 416   Operand(Register index,
 417           ScaleFactor scale,
 418           int32_t disp);
 419
 420   // Offset from existing memory operand.
 421   // Offset is added to existing displacement as 32-bit signed values and
 422   // this must not overflow.
 423   Operand(const Operand& base, int32_t offset);
 424
 425   // Checks whether either base or index register is the given register.
 426   // Does not check the "reg" part of the Operand.
 427   bool AddressUsesRegister(Register reg) const;
 428
 429   // Queries related to the size of the generated instruction.
 430   // Whether the generated instruction will have a REX prefix.
 431   bool requires_rex() const { return rex_ != 0; }
 432   // Size of the ModR/M, SIB and displacement parts of the generated
 433   // instruction.
 434   int operand_size() const { return len_; }
 435
 436  private:
 437   byte rex_;
 438   byte buf_[6];
 439   // The number of bytes of buf_ in use.
 440   byte len_;
 441
 442   // Set the ModR/M byte without an encoded 'reg' register. The
 443   // register is encoded later as part of the emit_operand operation.
 444   // set_modrm can be called before or after set_sib and set_disp*.
 445   inline void set_modrm(int mod, Register rm);
 446
 447   // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
 448   inline void set_sib(ScaleFactor scale, Register index, Register base);
 449
 450   // Adds operand displacement fields (offsets added to the memory address).
 451   // Needs to be called after set_sib, not before it.
 452   inline void set_disp8(int disp);
 453   inline void set_disp32(int disp);
 454
 455   friend class Assembler;
 456 };
 457
 458
 459 // CpuFeatures keeps track of which features are supported by the target CPU.
 460 // Supported features must be enabled by a CpuFeatureScope before use.
 461 // Example:
 462 //   if (assembler->IsSupported(SSE3)) {
 463 //     CpuFeatureScope fscope(assembler, SSE3);
 464 //     // Generate SSE3 floating point code.
 465 //   } else {
 466 //     // Generate standard SSE2 floating point code.
 467 //   }
 468 class CpuFeatures : public AllStatic {
 469  public:
 470   // Detect features of the target CPU. Set safe defaults if the serializer
 471   // is enabled (snapshots must be portable).
 472   static void Probe();
 473
 474   // Check whether a feature is supported by the target CPU.
 475   static bool IsSupported(CpuFeature f) {
 476     if (Check(f, cross_compile_)) return true;
 477     ASSERT(initialized_);
 478     if (f == SSE3 && !FLAG_enable_sse3) return false;
 479     if (f == SSE4_1 && !FLAG_enable_sse4_1) return false;
 480     if (f == CMOV && !FLAG_enable_cmov) return false;
 481     if (f == SAHF && !FLAG_enable_sahf) return false;
 482     return Check(f, supported_);
 483   }
 484
 485   static bool IsFoundByRuntimeProbingOnly(CpuFeature f) {
 486     ASSERT(initialized_);
 487     return Check(f, found_by_runtime_probing_only_);
 488   }
 489
 490   static bool IsSafeForSnapshot(CpuFeature f) {
 491     return Check(f, cross_compile_) ||
 492            (IsSupported(f) &&
 493             (!Serializer::enabled() || !IsFoundByRuntimeProbingOnly(f)));
 494   }
 495
 496   static bool VerifyCrossCompiling() {
 497     return cross_compile_ == 0;
 498   }
 499
 500   static bool VerifyCrossCompiling(CpuFeature f) {
 501     uint64_t mask = flag2set(f);
 502     return cross_compile_ == 0 ||
 503            (cross_compile_ & mask) == mask;
 504   }
 505
 506  private:
 507   static bool Check(CpuFeature f, uint64_t set) {
 508     return (set & flag2set(f)) != 0;
 509   }
 510
 511   static uint64_t flag2set(CpuFeature f) {
 512     return static_cast<uint64_t>(1) << f;
 513   }
 514
 515   // Safe defaults include CMOV for X64. It is always available, if
 516   // anyone checks, but they shouldn't need to check.
 517   // The required user mode extensions in X64 are (from AMD64 ABI Table A.1):
 518   //   fpu, tsc, cx8, cmov, mmx, sse, sse2, fxsr, syscall
 519   static const uint64_t kDefaultCpuFeatures = (1 << CMOV);
 520
 521 #ifdef DEBUG
 522   static bool initialized_;
 523 #endif
 524   static uint64_t supported_;
 525   static uint64_t found_by_runtime_probing_only_;
 526
 527   static uint64_t cross_compile_;
 528
 529   friend class ExternalReference;
 530   friend class PlatformFeatureScope;
 531   DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
 532 };
 533
 534
 535 #define ASSEMBLER_INSTRUCTION_LIST(V)  \
 536   V(mov)
 537
 538
 539 class Assembler : public AssemblerBase {
 540  private:
 541   // We check before assembling an instruction that there is sufficient
 542   // space to write an instruction and its relocation information.
 543   // The relocation writer's position must be kGap bytes above the end of
 544   // the generated instructions. This leaves enough space for the
 545   // longest possible x64 instruction, 15 bytes, and the longest possible
 546   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
 547   // (There is a 15 byte limit on x64 instruction length that rules out some
 548   // otherwise valid instructions.)
 549   // This allows for a single, fast space check per instruction.
 550   static const int kGap = 32;
 551
 552  public:
 553   // Create an assembler. Instructions and relocation information are emitted
 554   // into a buffer, with the instructions starting from the beginning and the
 555   // relocation information starting from the end of the buffer. See CodeDesc
 556   // for a detailed comment on the layout (globals.h).
 557   //
 558   // If the provided buffer is NULL, the assembler allocates and grows its own
 559   // buffer, and buffer_size determines the initial buffer size. The buffer is
 560   // owned by the assembler and deallocated upon destruction of the assembler.
 561   //
 562   // If the provided buffer is not NULL, the assembler uses the provided buffer
 563   // for code generation and assumes its size to be buffer_size. If the buffer
 564   // is too small, a fatal error occurs. No deallocation of the buffer is done
 565   // upon destruction of the assembler.
 566   Assembler(Isolate* isolate, void* buffer, int buffer_size);
 567   virtual ~Assembler() { }
 568
 569   // GetCode emits any pending (non-emitted) code and fills the descriptor
 570   // desc. GetCode() is idempotent; it returns the same result if no other
 571   // Assembler functions are invoked in between GetCode() calls.
 572   void GetCode(CodeDesc* desc);
 573
 574   // Read/Modify the code target in the relative branch/call instruction at pc.
 575   // On the x64 architecture, we use relative jumps with a 32-bit displacement
 576   // to jump to other Code objects in the Code space in the heap.
 577   // Jumps to C functions are done indirectly through a 64-bit register holding
 578   // the absolute address of the target.
 579   // These functions convert between absolute Addresses of Code objects and
 580   // the relative displacements stored in the code.
 581   static inline Address target_address_at(Address pc);
 582   static inline void set_target_address_at(Address pc, Address target);
 583
 584   // Return the code target address at a call site from the return address
 585   // of that call in the instruction stream.
 586   static inline Address target_address_from_return_address(Address pc);
 587
 588   // This sets the branch destination (which is in the instruction on x64).
 589   // This is for calls and branches within generated code.
 590   inline static void deserialization_set_special_target_at(
 591       Address instruction_payload, Address target) {
 592     set_target_address_at(instruction_payload, target);
 593   }
 594
 595   static inline RelocInfo::Mode RelocInfoNone() {
 596     if (kPointerSize == kInt64Size) {
 597       return RelocInfo::NONE64;
 598     } else {
 599       ASSERT(kPointerSize == kInt32Size);
 600       return RelocInfo::NONE32;
 601     }
 602   }
 603
 604   inline Handle<Object> code_target_object_handle_at(Address pc);
 605   inline Address runtime_entry_at(Address pc);
 606   // Number of bytes taken up by the branch target in the code.
 607   static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
 608   // Distance between the address of the code target in the call instruction
 609   // and the return address pushed on the stack.
 610   static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
 611   // The length of call(kScratchRegister).
 612   static const int kCallScratchRegisterInstructionLength = 3;
 613   // The length of call(Immediate32).
 614   static const int kShortCallInstructionLength = 5;
 615   // The length of movq(kScratchRegister, address).
 616   static const int kMoveAddressIntoScratchRegisterInstructionLength =
 617       2 + kPointerSize;
 618   // The length of movq(kScratchRegister, address) and call(kScratchRegister).
 619   static const int kCallSequenceLength =
 620       kMoveAddressIntoScratchRegisterInstructionLength +
 621       kCallScratchRegisterInstructionLength;
 622
 623   // The js return and debug break slot must be able to contain an indirect
 624   // call sequence, some x64 JS code is padded with int3 to make it large
 625   // enough to hold an instruction when the debugger patches it.
 626   static const int kJSReturnSequenceLength = kCallSequenceLength;
 627   static const int kDebugBreakSlotLength = kCallSequenceLength;
 628   static const int kPatchDebugBreakSlotReturnOffset = kCallTargetAddressOffset;
 629   // Distance between the start of the JS return sequence and where the
 630   // 32-bit displacement of a short call would be. The short call is from
 631   // SetDebugBreakAtIC from debug-x64.cc.
 632   static const int kPatchReturnSequenceAddressOffset =
 633       kJSReturnSequenceLength - kPatchDebugBreakSlotReturnOffset;
 634   // Distance between the start of the JS return sequence and where the
 635   // 32-bit displacement of a short call would be. The short call is from
 636   // SetDebugBreakAtIC from debug-x64.cc.
 637   static const int kPatchDebugBreakSlotAddressOffset =
 638       kDebugBreakSlotLength - kPatchDebugBreakSlotReturnOffset;
 639   static const int kRealPatchReturnSequenceAddressOffset =
 640       kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
 641
 642   // One byte opcode for test eax,0xXXXXXXXX.
 643   static const byte kTestEaxByte = 0xA9;
 644   // One byte opcode for test al, 0xXX.
 645   static const byte kTestAlByte = 0xA8;
 646   // One byte opcode for nop.
 647   static const byte kNopByte = 0x90;
 648
 649   // One byte prefix for a short conditional jump.
 650   static const byte kJccShortPrefix = 0x70;
 651   static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
 652   static const byte kJcShortOpcode = kJccShortPrefix | carry;
 653   static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
 654   static const byte kJzShortOpcode = kJccShortPrefix | zero;
 655
 656
 657   // ---------------------------------------------------------------------------
 658   // Code generation
 659   //
 660   // Function names correspond one-to-one to x64 instruction mnemonics.
 661   // Unless specified otherwise, instructions operate on 64-bit operands.
 662   //
 663   // If we need versions of an assembly instruction that operate on different
 664   // width arguments, we add a single-letter suffix specifying the width.
 665   // This is done for the following instructions: mov, cmp, inc, dec,
 666   // add, sub, and test.
 667   // There are no versions of these instructions without the suffix.
 668   // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
 669   // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
 670   // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
 671   // - Instructions on 64-bit (quadword) operands/registers use 'q'.
 672   //
 673   // Some mnemonics, such as "and", are the same as C++ keywords.
 674   // Naming conflicts with C++ keywords are resolved by adding a trailing '_'.
 675
 676 #define DECLARE_INSTRUCTION(instruction)                \
 677   template<class P1, class P2>                          \
 678   void instruction##p(P1 p1, P2 p2) {                   \
 679     emit_##instruction(p1, p2, kPointerSize);           \
 680   }                                                     \
 681                                                         \
 682   template<class P1, class P2>                          \
 683   void instruction##l(P1 p1, P2 p2) {                   \
 684     emit_##instruction(p1, p2, kInt32Size);             \
 685   }                                                     \
 686                                                         \
 687   template<class P1, class P2>                          \
 688   void instruction##q(P1 p1, P2 p2) {                   \
 689     emit_##instruction(p1, p2, kInt64Size);             \
 690   }
 691   ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
 692 #undef DECLARE_INSTRUCTION
 693
 694   // Insert the smallest number of nop instructions
 695   // possible to align the pc offset to a multiple
 696   // of m, where m must be a power of 2.
 697   void Align(int m);
 698   void Nop(int bytes = 1);
 699   // Aligns code to something that's optimal for a jump target for the platform.
 700   void CodeTargetAlign();
 701
 702   // Stack
 703   void pushfq();
 704   void popfq();
 705
 706   void push(Immediate value);
 707   // Push a 32 bit integer, and guarantee that it is actually pushed as a
 708   // 32 bit value, the normal push will optimize the 8 bit case.
 709   void push_imm32(int32_t imm32);
 710   void push(Register src);
 711   void push(const Operand& src);
 712
 713   void pop(Register dst);
 714   void pop(const Operand& dst);
 715
 716   void enter(Immediate size);
 717   void leave();
 718
 719   // Moves
 720   void movb(Register dst, const Operand& src);
 721   void movb(Register dst, Immediate imm);
 722   void movb(const Operand& dst, Register src);
 723   void movb(const Operand& dst, Immediate imm);
 724
 725   // Move the low 16 bits of a 64-bit register value to a 16-bit
 726   // memory location.
 727   void movw(Register dst, const Operand& src);
 728   void movw(const Operand& dst, Register src);
 729   void movw(const Operand& dst, Immediate imm);
 730
 731   // Move the offset of the label location relative to the current
 732   // position (after the move) to the destination.
 733   void movl(const Operand& dst, Label* src);
 734
 735   // Loads a pointer into a register with a relocation mode.
 736   void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
 737
 738   // Loads a 64-bit immediate into a register.
 739   void movq(Register dst, int64_t value);
 740   void movq(Register dst, uint64_t value);
 741
 742   void movsxbq(Register dst, const Operand& src);
 743   void movsxwq(Register dst, const Operand& src);
 744   void movsxlq(Register dst, Register src);
 745   void movsxlq(Register dst, const Operand& src);
 746   void movzxbq(Register dst, const Operand& src);
 747   void movzxbl(Register dst, const Operand& src);
 748   void movzxwq(Register dst, const Operand& src);
 749   void movzxwl(Register dst, const Operand& src);
 750   void movzxwl(Register dst, Register src);
 751
 752   // Repeated moves.
 753
 754   void repmovsb();
 755   void repmovsw();
 756   void repmovsl();
 757   void repmovsq();
 758
 759   // Instruction to load from an immediate 64-bit pointer into RAX.
 760   void load_rax(void* ptr, RelocInfo::Mode rmode);
 761   void load_rax(ExternalReference ext);
 762
 763   // Conditional moves.
 764   void cmovq(Condition cc, Register dst, Register src);
 765   void cmovq(Condition cc, Register dst, const Operand& src);
 766   void cmovl(Condition cc, Register dst, Register src);
 767   void cmovl(Condition cc, Register dst, const Operand& src);
 768
 769   // Exchange two registers
 770   void xchgq(Register dst, Register src);
 771   void xchgl(Register dst, Register src);
 772
 773   // Arithmetics
 774   void addl(Register dst, Register src) {
 775     arithmetic_op_32(0x03, dst, src);
 776   }
 777
 778   void addl(Register dst, Immediate src) {
 779     immediate_arithmetic_op_32(0x0, dst, src);
 780   }
 781
 782   void addl(Register dst, const Operand& src) {
 783     arithmetic_op_32(0x03, dst, src);
 784   }
 785
 786   void addl(const Operand& dst, Immediate src) {
 787     immediate_arithmetic_op_32(0x0, dst, src);
 788   }
 789
 790   void addl(const Operand& dst, Register src) {
 791     arithmetic_op_32(0x01, src, dst);
 792   }
 793
 794   void addq(Register dst, Register src) {
 795     arithmetic_op(0x03, dst, src);
 796   }
 797
 798   void addq(Register dst, const Operand& src) {
 799     arithmetic_op(0x03, dst, src);
 800   }
 801
 802   void addq(const Operand& dst, Register src) {
 803     arithmetic_op(0x01, src, dst);
 804   }
 805
 806   void addq(Register dst, Immediate src) {
 807     immediate_arithmetic_op(0x0, dst, src);
 808   }
 809
 810   void addq(const Operand& dst, Immediate src) {
 811     immediate_arithmetic_op(0x0, dst, src);
 812   }
 813
 814   void sbbl(Register dst, Register src) {
 815     arithmetic_op_32(0x1b, dst, src);
 816   }
 817
 818   void sbbq(Register dst, Register src) {
 819     arithmetic_op(0x1b, dst, src);
 820   }
 821
 822   void cmpb(Register dst, Immediate src) {
 823     immediate_arithmetic_op_8(0x7, dst, src);
 824   }
 825
 826   void cmpb_al(Immediate src);
 827
 828   void cmpb(Register dst, Register src) {
 829     arithmetic_op(0x3A, dst, src);
 830   }
 831
 832   void cmpb(Register dst, const Operand& src) {
 833     arithmetic_op(0x3A, dst, src);
 834   }
 835
 836   void cmpb(const Operand& dst, Register src) {
 837     arithmetic_op(0x38, src, dst);
 838   }
 839
 840   void cmpb(const Operand& dst, Immediate src) {
 841     immediate_arithmetic_op_8(0x7, dst, src);
 842   }
 843
 844   void cmpw(const Operand& dst, Immediate src) {
 845     immediate_arithmetic_op_16(0x7, dst, src);
 846   }
 847
 848   void cmpw(Register dst, Immediate src) {
 849     immediate_arithmetic_op_16(0x7, dst, src);
 850   }
 851
 852   void cmpw(Register dst, const Operand& src) {
 853     arithmetic_op_16(0x3B, dst, src);
 854   }
 855
 856   void cmpw(Register dst, Register src) {
 857     arithmetic_op_16(0x3B, dst, src);
 858   }
 859
 860   void cmpw(const Operand& dst, Register src) {
 861     arithmetic_op_16(0x39, src, dst);
 862   }
 863
 864   void cmpl(Register dst, Register src) {
 865     arithmetic_op_32(0x3B, dst, src);
 866   }
 867
 868   void cmpl(Register dst, const Operand& src) {
 869     arithmetic_op_32(0x3B, dst, src);
 870   }
 871
 872   void cmpl(const Operand& dst, Register src) {
 873     arithmetic_op_32(0x39, src, dst);
 874   }
 875
 876   void cmpl(Register dst, Immediate src) {
 877     immediate_arithmetic_op_32(0x7, dst, src);
 878   }
 879
 880   void cmpl(const Operand& dst, Immediate src) {
 881     immediate_arithmetic_op_32(0x7, dst, src);
 882   }
 883
 884   void cmpq(Register dst, Register src) {
 885     arithmetic_op(0x3B, dst, src);
 886   }
 887
 888   void cmpq(Register dst, const Operand& src) {
 889     arithmetic_op(0x3B, dst, src);
 890   }
 891
 892   void cmpq(const Operand& dst, Register src) {
 893     arithmetic_op(0x39, src, dst);
 894   }
 895
 896   void cmpq(Register dst, Immediate src) {
 897     immediate_arithmetic_op(0x7, dst, src);
 898   }
 899
 900   void cmpq(const Operand& dst, Immediate src) {
 901     immediate_arithmetic_op(0x7, dst, src);
 902   }
 903
 904   void and_(Register dst, Register src) {
 905     arithmetic_op(0x23, dst, src);
 906   }
 907
 908   void and_(Register dst, const Operand& src) {
 909     arithmetic_op(0x23, dst, src);
 910   }
 911
 912   void and_(const Operand& dst, Register src) {
 913     arithmetic_op(0x21, src, dst);
 914   }
 915
 916   void and_(Register dst, Immediate src) {
 917     immediate_arithmetic_op(0x4, dst, src);
 918   }
 919
 920   void and_(const Operand& dst, Immediate src) {
 921     immediate_arithmetic_op(0x4, dst, src);
 922   }
 923
 924   void andl(Register dst, Immediate src) {
 925     immediate_arithmetic_op_32(0x4, dst, src);
 926   }
 927
 928   void andl(Register dst, Register src) {
 929     arithmetic_op_32(0x23, dst, src);
 930   }
 931
 932   void andl(Register dst, const Operand& src) {
 933     arithmetic_op_32(0x23, dst, src);
 934   }
 935
 936   void andb(Register dst, Immediate src) {
 937     immediate_arithmetic_op_8(0x4, dst, src);
 938   }
 939
 940   void decq(Register dst);
 941   void decq(const Operand& dst);
 942   void decl(Register dst);
 943   void decl(const Operand& dst);
 944   void decb(Register dst);
 945   void decb(const Operand& dst);
 946
 947   // Sign-extends rax into rdx:rax.
 948   void cqo();
 949   // Sign-extends eax into edx:eax.
 950   void cdq();
 951
 952   // Divide rdx:rax by src.  Quotient in rax, remainder in rdx.
 953   void idivq(Register src);
 954   // Divide edx:eax by lower 32 bits of src.  Quotient in eax, rem. in edx.
 955   void idivl(Register src);
 956
 957   // Signed multiply instructions.
 958   void imul(Register src);                               // rdx:rax = rax * src.
 959   void imul(Register dst, Register src);                 // dst = dst * src.
 960   void imul(Register dst, const Operand& src);           // dst = dst * src.
 961   void imul(Register dst, Register src, Immediate imm);  // dst = src * imm.
 962   // Signed 32-bit multiply instructions.
 963   void imull(Register dst, Register src);                 // dst = dst * src.
 964   void imull(Register dst, const Operand& src);           // dst = dst * src.
 965   void imull(Register dst, Register src, Immediate imm);  // dst = src * imm.
 966
 967   void incq(Register dst);
 968   void incq(const Operand& dst);
 969   void incl(Register dst);
 970   void incl(const Operand& dst);
 971
 972   void lea(Register dst, const Operand& src);
 973   void leal(Register dst, const Operand& src);
 974
 975   // Multiply rax by src, put the result in rdx:rax.
 976   void mul(Register src);
 977
 978   void neg(Register dst);
 979   void neg(const Operand& dst);
 980   void negl(Register dst);
 981
 982   void not_(Register dst);
 983   void not_(const Operand& dst);
 984   void notl(Register dst);
 985
 986   void or_(Register dst, Register src) {
 987     arithmetic_op(0x0B, dst, src);
 988   }
 989
 990   void orl(Register dst, Register src) {
 991     arithmetic_op_32(0x0B, dst, src);
 992   }
 993
 994   void or_(Register dst, const Operand& src) {
 995     arithmetic_op(0x0B, dst, src);
 996   }
 997
 998   void orl(Register dst, const Operand& src) {
 999     arithmetic_op_32(0x0B, dst, src);
1000   }
1001
1002   void or_(const Operand& dst, Register src) {
1003     arithmetic_op(0x09, src, dst);
1004   }
1005
1006   void orl(const Operand& dst, Register src) {
1007     arithmetic_op_32(0x09, src, dst);
1008   }
1009
1010   void or_(Register dst, Immediate src) {
1011     immediate_arithmetic_op(0x1, dst, src);
1012   }
1013
1014   void orl(Register dst, Immediate src) {
1015     immediate_arithmetic_op_32(0x1, dst, src);
1016   }
1017
1018   void or_(const Operand& dst, Immediate src) {
1019     immediate_arithmetic_op(0x1, dst, src);
1020   }
1021
1022   void orl(const Operand& dst, Immediate src) {
1023     immediate_arithmetic_op_32(0x1, dst, src);
1024   }
1025
1026   void rcl(Register dst, Immediate imm8) {
1027     shift(dst, imm8, 0x2);
1028   }
1029
1030   void rol(Register dst, Immediate imm8) {
1031     shift(dst, imm8, 0x0);
1032   }
1033
1034   void roll(Register dst, Immediate imm8) {
1035     shift_32(dst, imm8, 0x0);
1036   }
1037
1038   void rcr(Register dst, Immediate imm8) {
1039     shift(dst, imm8, 0x3);
1040   }
1041
1042   void ror(Register dst, Immediate imm8) {
1043     shift(dst, imm8, 0x1);
1044   }
1045
1046   void rorl(Register dst, Immediate imm8) {
1047     shift_32(dst, imm8, 0x1);
1048   }
1049
1050   void rorl_cl(Register dst) {
1051     shift_32(dst, 0x1);
1052   }
1053
1054   // Shifts dst:src left by cl bits, affecting only dst.
1055   void shld(Register dst, Register src);
1056
1057   // Shifts src:dst right by cl bits, affecting only dst.
1058   void shrd(Register dst, Register src);
1059
1060   // Shifts dst right, duplicating sign bit, by shift_amount bits.
1061   // Shifting by 1 is handled efficiently.
1062   void sar(Register dst, Immediate shift_amount) {
1063     shift(dst, shift_amount, 0x7);
1064   }
1065
1066   // Shifts dst right, duplicating sign bit, by shift_amount bits.
1067   // Shifting by 1 is handled efficiently.
1068   void sarl(Register dst, Immediate shift_amount) {
1069     shift_32(dst, shift_amount, 0x7);
1070   }
1071
1072   // Shifts dst right, duplicating sign bit, by cl % 64 bits.
1073   void sar_cl(Register dst) {
1074     shift(dst, 0x7);
1075   }
1076
1077   // Shifts dst right, duplicating sign bit, by cl % 64 bits.
1078   void sarl_cl(Register dst) {
1079     shift_32(dst, 0x7);
1080   }
1081
1082   void shl(Register dst, Immediate shift_amount) {
1083     shift(dst, shift_amount, 0x4);
1084   }
1085
1086   void shl_cl(Register dst) {
1087     shift(dst, 0x4);
1088   }
1089
1090   void shll_cl(Register dst) {
1091     shift_32(dst, 0x4);
1092   }
1093
1094   void shll(Register dst, Immediate shift_amount) {
1095     shift_32(dst, shift_amount, 0x4);
1096   }
1097
1098   void shr(Register dst, Immediate shift_amount) {
1099     shift(dst, shift_amount, 0x5);
1100   }
1101
1102   void shr_cl(Register dst) {
1103     shift(dst, 0x5);
1104   }
1105
1106   void shrl_cl(Register dst) {
1107     shift_32(dst, 0x5);
1108   }
1109
1110   void shrl(Register dst, Immediate shift_amount) {
1111     shift_32(dst, shift_amount, 0x5);
1112   }
1113
1114   void store_rax(void* dst, RelocInfo::Mode mode);
1115   void store_rax(ExternalReference ref);
1116
1117   void subq(Register dst, Register src) {
1118     arithmetic_op(0x2B, dst, src);
1119   }
1120
1121   void subq(Register dst, const Operand& src) {
1122     arithmetic_op(0x2B, dst, src);
1123   }
1124
1125   void subq(const Operand& dst, Register src) {
1126     arithmetic_op(0x29, src, dst);
1127   }
1128
1129   void subq(Register dst, Immediate src) {
1130     immediate_arithmetic_op(0x5, dst, src);
1131   }
1132
1133   void subq(const Operand& dst, Immediate src) {
1134     immediate_arithmetic_op(0x5, dst, src);
1135   }
1136
1137   void subl(Register dst, Register src) {
1138     arithmetic_op_32(0x2B, dst, src);
1139   }
1140
1141   void subl(Register dst, const Operand& src) {
1142     arithmetic_op_32(0x2B, dst, src);
1143   }
1144
1145   void subl(const Operand& dst, Register src) {
1146     arithmetic_op_32(0x29, src, dst);
1147   }
1148
1149   void subl(const Operand& dst, Immediate src) {
1150     immediate_arithmetic_op_32(0x5, dst, src);
1151   }
1152
1153   void subl(Register dst, Immediate src) {
1154     immediate_arithmetic_op_32(0x5, dst, src);
1155   }
1156
1157   void subb(Register dst, Immediate src) {
1158     immediate_arithmetic_op_8(0x5, dst, src);
1159   }
1160
1161   void testb(Register dst, Register src);
1162   void testb(Register reg, Immediate mask);
1163   void testb(const Operand& op, Immediate mask);
1164   void testb(const Operand& op, Register reg);
1165   void testl(Register dst, Register src);
1166   void testl(Register reg, Immediate mask);
1167   void testl(const Operand& op, Register reg);
1168   void testl(const Operand& op, Immediate mask);
1169   void testq(const Operand& op, Register reg);
1170   void testq(Register dst, Register src);
1171   void testq(Register dst, Immediate mask);
1172
1173   void xor_(Register dst, Register src) {
1174     if (dst.code() == src.code()) {
1175       arithmetic_op_32(0x33, dst, src);
1176     } else {
1177       arithmetic_op(0x33, dst, src);
1178     }
1179   }
1180
1181   void xorl(Register dst, Register src) {
1182     arithmetic_op_32(0x33, dst, src);
1183   }
1184
1185   void xorl(Register dst, const Operand& src) {
1186     arithmetic_op_32(0x33, dst, src);
1187   }
1188
1189   void xorl(Register dst, Immediate src) {
1190     immediate_arithmetic_op_32(0x6, dst, src);
1191   }
1192
1193   void xorl(const Operand& dst, Register src) {
1194     arithmetic_op_32(0x31, src, dst);
1195   }
1196
1197   void xorl(const Operand& dst, Immediate src) {
1198     immediate_arithmetic_op_32(0x6, dst, src);
1199   }
1200
1201   void xor_(Register dst, const Operand& src) {
1202     arithmetic_op(0x33, dst, src);
1203   }
1204
1205   void xor_(const Operand& dst, Register src) {
1206     arithmetic_op(0x31, src, dst);
1207   }
1208
1209   void xor_(Register dst, Immediate src) {
1210     immediate_arithmetic_op(0x6, dst, src);
1211   }
1212
1213   void xor_(const Operand& dst, Immediate src) {
1214     immediate_arithmetic_op(0x6, dst, src);
1215   }
1216
1217   // Bit operations.
1218   void bt(const Operand& dst, Register src);
1219   void bts(const Operand& dst, Register src);
1220
1221   // Miscellaneous
1222   void clc();
1223   void cld();
1224   void cpuid();
1225   void hlt();
1226   void int3();
1227   void nop();
1228   void ret(int imm16);
1229   void setcc(Condition cc, Register reg);
1230
1231   // Label operations & relative jumps (PPUM Appendix D)
1232   //
1233   // Takes a branch opcode (cc) and a label (L) and generates
1234   // either a backward branch or a forward branch and links it
1235   // to the label fixup chain. Usage:
1236   //
1237   // Label L;    // unbound label
1238   // j(cc, &L);  // forward branch to unbound label
1239   // bind(&L);   // bind label to the current pc
1240   // j(cc, &L);  // backward branch to bound label
1241   // bind(&L);   // illegal: a label may be bound only once
1242   //
1243   // Note: The same Label can be used for forward and backward branches
1244   // but it may be bound only once.
1245
1246   void bind(Label* L);  // binds an unbound label L to the current code position
1247
1248   // Calls
1249   // Call near relative 32-bit displacement, relative to next instruction.
1250   void call(Label* L);
1251   void call(Address entry, RelocInfo::Mode rmode);
1252   void call(Handle<Code> target,
1253             RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
1254             TypeFeedbackId ast_id = TypeFeedbackId::None());
1255
1256   // Calls directly to the given address using a relative offset.
1257   // Should only ever be used in Code objects for calls within the
1258   // same Code object. Should not be used when generating new code (use labels),
1259   // but only when patching existing code.
1260   void call(Address target);
1261
1262   // Call near absolute indirect, address in register
1263   void call(Register adr);
1264
1265   // Call near indirect
1266   void call(const Operand& operand);
1267
1268   // Jumps
1269   // Jump short or near relative.
1270   // Use a 32-bit signed displacement.
1271   // Unconditional jump to L
1272   void jmp(Label* L, Label::Distance distance = Label::kFar);
1273   void jmp(Address entry, RelocInfo::Mode rmode);
1274   void jmp(Handle<Code> target, RelocInfo::Mode rmode);
1275
1276   // Jump near absolute indirect (r64)
1277   void jmp(Register adr);
1278
1279   // Jump near absolute indirect (m64)
1280   void jmp(const Operand& src);
1281
1282   // Conditional jumps
1283   void j(Condition cc,
1284          Label* L,
1285          Label::Distance distance = Label::kFar);
1286   void j(Condition cc, Address entry, RelocInfo::Mode rmode);
1287   void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
1288
1289   // Floating-point operations
1290   void fld(int i);
1291
1292   void fld1();
1293   void fldz();
1294   void fldpi();
1295   void fldln2();
1296
1297   void fld_s(const Operand& adr);
1298   void fld_d(const Operand& adr);
1299
1300   void fstp_s(const Operand& adr);
1301   void fstp_d(const Operand& adr);
1302   void fstp(int index);
1303
1304   void fild_s(const Operand& adr);
1305   void fild_d(const Operand& adr);
1306
1307   void fist_s(const Operand& adr);
1308
1309   void fistp_s(const Operand& adr);
1310   void fistp_d(const Operand& adr);
1311
1312   void fisttp_s(const Operand& adr);
1313   void fisttp_d(const Operand& adr);
1314
1315   void fabs();
1316   void fchs();
1317
1318   void fadd(int i);
1319   void fsub(int i);
1320   void fmul(int i);
1321   void fdiv(int i);
1322
1323   void fisub_s(const Operand& adr);
1324
1325   void faddp(int i = 1);
1326   void fsubp(int i = 1);
1327   void fsubrp(int i = 1);
1328   void fmulp(int i = 1);
1329   void fdivp(int i = 1);
1330   void fprem();
1331   void fprem1();
1332
1333   void fxch(int i = 1);
1334   void fincstp();
1335   void ffree(int i = 0);
1336
1337   void ftst();
1338   void fucomp(int i);
1339   void fucompp();
1340   void fucomi(int i);
1341   void fucomip();
1342
1343   void fcompp();
1344   void fnstsw_ax();
1345   void fwait();
1346   void fnclex();
1347
1348   void fsin();
1349   void fcos();
1350   void fptan();
1351   void fyl2x();
1352   void f2xm1();
1353   void fscale();
1354   void fninit();
1355
1356   void frndint();
1357
1358   void sahf();
1359
1360   // SSE instructions
1361   void movaps(XMMRegister dst, XMMRegister src);
1362   void movups(XMMRegister dst, const Operand& src);
1363   void movups(const Operand& dst, XMMRegister src);
1364   void movss(XMMRegister dst, const Operand& src);
1365   void movss(const Operand& dst, XMMRegister src);
1366   void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1367
1368   void cvttss2si(Register dst, const Operand& src);
1369   void cvttss2si(Register dst, XMMRegister src);
1370   void cvtlsi2ss(XMMRegister dst, Register src);
1371
1372   void andps(XMMRegister dst, XMMRegister src);
1373   void andps(XMMRegister dst, const Operand& src);
1374   void orps(XMMRegister dst, XMMRegister src);
1375   void orps(XMMRegister dst, const Operand& src);
1376   void xorps(XMMRegister dst, XMMRegister src);
1377   void xorps(XMMRegister dst, const Operand& src);
1378
1379   void addps(XMMRegister dst, XMMRegister src);
1380   void addps(XMMRegister dst, const Operand& src);
1381   void subps(XMMRegister dst, XMMRegister src);
1382   void subps(XMMRegister dst, const Operand& src);
1383   void mulps(XMMRegister dst, XMMRegister src);
1384   void mulps(XMMRegister dst, const Operand& src);
1385   void divps(XMMRegister dst, XMMRegister src);
1386   void divps(XMMRegister dst, const Operand& src);
1387
1388   void movmskps(Register dst, XMMRegister src);
1389
1390   // SSE2 instructions
1391   void movd(XMMRegister dst, Register src);
1392   void movd(Register dst, XMMRegister src);
1393   void movq(XMMRegister dst, Register src);
1394   void movq(Register dst, XMMRegister src);
1395   void movq(XMMRegister dst, XMMRegister src);
1396
1397   // Don't use this unless it's important to keep the
1398   // top half of the destination register unchanged.
1399   // Used movaps when moving double values and movq for integer
1400   // values in xmm registers.
1401   void movsd(XMMRegister dst, XMMRegister src);
1402
1403   void movsd(const Operand& dst, XMMRegister src);
1404   void movsd(XMMRegister dst, const Operand& src);
1405
1406   void movdqa(const Operand& dst, XMMRegister src);
1407   void movdqa(XMMRegister dst, const Operand& src);
1408
1409   void movdqu(const Operand& dst, XMMRegister src);
1410   void movdqu(XMMRegister dst, const Operand& src);
1411
1412   void movapd(XMMRegister dst, XMMRegister src);
1413
1414   void cvttsd2si(Register dst, const Operand& src);
1415   void cvttsd2si(Register dst, XMMRegister src);
1416   void cvttsd2siq(Register dst, XMMRegister src);
1417
1418   void cvtlsi2sd(XMMRegister dst, const Operand& src);
1419   void cvtlsi2sd(XMMRegister dst, Register src);
1420   void cvtqsi2sd(XMMRegister dst, const Operand& src);
1421   void cvtqsi2sd(XMMRegister dst, Register src);
1422
1423
1424   void cvtss2sd(XMMRegister dst, XMMRegister src);
1425   void cvtss2sd(XMMRegister dst, const Operand& src);
1426   void cvtsd2ss(XMMRegister dst, XMMRegister src);
1427
1428   void cvtsd2si(Register dst, XMMRegister src);
1429   void cvtsd2siq(Register dst, XMMRegister src);
1430
1431   void addsd(XMMRegister dst, XMMRegister src);
1432   void addsd(XMMRegister dst, const Operand& src);
1433   void subsd(XMMRegister dst, XMMRegister src);
1434   void mulsd(XMMRegister dst, XMMRegister src);
1435   void mulsd(XMMRegister dst, const Operand& src);
1436   void divsd(XMMRegister dst, XMMRegister src);
1437
1438   void andpd(XMMRegister dst, XMMRegister src);
1439   void orpd(XMMRegister dst, XMMRegister src);
1440   void xorpd(XMMRegister dst, XMMRegister src);
1441   void sqrtsd(XMMRegister dst, XMMRegister src);
1442
1443   void ucomisd(XMMRegister dst, XMMRegister src);
1444   void ucomisd(XMMRegister dst, const Operand& src);
1445   void cmpltsd(XMMRegister dst, XMMRegister src);
1446
1447   void movmskpd(Register dst, XMMRegister src);
1448
1449   // SSE 4.1 instruction
1450   void extractps(Register dst, XMMRegister src, byte imm8);
1451   void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1452   void pinsrd(XMMRegister dst, Register src, byte imm8);
1453
1454   void minps(XMMRegister dst, XMMRegister src);
1455   void minps(XMMRegister dst, const Operand& src);
1456   void maxps(XMMRegister dst, XMMRegister src);
1457   void maxps(XMMRegister dst, const Operand& src);
1458   void rcpps(XMMRegister dst, XMMRegister src);
1459   void rcpps(XMMRegister dst, const Operand& src);
1460   void rsqrtps(XMMRegister dst, XMMRegister src);
1461   void rsqrtps(XMMRegister dst, const Operand& src);
1462   void sqrtps(XMMRegister dst, XMMRegister src);
1463   void sqrtps(XMMRegister dst, const Operand& src);
1464   void paddd(XMMRegister dst, XMMRegister src);
1465   void paddd(XMMRegister dst, const Operand& src);
1466   void psubd(XMMRegister dst, XMMRegister src);
1467   void psubd(XMMRegister dst, const Operand& src);
1468   void pmulld(XMMRegister dst, XMMRegister src);
1469   void pmulld(XMMRegister dst, const Operand& src);
1470   void pmuludq(XMMRegister dst, XMMRegister src);
1471   void pmuludq(XMMRegister dst, const Operand& src);
1472   void punpackldq(XMMRegister dst, XMMRegister src);
1473   void punpackldq(XMMRegister dst, const Operand& src);
1474   void psrldq(XMMRegister dst, uint8_t shift);
1475   void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1476   void cvtps2dq(XMMRegister dst, XMMRegister src);
1477   void cvtps2dq(XMMRegister dst, const Operand& src);
1478   void cvtdq2ps(XMMRegister dst, XMMRegister src);
1479   void cvtdq2ps(XMMRegister dst, const Operand& src);
1480
1481   enum RoundingMode {
1482     kRoundToNearest = 0x0,
1483     kRoundDown      = 0x1,
1484     kRoundUp        = 0x2,
1485     kRoundToZero    = 0x3
1486   };
1487
1488   void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1489
1490   void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1491   void cmpeqps(XMMRegister dst, XMMRegister src);
1492   void cmpltps(XMMRegister dst, XMMRegister src);
1493   void cmpleps(XMMRegister dst, XMMRegister src);
1494   void cmpneqps(XMMRegister dst, XMMRegister src);
1495   void cmpnltps(XMMRegister dst, XMMRegister src);
1496   void cmpnleps(XMMRegister dst, XMMRegister src);
1497
1498   void pslld(XMMRegister reg, int8_t shift);
1499   void pslld(XMMRegister dst, XMMRegister src);
1500   void psrld(XMMRegister reg, int8_t shift);
1501   void psrld(XMMRegister dst, XMMRegister src);
1502   void psrad(XMMRegister reg, int8_t shift);
1503   void psrad(XMMRegister dst, XMMRegister src);
1504
1505   void pcmpgtd(XMMRegister dst, XMMRegister src);
1506   void pcmpeqd(XMMRegister dst, XMMRegister src);
1507   void pcmpltd(XMMRegister dst, XMMRegister src);
1508
1509   // Debugging
1510   void Print();
1511
1512   // Check the code size generated from label to here.
1513   int SizeOfCodeGeneratedSince(Label* label) {
1514     return pc_offset() - label->pos();
1515   }
1516
1517   // Mark address of the ExitJSFrame code.
1518   void RecordJSReturn();
1519
1520   // Mark address of a debug break slot.
1521   void RecordDebugBreakSlot();
1522
1523   // Record a comment relocation entry that can be used by a disassembler.
1524   // Use --code-comments to enable.
1525   void RecordComment(const char* msg, bool force = false);
1526
1527   // Writes a single word of data in the code stream.
1528   // Used for inline tables, e.g., jump-tables.
1529   void db(uint8_t data);
1530   void dd(uint32_t data);
1531
1532   PositionsRecorder* positions_recorder() { return &positions_recorder_; }
1533
1534   // Check if there is less than kGap bytes available in the buffer.
1535   // If this is the case, we need to grow the buffer before emitting
1536   // an instruction or relocation information.
1537   inline bool buffer_overflow() const {
1538     return pc_ >= reloc_info_writer.pos() - kGap;
1539   }
1540
1541   // Get the number of bytes available in the buffer.
1542   inline int available_space() const {
1543     return static_cast<int>(reloc_info_writer.pos() - pc_);
1544   }
1545
1546   static bool IsNop(Address addr);
1547
1548   // Avoid overflows for displacements etc.
1549   static const int kMaximalBufferSize = 512*MB;
1550
1551   byte byte_at(int pos)  { return buffer_[pos]; }
1552   void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1553
1554  private:
1555   byte* addr_at(int pos)  { return buffer_ + pos; }
1556   uint32_t long_at(int pos)  {
1557     return *reinterpret_cast<uint32_t*>(addr_at(pos));
1558   }
1559   void long_at_put(int pos, uint32_t x)  {
1560     *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1561   }
1562
1563   // code emission
1564   void GrowBuffer();
1565
1566   void emit(byte x) { *pc_++ = x; }
1567   inline void emitl(uint32_t x);
1568   inline void emitp(void* x, RelocInfo::Mode rmode);
1569   inline void emitq(uint64_t x);
1570   inline void emitw(uint16_t x);
1571   inline void emit_code_target(Handle<Code> target,
1572                                RelocInfo::Mode rmode,
1573                                TypeFeedbackId ast_id = TypeFeedbackId::None());
1574   inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1575   void emit(Immediate x) { emitl(x.value_); }
1576
1577   // Emits a REX prefix that encodes a 64-bit operand size and
1578   // the top bit of both register codes.
1579   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1580   // REX.W is set.
1581   inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1582   inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1583   inline void emit_rex_64(Register reg, Register rm_reg);
1584
1585   // Emits a REX prefix that encodes a 64-bit operand size and
1586   // the top bit of the destination, index, and base register codes.
1587   // The high bit of reg is used for REX.R, the high bit of op's base
1588   // register is used for REX.B, and the high bit of op's index register
1589   // is used for REX.X.  REX.W is set.
1590   inline void emit_rex_64(Register reg, const Operand& op);
1591   inline void emit_rex_64(XMMRegister reg, const Operand& op);
1592
1593   // Emits a REX prefix that encodes a 64-bit operand size and
1594   // the top bit of the register code.
1595   // The high bit of register is used for REX.B.
1596   // REX.W is set and REX.R and REX.X are clear.
1597   inline void emit_rex_64(Register rm_reg);
1598
1599   // Emits a REX prefix that encodes a 64-bit operand size and
1600   // the top bit of the index and base register codes.
1601   // The high bit of op's base register is used for REX.B, and the high
1602   // bit of op's index register is used for REX.X.
1603   // REX.W is set and REX.R clear.
1604   inline void emit_rex_64(const Operand& op);
1605
1606   // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
1607   void emit_rex_64() { emit(0x48); }
1608
1609   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1610   // REX.W is clear.
1611   inline void emit_rex_32(Register reg, Register rm_reg);
1612
1613   // The high bit of reg is used for REX.R, the high bit of op's base
1614   // register is used for REX.B, and the high bit of op's index register
1615   // is used for REX.X.  REX.W is cleared.
1616   inline void emit_rex_32(Register reg, const Operand& op);
1617
1618   // High bit of rm_reg goes to REX.B.
1619   // REX.W, REX.R and REX.X are clear.
1620   inline void emit_rex_32(Register rm_reg);
1621
1622   // High bit of base goes to REX.B and high bit of index to REX.X.
1623   // REX.W and REX.R are clear.
1624   inline void emit_rex_32(const Operand& op);
1625
1626   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1627   // REX.W is cleared.  If no REX bits are set, no byte is emitted.
1628   inline void emit_optional_rex_32(Register reg, Register rm_reg);
1629
1630   // The high bit of reg is used for REX.R, the high bit of op's base
1631   // register is used for REX.B, and the high bit of op's index register
1632   // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
1633   // is emitted.
1634   inline void emit_optional_rex_32(Register reg, const Operand& op);
1635
1636   // As for emit_optional_rex_32(Register, Register), except that
1637   // the registers are XMM registers.
1638   inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
1639
1640   // As for emit_optional_rex_32(Register, Register), except that
1641   // one of the registers is an XMM registers.
1642   inline void emit_optional_rex_32(XMMRegister reg, Register base);
1643
1644   // As for emit_optional_rex_32(Register, Register), except that
1645   // one of the registers is an XMM registers.
1646   inline void emit_optional_rex_32(Register reg, XMMRegister base);
1647
1648   // As for emit_optional_rex_32(Register, const Operand&), except that
1649   // the register is an XMM register.
1650   inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
1651
1652   // Optionally do as emit_rex_32(Register) if the register number has
1653   // the high bit set.
1654   inline void emit_optional_rex_32(Register rm_reg);
1655
1656   // As for emit_optional_rex_32(Register), except that the register is
1657   // an XMM register.
1658   inline void emit_optional_rex_32(XMMRegister rm_reg);
1659
1660   // Optionally do as emit_rex_32(const Operand&) if the operand register
1661   // numbers have a high bit set.
1662   inline void emit_optional_rex_32(const Operand& op);
1663
1664   template<class P1>
1665   void emit_rex(P1 p1, int size) {
1666     if (size == kInt64Size) {
1667       emit_rex_64(p1);
1668     } else {
1669       ASSERT(size == kInt32Size);
1670       emit_optional_rex_32(p1);
1671     }
1672   }
1673
1674   template<class P1, class P2>
1675   void emit_rex(P1 p1, P2 p2, int size) {
1676     if (size == kInt64Size) {
1677       emit_rex_64(p1, p2);
1678     } else {
1679       ASSERT(size == kInt32Size);
1680       emit_optional_rex_32(p1, p2);
1681     }
1682   }
1683
1684   // Emit the ModR/M byte, and optionally the SIB byte and
1685   // 1- or 4-byte offset for a memory operand.  Also encodes
1686   // the second operand of the operation, a register or operation
1687   // subcode, into the reg field of the ModR/M byte.
1688   void emit_operand(Register reg, const Operand& adr) {
1689     emit_operand(reg.low_bits(), adr);
1690   }
1691
1692   // Emit the ModR/M byte, and optionally the SIB byte and
1693   // 1- or 4-byte offset for a memory operand.  Also used to encode
1694   // a three-bit opcode extension into the ModR/M byte.
1695   void emit_operand(int rm, const Operand& adr);
1696
1697   // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
1698   void emit_modrm(Register reg, Register rm_reg) {
1699     emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
1700   }
1701
1702   // Emit a ModR/M byte with an operation subcode in the reg field and
1703   // a register in the rm_reg field.
1704   void emit_modrm(int code, Register rm_reg) {
1705     ASSERT(is_uint3(code));
1706     emit(0xC0 | code << 3 | rm_reg.low_bits());
1707   }
1708
1709   // Emit the code-object-relative offset of the label's position
1710   inline void emit_code_relative_offset(Label* label);
1711
1712   // The first argument is the reg field, the second argument is the r/m field.
1713   void emit_sse_operand(XMMRegister dst, XMMRegister src);
1714   void emit_sse_operand(XMMRegister reg, const Operand& adr);
1715   void emit_sse_operand(XMMRegister dst, Register src);
1716   void emit_sse_operand(Register dst, XMMRegister src);
1717   void emit_sse_operand(XMMRegister dst);
1718
1719   // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
1720   // AND, OR, XOR, or CMP.  The encodings of these operations are all
1721   // similar, differing just in the opcode or in the reg field of the
1722   // ModR/M byte.
1723   void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
1724   void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
1725   void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
1726   void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
1727   void arithmetic_op(byte opcode, Register reg, Register rm_reg);
1728   void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
1729   void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
1730   void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
1731   // Operate on a byte in memory or register.
1732   void immediate_arithmetic_op_8(byte subcode,
1733                                  Register dst,
1734                                  Immediate src);
1735   void immediate_arithmetic_op_8(byte subcode,
1736                                  const Operand& dst,
1737                                  Immediate src);
1738   // Operate on a word in memory or register.
1739   void immediate_arithmetic_op_16(byte subcode,
1740                                   Register dst,
1741                                   Immediate src);
1742   void immediate_arithmetic_op_16(byte subcode,
1743                                   const Operand& dst,
1744                                   Immediate src);
1745   // Operate on a 32-bit word in memory or register.
1746   void immediate_arithmetic_op_32(byte subcode,
1747                                   Register dst,
1748                                   Immediate src);
1749   void immediate_arithmetic_op_32(byte subcode,
1750                                   const Operand& dst,
1751                                   Immediate src);
1752
1753   // Emit machine code for a shift operation.
1754   void shift(Register dst, Immediate shift_amount, int subcode);
1755   void shift_32(Register dst, Immediate shift_amount, int subcode);
1756   // Shift dst by cl % 64 bits.
1757   void shift(Register dst, int subcode);
1758   void shift_32(Register dst, int subcode);
1759
1760   void emit_farith(int b1, int b2, int i);
1761
1762   // labels
1763   // void print(Label* L);
1764   void bind_to(Label* L, int pos);
1765
1766   // record reloc info for current pc_
1767   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1768
1769   void emit_mov(Register dst, const Operand& src, int size);
1770   void emit_mov(Register dst, Register src, int size);
1771   void emit_mov(const Operand& dst, Register src, int size);
1772   void emit_mov(Register dst, Immediate value, int size);
1773   void emit_mov(const Operand& dst, Immediate value, int size);
1774
1775   friend class CodePatcher;
1776   friend class EnsureSpace;
1777   friend class RegExpMacroAssemblerX64;
1778
1779   // code generation
1780   RelocInfoWriter reloc_info_writer;
1781
1782   List< Handle<Code> > code_targets_;
1783
1784   PositionsRecorder positions_recorder_;
1785   friend class PositionsRecorder;
1786 };
1787
1788
1789 // Helper class that ensures that there is enough space for generating
1790 // instructions and relocation information.  The constructor makes
1791 // sure that there is enough space and (in debug mode) the destructor
1792 // checks that we did not generate too much.
1793 class EnsureSpace BASE_EMBEDDED {
1794  public:
1795   explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1796     if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1797 #ifdef DEBUG
1798     space_before_ = assembler_->available_space();
1799 #endif
1800   }
1801
1802 #ifdef DEBUG
1803   ~EnsureSpace() {
1804     int bytes_generated = space_before_ - assembler_->available_space();
1805     ASSERT(bytes_generated < assembler_->kGap);
1806   }
1807 #endif
1808
1809  private:
1810   Assembler* assembler_;
1811 #ifdef DEBUG
1812   int space_before_;
1813 #endif
1814 };
1815
1816 } }  // namespace v8::internal
1817
1818 #endif  // V8_X64_ASSEMBLER_X64_H_