src/jit/emitxarch.cpp

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
   6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
   7 XX                                                                           XX
   8 XX                             emitX86.cpp                                   XX
   9 XX                                                                           XX
  10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  12 */
  13
  14 #include "jitpch.h"
  15 #ifdef _MSC_VER
  16 #pragma hdrstop
  17 #endif
  18
  19 #if defined(_TARGET_XARCH_)
  20
  21 /*****************************************************************************/
  22 /*****************************************************************************/
  23
  24 #include "instr.h"
  25 #include "emit.h"
  26 #include "codegen.h"
  27
  28 bool IsSSEInstruction(instruction ins)
  29 {
  30     return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_SSE_INSTRUCTION);
  31 }
  32
  33 bool IsSSEOrAVXInstruction(instruction ins)
  34 {
  35     return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
  36 }
  37
  38 bool IsAVXOnlyInstruction(instruction ins)
  39 {
  40     return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
  41 }
  42
  43 bool IsFMAInstruction(instruction ins)
  44 {
  45     return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
  46 }
  47
  48 bool IsBMIInstruction(instruction ins)
  49 {
  50     return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
  51 }
  52
  53 regNumber getBmiRegNumber(instruction ins)
  54 {
  55     switch (ins)
  56     {
  57         case INS_blsi:
  58         {
  59             return (regNumber)3;
  60         }
  61
  62         case INS_blsmsk:
  63         {
  64             return (regNumber)2;
  65         }
  66
  67         case INS_blsr:
  68         {
  69             return (regNumber)1;
  70         }
  71
  72         default:
  73         {
  74             assert(IsBMIInstruction(ins));
  75             return REG_NA;
  76         }
  77     }
  78 }
  79
  80 regNumber getSseShiftRegNumber(instruction ins)
  81 {
  82     switch (ins)
  83     {
  84         case INS_psrldq:
  85         {
  86             return (regNumber)3;
  87         }
  88
  89         case INS_pslldq:
  90         {
  91             return (regNumber)7;
  92         }
  93
  94         case INS_psrld:
  95         case INS_psrlw:
  96         case INS_psrlq:
  97         {
  98             return (regNumber)2;
  99         }
 100
 101         case INS_pslld:
 102         case INS_psllw:
 103         case INS_psllq:
 104         {
 105             return (regNumber)6;
 106         }
 107
 108         case INS_psrad:
 109         case INS_psraw:
 110         {
 111             return (regNumber)4;
 112         }
 113
 114         default:
 115         {
 116             assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
 117             return REG_NA;
 118         }
 119     }
 120 }
 121
 122 bool emitter::IsAVXInstruction(instruction ins)
 123 {
 124     return UseVEXEncoding() && IsSSEOrAVXInstruction(ins);
 125 }
 126
 127 // Returns true if the AVX instruction is a binary operator that requires 3 operands.
 128 // When we emit an instruction with only two operands, we will duplicate the destination
 129 // as a source.
 130 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
 131 // be formalized by adding an additional field to instruction table to
 132 // to indicate whether a 3-operand instruction.
 133 bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
 134 {
 135     return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstDstSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
 136 }
 137
 138 // Returns true if the AVX instruction requires 3 operands that duplicate the source
 139 // register in the vvvv field.
 140 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
 141 // be formalized by adding an additional field to instruction table to
 142 // to indicate whether a 3-operand instruction.
 143 bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
 144 {
 145     return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
 146 }
 147
 148 //------------------------------------------------------------------------
 149 // AreUpper32BitsZero: check if some previously emitted
 150 //     instruction set the upper 32 bits of reg to zero.
 151 //
 152 // Arguments:
 153 //    reg - register of interest
 154 //
 155 // Return Value:
 156 //    true if previous instruction zeroed reg's upper 32 bits.
 157 //    false if it did not, or if we can't safely determine.
 158 //
 159 // Notes:
 160 //    Currently only looks back one instruction.
 161 //
 162 //    movsx eax, ... might seem viable but we always encode this
 163 //    instruction with a 64 bit destination. See TakesRexWPrefix.
 164
 165 bool emitter::AreUpper32BitsZero(regNumber reg)
 166 {
 167     // Don't look back across IG boundaries (possible control flow)
 168     if (emitCurIGinsCnt == 0)
 169     {
 170         return false;
 171     }
 172
 173     instrDesc* id  = emitLastIns;
 174     insFormat  fmt = id->idInsFmt();
 175
 176     // This isn't meant to be a comprehensive check. Just look for what
 177     // seems to be common.
 178     switch (fmt)
 179     {
 180         case IF_RWR_CNS:
 181         case IF_RRW_CNS:
 182         case IF_RRW_SHF:
 183         case IF_RWR_RRD:
 184         case IF_RRW_RRD:
 185         case IF_RWR_MRD:
 186         case IF_RWR_SRD:
 187         case IF_RWR_ARD:
 188
 189             // Bail if not writing to the right register
 190             if (id->idReg1() != reg)
 191             {
 192                 return false;
 193             }
 194
 195             // Bail if movsx, we always have movsx sign extend to 8 bytes
 196             if (id->idIns() == INS_movsx)
 197             {
 198                 return false;
 199             }
 200
 201             // movzx always zeroes the upper 32 bits.
 202             if (id->idIns() == INS_movzx)
 203             {
 204                 return true;
 205             }
 206
 207             // Else rely on operation size.
 208             return (id->idOpSize() == EA_4BYTE);
 209
 210         default:
 211             break;
 212     }
 213
 214     return false;
 215 }
 216
 217 #ifdef FEATURE_HW_INTRINSICS
 218 //------------------------------------------------------------------------
 219 // IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or
 220 //                            "reg/mem, reg, imm" form for the legacy, VEX, and EVEX
 221 //                            encodings.
 222 //
 223 // Arguments:
 224 //    instruction -- processor instruction to check
 225 //
 226 // Return Value:
 227 //    true if instruction has a "reg, reg/mem, imm" or "reg/mem, reg, imm" encoding
 228 //    form for the legacy, VEX, and EVEX encodings.
 229 //
 230 //    That is, the instruction takes two operands, one of which is immediate, and it
 231 //    does not need to encode any data in the VEX.vvvv field.
 232 //
 233 static bool IsDstSrcImmAvxInstruction(instruction ins)
 234 {
 235     switch (ins)
 236     {
 237         case INS_aeskeygenassist:
 238         case INS_extractps:
 239         case INS_pextrb:
 240         case INS_pextrw:
 241         case INS_pextrd:
 242         case INS_pextrq:
 243         case INS_pshufd:
 244         case INS_pshufhw:
 245         case INS_pshuflw:
 246         case INS_roundpd:
 247         case INS_roundps:
 248             return true;
 249         default:
 250             return false;
 251     }
 252 }
 253 #endif // FEATURE_HW_INTRINSICS
 254
 255 // -------------------------------------------------------------------
 256 // Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode.
 257 //
 258 // Arguments:
 259 //    ins  -  instruction
 260 //
 261 // Note that this should be true for any of the instructions in instrsXArch.h
 262 // that use the SSE38 or SSE3A macro but returns false if the VEX encoding is
 263 // in use, since that encoding does not require an additional byte.
 264 bool emitter::Is4ByteSSEInstruction(instruction ins)
 265 {
 266     return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins);
 267 }
 268
 269 // Returns true if this instruction requires a VEX prefix
 270 // All AVX instructions require a VEX prefix
 271 bool emitter::TakesVexPrefix(instruction ins)
 272 {
 273     // special case vzeroupper as it requires 2-byte VEX prefix
 274     // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
 275     switch (ins)
 276     {
 277         case INS_lfence:
 278         case INS_mfence:
 279         case INS_movnti:
 280         case INS_prefetchnta:
 281         case INS_prefetcht0:
 282         case INS_prefetcht1:
 283         case INS_prefetcht2:
 284         case INS_sfence:
 285         case INS_vzeroupper:
 286             return false;
 287         default:
 288             break;
 289     }
 290
 291     return IsAVXInstruction(ins);
 292 }
 293
 294 // Add base VEX prefix without setting W, R, X, or B bits
 295 // L bit will be set based on emitter attr.
 296 //
 297 // 2-byte VEX prefix = C5 <R,vvvv,L,pp>
 298 // 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
 299 //  - R, X, B, W - bits to express corresponding REX prefixes
 300 //  - m-mmmmm (5-bit)
 301 //    0-00001 - implied leading 0F opcode byte
 302 //    0-00010 - implied leading 0F 38 opcode bytes
 303 //    0-00011 - implied leading 0F 3A opcode bytes
 304 //    Rest    - reserved for future use and usage of them will uresult in Undefined instruction exception
 305 //
 306 // - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
 307 // - L - scalar or AVX-128 bit operations (L=0),  256-bit operations (L=1)
 308 // - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
 309 //                 these prefixes are treated mandatory when used with escape opcode 0Fh for
 310 //                 some SIMD instructions
 311 //   00  - None   (0F    - packed float)
 312 //   01  - 66     (66 0F - packed double)
 313 //   10  - F3     (F3 0F - scalar float
 314 //   11  - F2     (F2 0F - scalar double)
 315 #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
 316 #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
 317 #define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
 318 emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
 319 {
 320     // The 2-byte VEX encoding is preferred when possible, but actually emitting
 321     // it depends on a number of factors that we may not know until much later.
 322     //
 323     // In order to handle this "easily", we just carry the 3-byte encoding all
 324     // the way through and "fix-up" the encoding when the VEX prefix is actually
 325     // emitted, by simply checking that all the requirements were met.
 326
 327     // Only AVX instructions require VEX prefix
 328     assert(IsAVXInstruction(ins));
 329
 330     // Shouldn't have already added VEX prefix
 331     assert(!hasVexPrefix(code));
 332
 333     assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0);
 334
 335     code |= DEFAULT_3BYTE_VEX_PREFIX;
 336
 337     if (attr == EA_32BYTE)
 338     {
 339         // Set L bit to 1 in case of instructions that operate on 256-bits.
 340         code |= LBIT_IN_3BYTE_VEX_PREFIX;
 341     }
 342
 343     return code;
 344 }
 345
 346 // Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
 347 bool TakesRexWPrefix(instruction ins, emitAttr attr)
 348 {
 349     // Because the current implementation of AVX does not have a way to distinguish between the register
 350     // size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are
 351     // required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE),
 352     // and here we must special case these by the opcode.
 353     switch (ins)
 354     {
 355         case INS_vpermpd:
 356         case INS_vpermq:
 357         case INS_vpsrlvq:
 358         case INS_vpsllvq:
 359         case INS_pinsrq:
 360         case INS_pextrq:
 361         case INS_vfmadd132pd:
 362         case INS_vfmadd213pd:
 363         case INS_vfmadd231pd:
 364         case INS_vfmadd132sd:
 365         case INS_vfmadd213sd:
 366         case INS_vfmadd231sd:
 367         case INS_vfmaddsub132pd:
 368         case INS_vfmaddsub213pd:
 369         case INS_vfmaddsub231pd:
 370         case INS_vfmsubadd132pd:
 371         case INS_vfmsubadd213pd:
 372         case INS_vfmsubadd231pd:
 373         case INS_vfmsub132pd:
 374         case INS_vfmsub213pd:
 375         case INS_vfmsub231pd:
 376         case INS_vfmsub132sd:
 377         case INS_vfmsub213sd:
 378         case INS_vfmsub231sd:
 379         case INS_vfnmadd132pd:
 380         case INS_vfnmadd213pd:
 381         case INS_vfnmadd231pd:
 382         case INS_vfnmadd132sd:
 383         case INS_vfnmadd213sd:
 384         case INS_vfnmadd231sd:
 385         case INS_vfnmsub132pd:
 386         case INS_vfnmsub213pd:
 387         case INS_vfnmsub231pd:
 388         case INS_vfnmsub132sd:
 389         case INS_vfnmsub213sd:
 390         case INS_vfnmsub231sd:
 391         case INS_vpmaskmovq:
 392         case INS_vpgatherdq:
 393         case INS_vpgatherqq:
 394         case INS_vgatherdpd:
 395         case INS_vgatherqpd:
 396             return true;
 397         default:
 398             break;
 399     }
 400
 401 #ifdef _TARGET_AMD64_
 402     // movsx should always sign extend out to 8 bytes just because we don't track
 403     // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
 404     // of the source, not the dest).
 405     // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
 406     // cased here.
 407     //
 408     // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
 409     if (ins == INS_movsx || ins == INS_rex_jmp)
 410     {
 411         return true;
 412     }
 413
 414     if (EA_SIZE(attr) != EA_8BYTE)
 415     {
 416         return false;
 417     }
 418
 419     if (IsSSEOrAVXInstruction(ins))
 420     {
 421         switch (ins)
 422         {
 423             case INS_andn:
 424             case INS_bextr:
 425             case INS_blsi:
 426             case INS_blsmsk:
 427             case INS_blsr:
 428             case INS_bzhi:
 429             case INS_cvttsd2si:
 430             case INS_cvttss2si:
 431             case INS_cvtsd2si:
 432             case INS_cvtss2si:
 433             case INS_cvtsi2sd:
 434             case INS_cvtsi2ss:
 435             case INS_mov_xmm2i:
 436             case INS_mov_i2xmm:
 437             case INS_movnti:
 438             case INS_mulx:
 439             case INS_pdep:
 440             case INS_pext:
 441                 return true;
 442             default:
 443                 return false;
 444         }
 445     }
 446
 447     // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
 448     // opcodes...
 449     // These are all the instructions that default to 8-byte operand without the REX.W bit
 450     // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
 451     // so we never need it
 452     if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
 453         (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
 454     {
 455         return true;
 456     }
 457     else
 458     {
 459         return false;
 460     }
 461 #else  //!_TARGET_AMD64 = _TARGET_X86_
 462     return false;
 463 #endif //!_TARGET_AMD64_
 464 }
 465
 466 // Returns true if using this register will require a REX.* prefix.
 467 // Since XMM registers overlap with YMM registers, this routine
 468 // can also be used to know whether a YMM register if the
 469 // instruction in question is AVX.
 470 bool IsExtendedReg(regNumber reg)
 471 {
 472 #ifdef _TARGET_AMD64_
 473     return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
 474 #else
 475     // X86 JIT operates in 32-bit mode and hence extended reg are not available.
 476     return false;
 477 #endif
 478 }
 479
 480 // Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
 481 bool IsExtendedReg(regNumber reg, emitAttr attr)
 482 {
 483 #ifdef _TARGET_AMD64_
 484     // Not a register, so doesn't need a prefix
 485     if (reg > REG_XMM15)
 486     {
 487         return false;
 488     }
 489
 490     // Opcode field only has 3 bits for the register, these high registers
 491     // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
 492     if (IsExtendedReg(reg))
 493     {
 494         return true;
 495     }
 496
 497     if (EA_SIZE(attr) != EA_1BYTE)
 498     {
 499         return false;
 500     }
 501
 502     // There are 12 one byte registers addressible 'below' r8b:
 503     //     al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
 504     // The first 4 are always addressible, the last 8 are divided into 2 sets:
 505     //     ah,  ch,  dh,  bh
 506     //          -- or --
 507     //     spl, bpl, sil, dil
 508     // Both sets are encoded exactly the same, the difference is the presence
 509     // of a REX prefix, even a REX prefix with no other bits set (0x40).
 510     // So in order to get to the second set we need a REX prefix (but no bits).
 511     //
 512     // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
 513     // encoding/tracking/encoding registers.
 514     return (reg >= REG_RSP);
 515 #else
 516     // X86 JIT operates in 32-bit mode and hence extended reg are not available.
 517     return false;
 518 #endif
 519 }
 520
 521 // Since XMM registers overlap with YMM registers, this routine
 522 // can also used to know whether a YMM register in case of AVX instructions.
 523 bool IsXMMReg(regNumber reg)
 524 {
 525 #ifdef _TARGET_AMD64_
 526     return (reg >= REG_XMM0) && (reg <= REG_XMM15);
 527 #else  // !_TARGET_AMD64_
 528     return (reg >= REG_XMM0) && (reg <= REG_XMM7);
 529 #endif // !_TARGET_AMD64_
 530 }
 531
 532 // Returns bits to be encoded in instruction for the given register.
 533 unsigned RegEncoding(regNumber reg)
 534 {
 535     static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE");
 536     return (unsigned)(reg & 0x7);
 537 }
 538
 539 // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
 540 // SSE2: separate 1-byte prefix gets added before opcode.
 541 // AVX:  specific bits within VEX prefix need to be set in bit-inverted form.
 542 emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
 543 {
 544     if (UseVEXEncoding() && IsAVXInstruction(ins))
 545     {
 546         if (TakesVexPrefix(ins))
 547         {
 548             // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
 549             assert(hasVexPrefix(code));
 550
 551             // W-bit is the only bit that is added in non bit-inverted form.
 552             return emitter::code_t(code | 0x00008000000000ULL);
 553         }
 554     }
 555 #ifdef _TARGET_AMD64_
 556     return emitter::code_t(code | 0x4800000000ULL);
 557 #else
 558     assert(!"UNREACHED");
 559     return code;
 560 #endif
 561 }
 562
 563 #ifdef _TARGET_AMD64_
 564
 565 emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
 566 {
 567     if (UseVEXEncoding() && IsAVXInstruction(ins))
 568     {
 569         if (TakesVexPrefix(ins))
 570         {
 571             // R-bit is supported by both 2-byte and 3-byte VEX prefix
 572             assert(hasVexPrefix(code));
 573
 574             // R-bit is added in bit-inverted form.
 575             return code & 0xFF7FFFFFFFFFFFULL;
 576         }
 577     }
 578
 579     return code | 0x4400000000ULL;
 580 }
 581
 582 emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
 583 {
 584     if (UseVEXEncoding() && IsAVXInstruction(ins))
 585     {
 586         if (TakesVexPrefix(ins))
 587         {
 588             // X-bit is available only in 3-byte VEX prefix that starts with byte C4.
 589             assert(hasVexPrefix(code));
 590
 591             // X-bit is added in bit-inverted form.
 592             return code & 0xFFBFFFFFFFFFFFULL;
 593         }
 594     }
 595
 596     return code | 0x4200000000ULL;
 597 }
 598
 599 emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
 600 {
 601     if (UseVEXEncoding() && IsAVXInstruction(ins))
 602     {
 603         if (TakesVexPrefix(ins))
 604         {
 605             // B-bit is available only in 3-byte VEX prefix that starts with byte C4.
 606             assert(hasVexPrefix(code));
 607
 608             // B-bit is added in bit-inverted form.
 609             return code & 0xFFDFFFFFFFFFFFULL;
 610         }
 611     }
 612
 613     return code | 0x4100000000ULL;
 614 }
 615
 616 // Adds REX prefix (0x40) without W, R, X or B bits set
 617 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
 618 {
 619     assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
 620     return code | 0x4000000000ULL;
 621 }
 622
 623 #endif //_TARGET_AMD64_
 624
 625 bool isPrefix(BYTE b)
 626 {
 627     assert(b != 0);    // Caller should check this
 628     assert(b != 0x67); // We don't use the address size prefix
 629     assert(b != 0x65); // The GS segment override prefix is emitted separately
 630     assert(b != 0x64); // The FS segment override prefix is emitted separately
 631     assert(b != 0xF0); // The lock prefix is emitted separately
 632     assert(b != 0x2E); // We don't use the CS segment override prefix
 633     assert(b != 0x3E); // Or the DS segment override prefix
 634     assert(b != 0x26); // Or the ES segment override prefix
 635     assert(b != 0x36); // Or the SS segment override prefix
 636
 637     // That just leaves the size prefixes used in SSE opcodes:
 638     //      Scalar Double  Scalar Single  Packed Double
 639     return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
 640 }
 641
 642 // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
 643 unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
 644 {
 645     if (hasVexPrefix(code))
 646     {
 647         // Only AVX instructions should have a VEX prefix
 648         assert(UseVEXEncoding() && IsAVXInstruction(ins));
 649         code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
 650         code &= 0x00000000FFFFFFFFLL;
 651
 652         WORD leadingBytes = 0;
 653         BYTE check        = (code >> 24) & 0xFF;
 654         if (check != 0)
 655         {
 656             // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
 657             // 4-byte opcode: with the bytes ordered as 0x22114433
 658             // check for a prefix in the 11 position
 659             BYTE sizePrefix = (code >> 16) & 0xFF;
 660             if ((sizePrefix != 0) && isPrefix(sizePrefix))
 661             {
 662                 // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
 663                 //
 664                 //   00  - None   (0F    - packed float)
 665                 //   01  - 66     (66 0F - packed double)
 666                 //   10  - F3     (F3 0F - scalar float
 667                 //   11  - F2     (F2 0F - scalar double)
 668                 switch (sizePrefix)
 669                 {
 670                     case 0x66:
 671                         if (IsBMIInstruction(ins))
 672                         {
 673                             switch (ins)
 674                             {
 675                                 case INS_pdep:
 676                                 case INS_mulx:
 677                                 {
 678                                     vexPrefix |= 0x03;
 679                                     break;
 680                                 }
 681
 682                                 case INS_pext:
 683                                 {
 684                                     vexPrefix |= 0x02;
 685                                     break;
 686                                 }
 687
 688                                 default:
 689                                 {
 690                                     vexPrefix |= 0x00;
 691                                     break;
 692                                 }
 693                             }
 694                         }
 695                         else
 696                         {
 697                             vexPrefix |= 0x01;
 698                         }
 699                         break;
 700                     case 0xF3:
 701                         vexPrefix |= 0x02;
 702                         break;
 703                     case 0xF2:
 704                         vexPrefix |= 0x03;
 705                         break;
 706                     default:
 707                         assert(!"unrecognized SIMD size prefix");
 708                         unreached();
 709                 }
 710
 711                 // Now the byte in the 22 position must be an escape byte 0F
 712                 leadingBytes = check;
 713                 assert(leadingBytes == 0x0F);
 714
 715                 // Get rid of both sizePrefix and escape byte
 716                 code &= 0x0000FFFFLL;
 717
 718                 // Check the byte in the 33 position to see if it is 3A or 38.
 719                 // In such a case escape bytes must be 0x0F3A or 0x0F38
 720                 check = code & 0xFF;
 721                 if (check == 0x3A || check == 0x38)
 722                 {
 723                     leadingBytes = (leadingBytes << 8) | check;
 724                     code &= 0x0000FF00LL;
 725                 }
 726             }
 727         }
 728         else
 729         {
 730             // 2-byte opcode with the bytes ordered as 0x0011RM22
 731             // the byte in position 11 must be an escape byte.
 732             leadingBytes = (code >> 16) & 0xFF;
 733             assert(leadingBytes == 0x0F || leadingBytes == 0x00);
 734             code &= 0xFFFF;
 735         }
 736
 737         // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
 738         // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
 739         // implied leading bytes. 0x0F is supported by both the 2-byte and
 740         // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by
 741         // the 3-byte version.
 742
 743         switch (leadingBytes)
 744         {
 745             case 0x00:
 746                 // there is no leading byte
 747                 break;
 748             case 0x0F:
 749                 vexPrefix |= 0x0100;
 750                 break;
 751             case 0x0F38:
 752                 vexPrefix |= 0x0200;
 753                 break;
 754             case 0x0F3A:
 755                 vexPrefix |= 0x0300;
 756                 break;
 757             default:
 758                 assert(!"encountered unknown leading bytes");
 759                 unreached();
 760         }
 761
 762         // At this point
 763         //     VEX.2211RM33 got transformed as VEX.0000RM33
 764         //     VEX.0011RM22 got transformed as VEX.0000RM22
 765         //
 766         // Now output VEX prefix leaving the 4-byte opcode
 767
 768         // The 2-byte VEX encoding, requires that the X and B-bits are set (these
 769         // bits are inverted from the REX values so set means off), the W-bit is
 770         // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001
 771         // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these
 772         // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then
 773         // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2
 774         // from the 3-byte VEX encoding.
 775         //
 776         // Given the above, the check can be reduced to a simple mask and comparison.
 777         // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about:
 778         //   * R can be set or unset              (0x7F ignores bit 7)
 779         //   * vvvv can be any value              (0x80 ignores bits 3-6)
 780         //   * L can be set or unset              (0x80 ignores bit 2)
 781         //   * pp can be any value                (0x80 ignores bits 0-1)
 782         // * 0x00C46100 is a value that signifies the requirements listed above were met:
 783         //   * We must be a three-byte VEX opcode (0x00C4)
 784         //   * X and B must be set                (0x61 validates bits 5-6)
 785         //   * m-mmmm must be 0-00001             (0x61 validates bits 0-4)
 786         //   * W must be unset                    (0x00 validates bit 7)
 787         if ((vexPrefix & 0xFFFF7F80) == 0x00C46100)
 788         {
 789             emitOutputByte(dst, 0xC5);
 790             emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F));
 791             return 2;
 792         }
 793
 794         emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
 795         emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
 796         emitOutputByte(dst + 2, vexPrefix & 0xFF);
 797         return 3;
 798     }
 799
 800 #ifdef _TARGET_AMD64_
 801     if (code > 0x00FFFFFFFFLL)
 802     {
 803         BYTE prefix = (code >> 32) & 0xFF;
 804         noway_assert(prefix >= 0x40 && prefix <= 0x4F);
 805         code &= 0x00000000FFFFFFFFLL;
 806
 807         // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
 808         // we can remove this code as well
 809
 810         // The REX prefix is required to come after all other prefixes.
 811         // Some of our 'opcodes' actually include some prefixes, if that
 812         // is the case, shift them over and place the REX prefix after
 813         // the other prefixes, and emit any prefix that got moved out.
 814         BYTE check = (code >> 24) & 0xFF;
 815         if (check == 0)
 816         {
 817             // 3-byte opcode: with the bytes ordered as 0x00113322
 818             // check for a prefix in the 11 position
 819             check = (code >> 16) & 0xFF;
 820             if (check != 0 && isPrefix(check))
 821             {
 822                 // Swap the rex prefix and whatever this prefix is
 823                 code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
 824                 // and then emit the other prefix
 825                 return emitOutputByte(dst, check);
 826             }
 827         }
 828         else
 829         {
 830             // 4-byte opcode with the bytes ordered as 0x22114433
 831             // first check for a prefix in the 11 position
 832             BYTE check2 = (code >> 16) & 0xFF;
 833             if (isPrefix(check2))
 834             {
 835                 assert(!isPrefix(check)); // We currently don't use this, so it is untested
 836                 if (isPrefix(check))
 837                 {
 838                     // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
 839                     // Change to c2rrc1XXXX, and emit check2 now
 840                     code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL));
 841                 }
 842                 else
 843                 {
 844                     // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
 845                     // Change to c2XXrrXXXX, and emit check2 now
 846                     code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL));
 847                 }
 848                 return emitOutputByte(dst, check2);
 849             }
 850         }
 851
 852         return emitOutputByte(dst, prefix);
 853     }
 854 #endif // _TARGET_AMD64_
 855
 856     return 0;
 857 }
 858
 859 #ifdef _TARGET_AMD64_
 860 /*****************************************************************************
 861  * Is the last instruction emitted a call instruction?
 862  */
 863 bool emitter::emitIsLastInsCall()
 864 {
 865     if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
 866     {
 867         return true;
 868     }
 869
 870     return false;
 871 }
 872
 873 /*****************************************************************************
 874  * We're about to create an epilog. If the last instruction we output was a 'call',
 875  * then we need to insert a NOP, to allow for proper exception-handling behavior.
 876  */
 877 void emitter::emitOutputPreEpilogNOP()
 878 {
 879     if (emitIsLastInsCall())
 880     {
 881         emitIns(INS_nop);
 882     }
 883 }
 884
 885 #endif //_TARGET_AMD64_
 886
 887 // Size of rex prefix in bytes
 888 unsigned emitter::emitGetRexPrefixSize(instruction ins)
 889 {
 890     // In case of AVX instructions, REX prefixes are part of VEX prefix.
 891     // And hence requires no additional byte to encode REX prefixes.
 892     if (IsAVXInstruction(ins))
 893     {
 894         return 0;
 895     }
 896
 897     // If not AVX, then we would need 1-byte to encode REX prefix.
 898     return 1;
 899 }
 900
 901 // Size of vex prefix in bytes
 902 unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
 903 {
 904     if (IsAVXInstruction(ins))
 905     {
 906         return 3;
 907     }
 908
 909     // If not AVX, then we don't need to encode vex prefix.
 910     return 0;
 911 }
 912
 913 // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
 914 // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
 915 // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
 916 // instruction size estimate will be accurate.
 917 // Basically this function will decrease the vexPrefixSize,
 918 // so that opcodeSize + vexPrefixAdjustedSize will be the right size.
 919 // rightOpcodeSize + vexPrefixSize
 920 //=(opcodeSize - ExtrabytesSize) + vexPrefixSize
 921 //=opcodeSize + (vexPrefixSize - ExtrabytesSize)
 922 //=opcodeSize + vexPrefixAdjustedSize
 923 unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
 924 {
 925     if (IsAVXInstruction(ins))
 926     {
 927         unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
 928         assert(vexPrefixAdjustedSize == 3);
 929
 930         // In this case, opcode will contains escape prefix at least one byte,
 931         // vexPrefixAdjustedSize should be minus one.
 932         vexPrefixAdjustedSize -= 1;
 933
 934         // Get the fourth byte in Opcode.
 935         // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
 936         BYTE check = (code >> 24) & 0xFF;
 937         if (check != 0)
 938         {
 939             // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
 940             // 4-byte opcode: with the bytes ordered as 0x22114433
 941             // Simd prefix is at the first byte.
 942             BYTE sizePrefix = (code >> 16) & 0xFF;
 943             if (sizePrefix != 0 && isPrefix(sizePrefix))
 944             {
 945                 vexPrefixAdjustedSize -= 1;
 946             }
 947
 948             // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
 949             // But in this case the opcode has not counted R\M part.
 950             // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
 951             //=opcodeSize + VexPrefixAdjustedSize -1 + 1
 952             //=opcodeSize + VexPrefixAdjustedSize
 953             // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize.
 954         }
 955
 956         return vexPrefixAdjustedSize;
 957     }
 958     return 0;
 959 }
 960
 961 // Get size of rex or vex prefix emitted in code
 962 unsigned emitter::emitGetPrefixSize(code_t code)
 963 {
 964     if (hasVexPrefix(code))
 965     {
 966         return 3;
 967     }
 968
 969     if (hasRexPrefix(code))
 970     {
 971         return 1;
 972     }
 973
 974     return 0;
 975 }
 976
 977 #ifdef _TARGET_X86_
 978 /*****************************************************************************
 979  *
 980  *  Record a non-empty stack
 981  */
 982
 983 void emitter::emitMarkStackLvl(unsigned stackLevel)
 984 {
 985     assert(int(stackLevel) >= 0);
 986     assert(emitCurStackLvl == 0);
 987     assert(emitCurIG->igStkLvl == 0);
 988     assert(emitCurIGfreeNext == emitCurIGfreeBase);
 989
 990     assert(stackLevel && stackLevel % sizeof(int) == 0);
 991
 992     emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
 993
 994     if (emitMaxStackDepth < emitCurStackLvl)
 995     {
 996         JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
 997         emitMaxStackDepth = emitCurStackLvl;
 998     }
 999 }
1000 #endif
1001
1002 /*****************************************************************************
1003  *
1004  *  Get hold of the address mode displacement value for an indirect call.
1005  */
1006
1007 inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
1008 {
1009     if (id->idIsLargeCall())
1010     {
1011         return ((instrDescCGCA*)id)->idcDisp;
1012     }
1013     else
1014     {
1015         assert(!id->idIsLargeDsp());
1016         assert(!id->idIsLargeCns());
1017
1018         return id->idAddr()->iiaAddrMode.amDisp;
1019     }
1020 }
1021
1022 /** ***************************************************************************
1023  *
1024  *  The following table is used by the instIsFP()/instUse/DefFlags() helpers.
1025  */
1026
1027 // clang-format off
1028 const insFlags      CodeGenInterface::instInfo[] =
1029 {
1030     #define INST0(id, nm, um, mr,                 flags) static_cast<insFlags>(flags),
1031     #define INST1(id, nm, um, mr,                 flags) static_cast<insFlags>(flags),
1032     #define INST2(id, nm, um, mr, mi,             flags) static_cast<insFlags>(flags),
1033     #define INST3(id, nm, um, mr, mi, rm,         flags) static_cast<insFlags>(flags),
1034     #define INST4(id, nm, um, mr, mi, rm, a4,     flags) static_cast<insFlags>(flags),
1035     #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) static_cast<insFlags>(flags),
1036     #include "instrs.h"
1037     #undef  INST0
1038     #undef  INST1
1039     #undef  INST2
1040     #undef  INST3
1041     #undef  INST4
1042     #undef  INST5
1043 };
1044 // clang-format on
1045
1046 /*****************************************************************************
1047  *
1048  *  Initialize the table used by emitInsModeFormat().
1049  */
1050
1051 // clang-format off
1052 const BYTE          emitter::emitInsModeFmtTab[] =
1053 {
1054     #define INST0(id, nm, um, mr,                 flags) um,
1055     #define INST1(id, nm, um, mr,                 flags) um,
1056     #define INST2(id, nm, um, mr, mi,             flags) um,
1057     #define INST3(id, nm, um, mr, mi, rm,         flags) um,
1058     #define INST4(id, nm, um, mr, mi, rm, a4,     flags) um,
1059     #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) um,
1060     #include "instrs.h"
1061     #undef  INST0
1062     #undef  INST1
1063     #undef  INST2
1064     #undef  INST3
1065     #undef  INST4
1066     #undef  INST5
1067 };
1068 // clang-format on
1069
1070 #ifdef DEBUG
1071 unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab);
1072 #endif
1073
1074 /*****************************************************************************
1075  *
1076  *  Combine the given base format with the update mode of the instuction.
1077  */
1078
1079 inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
1080 {
1081     assert(IF_RRD + IUM_RD == IF_RRD);
1082     assert(IF_RRD + IUM_WR == IF_RWR);
1083     assert(IF_RRD + IUM_RW == IF_RRW);
1084
1085     return (insFormat)(base + emitInsUpdateMode(ins));
1086 }
1087
1088 // This is a helper we need due to Vs Whidbey #254016 in order to distinguish
1089 // if we can not possibly be updating an integer register. This is not the best
1090 // solution, but the other ones (see bug) are going to be much more complicated.
1091 bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
1092 {
1093     instruction ins = id->idIns();
1094
1095     if (!IsSSEOrAVXInstruction(ins))
1096     {
1097         return false;
1098     }
1099
1100     switch (ins)
1101     {
1102         case INS_andn:
1103         case INS_bextr:
1104         case INS_blsi:
1105         case INS_blsmsk:
1106         case INS_blsr:
1107         case INS_bzhi:
1108         case INS_cvttsd2si:
1109         case INS_cvttss2si:
1110         case INS_cvtsd2si:
1111         case INS_cvtss2si:
1112         case INS_extractps:
1113         case INS_mov_xmm2i:
1114         case INS_movmskpd:
1115         case INS_movmskps:
1116         case INS_mulx:
1117         case INS_pdep:
1118         case INS_pext:
1119         case INS_pmovmskb:
1120         case INS_pextrb:
1121         case INS_pextrd:
1122         case INS_pextrq:
1123         case INS_pextrw:
1124         case INS_pextrw_sse41:
1125         {
1126             // These SSE instructions write to a general purpose integer register.
1127             return false;
1128         }
1129
1130         default:
1131         {
1132             return true;
1133         }
1134     }
1135 }
1136
1137 /*****************************************************************************
1138  *
1139  *  Returns the base encoding of the given CPU instruction.
1140  */
1141
1142 inline size_t insCode(instruction ins)
1143 {
1144     // clang-format off
1145     const static
1146     size_t          insCodes[] =
1147     {
1148         #define INST0(id, nm, um, mr,                 flags) mr,
1149         #define INST1(id, nm, um, mr,                 flags) mr,
1150         #define INST2(id, nm, um, mr, mi,             flags) mr,
1151         #define INST3(id, nm, um, mr, mi, rm,         flags) mr,
1152         #define INST4(id, nm, um, mr, mi, rm, a4,     flags) mr,
1153         #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1154         #include "instrs.h"
1155         #undef  INST0
1156         #undef  INST1
1157         #undef  INST2
1158         #undef  INST3
1159         #undef  INST4
1160         #undef  INST5
1161     };
1162     // clang-format on
1163
1164     assert((unsigned)ins < _countof(insCodes));
1165     assert((insCodes[ins] != BAD_CODE));
1166
1167     return insCodes[ins];
1168 }
1169
1170 /*****************************************************************************
1171  *
1172  *  Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
1173  */
1174
1175 inline size_t insCodeACC(instruction ins)
1176 {
1177     // clang-format off
1178     const static
1179     size_t          insCodesACC[] =
1180     {
1181         #define INST0(id, nm, um, mr,                 flags)
1182         #define INST1(id, nm, um, mr,                 flags)
1183         #define INST2(id, nm, um, mr, mi,             flags)
1184         #define INST3(id, nm, um, mr, mi, rm,         flags)
1185         #define INST4(id, nm, um, mr, mi, rm, a4,     flags) a4,
1186         #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) a4,
1187         #include "instrs.h"
1188         #undef  INST0
1189         #undef  INST1
1190         #undef  INST2
1191         #undef  INST3
1192         #undef  INST4
1193         #undef  INST5
1194     };
1195     // clang-format on
1196
1197     assert((unsigned)ins < _countof(insCodesACC));
1198     assert((insCodesACC[ins] != BAD_CODE));
1199
1200     return insCodesACC[ins];
1201 }
1202
1203 /*****************************************************************************
1204  *
1205  *  Returns the "register" encoding of the given CPU instruction.
1206  */
1207
1208 inline size_t insCodeRR(instruction ins)
1209 {
1210     // clang-format off
1211     const static
1212     size_t          insCodesRR[] =
1213     {
1214         #define INST0(id, nm, um, mr,                 flags)
1215         #define INST1(id, nm, um, mr,                 flags)
1216         #define INST2(id, nm, um, mr, mi,             flags)
1217         #define INST3(id, nm, um, mr, mi, rm,         flags)
1218         #define INST4(id, nm, um, mr, mi, rm, a4,     flags)
1219         #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rr,
1220         #include "instrs.h"
1221         #undef  INST0
1222         #undef  INST1
1223         #undef  INST2
1224         #undef  INST3
1225         #undef  INST4
1226         #undef  INST5
1227     };
1228     // clang-format on
1229
1230     assert((unsigned)ins < _countof(insCodesRR));
1231     assert((insCodesRR[ins] != BAD_CODE));
1232
1233     return insCodesRR[ins];
1234 }
1235
1236 // clang-format off
1237 const static
1238 size_t          insCodesRM[] =
1239 {
1240     #define INST0(id, nm, um, mr,                 flags)
1241     #define INST1(id, nm, um, mr,                 flags)
1242     #define INST2(id, nm, um, mr, mi,             flags)
1243     #define INST3(id, nm, um, mr, mi, rm,         flags) rm,
1244     #define INST4(id, nm, um, mr, mi, rm, a4,     flags) rm,
1245     #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rm,
1246     #include "instrs.h"
1247     #undef  INST0
1248     #undef  INST1
1249     #undef  INST2
1250     #undef  INST3
1251     #undef  INST4
1252     #undef  INST5
1253 };
1254 // clang-format on
1255
1256 // Returns true iff the give CPU instruction has an RM encoding.
1257 inline bool hasCodeRM(instruction ins)
1258 {
1259     assert((unsigned)ins < _countof(insCodesRM));
1260     return ((insCodesRM[ins] != BAD_CODE));
1261 }
1262
1263 /*****************************************************************************
1264  *
1265  *  Returns the "reg, [r/m]" encoding of the given CPU instruction.
1266  */
1267
1268 inline size_t insCodeRM(instruction ins)
1269 {
1270     assert((unsigned)ins < _countof(insCodesRM));
1271     assert((insCodesRM[ins] != BAD_CODE));
1272
1273     return insCodesRM[ins];
1274 }
1275
1276 // clang-format off
1277 const static
1278 size_t          insCodesMI[] =
1279 {
1280     #define INST0(id, nm, um, mr,                 flags)
1281     #define INST1(id, nm, um, mr,                 flags)
1282     #define INST2(id, nm, um, mr, mi,             flags) mi,
1283     #define INST3(id, nm, um, mr, mi, rm,         flags) mi,
1284     #define INST4(id, nm, um, mr, mi, rm, a4,     flags) mi,
1285     #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mi,
1286     #include "instrs.h"
1287     #undef  INST0
1288     #undef  INST1
1289     #undef  INST2
1290     #undef  INST3
1291     #undef  INST4
1292     #undef  INST5
1293 };
1294 // clang-format on
1295
1296 // Returns true iff the give CPU instruction has an MI encoding.
1297 inline bool hasCodeMI(instruction ins)
1298 {
1299     assert((unsigned)ins < _countof(insCodesMI));
1300     return ((insCodesMI[ins] != BAD_CODE));
1301 }
1302
1303 /*****************************************************************************
1304  *
1305  *  Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
1306  */
1307
1308 inline size_t insCodeMI(instruction ins)
1309 {
1310     assert((unsigned)ins < _countof(insCodesMI));
1311     assert((insCodesMI[ins] != BAD_CODE));
1312
1313     return insCodesMI[ins];
1314 }
1315
1316 // clang-format off
1317 const static
1318 size_t          insCodesMR[] =
1319 {
1320     #define INST0(id, nm, um, mr,                 flags)
1321     #define INST1(id, nm, um, mr,                 flags) mr,
1322     #define INST2(id, nm, um, mr, mi,             flags) mr,
1323     #define INST3(id, nm, um, mr, mi, rm,         flags) mr,
1324     #define INST4(id, nm, um, mr, mi, rm, a4,     flags) mr,
1325     #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1326     #include "instrs.h"
1327     #undef  INST0
1328     #undef  INST1
1329     #undef  INST2
1330     #undef  INST3
1331     #undef  INST4
1332     #undef  INST5
1333 };
1334 // clang-format on
1335
1336 // Returns true iff the give CPU instruction has an MR encoding.
1337 inline bool hasCodeMR(instruction ins)
1338 {
1339     assert((unsigned)ins < _countof(insCodesMR));
1340     return ((insCodesMR[ins] != BAD_CODE));
1341 }
1342
1343 /*****************************************************************************
1344  *
1345  *  Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
1346  */
1347
1348 inline size_t insCodeMR(instruction ins)
1349 {
1350     assert((unsigned)ins < _countof(insCodesMR));
1351     assert((insCodesMR[ins] != BAD_CODE));
1352
1353     return insCodesMR[ins];
1354 }
1355
1356 // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
1357 bool emitter::EncodedBySSE38orSSE3A(instruction ins)
1358 {
1359     const size_t SSE38 = 0x0F660038;
1360     const size_t SSE3A = 0x0F66003A;
1361     const size_t MASK  = 0xFFFF00FF;
1362
1363     size_t insCode = 0;
1364
1365     if (!IsSSEOrAVXInstruction(ins))
1366     {
1367         return false;
1368     }
1369
1370     if (hasCodeRM(ins))
1371     {
1372         insCode = insCodeRM(ins);
1373     }
1374     else if (hasCodeMI(ins))
1375     {
1376         insCode = insCodeMI(ins);
1377     }
1378     else if (hasCodeMR(ins))
1379     {
1380         insCode = insCodeMR(ins);
1381     }
1382
1383     insCode &= MASK;
1384     return insCode == SSE38 || insCode == SSE3A;
1385 }
1386
1387 /*****************************************************************************
1388  *
1389  *  Returns an encoding for the specified register to be used in the bit0-2
1390  *  part of an opcode.
1391  */
1392
1393 inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
1394 {
1395     assert(reg < REG_STK);
1396
1397 #ifdef _TARGET_AMD64_
1398     // Either code is not NULL or reg is not an extended reg.
1399     // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1400     // which would require code != NULL.
1401     assert(code != nullptr || !IsExtendedReg(reg));
1402
1403     if (IsExtendedReg(reg))
1404     {
1405         *code = AddRexBPrefix(ins, *code); // REX.B
1406     }
1407     else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1408     {
1409         // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1410         // not the corresponding AH, CH, DH, or BH
1411         *code = AddRexPrefix(ins, *code); // REX
1412     }
1413 #endif // _TARGET_AMD64_
1414
1415     unsigned regBits = RegEncoding(reg);
1416
1417     assert(regBits < 8);
1418     return regBits;
1419 }
1420
1421 /*****************************************************************************
1422  *
1423  *  Returns an encoding for the specified register to be used in the bit3-5
1424  *  part of an opcode.
1425  */
1426
1427 inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
1428 {
1429     assert(reg < REG_STK);
1430
1431 #ifdef _TARGET_AMD64_
1432     // Either code is not NULL or reg is not an extended reg.
1433     // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1434     // which would require code != NULL.
1435     assert(code != nullptr || !IsExtendedReg(reg));
1436
1437     if (IsExtendedReg(reg))
1438     {
1439         *code = AddRexRPrefix(ins, *code); // REX.R
1440     }
1441     else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1442     {
1443         // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1444         // not the corresponding AH, CH, DH, or BH
1445         *code = AddRexPrefix(ins, *code); // REX
1446     }
1447 #endif // _TARGET_AMD64_
1448
1449     unsigned regBits = RegEncoding(reg);
1450
1451     assert(regBits < 8);
1452     return (regBits << 3);
1453 }
1454
1455 /***********************************************************************************
1456  *
1457  *  Returns modified AVX opcode with the specified register encoded in bits 3-6 of
1458  *  byte 2 of VEX prefix.
1459  */
1460 inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
1461 {
1462     assert(reg < REG_STK);
1463     assert(IsAVXInstruction(ins));
1464     assert(hasVexPrefix(code));
1465
1466     // Get 4-bit register encoding
1467     // RegEncoding() gives lower 3 bits
1468     // IsExtendedReg() gives MSB.
1469     code_t regBits = RegEncoding(reg);
1470     if (IsExtendedReg(reg))
1471     {
1472         regBits |= 0x08;
1473     }
1474
1475     // VEX prefix encodes register operand in 1's complement form
1476     // Shift count = 4-bytes of opcode + 0-2 bits
1477     assert(regBits <= 0xF);
1478     regBits <<= 35;
1479     return code ^ regBits;
1480 }
1481
1482 /*****************************************************************************
1483  *
1484  *  Returns an encoding for the specified register to be used in the bit3-5
1485  *  part of an SIB byte (unshifted).
1486  *  Used exclusively to generate the REX.X bit and truncate the register.
1487  */
1488
1489 inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
1490 {
1491     assert(reg < REG_STK);
1492
1493 #ifdef _TARGET_AMD64_
1494     // Either code is not NULL or reg is not an extended reg.
1495     // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1496     // which would require code != NULL.
1497     assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
1498
1499     if (IsExtendedReg(reg))
1500     {
1501         *code = AddRexXPrefix(ins, *code); // REX.X
1502     }
1503     unsigned regBits = RegEncoding(reg);
1504 #else  // !_TARGET_AMD64_
1505     unsigned regBits = reg;
1506 #endif // !_TARGET_AMD64_
1507
1508     assert(regBits < 8);
1509     return regBits;
1510 }
1511
1512 /*****************************************************************************
1513  *
1514  *  Returns the "[r/m]" opcode with the mod/RM field set to register.
1515  */
1516
1517 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
1518 {
1519     // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1520     // Otherwise, it will be placed after the 4 byte encoding.
1521     if ((code & 0xFF00) == 0)
1522     {
1523         assert((code & 0xC000) == 0);
1524         code |= 0xC000;
1525     }
1526
1527     return code;
1528 }
1529
1530 /*****************************************************************************
1531  *
1532  *  Returns the given "[r/m]" opcode with the mod/RM field set to register.
1533  */
1534
1535 inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
1536 {
1537     // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1538     // Otherwise, it will be placed after the 4 byte encoding.
1539     if ((code & 0xFF00) == 0)
1540     {
1541         assert((code & 0xC000) == 0);
1542         code |= 0xC000;
1543     }
1544     return code;
1545 }
1546
1547 /*****************************************************************************
1548  *
1549  *  Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
1550  *  the given register.
1551  */
1552
1553 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1554 {
1555     assert((code & 0xC000) == 0);
1556     code |= 0xC000;
1557     unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1558     code |= regcode;
1559     return code;
1560 }
1561
1562 /*****************************************************************************
1563  *
1564  *  Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
1565  *  the given register.
1566  */
1567
1568 inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1569 {
1570     assert((code & 0xC000) == 0);
1571     code |= 0xC000;
1572     unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1573     code |= regcode;
1574     return code;
1575 }
1576
1577 /*****************************************************************************
1578  *
1579  *  Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
1580  *  "reg,reg,imm8" form.
1581  */
1582 inline bool insNeedsRRIb(instruction ins)
1583 {
1584     // If this list gets longer, use a switch or a table.
1585     return ins == INS_imul;
1586 }
1587
1588 /*****************************************************************************
1589  *
1590  *  Returns the "reg,reg,imm8" opcode with both the reg's set to the
1591  *  the given register.
1592  */
1593 inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
1594 {
1595     assert(size == EA_4BYTE); // All we handle for now.
1596     assert(insNeedsRRIb(ins));
1597     // If this list gets longer, use a switch, or a table lookup.
1598     code_t   code    = 0x69c0;
1599     unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1600     // We use the same register as source and destination.  (Could have another version that does both regs...)
1601     code |= regcode;
1602     code |= (regcode << 3);
1603     return code;
1604 }
1605
1606 /*****************************************************************************
1607  *
1608  *  Returns the "+reg" opcode with the the given register set into the low
1609  *  nibble of the opcode
1610  */
1611
1612 inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
1613 {
1614     code_t   code    = insCodeRR(ins);
1615     unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1616     code |= regcode;
1617     return code;
1618 }
1619
1620 /*****************************************************************************
1621  *
1622  *  Return the 'SS' field value for the given index scale factor.
1623  */
1624
1625 inline unsigned emitter::insSSval(unsigned scale)
1626 {
1627     assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1628
1629     const static BYTE scales[] = {
1630         0x00, // 1
1631         0x40, // 2
1632         0xFF, // 3
1633         0x80, // 4
1634         0xFF, // 5
1635         0xFF, // 6
1636         0xFF, // 7
1637         0xC0, // 8
1638     };
1639
1640     return scales[scale - 1];
1641 }
1642
1643 const instruction emitJumpKindInstructions[] = {INS_nop,
1644
1645 #define JMP_SMALL(en, rev, ins) INS_##ins,
1646 #include "emitjmps.h"
1647
1648                                                 INS_call};
1649
1650 const emitJumpKind emitReverseJumpKinds[] = {
1651     EJ_NONE,
1652
1653 #define JMP_SMALL(en, rev, ins) EJ_##rev,
1654 #include "emitjmps.h"
1655 };
1656
1657 /*****************************************************************************
1658  * Look up the instruction for a jump kind
1659  */
1660
1661 /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
1662 {
1663     assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
1664     return emitJumpKindInstructions[jumpKind];
1665 }
1666
1667 /*****************************************************************************
1668  * Reverse the conditional jump
1669  */
1670
1671 /* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
1672 {
1673     assert(jumpKind < EJ_COUNT);
1674     return emitReverseJumpKinds[jumpKind];
1675 }
1676
1677 /*****************************************************************************
1678  * The size for these instructions is less than EA_4BYTE,
1679  * but the target register need not be byte-addressable
1680  */
1681
1682 inline bool emitInstHasNoCode(instruction ins)
1683 {
1684     if (ins == INS_align)
1685     {
1686         return true;
1687     }
1688
1689     return false;
1690 }
1691
1692 /*****************************************************************************
1693  * When encoding instructions that operate on byte registers
1694  * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
1695  * otherwise we will incorrectly encode the instruction
1696  */
1697
1698 bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
1699 {
1700 #if CPU_HAS_BYTE_REGS
1701     if (size != EA_1BYTE) // Not operating on a byte register is fine
1702     {
1703         return true;
1704     }
1705
1706     if ((ins != INS_movsx) && // These three instructions support high register
1707         (ins != INS_movzx)    // encodings for reg1
1708 #ifdef FEATURE_HW_INTRINSICS
1709         && (ins != INS_crc32)
1710 #endif
1711             )
1712     {
1713         // reg1 must be a byte-able register
1714         if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
1715         {
1716             return false;
1717         }
1718     }
1719     // if reg2 is not REG_NA then reg2 must be a byte-able register
1720     if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
1721     {
1722         return false;
1723     }
1724 #endif
1725     // The instruction can be encoded
1726     return true;
1727 }
1728
1729 /*****************************************************************************
1730  *
1731  *  Estimate the size (in bytes of generated code) of the given instruction.
1732  */
1733
1734 inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
1735 {
1736     UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
1737 #ifdef _TARGET_AMD64_
1738     size += emitGetPrefixSize(code);
1739 #endif
1740     return size;
1741 }
1742
1743 inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
1744 {
1745     return emitInsSize(insCodeRM(ins));
1746 }
1747
1748 inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
1749 {
1750     emitAttr size = EA_SIZE(attr);
1751
1752     UNATIVE_OFFSET sz;
1753
1754     // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
1755     // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
1756     // This would probably be better expressed as a different format or something?
1757     code_t code = insCodeRM(ins);
1758
1759     if ((code & 0xFF00) != 0)
1760     {
1761         sz = 5;
1762     }
1763     else
1764     {
1765         sz = emitInsSize(insEncodeRMreg(ins, code));
1766     }
1767
1768     // Most 16-bit operand instructions will need a prefix
1769     if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
1770     {
1771         sz += 1;
1772     }
1773
1774     // VEX prefix
1775     sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
1776
1777     // REX prefix
1778     if (!hasRexPrefix(code))
1779     {
1780         if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
1781             IsExtendedReg(reg2, attr))
1782         {
1783             sz += emitGetRexPrefixSize(ins);
1784         }
1785     }
1786
1787     return sz;
1788 }
1789
1790 /*****************************************************************************/
1791
1792 inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
1793 {
1794     UNATIVE_OFFSET size = emitInsSize(code);
1795     UNATIVE_OFFSET offs;
1796     bool           offsIsUpperBound = true;
1797     bool           EBPbased         = true;
1798
1799     /*  Is this a temporary? */
1800
1801     if (var < 0)
1802     {
1803         /* An address off of ESP takes an extra byte */
1804
1805         if (!emitHasFramePtr)
1806         {
1807             size++;
1808         }
1809
1810         // The offset is already assigned. Find the temp.
1811         TempDsc* tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_USED);
1812         if (tmp == nullptr)
1813         {
1814             // It might be in the free lists, if we're working on zero initializing the temps.
1815             tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_FREE);
1816         }
1817         assert(tmp != nullptr);
1818         offs = tmp->tdTempOffs();
1819
1820         // We only care about the magnitude of the offset here, to determine instruction size.
1821         if (emitComp->isFramePointerUsed())
1822         {
1823             if ((int)offs < 0)
1824             {
1825                 offs = -(int)offs;
1826             }
1827         }
1828         else
1829         {
1830             // SP-based offsets must already be positive.
1831             assert((int)offs >= 0);
1832         }
1833     }
1834     else
1835     {
1836
1837         /* Get the frame offset of the (non-temp) variable */
1838
1839         offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
1840
1841         /* An address off of ESP takes an extra byte */
1842
1843         if (!EBPbased)
1844         {
1845             ++size;
1846         }
1847
1848         /* Is this a stack parameter reference? */
1849
1850         if (emitComp->lvaIsParameter(var)
1851 #if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
1852             && !emitComp->lvaIsRegArgument(var)
1853 #endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
1854                 )
1855         {
1856             /* If no EBP frame, arguments are off of ESP, above temps */
1857
1858             if (!EBPbased)
1859             {
1860                 assert((int)offs >= 0);
1861
1862                 offsIsUpperBound = false; // since #temps can increase
1863                 offs += emitMaxTmpSize;
1864             }
1865         }
1866         else
1867         {
1868             /* Locals off of EBP are at negative offsets */
1869
1870             if (EBPbased)
1871             {
1872 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
1873                 // If localloc is not used, then ebp chaining is done and hence
1874                 // offset of locals will be at negative offsets, Otherwise offsets
1875                 // will be positive.  In future, when RBP gets positioned in the
1876                 // middle of the frame so as to optimize instruction encoding size,
1877                 // the below asserts needs to be modified appropriately.
1878                 // However, for Unix platforms, we always do frame pointer chaining,
1879                 // so offsets from the frame pointer will always be negative.
1880                 if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
1881                 {
1882                     noway_assert((int)offs >= 0);
1883                 }
1884                 else
1885 #endif
1886                 {
1887                     // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
1888                     CLANG_FORMAT_COMMENT_ANCHOR;
1889
1890 #ifdef UNIX_AMD64_ABI
1891                     LclVarDsc* varDsc         = emitComp->lvaTable + var;
1892                     bool       isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
1893                     // Register passed args could have a stack offset of 0.
1894                     noway_assert((int)offs < 0 || isRegPassedArg);
1895 #else  // !UNIX_AMD64_ABI
1896                     noway_assert((int)offs < 0);
1897 #endif // !UNIX_AMD64_ABI
1898                 }
1899
1900                 assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
1901
1902                 // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
1903                 if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
1904                     unsigned(var) == emitComp->lvaStubArgumentVar)
1905                 {
1906                     offs -= emitMaxTmpSize;
1907                 }
1908
1909                 if ((int)offs < 0)
1910                 {
1911                     // offset is negative
1912                     return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
1913                 }
1914 #ifdef _TARGET_AMD64_
1915                 // This case arises for localloc frames
1916                 else
1917                 {
1918                     return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
1919                 }
1920 #endif
1921             }
1922
1923             if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
1924             {
1925                 offs += emitMaxTmpSize;
1926             }
1927         }
1928     }
1929
1930     assert((int)offs >= 0);
1931
1932 #if !FEATURE_FIXED_OUT_ARGS
1933
1934     /* Are we addressing off of ESP? */
1935
1936     if (!emitHasFramePtr)
1937     {
1938         /* Adjust the effective offset if necessary */
1939
1940         if (emitCntStackDepth)
1941             offs += emitCurStackLvl;
1942
1943         // we could (and used to) check for the special case [sp] here but the stack offset
1944         // estimator was off, and there is very little harm in overestimating for such a
1945         // rare case.
1946     }
1947
1948 #endif // !FEATURE_FIXED_OUT_ARGS
1949
1950 //  printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
1951 //         emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
1952
1953 #ifdef _TARGET_AMD64_
1954     bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
1955 #else
1956     bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
1957 #endif
1958
1959     // If it is ESP based, and the offset is zero, we will not encode the disp part.
1960     if (!EBPbased && offs == 0)
1961     {
1962         return size;
1963     }
1964     else
1965     {
1966         return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
1967     }
1968 }
1969
1970 inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp)
1971 {
1972     instruction    ins      = id->idIns();
1973     emitAttr       attrSize = id->idOpSize();
1974     UNATIVE_OFFSET prefix   = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1975     return prefix + emitInsSizeSV(code, var, dsp);
1976 }
1977
1978 inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val)
1979 {
1980     instruction    ins       = id->idIns();
1981     emitAttr       attrSize  = id->idOpSize();
1982     UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(attrSize);
1983     UNATIVE_OFFSET prefix    = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1984     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
1985
1986 #ifdef _TARGET_AMD64_
1987     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
1988     // all other opcodes take a sign-extended 4-byte immediate
1989     noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
1990 #endif // _TARGET_AMD64_
1991
1992     if (valSize > sizeof(int))
1993     {
1994         valSize = sizeof(int);
1995     }
1996
1997     if (id->idIsCnsReloc())
1998     {
1999         valInByte = false; // relocs can't be placed in a byte
2000         assert(valSize == sizeof(int));
2001     }
2002
2003     if (valInByte)
2004     {
2005         valSize = sizeof(char);
2006     }
2007
2008     // 16-bit operand instructions need a prefix.
2009     // This referes to 66h size prefix override
2010     if (id->idOpSize() == EA_2BYTE)
2011     {
2012         prefix += 1;
2013     }
2014
2015     return prefix + valSize + emitInsSizeSV(code, var, dsp);
2016 }
2017
2018 /*****************************************************************************/
2019
2020 static bool baseRegisterRequiresSibByte(regNumber base)
2021 {
2022 #ifdef _TARGET_AMD64_
2023     return base == REG_ESP || base == REG_R12;
2024 #else
2025     return base == REG_ESP;
2026 #endif
2027 }
2028
2029 static bool baseRegisterRequiresDisplacement(regNumber base)
2030 {
2031 #ifdef _TARGET_AMD64_
2032     return base == REG_EBP || base == REG_R13;
2033 #else
2034     return base == REG_EBP;
2035 #endif
2036 }
2037
2038 UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
2039 {
2040     emitAttr    attrSize = id->idOpSize();
2041     instruction ins      = id->idIns();
2042     /* The displacement field is in an unusual place for calls */
2043     ssize_t        dsp       = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
2044     bool           dspInByte = ((signed char)dsp == (ssize_t)dsp);
2045     bool           dspIsZero = (dsp == 0);
2046     UNATIVE_OFFSET size;
2047
2048     // Note that the values in reg and rgx are used in this method to decide
2049     // how many bytes will be needed by the address [reg+rgx+cns]
2050     // this includes the prefix bytes when reg or rgx are registers R8-R15
2051     regNumber reg;
2052     regNumber rgx;
2053
2054     // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
2055     // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
2056     // ideally these should really be the only idInsFmts that we see here
2057     //  but we have some outliers to deal with:
2058     //     emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
2059     //     emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
2060
2061     switch (id->idInsFmt())
2062     {
2063         case IF_RWR_LABEL:
2064         case IF_MRW_CNS:
2065         case IF_MRW_RRD:
2066         case IF_MRW_SHF:
2067             reg = REG_NA;
2068             rgx = REG_NA;
2069             break;
2070
2071         default:
2072             reg = id->idAddr()->iiaAddrMode.amBaseReg;
2073             rgx = id->idAddr()->iiaAddrMode.amIndxReg;
2074             break;
2075     }
2076
2077     if (id->idIsDspReloc())
2078     {
2079         dspInByte = false; // relocs can't be placed in a byte
2080         dspIsZero = false; // relocs won't always be zero
2081     }
2082
2083     if (code & 0xFF000000)
2084     {
2085         size = 4;
2086     }
2087     else if (code & 0x00FF0000)
2088     {
2089         // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
2090         assert(ins != INS_bt);
2091
2092         assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE)    // Only for x64
2093                || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
2094                || (ins == INS_movzx) || (ins == INS_movsx)
2095                // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
2096                || isPrefetch(ins));
2097         size = 3;
2098     }
2099     else
2100     {
2101         size = 2;
2102
2103         // Most 16-bit operands will require a size prefix.
2104         // This refers to 66h size prefix override.
2105
2106         if (attrSize == EA_2BYTE)
2107         {
2108             size++;
2109         }
2110     }
2111
2112     size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2113
2114     if (hasRexPrefix(code))
2115     {
2116         // REX prefix
2117         size += emitGetRexPrefixSize(ins);
2118     }
2119     else if (TakesRexWPrefix(ins, attrSize))
2120     {
2121         // REX.W prefix
2122         size += emitGetRexPrefixSize(ins);
2123     }
2124     else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
2125              ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize)))
2126     {
2127         // Should have a REX byte
2128         size += emitGetRexPrefixSize(ins);
2129     }
2130
2131     size += emitAdjustSizeCrc32(ins, attrSize);
2132
2133     if (rgx == REG_NA)
2134     {
2135         /* The address is of the form "[reg+disp]" */
2136
2137         if (reg == REG_NA)
2138         {
2139             /* The address is of the form "[disp]" */
2140
2141             size += sizeof(INT32);
2142
2143 #ifdef _TARGET_AMD64_
2144             // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
2145             if (!id->idIsDspReloc())
2146             {
2147                 size++;
2148             }
2149 #endif
2150             return size;
2151         }
2152
2153         // If this is just "call reg", we're done.
2154         if (id->idIsCallRegPtr())
2155         {
2156             assert(ins == INS_call);
2157             assert(dsp == 0);
2158             return size;
2159         }
2160
2161         // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
2162         if (baseRegisterRequiresSibByte(reg))
2163         {
2164             size++;
2165         }
2166
2167         // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
2168         // Otherwise, the displacement can be elided if it is zero.
2169         if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2170         {
2171             return size;
2172         }
2173
2174         /* Does the offset fit in a byte? */
2175
2176         if (dspInByte)
2177         {
2178             size += sizeof(char);
2179         }
2180         else
2181         {
2182             size += sizeof(INT32);
2183         }
2184     }
2185     else
2186     {
2187         /* An index register is present */
2188
2189         size++;
2190
2191         /* Is the index value scaled? */
2192
2193         if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
2194         {
2195             /* Is there a base register? */
2196
2197             if (reg != REG_NA)
2198             {
2199                 /* The address is "[reg + {2/4/8} * rgx + icon]" */
2200
2201                 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2202                 {
2203                     /* The address is "[reg + {2/4/8} * rgx]" */
2204                 }
2205                 else
2206                 {
2207                     /* The address is "[reg + {2/4/8} * rgx + disp]" */
2208
2209                     if (dspInByte)
2210                     {
2211                         size += sizeof(char);
2212                     }
2213                     else
2214                     {
2215                         size += sizeof(int);
2216                     }
2217                 }
2218             }
2219             else
2220             {
2221                 /* The address is "[{2/4/8} * rgx + icon]" */
2222
2223                 size += sizeof(INT32);
2224             }
2225         }
2226         else
2227         {
2228             if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
2229             {
2230                 /* Swap reg and rgx, such that reg is not EBP/R13 */
2231                 regNumber tmp                       = reg;
2232                 id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
2233                 id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
2234             }
2235
2236             /* The address is "[reg+rgx+dsp]" */
2237
2238             if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2239             {
2240                 /* This is [reg+rgx]" */
2241             }
2242             else
2243             {
2244                 /* This is [reg+rgx+dsp]" */
2245
2246                 if (dspInByte)
2247                 {
2248                     size += sizeof(char);
2249                 }
2250                 else
2251                 {
2252                     size += sizeof(int);
2253                 }
2254             }
2255         }
2256     }
2257
2258     return size;
2259 }
2260
2261 inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
2262 {
2263     instruction    ins       = id->idIns();
2264     UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
2265     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2266
2267     // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
2268     // but it requires special handling of the immediate value (it is always encoded in a byte).
2269     // Let's not complicate things until this is needed.
2270     assert(ins != INS_bt);
2271
2272 #ifdef _TARGET_AMD64_
2273     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2274     // all other opcodes take a sign-extended 4-byte immediate
2275     noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
2276 #endif // _TARGET_AMD64_
2277
2278     if (valSize > sizeof(INT32))
2279     {
2280         valSize = sizeof(INT32);
2281     }
2282
2283     if (id->idIsCnsReloc())
2284     {
2285         valInByte = false; // relocs can't be placed in a byte
2286         assert(valSize == sizeof(INT32));
2287     }
2288
2289     if (valInByte)
2290     {
2291         valSize = sizeof(char);
2292     }
2293
2294     return valSize + emitInsSizeAM(id, code);
2295 }
2296
2297 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
2298 {
2299     instruction ins      = id->idIns();
2300     emitAttr    attrSize = id->idOpSize();
2301
2302     // fgMorph changes any statics that won't fit into 32-bit addresses
2303     // into constants with an indir, rather than GT_CLS_VAR
2304     // so we should only hit this path for statics that are RIP-relative
2305     UNATIVE_OFFSET size = sizeof(INT32);
2306
2307     size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2308     size += emitAdjustSizeCrc32(ins, attrSize);
2309
2310     // Most 16-bit operand instructions will need a prefix.
2311     // This refers to 66h size prefix override.
2312
2313     if (attrSize == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
2314     {
2315         size++;
2316     }
2317
2318     return size + emitInsSize(code);
2319 }
2320
2321 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
2322 {
2323     instruction    ins       = id->idIns();
2324     UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
2325     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2326
2327 #ifndef _TARGET_AMD64_
2328     // occasionally longs get here on x86
2329     if (valSize > sizeof(INT32))
2330         valSize = sizeof(INT32);
2331 #endif // !_TARGET_AMD64_
2332
2333     if (id->idIsCnsReloc())
2334     {
2335         valInByte = false; // relocs can't be placed in a byte
2336         assert(valSize == sizeof(INT32));
2337     }
2338
2339     if (valInByte)
2340     {
2341         valSize = sizeof(char);
2342     }
2343
2344     return valSize + emitInsSizeCV(id, code);
2345 }
2346
2347 /*****************************************************************************
2348  *
2349  *  Allocate instruction descriptors for instructions with address modes.
2350  */
2351
2352 inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
2353 {
2354     if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2355     {
2356         instrDescAmd* id = emitAllocInstrAmd(size);
2357
2358         id->idSetIsLargeDsp();
2359 #ifdef DEBUG
2360         id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2361 #endif
2362         id->idaAmdVal = dsp;
2363
2364         return id;
2365     }
2366     else
2367     {
2368         instrDesc* id = emitAllocInstr(size);
2369
2370         id->idAddr()->iiaAddrMode.amDisp = dsp;
2371         assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2372
2373         return id;
2374     }
2375 }
2376
2377 /*****************************************************************************
2378  *
2379  *  Set the displacement field in an instruction. Only handles instrDescAmd type.
2380  */
2381
2382 inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
2383 {
2384     if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2385     {
2386         id->idSetIsLargeDsp();
2387 #ifdef DEBUG
2388         id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2389 #endif
2390         id->idaAmdVal = dsp;
2391     }
2392     else
2393     {
2394         id->idSetIsSmallDsp();
2395         id->idAddr()->iiaAddrMode.amDisp = dsp;
2396         assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2397     }
2398 }
2399
2400 /*****************************************************************************
2401  *
2402  *  Allocate an instruction descriptor for an instruction that uses both
2403  *  an address mode displacement and a constant.
2404  */
2405
2406 emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
2407 {
2408     if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
2409     {
2410         instrDesc* id                    = emitNewInstrCns(size, cns);
2411         id->idAddr()->iiaAddrMode.amDisp = dsp;
2412         assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2413
2414         return id;
2415     }
2416     else
2417     {
2418         if (instrDesc::fitsInSmallCns(cns))
2419         {
2420             instrDescAmd* id = emitAllocInstrAmd(size);
2421
2422             id->idSetIsLargeDsp();
2423 #ifdef DEBUG
2424             id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2425 #endif
2426             id->idaAmdVal = dsp;
2427
2428             id->idSmallCns(cns);
2429
2430             return id;
2431         }
2432         else
2433         {
2434             instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
2435
2436             id->idSetIsLargeCns();
2437             id->idacCnsVal = cns;
2438
2439             id->idSetIsLargeDsp();
2440 #ifdef DEBUG
2441             id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2442 #endif
2443             id->idacAmdVal = dsp;
2444
2445             return id;
2446         }
2447     }
2448 }
2449
2450 /*****************************************************************************
2451  *
2452  *  The next instruction will be a loop head entry point
2453  *  So insert a dummy instruction here to ensure that
2454  *  the x86 I-cache alignment rule is followed.
2455  */
2456
2457 void emitter::emitLoopAlign()
2458 {
2459     /* Insert a pseudo-instruction to ensure that we align
2460        the next instruction properly */
2461
2462     instrDesc* id = emitNewInstrSmall(EA_1BYTE);
2463     id->idIns(INS_align);
2464     id->idCodeSize(15); // We may need to skip up to 15 bytes of code
2465     emitCurIGsize += 15;
2466 }
2467
2468 /*****************************************************************************
2469  *
2470  *  Add a NOP instruction of the given size.
2471  */
2472
2473 void emitter::emitIns_Nop(unsigned size)
2474 {
2475     assert(size <= 15);
2476
2477     instrDesc* id = emitNewInstr();
2478     id->idIns(INS_nop);
2479     id->idInsFmt(IF_NONE);
2480     id->idCodeSize(size);
2481
2482     dispIns(id);
2483     emitCurIGsize += size;
2484 }
2485
2486 /*****************************************************************************
2487  *
2488  *  Add an instruction with no operands.
2489  */
2490 void emitter::emitIns(instruction ins)
2491 {
2492     UNATIVE_OFFSET sz;
2493     instrDesc*     id   = emitNewInstr();
2494     code_t         code = insCodeMR(ins);
2495
2496 #ifdef DEBUG
2497     {
2498         // We cannot have #ifdef inside macro expansion.
2499         bool assertCond =
2500             (ins == INS_cdq || ins == INS_int3 || ins == INS_lock || ins == INS_leave || ins == INS_movsb ||
2501              ins == INS_movsd || ins == INS_movsp || ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd ||
2502              ins == INS_r_movsp || ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
2503              ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
2504              // These instructions take zero operands
2505              || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence);
2506
2507         assert(assertCond);
2508     }
2509 #endif // DEBUG
2510
2511     assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
2512
2513     if (code & 0xFF000000)
2514     {
2515         sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
2516     }
2517     else if (code & 0x00FF0000)
2518     {
2519         sz = 3;
2520     }
2521     else if (code & 0x0000FF00)
2522     {
2523         sz = 2;
2524     }
2525     else
2526     {
2527         sz = 1;
2528     }
2529
2530     // vzeroupper includes its 2-byte VEX prefix in its MR code.
2531     assert((ins != INS_vzeroupper) || (sz == 3));
2532
2533     insFormat fmt = IF_NONE;
2534
2535     id->idIns(ins);
2536     id->idInsFmt(fmt);
2537     id->idCodeSize(sz);
2538
2539     dispIns(id);
2540     emitCurIGsize += sz;
2541 }
2542
2543 // Add an instruction with no operands, but whose encoding depends on the size
2544 // (Only CDQ/CQO currently)
2545 void emitter::emitIns(instruction ins, emitAttr attr)
2546 {
2547     UNATIVE_OFFSET sz;
2548     instrDesc*     id   = emitNewInstr(attr);
2549     code_t         code = insCodeMR(ins);
2550     assert(ins == INS_cdq);
2551     assert((code & 0xFFFFFF00) == 0);
2552     sz = 1;
2553
2554     insFormat fmt = IF_NONE;
2555
2556     sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
2557     if (TakesRexWPrefix(ins, attr))
2558     {
2559         sz += emitGetRexPrefixSize(ins);
2560     }
2561
2562     id->idIns(ins);
2563     id->idInsFmt(fmt);
2564     id->idCodeSize(sz);
2565
2566     dispIns(id);
2567     emitCurIGsize += sz;
2568 }
2569
2570 //------------------------------------------------------------------------
2571 // emitMapFmtForIns: map the instruction format based on the instruction.
2572 // Shift-by-a-constant instructions have a special format.
2573 //
2574 // Arguments:
2575 //    fmt - the instruction format to map
2576 //    ins - the instruction
2577 //
2578 // Returns:
2579 //    The mapped instruction format.
2580 //
2581 emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
2582 {
2583     switch (ins)
2584     {
2585         case INS_rol_N:
2586         case INS_ror_N:
2587         case INS_rcl_N:
2588         case INS_rcr_N:
2589         case INS_shl_N:
2590         case INS_shr_N:
2591         case INS_sar_N:
2592         {
2593             switch (fmt)
2594             {
2595                 case IF_RRW_CNS:
2596                     return IF_RRW_SHF;
2597                 case IF_MRW_CNS:
2598                     return IF_MRW_SHF;
2599                 case IF_SRW_CNS:
2600                     return IF_SRW_SHF;
2601                 case IF_ARW_CNS:
2602                     return IF_ARW_SHF;
2603                 default:
2604                     unreached();
2605             }
2606         }
2607
2608         default:
2609             return fmt;
2610     }
2611 }
2612
2613 //------------------------------------------------------------------------
2614 // emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
2615 //
2616 // Arguments:
2617 //    fmt - the instruction format to map
2618 //
2619 // Returns:
2620 //    The mapped instruction format.
2621 //
2622 emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
2623 {
2624     switch (fmt)
2625     {
2626         case IF_ARD:
2627             return IF_MRD;
2628         case IF_AWR:
2629             return IF_MWR;
2630         case IF_ARW:
2631             return IF_MRW;
2632
2633         case IF_RRD_ARD:
2634             return IF_RRD_MRD;
2635         case IF_RWR_ARD:
2636             return IF_RWR_MRD;
2637         case IF_RWR_ARD_CNS:
2638             return IF_RWR_MRD_CNS;
2639         case IF_RRW_ARD:
2640             return IF_RRW_MRD;
2641         case IF_RRW_ARD_CNS:
2642             return IF_RRW_MRD_CNS;
2643         case IF_RWR_RRD_ARD:
2644             return IF_RWR_RRD_MRD;
2645         case IF_RWR_RRD_ARD_CNS:
2646             return IF_RWR_RRD_MRD_CNS;
2647         case IF_RWR_RRD_ARD_RRD:
2648             return IF_RWR_RRD_MRD_RRD;
2649
2650         case IF_ARD_RRD:
2651             return IF_MRD_RRD;
2652         case IF_AWR_RRD:
2653             return IF_MWR_RRD;
2654         case IF_ARW_RRD:
2655             return IF_MRW_RRD;
2656
2657         case IF_ARD_CNS:
2658             return IF_MRD_CNS;
2659         case IF_AWR_CNS:
2660             return IF_MWR_CNS;
2661         case IF_ARW_CNS:
2662             return IF_MRW_CNS;
2663
2664         case IF_AWR_RRD_CNS:
2665             return IF_MWR_RRD_CNS;
2666
2667         case IF_ARW_SHF:
2668             return IF_MRW_SHF;
2669
2670         default:
2671             unreached();
2672     }
2673 }
2674
2675 //------------------------------------------------------------------------
2676 // emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
2677 //
2678 // Arguments:
2679 //    indir - the memory operand.
2680 //    id - the instrDesc to fill in.
2681 //    fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
2682 //          GT_CLS_VAR_ADDR), this function will map it to the correct format.
2683 //    ins - the instruction we are generating. This might affect the instruction format we choose.
2684 //
2685 // Assumptions:
2686 //    The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
2687 //
2688 // Post-conditions:
2689 //    For base address of int constant:
2690 //        -- the caller must have added the int constant base to the instrDesc when creating it via
2691 //           emitNewInstrAmdCns().
2692 //    For simple address modes (base + scale * index + offset):
2693 //        -- the base register, index register, and scale factor are set.
2694 //        -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
2695 //           emitNewInstrAmdCns().
2696 //
2697 //    The instruction format is set.
2698 //
2699 //    idSetIsDspReloc() is called if necessary.
2700 //
2701 void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
2702 {
2703     assert(fmt != IF_NONE);
2704
2705     GenTree* memBase = indir->Base();
2706
2707     if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
2708     {
2709         CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
2710
2711         // Static always need relocs
2712         if (!jitStaticFldIsGlobAddr(fldHnd))
2713         {
2714             // Contract:
2715             // fgMorphField() changes any statics that won't fit into 32-bit addresses into
2716             // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
2717             // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
2718             //
2719             // Data section constants: these get allocated close to code block of the method and
2720             // always addressable IP relative.  These too should be marked as relocatable.
2721
2722             id->idSetIsDspReloc();
2723         }
2724
2725         id->idAddr()->iiaFieldHnd = fldHnd;
2726         id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
2727     }
2728     else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
2729     {
2730         // Absolute addresses marked as contained should fit within the base of addr mode.
2731         assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
2732
2733         // Either not generating relocatable code, or addr must be an icon handle, or the
2734         // constant is zero (which we won't generate a relocation for).
2735         assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0));
2736
2737         if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
2738         {
2739             id->idSetIsDspReloc();
2740         }
2741
2742         id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2743         id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2744         id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1; // for completeness
2745
2746         id->idInsFmt(emitMapFmtForIns(fmt, ins));
2747
2748         // Absolute address must have already been set in the instrDesc constructor.
2749         assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
2750     }
2751     else
2752     {
2753         if (memBase != nullptr)
2754         {
2755             id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
2756         }
2757         else
2758         {
2759             id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2760         }
2761
2762         if (indir->HasIndex())
2763         {
2764             id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
2765         }
2766         else
2767         {
2768             id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2769         }
2770         id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
2771
2772         id->idInsFmt(emitMapFmtForIns(fmt, ins));
2773
2774         // disp must have already been set in the instrDesc constructor.
2775         assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly
2776     }
2777 }
2778
2779 // Takes care of storing all incoming register parameters
2780 // into its corresponding shadow space (defined by the x64 ABI)
2781 void emitter::spillIntArgRegsToShadowSlots()
2782 {
2783     unsigned       argNum;
2784     instrDesc*     id;
2785     UNATIVE_OFFSET sz;
2786
2787     assert(emitComp->compGeneratingProlog);
2788
2789     for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
2790     {
2791         regNumber argReg = intArgRegs[argNum];
2792
2793         // The offsets for the shadow space start at RSP + 8
2794         // (right before the caller return address)
2795         int offset = (argNum + 1) * EA_PTRSIZE;
2796
2797         id = emitNewInstrAmd(EA_PTRSIZE, offset);
2798         id->idIns(INS_mov);
2799         id->idInsFmt(IF_AWR_RRD);
2800         id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
2801         id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2802         id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);
2803
2804         // The offset has already been set in the intrDsc ctor,
2805         // make sure we got it right.
2806         assert(emitGetInsAmdAny(id) == ssize_t(offset));
2807
2808         id->idReg1(argReg);
2809         sz = emitInsSizeAM(id, insCodeMR(INS_mov));
2810         id->idCodeSize(sz);
2811         emitCurIGsize += sz;
2812     }
2813 }
2814
2815 //------------------------------------------------------------------------
2816 // emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss")
2817 // instruction for a GT_IND node.
2818 //
2819 // Arguments:
2820 //    ins - the instruction to emit
2821 //    attr - the instruction operand size
2822 //    dstReg - the destination register
2823 //    mem - the GT_IND node
2824 //
2825 void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem)
2826 {
2827     assert(mem->OperIs(GT_IND));
2828
2829     GenTree* addr = mem->Addr();
2830
2831     if (addr->OperGet() == GT_CLS_VAR_ADDR)
2832     {
2833         emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, 0);
2834         return;
2835     }
2836
2837     if (addr->OperGet() == GT_LCL_VAR_ADDR)
2838     {
2839         GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2840         emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0);
2841         codeGen->genUpdateLife(varNode);
2842         return;
2843     }
2844
2845     assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
2846     ssize_t    offset = mem->Offset();
2847     instrDesc* id     = emitNewInstrAmd(attr, offset);
2848     id->idIns(ins);
2849     id->idReg1(dstReg);
2850     emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
2851     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2852     id->idCodeSize(sz);
2853     dispIns(id);
2854     emitCurIGsize += sz;
2855 }
2856
2857 //------------------------------------------------------------------------
2858 // emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2859 // instruction for a GT_STOREIND node.
2860 //
2861 // Arguments:
2862 //    ins - the instruction to emit
2863 //    attr - the instruction operand size
2864 //    mem - the GT_STOREIND node
2865 //
2866 void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
2867 {
2868     assert(mem->OperIs(GT_STOREIND));
2869
2870     GenTree* addr = mem->Addr();
2871     GenTree* data = mem->Data();
2872
2873     if (addr->OperGet() == GT_CLS_VAR_ADDR)
2874     {
2875         if (data->isContainedIntOrIImmed())
2876         {
2877             emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
2878         }
2879         else
2880         {
2881             assert(!data->isContained());
2882             emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
2883         }
2884         return;
2885     }
2886
2887     if (addr->OperGet() == GT_LCL_VAR_ADDR)
2888     {
2889         GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2890         if (data->isContainedIntOrIImmed())
2891         {
2892             emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2893         }
2894         else
2895         {
2896             assert(!data->isContained());
2897             emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2898         }
2899         codeGen->genUpdateLife(varNode);
2900         return;
2901     }
2902
2903     ssize_t        offset = mem->Offset();
2904     UNATIVE_OFFSET sz;
2905     instrDesc*     id;
2906
2907     if (data->isContainedIntOrIImmed())
2908     {
2909         int icon = (int)data->AsIntConCommon()->IconValue();
2910         id       = emitNewInstrAmdCns(attr, offset, icon);
2911         id->idIns(ins);
2912         emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
2913         sz = emitInsSizeAM(id, insCodeMI(ins), icon);
2914         id->idCodeSize(sz);
2915     }
2916     else
2917     {
2918         assert(!data->isContained());
2919         id = emitNewInstrAmd(attr, offset);
2920         id->idIns(ins);
2921         emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
2922         id->idReg1(data->gtRegNum);
2923         sz = emitInsSizeAM(id, insCodeMR(ins));
2924         id->idCodeSize(sz);
2925     }
2926
2927     dispIns(id);
2928     emitCurIGsize += sz;
2929 }
2930
2931 //------------------------------------------------------------------------
2932 // emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2933 // instruction for a GT_STORE_LCL_VAR node.
2934 //
2935 // Arguments:
2936 //    ins - the instruction to emit
2937 //    attr - the instruction operand size
2938 //    varNode - the GT_STORE_LCL_VAR node
2939 //
2940 void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode)
2941 {
2942     assert(varNode->OperIs(GT_STORE_LCL_VAR));
2943     assert(varNode->gtRegNum == REG_NA); // stack store
2944
2945     GenTree* data = varNode->gtGetOp1();
2946     codeGen->inst_set_SV_var(varNode);
2947
2948     if (data->isContainedIntOrIImmed())
2949     {
2950         emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2951     }
2952     else
2953     {
2954         assert(!data->isContained());
2955         emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2956     }
2957     codeGen->genUpdateLife(varNode);
2958 }
2959
2960 //------------------------------------------------------------------------
2961 // emitInsBinary: Emits an instruction for a node which takes two operands
2962 //
2963 // Arguments:
2964 //    ins - the instruction to emit
2965 //    attr - the instruction operand size
2966 //    dst - the destination and first source operand
2967 //    src - the second source operand
2968 //
2969 // Assumptions:
2970 //  i) caller of this routine needs to call genConsumeReg()
2971 // ii) caller of this routine needs to call genProduceReg()
2972 regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
2973 {
2974     // We can only have one memory operand and only src can be a constant operand
2975     // However, the handling for a given operand type (mem, cns, or other) is fairly
2976     // consistent regardless of whether they are src or dst. As such, we will find
2977     // the type of each operand and only check them against src/dst where relevant.
2978
2979     GenTree* memOp   = nullptr;
2980     GenTree* cnsOp   = nullptr;
2981     GenTree* otherOp = nullptr;
2982
2983     if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp())
2984     {
2985         // dst can only be a modrm
2986         // dst on 3opImul isn't really the dst
2987         assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) || instrIs3opImul(ins));
2988         assert(!src->isUsedFromMemory());
2989
2990         memOp = dst;
2991
2992         if (src->isContained())
2993         {
2994             assert(src->IsCnsIntOrI());
2995             cnsOp = src;
2996         }
2997         else
2998         {
2999             otherOp = src;
3000         }
3001     }
3002     else if (src->isContained() || src->isUsedFromSpillTemp())
3003     {
3004         assert(!dst->isUsedFromMemory());
3005         otherOp = dst;
3006
3007         if ((src->IsCnsIntOrI() || src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp())
3008         {
3009             assert(!src->isUsedFromMemory() || src->IsCnsFltOrDbl());
3010             cnsOp = src;
3011         }
3012         else
3013         {
3014             assert(src->isUsedFromMemory());
3015             memOp = src;
3016         }
3017     }
3018
3019     // At this point, we either have a memory operand or we don't.
3020     //
3021     // If we don't then the logic is very simple and  we will either be emitting a
3022     // `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise.
3023     //
3024     // If we do have a memory operand, the logic is a bit more complicated as we need
3025     // to do different things depending on the type of memory operand. These types include:
3026     //  * Spill temp
3027     //  * Indirect access
3028     //    * Local variable
3029     //    * Class variable
3030     //    * Addressing mode [base + index * scale + offset]
3031     //  * Local field
3032     //  * Local variable
3033     //
3034     // Most of these types (except Indirect: Class variable and Indirect: Addressing mode)
3035     // give us a a local variable number and an offset and access memory on the stack
3036     //
3037     // Indirect: Class variable is used for access static class variables and gives us a handle
3038     // to the memory location we read from
3039     //
3040     // Indirect: Addressing mode is used for the remaining memory accesses and will give us
3041     // a base address, an index, a scale, and an offset. These are combined to let us easily
3042     // access the given memory location.
3043     //
3044     // In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]`
3045     // or `[mem], reg`) by comparing memOp to src to determine which `emitIns_*` method needs
3046     // to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable)
3047     // where only src can be the immediate.
3048
3049     if (memOp != nullptr)
3050     {
3051         TempDsc* tmpDsc = nullptr;
3052         unsigned varNum = BAD_VAR_NUM;
3053         unsigned offset = (unsigned)-1;
3054
3055         if (memOp->isUsedFromSpillTemp())
3056         {
3057             assert(memOp->IsRegOptional());
3058
3059             tmpDsc = codeGen->getSpillTempDsc(memOp);
3060             varNum = tmpDsc->tdTempNum();
3061             offset = 0;
3062
3063             codeGen->regSet.tmpRlsTemp(tmpDsc);
3064         }
3065         else if (memOp->isIndir())
3066         {
3067             GenTreeIndir* memIndir = memOp->AsIndir();
3068             GenTree*      memBase  = memIndir->gtOp1;
3069
3070             switch (memBase->OperGet())
3071             {
3072                 case GT_LCL_VAR_ADDR:
3073                 {
3074                     varNum = memBase->AsLclVarCommon()->GetLclNum();
3075                     offset = 0;
3076
3077                     // Ensure that all the GenTreeIndir values are set to their defaults.
3078                     assert(!memIndir->HasIndex());
3079                     assert(memIndir->Scale() == 1);
3080                     assert(memIndir->Offset() == 0);
3081
3082                     break;
3083                 }
3084
3085                 case GT_CLS_VAR_ADDR:
3086                 {
3087                     if (memOp == src)
3088                     {
3089                         assert(otherOp == dst);
3090                         assert(cnsOp == nullptr);
3091
3092                         if (instrHasImplicitRegPairDest(ins))
3093                         {
3094                             // src is a class static variable
3095                             // dst is implicit - RDX:RAX
3096                             emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
3097                         }
3098                         else
3099                         {
3100                             // src is a class static variable
3101                             // dst is a register
3102                             emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
3103                         }
3104                     }
3105                     else
3106                     {
3107                         assert(memOp == dst);
3108
3109                         if (cnsOp != nullptr)
3110                         {
3111                             assert(cnsOp == src);
3112                             assert(otherOp == nullptr);
3113                             assert(src->IsCnsIntOrI());
3114
3115                             // src is an contained immediate
3116                             // dst is a class static variable
3117                             emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0,
3118                                         (int)src->gtIntConCommon.IconValue());
3119                         }
3120                         else
3121                         {
3122                             assert(otherOp == src);
3123
3124                             // src is a register
3125                             // dst is a class static variable
3126                             emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
3127                         }
3128                     }
3129
3130                     return dst->gtRegNum;
3131                 }
3132
3133                 default: // Addressing mode [base + index * scale + offset]
3134                 {
3135                     instrDesc* id = nullptr;
3136
3137                     if (cnsOp != nullptr)
3138                     {
3139                         assert(memOp == dst);
3140                         assert(cnsOp == src);
3141                         assert(otherOp == nullptr);
3142                         assert(src->IsCnsIntOrI());
3143
3144                         id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue());
3145                     }
3146                     else
3147                     {
3148                         ssize_t offset = memIndir->Offset();
3149                         id             = emitNewInstrAmd(attr, offset);
3150                         id->idIns(ins);
3151
3152                         GenTree* regTree = (memOp == src) ? dst : src;
3153
3154                         // there must be one non-contained op
3155                         assert(!regTree->isContained());
3156                         id->idReg1(regTree->gtRegNum);
3157                     }
3158                     assert(id != nullptr);
3159
3160                     id->idIns(ins); // Set the instruction.
3161
3162                     // Determine the instruction format
3163                     insFormat fmt = IF_NONE;
3164
3165                     if (memOp == src)
3166                     {
3167                         assert(cnsOp == nullptr);
3168                         assert(otherOp == dst);
3169
3170                         if (instrHasImplicitRegPairDest(ins))
3171                         {
3172                             fmt = emitInsModeFormat(ins, IF_ARD);
3173                         }
3174                         else
3175                         {
3176                             fmt = emitInsModeFormat(ins, IF_RRD_ARD);
3177                         }
3178                     }
3179                     else
3180                     {
3181                         assert(memOp == dst);
3182
3183                         if (cnsOp != nullptr)
3184                         {
3185                             assert(cnsOp == src);
3186                             assert(otherOp == nullptr);
3187                             assert(src->IsCnsIntOrI());
3188
3189                             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
3190                         }
3191                         else
3192                         {
3193                             assert(otherOp == src);
3194                             fmt = emitInsModeFormat(ins, IF_ARD_RRD);
3195                         }
3196                     }
3197                     assert(fmt != IF_NONE);
3198                     emitHandleMemOp(memIndir, id, fmt, ins);
3199
3200                     // Determine the instruction size
3201                     UNATIVE_OFFSET sz = 0;
3202
3203                     if (memOp == src)
3204                     {
3205                         assert(otherOp == dst);
3206                         assert(cnsOp == nullptr);
3207
3208                         if (instrHasImplicitRegPairDest(ins))
3209                         {
3210                             sz = emitInsSizeAM(id, insCode(ins));
3211                         }
3212                         else
3213                         {
3214                             sz = emitInsSizeAM(id, insCodeRM(ins));
3215                         }
3216                     }
3217                     else
3218                     {
3219                         assert(memOp == dst);
3220
3221                         if (cnsOp != nullptr)
3222                         {
3223                             assert(memOp == dst);
3224                             assert(cnsOp == src);
3225                             assert(otherOp == nullptr);
3226
3227                             sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue());
3228                         }
3229                         else
3230                         {
3231                             assert(otherOp == src);
3232                             sz = emitInsSizeAM(id, insCodeMR(ins));
3233                         }
3234                     }
3235                     assert(sz != 0);
3236
3237                     id->idCodeSize(sz);
3238
3239                     dispIns(id);
3240                     emitCurIGsize += sz;
3241
3242                     return (memOp == src) ? dst->gtRegNum : REG_NA;
3243                 }
3244             }
3245         }
3246         else
3247         {
3248             switch (memOp->OperGet())
3249             {
3250                 case GT_LCL_FLD:
3251                 case GT_STORE_LCL_FLD:
3252                 {
3253                     GenTreeLclFld* lclField = memOp->AsLclFld();
3254                     varNum                  = lclField->GetLclNum();
3255                     offset                  = lclField->gtLclFld.gtLclOffs;
3256                     break;
3257                 }
3258
3259                 case GT_LCL_VAR:
3260                 {
3261                     assert(memOp->IsRegOptional() || !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate());
3262                     varNum = memOp->AsLclVar()->GetLclNum();
3263                     offset = 0;
3264                     break;
3265                 }
3266
3267                 default:
3268                     unreached();
3269                     break;
3270             }
3271         }
3272
3273         // Ensure we got a good varNum and offset.
3274         // We also need to check for `tmpDsc != nullptr` since spill temp numbers
3275         // are negative and start with -1, which also happens to be BAD_VAR_NUM.
3276         assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
3277         assert(offset != (unsigned)-1);
3278
3279         if (memOp == src)
3280         {
3281             assert(otherOp == dst);
3282             assert(cnsOp == nullptr);
3283
3284             if (instrHasImplicitRegPairDest(ins))
3285             {
3286                 // src is a stack based local variable
3287                 // dst is implicit - RDX:RAX
3288                 emitIns_S(ins, attr, varNum, offset);
3289             }
3290             else
3291             {
3292                 // src is a stack based local variable
3293                 // dst is a register
3294                 emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
3295             }
3296         }
3297         else
3298         {
3299             assert(memOp == dst);
3300             assert((dst->gtRegNum == REG_NA) || dst->IsRegOptional());
3301
3302             if (cnsOp != nullptr)
3303             {
3304                 assert(cnsOp == src);
3305                 assert(otherOp == nullptr);
3306                 assert(src->IsCnsIntOrI());
3307
3308                 // src is an contained immediate
3309                 // dst is a stack based local variable
3310                 emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
3311             }
3312             else
3313             {
3314                 assert(otherOp == src);
3315                 assert(!src->isContained());
3316
3317                 // src is a register
3318                 // dst is a stack based local variable
3319                 emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
3320             }
3321         }
3322     }
3323     else if (cnsOp != nullptr) // reg, immed
3324     {
3325         assert(cnsOp == src);
3326         assert(otherOp == dst);
3327
3328         if (src->IsCnsIntOrI())
3329         {
3330             assert(!dst->isContained());
3331             GenTreeIntConCommon* intCns = src->AsIntConCommon();
3332             emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue());
3333         }
3334         else
3335         {
3336             assert(src->IsCnsFltOrDbl());
3337             GenTreeDblCon* dblCns = src->AsDblCon();
3338
3339             CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns));
3340             emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
3341         }
3342     }
3343     else // reg, reg
3344     {
3345         assert(otherOp == nullptr);
3346         assert(!src->isContained() && !dst->isContained());
3347
3348         if (instrHasImplicitRegPairDest(ins))
3349         {
3350             emitIns_R(ins, attr, src->gtRegNum);
3351         }
3352         else
3353         {
3354             emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
3355         }
3356     }
3357
3358     return dst->gtRegNum;
3359 }
3360
3361 //------------------------------------------------------------------------
3362 // emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
3363 //
3364 // Responsible for emitting a single instruction that will perform an operation of the form:
3365 //      *addr = *addr <BinOp> src
3366 // For example:
3367 //      ADD [RAX], RCX
3368 //
3369 // Arguments:
3370 //    ins - instruction to generate
3371 //    attr - emitter attribute for instruction
3372 //    storeInd - indir for RMW addressing mode
3373 //    src - source operand of instruction
3374 //
3375 // Assumptions:
3376 //    Lowering has taken care of recognizing the StoreInd pattern of:
3377 //          StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
3378 //    The address to store is already sitting in a register.
3379 //
3380 // Notes:
3381 //    This is a no-produce operation, meaning that no register output will
3382 //    be produced for future use in the code stream.
3383 //
3384 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
3385 {
3386     GenTree* addr = storeInd->Addr();
3387     addr          = addr->gtSkipReloadOrCopy();
3388     assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
3389            addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
3390
3391     instrDesc*     id = nullptr;
3392     UNATIVE_OFFSET sz;
3393
3394     ssize_t offset = 0;
3395     if (addr->OperGet() != GT_CLS_VAR_ADDR)
3396     {
3397         offset = storeInd->Offset();
3398     }
3399
3400     if (src->isContainedIntOrIImmed())
3401     {
3402         GenTreeIntConCommon* intConst = src->AsIntConCommon();
3403         int                  iconVal  = (int)intConst->IconValue();
3404         switch (ins)
3405         {
3406             case INS_rcl_N:
3407             case INS_rcr_N:
3408             case INS_rol_N:
3409             case INS_ror_N:
3410             case INS_shl_N:
3411             case INS_shr_N:
3412             case INS_sar_N:
3413                 iconVal &= 0x7F;
3414                 break;
3415             default:
3416                 break;
3417         }
3418
3419         id = emitNewInstrAmdCns(attr, offset, iconVal);
3420         emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
3421         id->idIns(ins);
3422         sz = emitInsSizeAM(id, insCodeMI(ins), iconVal);
3423     }
3424     else
3425     {
3426         assert(!src->isContained()); // there must be one non-contained src
3427
3428         // ind, reg
3429         id = emitNewInstrAmd(attr, offset);
3430         emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
3431         id->idReg1(src->gtRegNum);
3432         id->idIns(ins);
3433         sz = emitInsSizeAM(id, insCodeMR(ins));
3434     }
3435
3436     id->idCodeSize(sz);
3437
3438     dispIns(id);
3439     emitCurIGsize += sz;
3440 }
3441
3442 //------------------------------------------------------------------------
3443 // emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
3444 //
3445 // Responsible for emitting a single instruction that will perform an operation of the form:
3446 //      *addr = UnaryOp *addr
3447 // For example:
3448 //      NOT [RAX]
3449 //
3450 // Arguments:
3451 //    ins - instruction to generate
3452 //    attr - emitter attribute for instruction
3453 //    storeInd - indir for RMW addressing mode
3454 //
3455 // Assumptions:
3456 //    Lowering has taken care of recognizing the StoreInd pattern of:
3457 //          StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
3458 //    The address to store is already sitting in a register.
3459 //
3460 // Notes:
3461 //    This is a no-produce operation, meaning that no register output will
3462 //    be produced for future use in the code stream.
3463 //
3464 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
3465 {
3466     GenTree* addr = storeInd->Addr();
3467     addr          = addr->gtSkipReloadOrCopy();
3468     assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
3469            addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
3470
3471     ssize_t offset = 0;
3472     if (addr->OperGet() != GT_CLS_VAR_ADDR)
3473     {
3474         offset = storeInd->Offset();
3475     }
3476
3477     instrDesc* id = emitNewInstrAmd(attr, offset);
3478     emitHandleMemOp(storeInd, id, IF_ARW, ins);
3479     id->idIns(ins);
3480     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3481     id->idCodeSize(sz);
3482
3483     dispIns(id);
3484     emitCurIGsize += sz;
3485 }
3486
3487 /*****************************************************************************
3488  *
3489  *  Add an instruction referencing a single register.
3490  */
3491
3492 void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
3493 {
3494     emitAttr size = EA_SIZE(attr);
3495
3496     assert(size <= EA_PTRSIZE);
3497     noway_assert(emitVerifyEncodable(ins, size, reg));
3498
3499     UNATIVE_OFFSET sz;
3500     instrDesc*     id = emitNewInstrSmall(attr);
3501
3502     switch (ins)
3503     {
3504         case INS_inc:
3505         case INS_dec:
3506 #ifdef _TARGET_AMD64_
3507
3508             sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
3509
3510 #else // !_TARGET_AMD64_
3511
3512             if (size == EA_1BYTE)
3513                 sz = 2; // Use the long form as the small one has no 'w' bit
3514             else
3515                 sz = 1; // Use short form
3516
3517 #endif // !_TARGET_AMD64_
3518
3519             break;
3520
3521         case INS_pop:
3522         case INS_pop_hide:
3523         case INS_push:
3524         case INS_push_hide:
3525
3526             /* We don't currently push/pop small values */
3527
3528             assert(size == EA_PTRSIZE);
3529
3530             sz = 1;
3531             break;
3532
3533         default:
3534
3535             /* All the sixteen INS_setCCs are contiguous. */
3536
3537             if (INS_seto <= ins && ins <= INS_setg)
3538             {
3539                 // Rough check that we used the endpoints for the range check
3540
3541                 assert(INS_seto + 0xF == INS_setg);
3542
3543                 // The caller must specify EA_1BYTE for 'attr'
3544
3545                 assert(attr == EA_1BYTE);
3546
3547                 /* We expect this to always be a 'big' opcode */
3548
3549                 assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
3550
3551                 size = attr;
3552
3553                 sz = 3;
3554                 break;
3555             }
3556             else
3557             {
3558                 sz = 2;
3559                 break;
3560             }
3561     }
3562     insFormat fmt = emitInsModeFormat(ins, IF_RRD);
3563
3564     id->idIns(ins);
3565     id->idInsFmt(fmt);
3566     id->idReg1(reg);
3567
3568     // 16-bit operand instructions will need a prefix.
3569     // This refers to 66h size prefix override.
3570     if (size == EA_2BYTE)
3571     {
3572         sz += 1;
3573     }
3574
3575     // Vex bytes
3576     sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
3577
3578     // REX byte
3579     if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
3580     {
3581         sz += emitGetRexPrefixSize(ins);
3582     }
3583
3584     id->idCodeSize(sz);
3585
3586     dispIns(id);
3587     emitCurIGsize += sz;
3588
3589     emitAdjustStackDepthPushPop(ins);
3590 }
3591
3592 /*****************************************************************************
3593  *
3594  *  Add an instruction referencing a register and a constant.
3595  */
3596
3597 void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
3598 {
3599     emitAttr size = EA_SIZE(attr);
3600
3601     // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
3602     assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
3603
3604     noway_assert(emitVerifyEncodable(ins, size, reg));
3605
3606 #ifdef _TARGET_AMD64_
3607     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3608     // all other opcodes take a sign-extended 4-byte immediate
3609     noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
3610 #endif
3611
3612     UNATIVE_OFFSET sz;
3613     instrDesc*     id;
3614     insFormat      fmt       = emitInsModeFormat(ins, IF_RRD_CNS);
3615     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
3616
3617     // BT reg,imm might be useful but it requires special handling of the immediate value
3618     // (it is always encoded in a byte). Let's not complicate things until this is needed.
3619     assert(ins != INS_bt);
3620
3621     // Figure out the size of the instruction
3622     switch (ins)
3623     {
3624         case INS_mov:
3625 #ifdef _TARGET_AMD64_
3626             // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
3627             // and this isn't a reloc constant.
3628             if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
3629             {
3630                 attr = size = EA_4BYTE;
3631             }
3632
3633             if (size > EA_4BYTE)
3634             {
3635                 sz = 9; // Really it is 10, but we'll add one more later
3636                 break;
3637             }
3638 #endif // _TARGET_AMD64_
3639             sz = 5;
3640             break;
3641
3642         case INS_rcl_N:
3643         case INS_rcr_N:
3644         case INS_rol_N:
3645         case INS_ror_N:
3646         case INS_shl_N:
3647         case INS_shr_N:
3648         case INS_sar_N:
3649             assert(val != 1);
3650             fmt = IF_RRW_SHF;
3651             sz  = 3;
3652             val &= 0x7F;
3653             valInByte = true; // shift amount always placed in a byte
3654             break;
3655
3656         default:
3657
3658             if (EA_IS_CNS_RELOC(attr))
3659             {
3660                 valInByte = false; // relocs can't be placed in a byte
3661             }
3662
3663             if (valInByte)
3664             {
3665                 if (IsSSEOrAVXInstruction(ins))
3666                 {
3667                     sz = 5;
3668                 }
3669                 else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins))
3670                 {
3671                     sz = 2;
3672                 }
3673                 else
3674                 {
3675                     sz = 3;
3676                 }
3677             }
3678             else
3679             {
3680                 if (reg == REG_EAX && !instrIs3opImul(ins))
3681                 {
3682                     sz = 1;
3683                 }
3684                 else
3685                 {
3686                     sz = 2;
3687                 }
3688
3689 #ifdef _TARGET_AMD64_
3690                 if (size > EA_4BYTE)
3691                 {
3692                     // We special-case anything that takes a full 8-byte constant.
3693                     sz += 4;
3694                 }
3695                 else
3696 #endif // _TARGET_AMD64_
3697                 {
3698                     sz += EA_SIZE_IN_BYTES(attr);
3699                 }
3700             }
3701             break;
3702     }
3703
3704     // Vex prefix size
3705     sz += emitGetVexPrefixSize(ins, attr);
3706
3707     // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
3708     // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
3709     // register. So we also need to check if that built-in register is an extended register.
3710     if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
3711     {
3712         sz += emitGetRexPrefixSize(ins);
3713     }
3714
3715     id = emitNewInstrSC(attr, val);
3716     id->idIns(ins);
3717     id->idInsFmt(fmt);
3718     id->idReg1(reg);
3719
3720     // 16-bit operand instructions will need a prefix
3721     if (size == EA_2BYTE)
3722     {
3723         sz += 1;
3724     }
3725
3726     id->idCodeSize(sz);
3727
3728     dispIns(id);
3729     emitCurIGsize += sz;
3730
3731     if (reg == REG_ESP)
3732     {
3733         emitAdjustStackDepth(ins, val);
3734     }
3735 }
3736
3737 /*****************************************************************************
3738  *
3739  *  Add an instruction referencing an integer constant.
3740  */
3741
3742 void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
3743 {
3744     UNATIVE_OFFSET sz;
3745     instrDesc*     id;
3746     bool           valInByte = ((signed char)val == val);
3747
3748 #ifdef _TARGET_AMD64_
3749     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3750     // all other opcodes take a sign-extended 4-byte immediate
3751     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3752 #endif
3753
3754     if (EA_IS_CNS_RELOC(attr))
3755     {
3756         valInByte = false; // relocs can't be placed in a byte
3757     }
3758
3759     switch (ins)
3760     {
3761         case INS_loop:
3762         case INS_jge:
3763             sz = 2;
3764             break;
3765
3766         case INS_ret:
3767             sz = 3;
3768             break;
3769
3770         case INS_push_hide:
3771         case INS_push:
3772             sz = valInByte ? 2 : 5;
3773             break;
3774
3775         default:
3776             NO_WAY("unexpected instruction");
3777     }
3778
3779     id = emitNewInstrSC(attr, val);
3780     id->idIns(ins);
3781     id->idInsFmt(IF_CNS);
3782     id->idCodeSize(sz);
3783
3784     dispIns(id);
3785     emitCurIGsize += sz;
3786
3787     emitAdjustStackDepthPushPop(ins);
3788 }
3789
3790 /*****************************************************************************
3791  *
3792  *  Add a "jump through a table" instruction.
3793  */
3794
3795 void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
3796 {
3797     assert(EA_SIZE(attr) == EA_4BYTE);
3798
3799     UNATIVE_OFFSET    sz  = 3 + 4;
3800     const instruction ins = INS_i_jmp;
3801
3802     if (IsExtendedReg(reg, attr))
3803     {
3804         sz += emitGetRexPrefixSize(ins);
3805     }
3806
3807     instrDesc* id = emitNewInstrAmd(attr, base);
3808
3809     id->idIns(ins);
3810     id->idInsFmt(IF_ARD);
3811     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
3812     id->idAddr()->iiaAddrMode.amIndxReg = reg;
3813     id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZP;
3814
3815 #ifdef DEBUG
3816     id->idDebugOnlyInfo()->idMemCookie = base;
3817 #endif
3818
3819     id->idCodeSize(sz);
3820
3821     dispIns(id);
3822     emitCurIGsize += sz;
3823 }
3824
3825 /*****************************************************************************
3826  *
3827  *  Add an instruction with a static data member operand. If 'size' is 0, the
3828  *  instruction operates on the address of the static member instead of its
3829  *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
3830  */
3831
3832 void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
3833 {
3834     // Static always need relocs
3835     if (!jitStaticFldIsGlobAddr(fldHnd))
3836     {
3837         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3838     }
3839
3840     UNATIVE_OFFSET sz;
3841     instrDesc*     id;
3842
3843     /* Are we pushing the offset of the class variable? */
3844
3845     if (EA_IS_OFFSET(attr))
3846     {
3847         assert(ins == INS_push);
3848         sz = 1 + TARGET_POINTER_SIZE;
3849
3850         id = emitNewInstrDsp(EA_1BYTE, offs);
3851         id->idIns(ins);
3852         id->idInsFmt(IF_MRD_OFF);
3853     }
3854     else
3855     {
3856         insFormat fmt = emitInsModeFormat(ins, IF_MRD);
3857
3858         id = emitNewInstrDsp(attr, offs);
3859         id->idIns(ins);
3860         id->idInsFmt(fmt);
3861         sz = emitInsSizeCV(id, insCodeMR(ins));
3862     }
3863
3864     // Vex prefix size
3865     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
3866
3867     if (TakesRexWPrefix(ins, attr))
3868     {
3869         // REX.W prefix
3870         sz += emitGetRexPrefixSize(ins);
3871     }
3872
3873     id->idAddr()->iiaFieldHnd = fldHnd;
3874
3875     id->idCodeSize(sz);
3876
3877     dispIns(id);
3878     emitCurIGsize += sz;
3879
3880     emitAdjustStackDepthPushPop(ins);
3881 }
3882
3883 /*****************************************************************************
3884  *
3885  *  Add an instruction with two register operands.
3886  */
3887
3888 void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
3889 {
3890     emitAttr size = EA_SIZE(attr);
3891
3892     /* We don't want to generate any useless mov instructions! */
3893     CLANG_FORMAT_COMMENT_ANCHOR;
3894
3895 #ifdef _TARGET_AMD64_
3896     // Same-reg 4-byte mov can be useful because it performs a
3897     // zero-extension to 8 bytes.
3898     assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
3899 #else
3900     assert(ins != INS_mov || reg1 != reg2);
3901 #endif // _TARGET_AMD64_
3902
3903     assert(size <= EA_32BYTE);
3904     noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
3905
3906     UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
3907
3908     if (Is4ByteSSEInstruction(ins))
3909     {
3910         // The 4-Byte SSE instructions require one additional byte
3911         sz += 1;
3912     }
3913
3914     /* Special case: "XCHG" uses a different format */
3915     insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
3916
3917     instrDesc* id = emitNewInstrSmall(attr);
3918     id->idIns(ins);
3919     id->idInsFmt(fmt);
3920     id->idReg1(reg1);
3921     id->idReg2(reg2);
3922     id->idCodeSize(sz);
3923
3924     dispIns(id);
3925     emitCurIGsize += sz;
3926 }
3927
3928 /*****************************************************************************
3929  *
3930  *  Add an instruction with two register operands and an integer constant.
3931  */
3932
3933 void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
3934 {
3935     // SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes
3936     UNATIVE_OFFSET sz = 4;
3937     if (IsSSEOrAVXInstruction(ins))
3938     {
3939         // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
3940         // SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
3941         // SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
3942         sz = (UseVEXEncoding() || Is4ByteSSEInstruction(ins)) ? 6 : 5;
3943     }
3944
3945 #ifdef _TARGET_AMD64_
3946     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3947     // all other opcodes take a sign-extended 4-byte immediate
3948     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3949 #endif
3950
3951     instrDesc* id = emitNewInstrSC(attr, ival);
3952
3953     // REX prefix
3954     if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
3955     {
3956         sz += emitGetRexPrefixSize(ins);
3957     }
3958
3959     if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding())
3960     {
3961         sz += 1;
3962     }
3963
3964     id->idIns(ins);
3965     id->idInsFmt(IF_RRW_RRW_CNS);
3966     id->idReg1(reg1);
3967     id->idReg2(reg2);
3968     id->idCodeSize(sz);
3969
3970     dispIns(id);
3971     emitCurIGsize += sz;
3972 }
3973
3974 void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
3975 {
3976     assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta);
3977
3978     instrDesc* id = emitNewInstrAmd(attr, offs);
3979
3980     id->idIns(ins);
3981
3982     id->idInsFmt(IF_ARD);
3983     id->idAddr()->iiaAddrMode.amBaseReg = base;
3984     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
3985
3986     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3987     id->idCodeSize(sz);
3988
3989     dispIns(id);
3990     emitCurIGsize += sz;
3991 }
3992
3993 //------------------------------------------------------------------------
3994 // emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands
3995 //                 and that does not return a value
3996 //
3997 // Arguments:
3998 //    ins       -- The instruction being emitted
3999 //    attr      -- The emit attribute
4000 //    targetReg -- The target register
4001 //    op2Reg    -- The register of the second operand
4002 //    op3Reg    -- The register of the third operand
4003 //    base      -- The base register used for the memory address (first operand)
4004 //    offs      -- The offset from base
4005 //
4006 void emitter::emitIns_AR_R_R(
4007     instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs)
4008 {
4009     assert(IsSSEOrAVXInstruction(ins));
4010     assert(IsThreeOperandAVXInstruction(ins));
4011
4012     instrDesc* id = emitNewInstrAmd(attr, offs);
4013
4014     id->idIns(ins);
4015     id->idReg1(op2Reg);
4016     id->idReg2(op3Reg);
4017
4018     id->idInsFmt(IF_AWR_RRD_RRD);
4019     id->idAddr()->iiaAddrMode.amBaseReg = base;
4020     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4021
4022     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
4023     id->idCodeSize(sz);
4024
4025     dispIns(id);
4026     emitCurIGsize += sz;
4027 }
4028
4029 void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir)
4030 {
4031     ssize_t    offs = indir->Offset();
4032     instrDesc* id   = emitNewInstrAmd(attr, offs);
4033
4034     id->idIns(ins);
4035     id->idReg1(reg1);
4036
4037     emitHandleMemOp(indir, id, IF_RRW_ARD, ins);
4038
4039     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4040     id->idCodeSize(sz);
4041
4042     dispIns(id);
4043     emitCurIGsize += sz;
4044 }
4045
4046 void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
4047 {
4048     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4049     assert(IsSSEOrAVXInstruction(ins));
4050
4051     ssize_t    offs = indir->Offset();
4052     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
4053
4054     id->idIns(ins);
4055     id->idReg1(reg1);
4056
4057     emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
4058
4059     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4060
4061     if (Is4ByteSSEInstruction(ins))
4062     {
4063         // The 4-Byte SSE instructions require two additional bytes
4064         sz += 2;
4065     }
4066
4067     id->idCodeSize(sz);
4068
4069     dispIns(id);
4070     emitCurIGsize += sz;
4071 }
4072
4073 void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
4074 {
4075     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4076     assert(IsSSEOrAVXInstruction(ins));
4077
4078     instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4079
4080     id->idIns(ins);
4081     id->idReg1(reg1);
4082
4083     id->idInsFmt(IF_RRW_ARD_CNS);
4084     id->idAddr()->iiaAddrMode.amBaseReg = base;
4085     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4086
4087     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4088
4089     if (Is4ByteSSEInstruction(ins))
4090     {
4091         // The 4-Byte SSE instructions require two additional bytes
4092         sz += 2;
4093     }
4094
4095     id->idCodeSize(sz);
4096
4097     dispIns(id);
4098     emitCurIGsize += sz;
4099 }
4100
4101 void emitter::emitIns_R_C_I(
4102     instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4103 {
4104     // Static always need relocs
4105     if (!jitStaticFldIsGlobAddr(fldHnd))
4106     {
4107         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4108     }
4109
4110     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4111     assert(IsSSEOrAVXInstruction(ins));
4112
4113     instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4114
4115     id->idIns(ins);
4116     id->idInsFmt(IF_RRW_MRD_CNS);
4117     id->idReg1(reg1);
4118     id->idAddr()->iiaFieldHnd = fldHnd;
4119
4120     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4121
4122     if (Is4ByteSSEInstruction(ins))
4123     {
4124         // The 4-Byte SSE instructions require two additional bytes
4125         sz += 2;
4126     }
4127
4128     id->idCodeSize(sz);
4129
4130     dispIns(id);
4131     emitCurIGsize += sz;
4132 }
4133
4134 void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
4135 {
4136     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4137     assert(IsSSEOrAVXInstruction(ins));
4138
4139     instrDesc* id = emitNewInstrCns(attr, ival);
4140
4141     id->idIns(ins);
4142     id->idInsFmt(IF_RRW_SRD_CNS);
4143     id->idReg1(reg1);
4144     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4145
4146 #ifdef DEBUG
4147     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4148 #endif
4149
4150     UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4151
4152     if (Is4ByteSSEInstruction(ins))
4153     {
4154         // The 4-Byte SSE instructions require two additional bytes
4155         sz += 2;
4156     }
4157
4158     id->idCodeSize(sz);
4159
4160     dispIns(id);
4161     emitCurIGsize += sz;
4162 }
4163
4164 void emitter::emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir)
4165 {
4166     assert(IsSSEOrAVXInstruction(ins));
4167     assert(IsThreeOperandAVXInstruction(ins));
4168
4169     ssize_t    offs = indir->Offset();
4170     instrDesc* id   = emitNewInstrAmd(attr, offs);
4171
4172     id->idIns(ins);
4173     id->idReg1(reg1);
4174     id->idReg2(reg2);
4175
4176     emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins);
4177
4178     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4179     id->idCodeSize(sz);
4180
4181     dispIns(id);
4182     emitCurIGsize += sz;
4183 }
4184
4185 void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
4186 {
4187     assert(IsSSEOrAVXInstruction(ins));
4188     assert(IsThreeOperandAVXInstruction(ins));
4189
4190     instrDesc* id = emitNewInstrAmd(attr, offs);
4191
4192     id->idIns(ins);
4193     id->idReg1(reg1);
4194     id->idReg2(reg2);
4195
4196     id->idInsFmt(IF_RWR_RRD_ARD);
4197     id->idAddr()->iiaAddrMode.amBaseReg = base;
4198     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4199
4200     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4201     id->idCodeSize(sz);
4202
4203     dispIns(id);
4204     emitCurIGsize += sz;
4205 }
4206
4207 //------------------------------------------------------------------------
4208 // IsAVX2GatherInstruction: return true if the instruction is AVX2 Gather
4209 //
4210 // Arguments:
4211 //    ins - the instruction to check
4212 // Return Value:
4213 //    true if the instruction is AVX2 Gather
4214 //
4215 bool IsAVX2GatherInstruction(instruction ins)
4216 {
4217     switch (ins)
4218     {
4219         case INS_vpgatherdd:
4220         case INS_vpgatherdq:
4221         case INS_vpgatherqd:
4222         case INS_vpgatherqq:
4223         case INS_vgatherdps:
4224         case INS_vgatherdpd:
4225         case INS_vgatherqps:
4226         case INS_vgatherqpd:
4227             return true;
4228         default:
4229             return false;
4230     }
4231 }
4232
4233 //------------------------------------------------------------------------
4234 // emitIns_R_AR_R: Emits an AVX2 Gather instructions
4235 //
4236 // Arguments:
4237 //    ins - the instruction to emit
4238 //    attr - the instruction operand size
4239 //    reg1 - the destination and first source operand
4240 //    reg2 - the mask operand (encoded in VEX.vvvv)
4241 //    base - the base register of address to load
4242 //    index - the index register of VSIB
4243 //    scale - the scale number of VSIB
4244 //    offs - the offset added to the memory address from base
4245 //
4246 void emitter::emitIns_R_AR_R(instruction ins,
4247                              emitAttr    attr,
4248                              regNumber   reg1,
4249                              regNumber   reg2,
4250                              regNumber   base,
4251                              regNumber   index,
4252                              int         scale,
4253                              int         offs)
4254 {
4255     assert(IsAVX2GatherInstruction(ins));
4256
4257     instrDesc* id = emitNewInstrAmd(attr, offs);
4258
4259     id->idIns(ins);
4260     id->idReg1(reg1);
4261     id->idReg2(reg2);
4262
4263     id->idInsFmt(IF_RWR_ARD_RRD);
4264     id->idAddr()->iiaAddrMode.amBaseReg = base;
4265     id->idAddr()->iiaAddrMode.amIndxReg = index;
4266     id->idAddr()->iiaAddrMode.amScale   = emitEncodeSize((emitAttr)scale);
4267
4268     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4269     id->idCodeSize(sz);
4270
4271     dispIns(id);
4272     emitCurIGsize += sz;
4273 }
4274
4275 void emitter::emitIns_R_R_C(
4276     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
4277 {
4278     assert(IsSSEOrAVXInstruction(ins));
4279     assert(IsThreeOperandAVXInstruction(ins));
4280
4281     // Static always need relocs
4282     if (!jitStaticFldIsGlobAddr(fldHnd))
4283     {
4284         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4285     }
4286
4287     instrDesc* id = emitNewInstrDsp(attr, offs);
4288
4289     id->idIns(ins);
4290     id->idInsFmt(IF_RWR_RRD_MRD);
4291     id->idReg1(reg1);
4292     id->idReg2(reg2);
4293     id->idAddr()->iiaFieldHnd = fldHnd;
4294
4295     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
4296     id->idCodeSize(sz);
4297
4298     dispIns(id);
4299     emitCurIGsize += sz;
4300 }
4301
4302 /*****************************************************************************
4303 *
4304 *  Add an instruction with three register operands.
4305 */
4306
4307 void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
4308 {
4309     assert(IsSSEOrAVXInstruction(ins));
4310     assert(IsThreeOperandAVXInstruction(ins));
4311     // Currently vex prefix only use three bytes mode.
4312     // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
4313     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4314     UNATIVE_OFFSET sz = 5;
4315
4316     instrDesc* id = emitNewInstr(attr);
4317     id->idIns(ins);
4318     id->idInsFmt(IF_RWR_RRD_RRD);
4319     id->idReg1(targetReg);
4320     id->idReg2(reg1);
4321     id->idReg3(reg2);
4322
4323     id->idCodeSize(sz);
4324     dispIns(id);
4325     emitCurIGsize += sz;
4326 }
4327
4328 void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
4329 {
4330     assert(IsSSEOrAVXInstruction(ins));
4331     assert(IsThreeOperandAVXInstruction(ins));
4332
4333     instrDesc* id = emitNewInstr(attr);
4334
4335     id->idIns(ins);
4336     id->idInsFmt(IF_RWR_RRD_SRD);
4337     id->idReg1(reg1);
4338     id->idReg2(reg2);
4339     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4340
4341 #ifdef DEBUG
4342     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4343 #endif
4344
4345     UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
4346     id->idCodeSize(sz);
4347
4348     dispIns(id);
4349     emitCurIGsize += sz;
4350 }
4351
4352 void emitter::emitIns_R_R_A_I(
4353     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
4354 {
4355     assert(IsSSEOrAVXInstruction(ins));
4356     assert(IsThreeOperandAVXInstruction(ins));
4357
4358     ssize_t    offs = indir->Offset();
4359     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
4360
4361     id->idIns(ins);
4362     id->idReg1(reg1);
4363     id->idReg2(reg2);
4364
4365     emitHandleMemOp(indir, id, fmt, ins);
4366
4367     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4368     id->idCodeSize(sz);
4369
4370     dispIns(id);
4371     emitCurIGsize += sz;
4372 }
4373
4374 void emitter::emitIns_R_R_AR_I(
4375     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
4376 {
4377     assert(IsSSEOrAVXInstruction(ins));
4378     assert(IsThreeOperandAVXInstruction(ins));
4379
4380     instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4381
4382     id->idIns(ins);
4383     id->idReg1(reg1);
4384     id->idReg2(reg2);
4385
4386     id->idInsFmt(IF_RWR_RRD_ARD_CNS);
4387     id->idAddr()->iiaAddrMode.amBaseReg = base;
4388     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4389
4390     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4391     id->idCodeSize(sz);
4392
4393     dispIns(id);
4394     emitCurIGsize += sz;
4395 }
4396
4397 void emitter::emitIns_R_R_C_I(
4398     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4399 {
4400     assert(IsSSEOrAVXInstruction(ins));
4401     assert(IsThreeOperandAVXInstruction(ins));
4402
4403     // Static always need relocs
4404     if (!jitStaticFldIsGlobAddr(fldHnd))
4405     {
4406         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4407     }
4408
4409     instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4410
4411     id->idIns(ins);
4412     id->idInsFmt(IF_RWR_RRD_MRD_CNS);
4413     id->idReg1(reg1);
4414     id->idReg2(reg2);
4415     id->idAddr()->iiaFieldHnd = fldHnd;
4416
4417     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4418     id->idCodeSize(sz);
4419
4420     dispIns(id);
4421     emitCurIGsize += sz;
4422 }
4423
4424 /**********************************************************************************
4425 * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
4426 *
4427 * Arguments:
4428 *    ins       - the instruction to add
4429 *    attr      - the emitter attribute for instruction
4430 *    targetReg - the target (destination) register
4431 *    reg1      - the first source register
4432 *    reg2      - the second source register
4433 *    ival      - the immediate value
4434 */
4435
4436 void emitter::emitIns_R_R_R_I(
4437     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
4438 {
4439     assert(IsSSEOrAVXInstruction(ins));
4440     assert(IsThreeOperandAVXInstruction(ins));
4441     // Currently vex prefix only use three bytes mode.
4442     // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6
4443     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4444     UNATIVE_OFFSET sz = 6;
4445
4446     instrDesc* id = emitNewInstrCns(attr, ival);
4447     id->idIns(ins);
4448     id->idInsFmt(IF_RWR_RRD_RRD_CNS);
4449     id->idReg1(targetReg);
4450     id->idReg2(reg1);
4451     id->idReg3(reg2);
4452
4453     id->idCodeSize(sz);
4454     dispIns(id);
4455     emitCurIGsize += sz;
4456 }
4457
4458 void emitter::emitIns_R_R_S_I(
4459     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
4460 {
4461     assert(IsSSEOrAVXInstruction(ins));
4462     assert(IsThreeOperandAVXInstruction(ins));
4463
4464     instrDesc* id = emitNewInstrCns(attr, ival);
4465
4466     id->idIns(ins);
4467     id->idInsFmt(IF_RWR_RRD_SRD_CNS);
4468     id->idReg1(reg1);
4469     id->idReg2(reg2);
4470     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4471
4472 #ifdef DEBUG
4473     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4474 #endif
4475
4476     UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4477     id->idCodeSize(sz);
4478
4479     dispIns(id);
4480     emitCurIGsize += sz;
4481 }
4482
4483 //------------------------------------------------------------------------
4484 // encodeXmmRegAsIval: Encodes a XMM register into imm[7:4] for use by a SIMD instruction
4485 //
4486 // Arguments
4487 //    opReg -- The register being encoded
4488 //
4489 // Returns:
4490 //    opReg encoded in imm[7:4]
4491 static int encodeXmmRegAsIval(regNumber opReg)
4492 {
4493     // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4494     // which encodes the fourth register into imm8[7:4]
4495     assert(opReg >= XMMBASE);
4496     int ival = (opReg - XMMBASE) << 4;
4497
4498     assert((ival >= 0) && (ival <= 255));
4499     return (int8_t)ival;
4500 }
4501
4502 //------------------------------------------------------------------------
4503 // emitIns_R_R_A_R: emits the code for an instruction that takes a register operand, a GenTreeIndir address,
4504 //                  another register operand, and that returns a value in register
4505 //
4506 // Arguments:
4507 //    ins       -- The instruction being emitted
4508 //    attr      -- The emit attribute
4509 //    targetReg -- The target register
4510 //    op1Reg    -- The register of the first operand
4511 //    op3Reg    -- The register of the third operand
4512 //    indir     -- The GenTreeIndir used for the memory address
4513 //
4514 // Remarks:
4515 //    op2 is built from indir
4516 //
4517 void emitter::emitIns_R_R_A_R(
4518     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
4519 {
4520     assert(isAvxBlendv(ins));
4521     assert(UseVEXEncoding());
4522
4523     int        ival = encodeXmmRegAsIval(op3Reg);
4524     ssize_t    offs = indir->Offset();
4525     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
4526
4527     id->idIns(ins);
4528     id->idReg1(targetReg);
4529     id->idReg2(op1Reg);
4530
4531     emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins);
4532
4533     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4534     id->idCodeSize(sz);
4535
4536     dispIns(id);
4537     emitCurIGsize += sz;
4538 }
4539
4540 //------------------------------------------------------------------------
4541 // emitIns_R_R_AR_R: emits the code for an instruction that takes a register operand, a base memory
4542 //                   register, another register operand, and that returns a value in register
4543 //
4544 // Arguments:
4545 //    ins       -- The instruction being emitted
4546 //    attr      -- The emit attribute
4547 //    targetReg -- The target register
4548 //    op1Reg    -- The register of the first operands
4549 //    op3Reg    -- The register of the third operand
4550 //    base      -- The base register used for the memory address
4551 //    offs      -- The offset added to the memory address from base
4552 //
4553 // Remarks:
4554 //    op2 is built from base + offs
4555 //
4556 void emitter::emitIns_R_R_AR_R(
4557     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs)
4558 {
4559     assert(isAvxBlendv(ins));
4560     assert(UseVEXEncoding());
4561
4562     int        ival = encodeXmmRegAsIval(op3Reg);
4563     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
4564
4565     id->idIns(ins);
4566     id->idReg1(targetReg);
4567     id->idReg2(op1Reg);
4568
4569     id->idInsFmt(IF_RWR_RRD_ARD_RRD);
4570     id->idAddr()->iiaAddrMode.amBaseReg = base;
4571     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4572
4573     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4574     id->idCodeSize(sz);
4575
4576     dispIns(id);
4577     emitCurIGsize += sz;
4578 }
4579
4580 //------------------------------------------------------------------------
4581 // emitIns_R_R_C_R: emits the code for an instruction that takes a register operand, a field handle +
4582 //                  offset,  another register operand, and that returns a value in register
4583 //
4584 // Arguments:
4585 //    ins       -- The instruction being emitted
4586 //    attr      -- The emit attribute
4587 //    targetReg -- The target register
4588 //    op1Reg    -- The register of the first operand
4589 //    op3Reg    -- The register of the third operand
4590 //    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
4591 //    offs      -- The offset added to the memory address from fldHnd
4592 //
4593 // Remarks:
4594 //    op2 is built from fldHnd + offs
4595 //
4596 void emitter::emitIns_R_R_C_R(instruction          ins,
4597                               emitAttr             attr,
4598                               regNumber            targetReg,
4599                               regNumber            op1Reg,
4600                               regNumber            op3Reg,
4601                               CORINFO_FIELD_HANDLE fldHnd,
4602                               int                  offs)
4603 {
4604     assert(isAvxBlendv(ins));
4605     assert(UseVEXEncoding());
4606
4607     // Static always need relocs
4608     if (!jitStaticFldIsGlobAddr(fldHnd))
4609     {
4610         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4611     }
4612
4613     int        ival = encodeXmmRegAsIval(op3Reg);
4614     instrDesc* id   = emitNewInstrCnsDsp(attr, ival, offs);
4615
4616     id->idIns(ins);
4617     id->idReg1(targetReg);
4618     id->idReg2(op1Reg);
4619
4620     id->idInsFmt(IF_RWR_RRD_MRD_RRD);
4621     id->idAddr()->iiaFieldHnd = fldHnd;
4622
4623     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4624     id->idCodeSize(sz);
4625
4626     dispIns(id);
4627     emitCurIGsize += sz;
4628 }
4629
4630 //------------------------------------------------------------------------
4631 // emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index +
4632 //                  offset, another register operand, and that returns a value in register
4633 //
4634 // Arguments:
4635 //    ins       -- The instruction being emitted
4636 //    attr      -- The emit attribute
4637 //    targetReg -- The target register
4638 //    op1Reg    -- The register of the first operand
4639 //    op3Reg    -- The register of the third operand
4640 //    varx      -- The variable index used for the memory address
4641 //    offs      -- The offset added to the memory address from varx
4642 //
4643 // Remarks:
4644 //    op2 is built from varx + offs
4645 //
4646 void emitter::emitIns_R_R_S_R(
4647     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
4648 {
4649     assert(isAvxBlendv(ins));
4650     assert(UseVEXEncoding());
4651
4652     int        ival = encodeXmmRegAsIval(op3Reg);
4653     instrDesc* id   = emitNewInstrCns(attr, ival);
4654
4655     id->idIns(ins);
4656     id->idReg1(targetReg);
4657     id->idReg2(op1Reg);
4658
4659     id->idInsFmt(IF_RWR_RRD_SRD_RRD);
4660     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4661
4662     UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4663     id->idCodeSize(sz);
4664
4665     dispIns(id);
4666     emitCurIGsize += sz;
4667 }
4668
4669 void emitter::emitIns_R_R_R_R(
4670     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4671 {
4672     assert(isAvxBlendv(ins));
4673     assert(UseVEXEncoding());
4674     // Currently vex prefix only use three bytes mode.
4675     // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4676     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4677     UNATIVE_OFFSET sz = 6;
4678
4679     int        ival = encodeXmmRegAsIval(reg3);
4680     instrDesc* id   = emitNewInstrCns(attr, ival);
4681
4682     id->idIns(ins);
4683     id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4684     id->idReg1(targetReg);
4685     id->idReg2(reg1);
4686     id->idReg3(reg2);
4687     id->idReg4(reg3);
4688
4689     id->idCodeSize(sz);
4690     dispIns(id);
4691     emitCurIGsize += sz;
4692 }
4693
4694 /*****************************************************************************
4695  *
4696  *  Add an instruction with a register + static member operands.
4697  */
4698 void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
4699 {
4700     // Static always need relocs
4701     if (!jitStaticFldIsGlobAddr(fldHnd))
4702     {
4703         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4704     }
4705
4706     emitAttr size = EA_SIZE(attr);
4707
4708     assert(size <= EA_32BYTE);
4709     noway_assert(emitVerifyEncodable(ins, size, reg));
4710
4711     UNATIVE_OFFSET sz;
4712     instrDesc*     id;
4713
4714     // Are we MOV'ing the offset of the class variable into EAX?
4715     if (EA_IS_OFFSET(attr))
4716     {
4717         id = emitNewInstrDsp(EA_1BYTE, offs);
4718         id->idIns(ins);
4719         id->idInsFmt(IF_RWR_MRD_OFF);
4720
4721         assert(ins == INS_mov && reg == REG_EAX);
4722
4723         // Special case: "mov eax, [addr]" is smaller
4724         sz = 1 + TARGET_POINTER_SIZE;
4725     }
4726     else
4727     {
4728         insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
4729
4730         id = emitNewInstrDsp(attr, offs);
4731         id->idIns(ins);
4732         id->idInsFmt(fmt);
4733
4734 #ifdef _TARGET_X86_
4735         // Special case: "mov eax, [addr]" is smaller.
4736         // This case is not enabled for amd64 as it always uses RIP relative addressing
4737         // and it results in smaller instruction size than encoding 64-bit addr in the
4738         // instruction.
4739         if (ins == INS_mov && reg == REG_EAX)
4740         {
4741             sz = 1 + TARGET_POINTER_SIZE;
4742             if (size == EA_2BYTE)
4743                 sz += 1;
4744         }
4745         else
4746 #endif //_TARGET_X86_
4747         {
4748             sz = emitInsSizeCV(id, insCodeRM(ins));
4749         }
4750
4751         // Special case: mov reg, fs:[ddd]
4752         if (fldHnd == FLD_GLOBAL_FS)
4753         {
4754             sz += 1;
4755         }
4756     }
4757
4758     // VEX prefix
4759     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4760
4761     // REX prefix
4762     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4763     {
4764         sz += emitGetRexPrefixSize(ins);
4765     }
4766
4767     id->idReg1(reg);
4768     id->idCodeSize(sz);
4769
4770     id->idAddr()->iiaFieldHnd = fldHnd;
4771
4772     dispIns(id);
4773     emitCurIGsize += sz;
4774 }
4775
4776 /*****************************************************************************
4777  *
4778  *  Add an instruction with a static member + register operands.
4779  */
4780
4781 void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
4782 {
4783     // Static always need relocs
4784     if (!jitStaticFldIsGlobAddr(fldHnd))
4785     {
4786         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4787     }
4788
4789     emitAttr size = EA_SIZE(attr);
4790
4791 #if defined(_TARGET_X86_)
4792     // For x86 it is valid to storeind a double sized operand in an xmm reg to memory
4793     assert(size <= EA_8BYTE);
4794 #else
4795     assert(size <= EA_PTRSIZE);
4796 #endif
4797
4798     noway_assert(emitVerifyEncodable(ins, size, reg));
4799
4800     instrDesc* id  = emitNewInstrDsp(attr, offs);
4801     insFormat  fmt = emitInsModeFormat(ins, IF_MRD_RRD);
4802
4803     id->idIns(ins);
4804     id->idInsFmt(fmt);
4805
4806     UNATIVE_OFFSET sz;
4807
4808 #ifdef _TARGET_X86_
4809     // Special case: "mov [addr], EAX" is smaller.
4810     // This case is not enable for amd64 as it always uses RIP relative addressing
4811     // and it will result in smaller instruction size than encoding 64-bit addr in
4812     // the instruction.
4813     if (ins == INS_mov && reg == REG_EAX)
4814     {
4815         sz = 1 + TARGET_POINTER_SIZE;
4816         if (size == EA_2BYTE)
4817             sz += 1;
4818     }
4819     else
4820 #endif //_TARGET_X86_
4821     {
4822         sz = emitInsSizeCV(id, insCodeMR(ins));
4823     }
4824
4825     // Special case: mov reg, fs:[ddd]
4826     if (fldHnd == FLD_GLOBAL_FS)
4827     {
4828         sz += 1;
4829     }
4830
4831     // VEX prefix
4832     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
4833
4834     // REX prefix
4835     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4836     {
4837         sz += emitGetRexPrefixSize(ins);
4838     }
4839
4840     id->idReg1(reg);
4841     id->idCodeSize(sz);
4842
4843     id->idAddr()->iiaFieldHnd = fldHnd;
4844
4845     dispIns(id);
4846     emitCurIGsize += sz;
4847 }
4848
4849 /*****************************************************************************
4850  *
4851  *  Add an instruction with a static member + constant.
4852  */
4853
4854 void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
4855 {
4856     // Static always need relocs
4857     if (!jitStaticFldIsGlobAddr(fldHnd))
4858     {
4859         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4860     }
4861
4862     insFormat fmt;
4863
4864     switch (ins)
4865     {
4866         case INS_rcl_N:
4867         case INS_rcr_N:
4868         case INS_rol_N:
4869         case INS_ror_N:
4870         case INS_shl_N:
4871         case INS_shr_N:
4872         case INS_sar_N:
4873             assert(val != 1);
4874             fmt = IF_MRW_SHF;
4875             val &= 0x7F;
4876             break;
4877
4878         default:
4879             fmt = emitInsModeFormat(ins, IF_MRD_CNS);
4880             break;
4881     }
4882
4883     instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
4884     id->idIns(ins);
4885     id->idInsFmt(fmt);
4886
4887     code_t         code = insCodeMI(ins);
4888     UNATIVE_OFFSET sz   = emitInsSizeCV(id, code, val);
4889
4890     // Vex prefix
4891     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
4892
4893     // REX prefix, if not already included in "code"
4894     if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
4895     {
4896         sz += emitGetRexPrefixSize(ins);
4897     }
4898
4899     id->idAddr()->iiaFieldHnd = fldHnd;
4900     id->idCodeSize(sz);
4901
4902     dispIns(id);
4903     emitCurIGsize += sz;
4904 }
4905
4906 void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
4907 {
4908     assert(ins == INS_mov);
4909     assert(dst->bbFlags & BBF_JMP_TARGET);
4910
4911     instrDescLbl* id = emitNewInstrLbl();
4912
4913     id->idIns(ins);
4914     id->idInsFmt(IF_SWR_LABEL);
4915     id->idAddr()->iiaBBlabel = dst;
4916
4917     /* The label reference is always long */
4918
4919     id->idjShort    = 0;
4920     id->idjKeepLong = 1;
4921
4922     /* Record the current IG and offset within it */
4923
4924     id->idjIG   = emitCurIG;
4925     id->idjOffs = emitCurIGsize;
4926
4927     /* Append this instruction to this IG's jump list */
4928
4929     id->idjNext      = emitCurIGjmpList;
4930     emitCurIGjmpList = id;
4931
4932     UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(id, insCodeMI(ins), varx, offs);
4933     id->dstLclVar.initLclVarAddr(varx, offs);
4934 #ifdef DEBUG
4935     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4936 #endif
4937
4938 #if EMITTER_STATS
4939     emitTotalIGjmps++;
4940 #endif
4941
4942 #ifndef _TARGET_AMD64_
4943     // Storing the address of a basicBlock will need a reloc
4944     // as the instruction uses the absolute address,
4945     // not a relative address.
4946     //
4947     // On Amd64, Absolute code addresses should always go through a reloc to
4948     // to be encoded as RIP rel32 offset.
4949     if (emitComp->opts.compReloc)
4950 #endif
4951     {
4952         id->idSetIsDspReloc();
4953     }
4954
4955     id->idCodeSize(sz);
4956
4957     dispIns(id);
4958     emitCurIGsize += sz;
4959 }
4960
4961 /*****************************************************************************
4962  *
4963  *  Add a label instruction.
4964  */
4965 void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
4966 {
4967     assert(ins == INS_lea);
4968     assert(dst->bbFlags & BBF_JMP_TARGET);
4969
4970     instrDescJmp* id = emitNewInstrJmp();
4971
4972     id->idIns(ins);
4973     id->idReg1(reg);
4974     id->idInsFmt(IF_RWR_LABEL);
4975     id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
4976     id->idAddr()->iiaBBlabel = dst;
4977
4978     /* The label reference is always long */
4979
4980     id->idjShort    = 0;
4981     id->idjKeepLong = 1;
4982
4983     /* Record the current IG and offset within it */
4984
4985     id->idjIG   = emitCurIG;
4986     id->idjOffs = emitCurIGsize;
4987
4988     /* Append this instruction to this IG's jump list */
4989
4990     id->idjNext      = emitCurIGjmpList;
4991     emitCurIGjmpList = id;
4992
4993 #ifdef DEBUG
4994     // Mark the catch return
4995     if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
4996     {
4997         id->idDebugOnlyInfo()->idCatchRet = true;
4998     }
4999 #endif // DEBUG
5000
5001 #if EMITTER_STATS
5002     emitTotalIGjmps++;
5003 #endif
5004
5005     // Set the relocation flags - these give hint to zap to perform
5006     // relocation of the specified 32bit address.
5007     //
5008     // Note the relocation flags influence the size estimate.
5009     id->idSetRelocFlags(attr);
5010
5011     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
5012     id->idCodeSize(sz);
5013
5014     dispIns(id);
5015     emitCurIGsize += sz;
5016 }
5017
5018 /*****************************************************************************
5019  *
5020  *  The following adds instructions referencing address modes.
5021  */
5022
5023 void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
5024 {
5025     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5026
5027 #ifdef _TARGET_AMD64_
5028     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5029     // all other opcodes take a sign-extended 4-byte immediate
5030     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5031 #endif
5032
5033     insFormat fmt;
5034
5035     switch (ins)
5036     {
5037         case INS_rcl_N:
5038         case INS_rcr_N:
5039         case INS_rol_N:
5040         case INS_ror_N:
5041         case INS_shl_N:
5042         case INS_shr_N:
5043         case INS_sar_N:
5044             assert(val != 1);
5045             fmt = IF_ARW_SHF;
5046             val &= 0x7F;
5047             break;
5048
5049         default:
5050             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5051             break;
5052     }
5053
5054     /*
5055     Useful if you want to trap moves with 0 constant
5056     if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5057     {
5058         printf("MOV 0\n");
5059     }
5060     */
5061
5062     UNATIVE_OFFSET sz;
5063     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5064     id->idIns(ins);
5065     id->idInsFmt(fmt);
5066
5067     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5068     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5069
5070     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5071
5072     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5073     id->idCodeSize(sz);
5074
5075     dispIns(id);
5076     emitCurIGsize += sz;
5077 }
5078
5079 void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
5080 {
5081     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5082
5083 #ifdef _TARGET_AMD64_
5084     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5085     // all other opcodes take a sign-extended 4-byte immediate
5086     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5087 #endif
5088
5089     insFormat fmt;
5090
5091     switch (ins)
5092     {
5093         case INS_rcl_N:
5094         case INS_rcr_N:
5095         case INS_rol_N:
5096         case INS_ror_N:
5097         case INS_shl_N:
5098         case INS_shr_N:
5099         case INS_sar_N:
5100             assert(val != 1);
5101             fmt = IF_ARW_SHF;
5102             val &= 0x7F;
5103             break;
5104
5105         default:
5106             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5107             break;
5108     }
5109
5110     /*
5111     Useful if you want to trap moves with 0 constant
5112     if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5113     {
5114         printf("MOV 0\n");
5115     }
5116     */
5117
5118     UNATIVE_OFFSET sz;
5119     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5120     id->idIns(ins);
5121     id->idInsFmt(fmt);
5122
5123     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5124     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5125
5126     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5127
5128     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5129     id->idCodeSize(sz);
5130
5131     dispIns(id);
5132     emitCurIGsize += sz;
5133 }
5134
5135 void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5136 {
5137     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
5138     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5139
5140     if (ins == INS_lea)
5141     {
5142         if (ireg == base && disp == 0)
5143         {
5144             // Maybe the emitter is not the common place for this optimization, but it's a better choke point
5145             // for all the emitIns(ins, tree), we would have to be analyzing at each call site
5146             //
5147             return;
5148         }
5149     }
5150
5151     UNATIVE_OFFSET sz;
5152     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5153     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5154
5155     id->idIns(ins);
5156     id->idInsFmt(fmt);
5157     id->idReg1(ireg);
5158
5159     id->idAddr()->iiaAddrMode.amBaseReg = base;
5160     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5161
5162     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5163
5164     sz = emitInsSizeAM(id, insCodeRM(ins));
5165
5166     if (Is4ByteSSEInstruction(ins))
5167     {
5168         // The 4-Byte SSE instructions require two additional bytes
5169         sz += 2;
5170     }
5171
5172     id->idCodeSize(sz);
5173
5174     dispIns(id);
5175     emitCurIGsize += sz;
5176 }
5177
5178 void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5179 {
5180     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5181     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5182
5183     UNATIVE_OFFSET sz;
5184     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5185     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5186
5187     id->idIns(ins);
5188     id->idInsFmt(fmt);
5189     id->idReg1(ireg);
5190
5191     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5192     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5193
5194     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5195
5196     sz = emitInsSizeAM(id, insCodeRM(ins));
5197     id->idCodeSize(sz);
5198
5199     dispIns(id);
5200     emitCurIGsize += sz;
5201 }
5202
5203 void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5204 {
5205     UNATIVE_OFFSET sz;
5206     instrDesc*     id = emitNewInstrAmd(attr, disp);
5207     insFormat      fmt;
5208
5209     if (ireg == REG_NA)
5210     {
5211         fmt = emitInsModeFormat(ins, IF_ARD);
5212     }
5213     else
5214     {
5215         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5216
5217         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
5218         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5219
5220         id->idReg1(ireg);
5221     }
5222
5223     id->idIns(ins);
5224     id->idInsFmt(fmt);
5225
5226     id->idAddr()->iiaAddrMode.amBaseReg = base;
5227     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5228
5229     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5230
5231     sz = emitInsSizeAM(id, insCodeMR(ins));
5232     id->idCodeSize(sz);
5233
5234     dispIns(id);
5235     emitCurIGsize += sz;
5236
5237     emitAdjustStackDepthPushPop(ins);
5238 }
5239
5240 void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
5241 {
5242     assert(ins == INS_vextracti128 || ins == INS_vextractf128);
5243     assert(base != REG_NA);
5244     assert(ireg != REG_NA);
5245     instrDesc* id = emitNewInstrAmdCns(attr, disp, ival);
5246
5247     id->idIns(ins);
5248     id->idInsFmt(IF_AWR_RRD_CNS);
5249     id->idAddr()->iiaAddrMode.amBaseReg = base;
5250     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5251     id->idReg1(ireg);
5252
5253     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5254
5255     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins), ival);
5256     id->idCodeSize(sz);
5257
5258     dispIns(id);
5259     emitCurIGsize += sz;
5260 }
5261
5262 void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5263 {
5264     UNATIVE_OFFSET sz;
5265     instrDesc*     id = emitNewInstrAmd(attr, disp);
5266     insFormat      fmt;
5267
5268     if (ireg == REG_NA)
5269     {
5270         fmt = emitInsModeFormat(ins, IF_ARD);
5271     }
5272     else
5273     {
5274         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5275
5276         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5277         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5278
5279         id->idReg1(ireg);
5280     }
5281
5282     id->idIns(ins);
5283     id->idInsFmt(fmt);
5284
5285     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5286     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5287
5288     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5289
5290     sz = emitInsSizeAM(id, insCodeMR(ins));
5291     id->idCodeSize(sz);
5292
5293     dispIns(id);
5294     emitCurIGsize += sz;
5295
5296     emitAdjustStackDepthPushPop(ins);
5297 }
5298
5299 void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
5300 {
5301     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5302
5303 #ifdef _TARGET_AMD64_
5304     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5305     // all other opcodes take a sign-extended 4-byte immediate
5306     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5307 #endif
5308
5309     insFormat fmt;
5310
5311     switch (ins)
5312     {
5313         case INS_rcl_N:
5314         case INS_rcr_N:
5315         case INS_rol_N:
5316         case INS_ror_N:
5317         case INS_shl_N:
5318         case INS_shr_N:
5319         case INS_sar_N:
5320             assert(val != 1);
5321             fmt = IF_ARW_SHF;
5322             val &= 0x7F;
5323             break;
5324
5325         default:
5326             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5327             break;
5328     }
5329
5330     UNATIVE_OFFSET sz;
5331     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5332     id->idIns(ins);
5333     id->idInsFmt(fmt);
5334
5335     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5336     id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5337     id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;
5338
5339     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5340
5341     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5342     id->idCodeSize(sz);
5343
5344     dispIns(id);
5345     emitCurIGsize += sz;
5346 }
5347
5348 void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
5349 {
5350     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5351     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5352
5353     UNATIVE_OFFSET sz;
5354     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5355     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5356
5357     id->idIns(ins);
5358     id->idInsFmt(fmt);
5359     id->idReg1(ireg);
5360
5361     id->idAddr()->iiaAddrMode.amBaseReg = base;
5362     id->idAddr()->iiaAddrMode.amIndxReg = index;
5363     id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;
5364
5365     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5366
5367     sz = emitInsSizeAM(id, insCodeRM(ins));
5368     id->idCodeSize(sz);
5369
5370     dispIns(id);
5371     emitCurIGsize += sz;
5372 }
5373
5374 void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
5375 {
5376     UNATIVE_OFFSET sz;
5377     instrDesc*     id = emitNewInstrAmd(attr, disp);
5378     insFormat      fmt;
5379
5380     if (ireg == REG_NA)
5381     {
5382         fmt = emitInsModeFormat(ins, IF_ARD);
5383     }
5384     else
5385     {
5386         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5387
5388         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5389         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5390
5391         id->idReg1(ireg);
5392     }
5393
5394     id->idIns(ins);
5395     id->idInsFmt(fmt);
5396
5397     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5398     id->idAddr()->iiaAddrMode.amIndxReg = index;
5399     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);
5400
5401     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5402
5403     sz = emitInsSizeAM(id, insCodeMR(ins));
5404     id->idCodeSize(sz);
5405
5406     dispIns(id);
5407     emitCurIGsize += sz;
5408
5409     emitAdjustStackDepthPushPop(ins);
5410 }
5411
5412 void emitter::emitIns_I_ARX(
5413     instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
5414 {
5415     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5416
5417 #ifdef _TARGET_AMD64_
5418     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5419     // all other opcodes take a sign-extended 4-byte immediate
5420     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5421 #endif
5422
5423     insFormat fmt;
5424
5425     switch (ins)
5426     {
5427         case INS_rcl_N:
5428         case INS_rcr_N:
5429         case INS_rol_N:
5430         case INS_ror_N:
5431         case INS_shl_N:
5432         case INS_shr_N:
5433         case INS_sar_N:
5434             assert(val != 1);
5435             fmt = IF_ARW_SHF;
5436             val &= 0x7F;
5437             break;
5438
5439         default:
5440             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5441             break;
5442     }
5443
5444     UNATIVE_OFFSET sz;
5445     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5446
5447     id->idIns(ins);
5448     id->idInsFmt(fmt);
5449
5450     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5451     id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5452     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5453
5454     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5455
5456     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5457     id->idCodeSize(sz);
5458
5459     dispIns(id);
5460     emitCurIGsize += sz;
5461 }
5462
5463 void emitter::emitIns_R_ARX(
5464     instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5465 {
5466     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5467     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5468
5469     UNATIVE_OFFSET sz;
5470     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5471     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5472
5473     id->idIns(ins);
5474     id->idInsFmt(fmt);
5475     id->idReg1(ireg);
5476
5477     id->idAddr()->iiaAddrMode.amBaseReg = base;
5478     id->idAddr()->iiaAddrMode.amIndxReg = index;
5479     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5480
5481     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5482
5483     sz = emitInsSizeAM(id, insCodeRM(ins));
5484     id->idCodeSize(sz);
5485
5486     dispIns(id);
5487     emitCurIGsize += sz;
5488 }
5489
5490 void emitter::emitIns_ARX_R(
5491     instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5492 {
5493     UNATIVE_OFFSET sz;
5494     instrDesc*     id = emitNewInstrAmd(attr, disp);
5495     insFormat      fmt;
5496
5497     if (ireg == REG_NA)
5498     {
5499         fmt = emitInsModeFormat(ins, IF_ARD);
5500     }
5501     else
5502     {
5503         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5504
5505         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5506         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5507
5508         id->idReg1(ireg);
5509     }
5510
5511     id->idIns(ins);
5512     id->idInsFmt(fmt);
5513
5514     id->idAddr()->iiaAddrMode.amBaseReg = base;
5515     id->idAddr()->iiaAddrMode.amIndxReg = index;
5516     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5517
5518     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5519
5520     sz = emitInsSizeAM(id, insCodeMR(ins));
5521     id->idCodeSize(sz);
5522
5523     dispIns(id);
5524     emitCurIGsize += sz;
5525
5526     emitAdjustStackDepthPushPop(ins);
5527 }
5528
5529 void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
5530 {
5531     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5532
5533 #ifdef _TARGET_AMD64_
5534     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5535     // all other opcodes take a sign-extended 4-byte immediate
5536     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5537 #endif
5538
5539     insFormat fmt;
5540
5541     switch (ins)
5542     {
5543         case INS_rcl_N:
5544         case INS_rcr_N:
5545         case INS_rol_N:
5546         case INS_ror_N:
5547         case INS_shl_N:
5548         case INS_shr_N:
5549         case INS_sar_N:
5550             assert(val != 1);
5551             fmt = IF_ARW_SHF;
5552             val &= 0x7F;
5553             break;
5554
5555         default:
5556             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5557             break;
5558     }
5559
5560     UNATIVE_OFFSET sz;
5561     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5562     id->idIns(ins);
5563     id->idInsFmt(fmt);
5564
5565     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5566     id->idAddr()->iiaAddrMode.amIndxReg = reg;
5567     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5568
5569     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5570
5571     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5572     id->idCodeSize(sz);
5573
5574     dispIns(id);
5575     emitCurIGsize += sz;
5576 }
5577
5578 void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5579 {
5580     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5581     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5582
5583     UNATIVE_OFFSET sz;
5584     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5585     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5586
5587     id->idIns(ins);
5588     id->idInsFmt(fmt);
5589     id->idReg1(ireg);
5590
5591     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5592     id->idAddr()->iiaAddrMode.amIndxReg = reg;
5593     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5594
5595     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5596
5597     sz = emitInsSizeAM(id, insCodeRM(ins));
5598     id->idCodeSize(sz);
5599
5600     dispIns(id);
5601     emitCurIGsize += sz;
5602 }
5603
5604 void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5605 {
5606     UNATIVE_OFFSET sz;
5607     instrDesc*     id = emitNewInstrAmd(attr, disp);
5608     insFormat      fmt;
5609
5610     if (ireg == REG_NA)
5611     {
5612         fmt = emitInsModeFormat(ins, IF_ARD);
5613     }
5614     else
5615     {
5616         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5617         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5618         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5619
5620         id->idReg1(ireg);
5621     }
5622
5623     id->idIns(ins);
5624     id->idInsFmt(fmt);
5625
5626     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5627     id->idAddr()->iiaAddrMode.amIndxReg = reg;
5628     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5629
5630     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5631
5632     sz = emitInsSizeAM(id, insCodeMR(ins));
5633     id->idCodeSize(sz);
5634
5635     dispIns(id);
5636     emitCurIGsize += sz;
5637
5638     emitAdjustStackDepthPushPop(ins);
5639 }
5640
5641 #ifdef FEATURE_HW_INTRINSICS
5642 //------------------------------------------------------------------------
5643 // emitIns_SIMD_R_R_I: emits the code for a SIMD instruction that takes a register operand, an immediate operand
5644 //                     and that returns a value in register
5645 //
5646 // Arguments:
5647 //    ins       -- The instruction being emitted
5648 //    attr      -- The emit attribute
5649 //    targetReg -- The target register
5650 //    op1Reg    -- The register of the first operand
5651 //    ival      -- The immediate value
5652 //
5653 void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival)
5654 {
5655     if (UseVEXEncoding() || IsDstSrcImmAvxInstruction(ins))
5656     {
5657         emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival);
5658     }
5659     else
5660     {
5661         if (op1Reg != targetReg)
5662         {
5663             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5664         }
5665         emitIns_R_I(ins, attr, targetReg, ival);
5666     }
5667 }
5668
5669 //------------------------------------------------------------------------
5670 // emitIns_SIMD_R_R_A: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5671 //                     and that returns a value in register
5672 //
5673 // Arguments:
5674 //    ins       -- The instruction being emitted
5675 //    attr      -- The emit attribute
5676 //    targetReg -- The target register
5677 //    op1Reg    -- The register of the first operand
5678 //    indir     -- The GenTreeIndir used for the memory address
5679 //
5680 void emitter::emitIns_SIMD_R_R_A(
5681     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir)
5682 {
5683     if (UseVEXEncoding())
5684     {
5685         emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir);
5686     }
5687     else
5688     {
5689         if (op1Reg != targetReg)
5690         {
5691             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5692         }
5693         emitIns_R_A(ins, attr, targetReg, indir);
5694     }
5695 }
5696
5697 //------------------------------------------------------------------------
5698 // emitIns_SIMD_R_R_AR: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5699 //                      and that returns a value in register
5700 //
5701 // Arguments:
5702 //    ins       -- The instruction being emitted
5703 //    attr      -- The emit attribute
5704 //    targetReg -- The target register
5705 //    op1Reg    -- The register of the first operand
5706 //    base      -- The base register used for the memory address
5707 //
5708 void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base)
5709 {
5710     if (UseVEXEncoding())
5711     {
5712         emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, 0);
5713     }
5714     else
5715     {
5716         if (op1Reg != targetReg)
5717         {
5718             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5719         }
5720         emitIns_R_AR(ins, attr, targetReg, base, 0);
5721     }
5722 }
5723
5724 //------------------------------------------------------------------------
5725 // emitIns_SIMD_R_R_C: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5726 //                     and that returns a value in register
5727 //
5728 // Arguments:
5729 //    ins       -- The instruction being emitted
5730 //    attr      -- The emit attribute
5731 //    targetReg -- The target register
5732 //    op1Reg    -- The register of the first operand
5733 //    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
5734 //    offs      -- The offset added to the memory address from fldHnd
5735 //
5736 void emitter::emitIns_SIMD_R_R_C(
5737     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
5738 {
5739     if (UseVEXEncoding())
5740     {
5741         emitIns_R_R_C(ins, attr, targetReg, op1Reg, fldHnd, offs);
5742     }
5743     else
5744     {
5745         if (op1Reg != targetReg)
5746         {
5747             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5748         }
5749         emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
5750     }
5751 }
5752
5753 //------------------------------------------------------------------------
5754 // emitIns_SIMD_R_R_R: emits the code for a SIMD instruction that takes two register operands, and that returns a
5755 //                     value in register
5756 //
5757 // Arguments:
5758 //    ins       -- The instruction being emitted
5759 //    attr      -- The emit attribute
5760 //    targetReg -- The target register
5761 //    op1Reg    -- The register of the first operand
5762 //    op2Reg    -- The register of the second operand
5763 //
5764 void emitter::emitIns_SIMD_R_R_R(
5765     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg)
5766 {
5767     if (UseVEXEncoding())
5768     {
5769         emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
5770     }
5771     else
5772     {
5773         if (op1Reg != targetReg)
5774         {
5775             // Ensure we aren't overwriting op2
5776             assert(op2Reg != targetReg);
5777
5778             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5779         }
5780         emitIns_R_R(ins, attr, targetReg, op2Reg);
5781     }
5782 }
5783
5784 //------------------------------------------------------------------------
5785 // emitIns_SIMD_R_R_S: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5786 //                     and that returns a value in register
5787 //
5788 // Arguments:
5789 //    ins       -- The instruction being emitted
5790 //    attr      -- The emit attribute
5791 //    targetReg -- The target register
5792 //    op1Reg    -- The register of the first operand
5793 //    varx      -- The variable index used for the memory address
5794 //    offs      -- The offset added to the memory address from varx
5795 //
5796 void emitter::emitIns_SIMD_R_R_S(
5797     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs)
5798 {
5799     if (UseVEXEncoding())
5800     {
5801         emitIns_R_R_S(ins, attr, targetReg, op1Reg, varx, offs);
5802     }
5803     else
5804     {
5805         if (op1Reg != targetReg)
5806         {
5807             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5808         }
5809         emitIns_R_S(ins, attr, targetReg, varx, offs);
5810     }
5811 }
5812
5813 //------------------------------------------------------------------------
5814 // emitIns_SIMD_R_R_A_I: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5815 //                       an immediate operand, and that returns a value in register
5816 //
5817 // Arguments:
5818 //    ins       -- The instruction being emitted
5819 //    attr      -- The emit attribute
5820 //    targetReg -- The target register
5821 //    op1Reg    -- The register of the first operand
5822 //    indir     -- The GenTreeIndir used for the memory address
5823 //    ival      -- The immediate value
5824 //
5825 void emitter::emitIns_SIMD_R_R_A_I(
5826     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival)
5827 {
5828     if (UseVEXEncoding())
5829     {
5830         emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS);
5831     }
5832     else
5833     {
5834         if (op1Reg != targetReg)
5835         {
5836             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5837         }
5838         emitIns_R_A_I(ins, attr, targetReg, indir, ival);
5839     }
5840 }
5841
5842 //------------------------------------------------------------------------
5843 // emitIns_SIMD_R_R_AR_I: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5844 //                        an immediate operand, and that returns a value in register
5845 //
5846 // Arguments:
5847 //    ins       -- The instruction being emitted
5848 //    attr      -- The emit attribute
5849 //    targetReg -- The target register
5850 //    op1Reg    -- The register of the first operand
5851 //    base      -- The base register used for the memory address
5852 //    ival      -- The immediate value
5853 //
5854 void emitter::emitIns_SIMD_R_R_AR_I(
5855     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int ival)
5856 {
5857     if (UseVEXEncoding())
5858     {
5859         emitIns_R_R_AR_I(ins, attr, targetReg, op1Reg, base, 0, ival);
5860     }
5861     else
5862     {
5863         if (op1Reg != targetReg)
5864         {
5865             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5866         }
5867         emitIns_R_AR_I(ins, attr, targetReg, base, 0, ival);
5868     }
5869 }
5870
5871 //------------------------------------------------------------------------
5872 // emitIns_SIMD_R_R_C_I: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5873 //                       an immediate operand, and that returns a value in register
5874 //
5875 // Arguments:
5876 //    ins       -- The instruction being emitted
5877 //    attr      -- The emit attribute
5878 //    targetReg -- The target register
5879 //    op1Reg    -- The register of the first operand
5880 //    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
5881 //    offs      -- The offset added to the memory address from fldHnd
5882 //    ival      -- The immediate value
5883 //
5884 void emitter::emitIns_SIMD_R_R_C_I(instruction          ins,
5885                                    emitAttr             attr,
5886                                    regNumber            targetReg,
5887                                    regNumber            op1Reg,
5888                                    CORINFO_FIELD_HANDLE fldHnd,
5889                                    int                  offs,
5890                                    int                  ival)
5891 {
5892     if (UseVEXEncoding())
5893     {
5894         emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival);
5895     }
5896     else
5897     {
5898         if (op1Reg != targetReg)
5899         {
5900             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5901         }
5902         emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival);
5903     }
5904 }
5905
5906 //------------------------------------------------------------------------
5907 // emitIns_SIMD_R_R_R_I: emits the code for a SIMD instruction that takes two register operands, an immediate operand,
5908 //                       and that returns a value in register
5909 //
5910 // Arguments:
5911 //    ins       -- The instruction being emitted
5912 //    attr      -- The emit attribute
5913 //    targetReg -- The target register
5914 //    op1Reg    -- The register of the first operand
5915 //    op2Reg    -- The register of the second operand
5916 //    ival      -- The immediate value
5917 //
5918 void emitter::emitIns_SIMD_R_R_R_I(
5919     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival)
5920 {
5921     if (UseVEXEncoding())
5922     {
5923         emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival);
5924     }
5925     else
5926     {
5927         if (op1Reg != targetReg)
5928         {
5929             // Ensure we aren't overwriting op2
5930             assert(op2Reg != targetReg);
5931
5932             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5933         }
5934         emitIns_R_R_I(ins, attr, targetReg, op2Reg, ival);
5935     }
5936 }
5937
5938 //------------------------------------------------------------------------
5939 // emitIns_SIMD_R_R_S_I: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5940 //                       an imediate operand, and that returns a value in register
5941 //
5942 // Arguments:
5943 //    ins       -- The instruction being emitted
5944 //    attr      -- The emit attribute
5945 //    targetReg -- The target register
5946 //    op1Reg    -- The register of the first operand
5947 //    varx      -- The variable index used for the memory address
5948 //    offs      -- The offset added to the memory address from varx
5949 //    ival      -- The immediate value
5950 //
5951 void emitter::emitIns_SIMD_R_R_S_I(
5952     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival)
5953 {
5954     if (UseVEXEncoding())
5955     {
5956         emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival);
5957     }
5958     else
5959     {
5960         if (op1Reg != targetReg)
5961         {
5962             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5963         }
5964         emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival);
5965     }
5966 }
5967
5968 //------------------------------------------------------------------------
5969 // emitIns_SIMD_R_R_R_A: emits the code for a SIMD instruction that takes two register operands, a GenTreeIndir address,
5970 //                       and that returns a value in register
5971 //
5972 // Arguments:
5973 //    ins       -- The instruction being emitted
5974 //    attr      -- The emit attribute
5975 //    targetReg -- The target register
5976 //    op1Reg    -- The register of the first operand
5977 //    op2Reg    -- The register of the second operand
5978 //    indir     -- The GenTreeIndir used for the memory address
5979 //
5980 void emitter::emitIns_SIMD_R_R_R_A(
5981     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
5982 {
5983     assert(IsFMAInstruction(ins));
5984     assert(UseVEXEncoding());
5985
5986     if (op1Reg != targetReg)
5987     {
5988         // Ensure we aren't overwriting op2
5989         assert(op2Reg != targetReg);
5990
5991         emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5992     }
5993
5994     emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir);
5995 }
5996
5997 //------------------------------------------------------------------------
5998 // emitIns_SIMD_R_R_R_AR: emits the code for a SIMD instruction that takes two register operands, a base memory
5999 //                        register, and that returns a value in register
6000 //
6001 // Arguments:
6002 //    ins       -- The instruction being emitted
6003 //    attr      -- The emit attribute
6004 //    targetReg -- The target register
6005 //    op1Reg    -- The register of the first operands
6006 //    op2Reg    -- The register of the second operand
6007 //    base      -- The base register used for the memory address
6008 //
6009 void emitter::emitIns_SIMD_R_R_R_AR(
6010     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base)
6011 {
6012     assert(IsFMAInstruction(ins));
6013     assert(UseVEXEncoding());
6014
6015     if (op1Reg != targetReg)
6016     {
6017         // Ensure we aren't overwriting op2
6018         assert(op2Reg != targetReg);
6019
6020         emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6021     }
6022
6023     emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, 0);
6024 }
6025
6026 //------------------------------------------------------------------------
6027 // emitIns_SIMD_R_R_R_C: emits the code for a SIMD instruction that takes two register operands, a field handle +
6028 //                       offset, and that returns a value in register
6029 //
6030 // Arguments:
6031 //    ins       -- The instruction being emitted
6032 //    attr      -- The emit attribute
6033 //    targetReg -- The target register
6034 //    op1Reg    -- The register of the first operand
6035 //    op2Reg    -- The register of the second operand
6036 //    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
6037 //    offs      -- The offset added to the memory address from fldHnd
6038 //
6039 void emitter::emitIns_SIMD_R_R_R_C(instruction          ins,
6040                                    emitAttr             attr,
6041                                    regNumber            targetReg,
6042                                    regNumber            op1Reg,
6043                                    regNumber            op2Reg,
6044                                    CORINFO_FIELD_HANDLE fldHnd,
6045                                    int                  offs)
6046 {
6047     assert(IsFMAInstruction(ins));
6048     assert(UseVEXEncoding());
6049
6050     if (op1Reg != targetReg)
6051     {
6052         // Ensure we aren't overwriting op2
6053         assert(op2Reg != targetReg);
6054
6055         emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6056     }
6057
6058     emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs);
6059 }
6060
6061 //------------------------------------------------------------------------
6062 // emitIns_SIMD_R_R_R_R: emits the code for a SIMD instruction that takes three register operands, and that returns a
6063 //                     value in register
6064 //
6065 // Arguments:
6066 //    ins       -- The instruction being emitted
6067 //    attr      -- The emit attribute
6068 //    targetReg -- The target register
6069 //    op1Reg    -- The register of the first operand
6070 //    op2Reg    -- The register of the second operand
6071 //    op3Reg    -- The register of the second operand
6072 //
6073 void emitter::emitIns_SIMD_R_R_R_R(
6074     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
6075 {
6076     if (IsFMAInstruction(ins))
6077     {
6078         assert(UseVEXEncoding());
6079
6080         if (op1Reg != targetReg)
6081         {
6082             // Ensure we aren't overwriting op2 or op3
6083
6084             assert(op2Reg != targetReg);
6085             assert(op3Reg != targetReg);
6086
6087             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6088         }
6089
6090         emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
6091     }
6092     else if (UseVEXEncoding())
6093     {
6094         assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6095
6096         // convert SSE encoding of SSE4.1 instructions to VEX encoding
6097         switch (ins)
6098         {
6099             case INS_blendvps:
6100                 ins = INS_vblendvps;
6101                 break;
6102             case INS_blendvpd:
6103                 ins = INS_vblendvpd;
6104                 break;
6105             case INS_pblendvb:
6106                 ins = INS_vpblendvb;
6107                 break;
6108             default:
6109                 break;
6110         }
6111         emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg);
6112     }
6113     else
6114     {
6115         assert(isSse41Blendv(ins));
6116         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6117         if (op3Reg != REG_XMM0)
6118         {
6119             // Ensure we aren't overwriting op1 or op2
6120             assert(op1Reg != REG_XMM0);
6121             assert(op2Reg != REG_XMM0);
6122
6123             emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6124         }
6125         if (op1Reg != targetReg)
6126         {
6127             // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0)
6128             assert(op2Reg != targetReg);
6129             assert(targetReg != REG_XMM0);
6130
6131             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6132         }
6133         emitIns_R_R(ins, attr, targetReg, op2Reg);
6134     }
6135 }
6136
6137 //------------------------------------------------------------------------
6138 // emitIns_SIMD_R_R_R_S: emits the code for a SIMD instruction that takes two register operands, a variable index +
6139 //                       offset, and that returns a value in register
6140 //
6141 // Arguments:
6142 //    ins       -- The instruction being emitted
6143 //    attr      -- The emit attribute
6144 //    targetReg -- The target register
6145 //    op1Reg    -- The register of the first operand
6146 //    op2Reg    -- The register of the second operand
6147 //    varx      -- The variable index used for the memory address
6148 //    offs      -- The offset added to the memory address from varx
6149 //
6150 void emitter::emitIns_SIMD_R_R_R_S(
6151     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
6152 {
6153     assert(IsFMAInstruction(ins));
6154     assert(UseVEXEncoding());
6155
6156     if (op1Reg != targetReg)
6157     {
6158         // Ensure we aren't overwriting op2
6159         assert(op2Reg != targetReg);
6160
6161         emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6162     }
6163
6164     emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs);
6165 }
6166
6167 //------------------------------------------------------------------------
6168 // emitIns_SIMD_R_R_A_R: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
6169 //                       another register operand, and that returns a value in register
6170 //
6171 // Arguments:
6172 //    ins       -- The instruction being emitted
6173 //    attr      -- The emit attribute
6174 //    targetReg -- The target register
6175 //    op1Reg    -- The register of the first operand
6176 //    op3Reg    -- The register of the third operand
6177 //    indir     -- The GenTreeIndir used for the memory address
6178 //
6179 void emitter::emitIns_SIMD_R_R_A_R(
6180     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
6181 {
6182     if (UseVEXEncoding())
6183     {
6184         assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6185
6186         // convert SSE encoding of SSE4.1 instructions to VEX encoding
6187         switch (ins)
6188         {
6189             case INS_blendvps:
6190             {
6191                 ins = INS_vblendvps;
6192                 break;
6193             }
6194
6195             case INS_blendvpd:
6196             {
6197                 ins = INS_vblendvpd;
6198                 break;
6199             }
6200
6201             case INS_pblendvb:
6202             {
6203                 ins = INS_vpblendvb;
6204                 break;
6205             }
6206
6207             default:
6208             {
6209                 break;
6210             }
6211         }
6212
6213         emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir);
6214     }
6215     else
6216     {
6217         assert(isSse41Blendv(ins));
6218
6219         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6220         if (op3Reg != REG_XMM0)
6221         {
6222             // Ensure we aren't overwriting op1
6223             assert(op1Reg != REG_XMM0);
6224
6225             emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6226         }
6227         if (op1Reg != targetReg)
6228         {
6229             // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6230             assert(targetReg != REG_XMM0);
6231
6232             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6233         }
6234
6235         emitIns_R_A(ins, attr, targetReg, indir);
6236     }
6237 }
6238
6239 //------------------------------------------------------------------------
6240 // emitIns_SIMD_R_R_AR_R: emits the code for a SIMD instruction that takes a register operand, a base memory
6241 //                        register, another register operand, and that returns a value in register
6242 //
6243 // Arguments:
6244 //    ins       -- The instruction being emitted
6245 //    attr      -- The emit attribute
6246 //    targetReg -- The target register
6247 //    op1Reg    -- The register of the first operands
6248 //    op3Reg    -- The register of the third operand
6249 //    base      -- The base register used for the memory address
6250 //
6251 void emitter::emitIns_SIMD_R_R_AR_R(
6252     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base)
6253 {
6254     if (UseVEXEncoding())
6255     {
6256         assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6257
6258         // convert SSE encoding of SSE4.1 instructions to VEX encoding
6259         switch (ins)
6260         {
6261             case INS_blendvps:
6262             {
6263                 ins = INS_vblendvps;
6264                 break;
6265             }
6266
6267             case INS_blendvpd:
6268             {
6269                 ins = INS_vblendvpd;
6270                 break;
6271             }
6272
6273             case INS_pblendvb:
6274             {
6275                 ins = INS_vpblendvb;
6276                 break;
6277             }
6278
6279             default:
6280             {
6281                 break;
6282             }
6283         }
6284
6285         emitIns_R_R_AR_R(ins, attr, targetReg, op1Reg, op3Reg, base, 0);
6286     }
6287     else
6288     {
6289         assert(isSse41Blendv(ins));
6290
6291         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6292         if (op3Reg != REG_XMM0)
6293         {
6294             // Ensure we aren't overwriting op1
6295             assert(op1Reg != REG_XMM0);
6296
6297             emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6298         }
6299         if (op1Reg != targetReg)
6300         {
6301             // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6302             assert(targetReg != REG_XMM0);
6303
6304             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6305         }
6306
6307         emitIns_R_AR(ins, attr, targetReg, base, 0);
6308     }
6309 }
6310
6311 //------------------------------------------------------------------------
6312 // emitIns_SIMD_R_R_C_R: emits the code for a SIMD instruction that takes a register operand, a field handle +
6313 //                       offset,  another register operand, and that returns a value in register
6314 //
6315 // Arguments:
6316 //    ins       -- The instruction being emitted
6317 //    attr      -- The emit attribute
6318 //    targetReg -- The target register
6319 //    op1Reg    -- The register of the first operand
6320 //    op3Reg    -- The register of the third operand
6321 //    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
6322 //    offs      -- The offset added to the memory address from fldHnd
6323 //
6324 void emitter::emitIns_SIMD_R_R_C_R(instruction          ins,
6325                                    emitAttr             attr,
6326                                    regNumber            targetReg,
6327                                    regNumber            op1Reg,
6328                                    regNumber            op3Reg,
6329                                    CORINFO_FIELD_HANDLE fldHnd,
6330                                    int                  offs)
6331 {
6332     if (UseVEXEncoding())
6333     {
6334         assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6335
6336         // convert SSE encoding of SSE4.1 instructions to VEX encoding
6337         switch (ins)
6338         {
6339             case INS_blendvps:
6340             {
6341                 ins = INS_vblendvps;
6342                 break;
6343             }
6344
6345             case INS_blendvpd:
6346             {
6347                 ins = INS_vblendvpd;
6348                 break;
6349             }
6350
6351             case INS_pblendvb:
6352             {
6353                 ins = INS_vpblendvb;
6354                 break;
6355             }
6356
6357             default:
6358             {
6359                 break;
6360             }
6361         }
6362
6363         emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs);
6364     }
6365     else
6366     {
6367         assert(isSse41Blendv(ins));
6368
6369         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6370         if (op3Reg != REG_XMM0)
6371         {
6372             // Ensure we aren't overwriting op1
6373             assert(op1Reg != REG_XMM0);
6374
6375             emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6376         }
6377         if (op1Reg != targetReg)
6378         {
6379             // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6380             assert(targetReg != REG_XMM0);
6381
6382             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6383         }
6384
6385         emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
6386     }
6387 }
6388
6389 //------------------------------------------------------------------------
6390 // emitIns_SIMD_R_R_S_R: emits the code for a SIMD instruction that takes a register operand, a variable index +
6391 //                       offset, another register operand, and that returns a value in register
6392 //
6393 // Arguments:
6394 //    ins       -- The instruction being emitted
6395 //    attr      -- The emit attribute
6396 //    targetReg -- The target register
6397 //    op1Reg    -- The register of the first operand
6398 //    op3Reg    -- The register of the third operand
6399 //    varx      -- The variable index used for the memory address
6400 //    offs      -- The offset added to the memory address from varx
6401 //
6402 void emitter::emitIns_SIMD_R_R_S_R(
6403     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
6404 {
6405     if (UseVEXEncoding())
6406     {
6407         assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6408
6409         // convert SSE encoding of SSE4.1 instructions to VEX encoding
6410         switch (ins)
6411         {
6412             case INS_blendvps:
6413             {
6414                 ins = INS_vblendvps;
6415                 break;
6416             }
6417
6418             case INS_blendvpd:
6419             {
6420                 ins = INS_vblendvpd;
6421                 break;
6422             }
6423
6424             case INS_pblendvb:
6425             {
6426                 ins = INS_vpblendvb;
6427                 break;
6428             }
6429
6430             default:
6431             {
6432                 break;
6433             }
6434         }
6435
6436         emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs);
6437     }
6438     else
6439     {
6440         assert(isSse41Blendv(ins));
6441
6442         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6443         if (op3Reg != REG_XMM0)
6444         {
6445             // Ensure we aren't overwriting op1
6446             assert(op1Reg != REG_XMM0);
6447
6448             emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6449         }
6450         if (op1Reg != targetReg)
6451         {
6452             // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6453             assert(targetReg != REG_XMM0);
6454
6455             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6456         }
6457
6458         emitIns_R_S(ins, attr, targetReg, varx, offs);
6459     }
6460 }
6461 #endif // FEATURE_HW_INTRINSICS
6462
6463 /*****************************************************************************
6464  *
6465  *  The following add instructions referencing stack-based local variables.
6466  */
6467
6468 void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
6469 {
6470     instrDesc*     id  = emitNewInstr(attr);
6471     UNATIVE_OFFSET sz  = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6472     insFormat      fmt = emitInsModeFormat(ins, IF_SRD);
6473
6474     // 16-bit operand instructions will need a prefix
6475     if (EA_SIZE(attr) == EA_2BYTE)
6476     {
6477         sz += 1;
6478     }
6479
6480     // VEX prefix
6481     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6482
6483     // 64-bit operand instructions will need a REX.W prefix
6484     if (TakesRexWPrefix(ins, attr))
6485     {
6486         sz += emitGetRexPrefixSize(ins);
6487     }
6488
6489     id->idIns(ins);
6490     id->idInsFmt(fmt);
6491     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6492     id->idCodeSize(sz);
6493
6494 #ifdef DEBUG
6495     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6496 #endif
6497     dispIns(id);
6498     emitCurIGsize += sz;
6499
6500     emitAdjustStackDepthPushPop(ins);
6501 }
6502
6503 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6504 {
6505     instrDesc*     id  = emitNewInstr(attr);
6506     UNATIVE_OFFSET sz  = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6507     insFormat      fmt = emitInsModeFormat(ins, IF_SRD_RRD);
6508
6509 #ifdef _TARGET_X86_
6510     if (attr == EA_1BYTE)
6511     {
6512         assert(isByteReg(ireg));
6513     }
6514 #endif
6515     // 16-bit operand instructions will need a prefix
6516     if (EA_SIZE(attr) == EA_2BYTE)
6517     {
6518         sz++;
6519     }
6520
6521     // VEX prefix
6522     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6523
6524     // 64-bit operand instructions will need a REX.W prefix
6525     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
6526     {
6527         sz += emitGetRexPrefixSize(ins);
6528     }
6529
6530     id->idIns(ins);
6531     id->idInsFmt(fmt);
6532     id->idReg1(ireg);
6533     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6534     id->idCodeSize(sz);
6535 #ifdef DEBUG
6536     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6537 #endif
6538     dispIns(id);
6539     emitCurIGsize += sz;
6540 }
6541
6542 void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6543 {
6544     emitAttr size = EA_SIZE(attr);
6545     noway_assert(emitVerifyEncodable(ins, size, ireg));
6546
6547     instrDesc*     id  = emitNewInstr(attr);
6548     UNATIVE_OFFSET sz  = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
6549     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_SRD);
6550
6551     // Most 16-bit operand instructions need a prefix
6552     if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
6553     {
6554         sz++;
6555     }
6556
6557     // VEX prefix
6558     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
6559
6560     // 64-bit operand instructions will need a REX.W prefix
6561     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
6562     {
6563         sz += emitGetRexPrefixSize(ins);
6564     }
6565
6566     sz += emitAdjustSizeCrc32(ins, attr);
6567
6568     id->idIns(ins);
6569     id->idInsFmt(fmt);
6570     id->idReg1(ireg);
6571     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6572     id->idCodeSize(sz);
6573 #ifdef DEBUG
6574     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6575 #endif
6576     dispIns(id);
6577     emitCurIGsize += sz;
6578 }
6579
6580 void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
6581 {
6582 #ifdef _TARGET_AMD64_
6583     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
6584     // all other opcodes take a sign-extended 4-byte immediate
6585     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
6586 #endif
6587
6588     insFormat fmt;
6589
6590     switch (ins)
6591     {
6592         case INS_rcl_N:
6593         case INS_rcr_N:
6594         case INS_rol_N:
6595         case INS_ror_N:
6596         case INS_shl_N:
6597         case INS_shr_N:
6598         case INS_sar_N:
6599             assert(val != 1);
6600             fmt = IF_SRW_SHF;
6601             val &= 0x7F;
6602             break;
6603
6604         default:
6605             fmt = emitInsModeFormat(ins, IF_SRD_CNS);
6606             break;
6607     }
6608
6609     instrDesc* id = emitNewInstrCns(attr, val);
6610     id->idIns(ins);
6611     id->idInsFmt(fmt);
6612     UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val);
6613
6614     // VEX prefix
6615     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
6616
6617     // 64-bit operand instructions will need a REX.W prefix
6618     if (TakesRexWPrefix(ins, attr))
6619     {
6620         sz += emitGetRexPrefixSize(ins);
6621     }
6622
6623     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6624     id->idCodeSize(sz);
6625 #ifdef DEBUG
6626     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6627 #endif
6628     dispIns(id);
6629     emitCurIGsize += sz;
6630 }
6631
6632 /*****************************************************************************
6633  *
6634  *  Record that a jump instruction uses the short encoding
6635  *
6636  */
6637 void emitter::emitSetShortJump(instrDescJmp* id)
6638 {
6639     if (id->idjKeepLong)
6640     {
6641         return;
6642     }
6643
6644     id->idjShort = true;
6645 }
6646
6647 /*****************************************************************************
6648  *
6649  *  Add a jmp instruction.
6650  */
6651
6652 void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
6653 {
6654     UNATIVE_OFFSET sz;
6655     instrDescJmp*  id = emitNewInstrJmp();
6656
6657     assert(dst->bbFlags & BBF_JMP_TARGET);
6658
6659     id->idIns(ins);
6660     id->idInsFmt(IF_LABEL);
6661     id->idAddr()->iiaBBlabel = dst;
6662
6663 #ifdef DEBUG
6664     // Mark the finally call
6665     if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
6666     {
6667         id->idDebugOnlyInfo()->idFinallyCall = true;
6668     }
6669 #endif // DEBUG
6670
6671     /* Assume the jump will be long */
6672
6673     id->idjShort    = 0;
6674     id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
6675
6676     /* Record the jump's IG and offset within it */
6677
6678     id->idjIG   = emitCurIG;
6679     id->idjOffs = emitCurIGsize;
6680
6681     /* Append this jump to this IG's jump list */
6682
6683     id->idjNext      = emitCurIGjmpList;
6684     emitCurIGjmpList = id;
6685
6686 #if EMITTER_STATS
6687     emitTotalIGjmps++;
6688 #endif
6689
6690     /* Figure out the max. size of the jump/call instruction */
6691
6692     if (ins == INS_call)
6693     {
6694         sz = CALL_INST_SIZE;
6695     }
6696     else if (ins == INS_push || ins == INS_push_hide)
6697     {
6698         // Pushing the address of a basicBlock will need a reloc
6699         // as the instruction uses the absolute address,
6700         // not a relative address
6701         if (emitComp->opts.compReloc)
6702         {
6703             id->idSetIsDspReloc();
6704         }
6705         sz = PUSH_INST_SIZE;
6706     }
6707     else
6708     {
6709         insGroup* tgt;
6710
6711         /* This is a jump - assume the worst */
6712
6713         sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
6714
6715         /* Can we guess at the jump distance? */
6716
6717         tgt = (insGroup*)emitCodeGetCookie(dst);
6718
6719         if (tgt)
6720         {
6721             int            extra;
6722             UNATIVE_OFFSET srcOffs;
6723             int            jmpDist;
6724
6725             assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
6726
6727             /* This is a backward jump - figure out the distance */
6728
6729             srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
6730
6731             /* Compute the distance estimate */
6732
6733             jmpDist = srcOffs - tgt->igOffs;
6734             assert((int)jmpDist > 0);
6735
6736             /* How much beyond the max. short distance does the jump go? */
6737
6738             extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
6739
6740 #if DEBUG_EMIT
6741             if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6742             {
6743                 if (INTERESTING_JUMP_NUM == 0)
6744                 {
6745                     printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6746                 }
6747                 printf("[0] Jump source is at %08X\n", srcOffs);
6748                 printf("[0] Label block is at %08X\n", tgt->igOffs);
6749                 printf("[0] Jump  distance  - %04X\n", jmpDist);
6750                 if (extra > 0)
6751                 {
6752                     printf("[0] Distance excess = %d  \n", extra);
6753                 }
6754             }
6755 #endif
6756
6757             if (extra <= 0 && !id->idjKeepLong)
6758             {
6759                 /* Wonderful - this jump surely will be short */
6760
6761                 emitSetShortJump(id);
6762                 sz = JMP_SIZE_SMALL;
6763             }
6764         }
6765 #if DEBUG_EMIT
6766         else
6767         {
6768             if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6769             {
6770                 if (INTERESTING_JUMP_NUM == 0)
6771                 {
6772                     printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6773                 }
6774                 printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
6775                        emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
6776                 printf("[0] Label block is unknown\n");
6777             }
6778         }
6779 #endif
6780     }
6781
6782     id->idCodeSize(sz);
6783
6784     dispIns(id);
6785     emitCurIGsize += sz;
6786
6787     emitAdjustStackDepthPushPop(ins);
6788 }
6789
6790 #if !FEATURE_FIXED_OUT_ARGS
6791
6792 //------------------------------------------------------------------------
6793 // emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
6794 //
6795 // Arguments:
6796 //    ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
6797 //
6798 // Notes:
6799 //    1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6800 //    2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6801 //
6802 void emitter::emitAdjustStackDepthPushPop(instruction ins)
6803 {
6804     if (ins == INS_push)
6805     {
6806         emitCurStackLvl += emitCntStackDepth;
6807
6808         if (emitMaxStackDepth < emitCurStackLvl)
6809         {
6810             JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6811             emitMaxStackDepth = emitCurStackLvl;
6812         }
6813     }
6814     else if (ins == INS_pop)
6815     {
6816         emitCurStackLvl -= emitCntStackDepth;
6817         assert((int)emitCurStackLvl >= 0);
6818     }
6819 }
6820
6821 //------------------------------------------------------------------------
6822 // emitAdjustStackDepth: Adjust the current and maximum stack depth.
6823 //
6824 // Arguments:
6825 //    ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
6826 //          It is assumed that the add/sub is on the stack pointer.
6827 //    val - the number of bytes to add to or subtract from the stack pointer.
6828 //
6829 // Notes:
6830 //    1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6831 //    2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6832 //
6833 void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
6834 {
6835     // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
6836     if (emitCntStackDepth == 0)
6837         return;
6838
6839     if (ins == INS_sub)
6840     {
6841         S_UINT32 newStackLvl(emitCurStackLvl);
6842         newStackLvl += S_UINT32(val);
6843         noway_assert(!newStackLvl.IsOverflow());
6844
6845         emitCurStackLvl = newStackLvl.Value();
6846
6847         if (emitMaxStackDepth < emitCurStackLvl)
6848         {
6849             JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6850             emitMaxStackDepth = emitCurStackLvl;
6851         }
6852     }
6853     else if (ins == INS_add)
6854     {
6855         S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
6856         noway_assert(!newStackLvl.IsOverflow());
6857
6858         emitCurStackLvl = newStackLvl.Value();
6859     }
6860 }
6861
6862 #endif // EMIT_TRACK_STACK_DEPTH
6863
6864 /*****************************************************************************
6865  *
6866  *  Add a call instruction (direct or indirect).
6867  *      argSize<0 means that the caller will pop the arguments
6868  *
6869  * The other arguments are interpreted depending on callType as shown:
6870  * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
6871  *
6872  * EC_FUNC_TOKEN       : addr is the method address
6873  * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
6874  * EC_FUNC_ADDR        : addr is the absolute address of the function
6875  * EC_FUNC_VIRTUAL     : "call [ireg+disp]"
6876  *
6877  * If callType is one of these emitCallTypes, addr has to be NULL.
6878  * EC_INDIR_R          : "call ireg".
6879  * EC_INDIR_SR         : "call lcl<disp>" (eg. call [ebp-8]).
6880  * EC_INDIR_C          : "call clsVar<disp>" (eg. call [clsVarAddr])
6881  * EC_INDIR_ARD        : "call [ireg+xreg*xmul+disp]"
6882  *
6883  */
6884
6885 // clang-format off
6886 void emitter::emitIns_Call(EmitCallType          callType,
6887                            CORINFO_METHOD_HANDLE methHnd,
6888                            INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
6889                            void*                 addr,
6890                            ssize_t               argSize,
6891                            emitAttr              retSize
6892                            MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
6893                            VARSET_VALARG_TP      ptrVars,
6894                            regMaskTP             gcrefRegs,
6895                            regMaskTP             byrefRegs,
6896                            IL_OFFSETX            ilOffset, // = BAD_IL_OFFSET
6897                            regNumber             ireg,     // = REG_NA
6898                            regNumber             xreg,     // = REG_NA
6899                            unsigned              xmul,     // = 0
6900                            ssize_t               disp,     // = 0
6901                            bool                  isJump)   // = false
6902 // clang-format on
6903 {
6904     /* Sanity check the arguments depending on callType */
6905
6906     assert(callType < EC_COUNT);
6907     assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
6908            (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
6909     assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
6910     assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
6911     assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
6912     assert(callType != EC_INDIR_SR ||
6913            (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
6914     assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
6915
6916     // Our stack level should be always greater than the bytes of arguments we push. Just
6917     // a sanity test.
6918     assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
6919
6920     // Trim out any callee-trashed registers from the live set.
6921     regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
6922     gcrefRegs &= savedSet;
6923     byrefRegs &= savedSet;
6924
6925 #ifdef DEBUG
6926     if (EMIT_GC_VERBOSE)
6927     {
6928         printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
6929         dumpConvertedVarSet(emitComp, ptrVars);
6930         printf(", gcrefRegs=");
6931         printRegMaskInt(gcrefRegs);
6932         emitDispRegSet(gcrefRegs);
6933         printf(", byrefRegs=");
6934         printRegMaskInt(byrefRegs);
6935         emitDispRegSet(byrefRegs);
6936         printf("\n");
6937     }
6938 #endif
6939
6940     /* Managed RetVal: emit sequence point for the call */
6941     if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
6942     {
6943         codeGen->genIPmappingAdd(ilOffset, false);
6944     }
6945
6946     /*
6947         We need to allocate the appropriate instruction descriptor based
6948         on whether this is a direct/indirect call, and whether we need to
6949         record an updated set of live GC variables.
6950
6951         The stats for a ton of classes is as follows:
6952
6953             Direct call w/o  GC vars        220,216
6954             Indir. call w/o  GC vars        144,781
6955
6956             Direct call with GC vars          9,440
6957             Indir. call with GC vars          5,768
6958      */
6959
6960     instrDesc* id;
6961
6962     assert(argSize % REGSIZE_BYTES == 0);
6963     int argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide
6964
6965     if (callType >= EC_FUNC_VIRTUAL)
6966     {
6967         /* Indirect call, virtual calls */
6968
6969         assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
6970                callType == EC_INDIR_C || callType == EC_INDIR_ARD);
6971
6972         id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
6973                                  retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6974     }
6975     else
6976     {
6977         // Helper/static/nonvirtual/function calls (direct or through handle),
6978         // and calls to an absolute addr.
6979
6980         assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
6981
6982         id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
6983                                  retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6984     }
6985
6986     /* Update the emitter's live GC ref sets */
6987
6988     VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
6989     emitThisGCrefRegs = gcrefRegs;
6990     emitThisByrefRegs = byrefRegs;
6991
6992     /* Set the instruction - special case jumping a function */
6993     instruction ins = INS_call;
6994
6995     if (isJump)
6996     {
6997         assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
6998         if (callType == EC_FUNC_TOKEN)
6999         {
7000             ins = INS_l_jmp;
7001         }
7002         else
7003         {
7004             ins = INS_i_jmp;
7005         }
7006     }
7007     id->idIns(ins);
7008
7009     id->idSetIsNoGC(emitNoGChelper(methHnd));
7010
7011     UNATIVE_OFFSET sz;
7012
7013     // Record the address: method, indirection, or funcptr
7014     if (callType >= EC_FUNC_VIRTUAL)
7015     {
7016         // This is an indirect call (either a virtual call or func ptr call)
7017
7018         switch (callType)
7019         {
7020             case EC_INDIR_C:
7021                 // Indirect call using an absolute code address.
7022                 // Must be marked as relocatable and is done at the
7023                 // branch target location.
7024                 goto CALL_ADDR_MODE;
7025
7026             case EC_INDIR_R: // the address is in a register
7027
7028                 id->idSetIsCallRegPtr();
7029
7030                 __fallthrough;
7031
7032             case EC_INDIR_ARD: // the address is an indirection
7033
7034                 goto CALL_ADDR_MODE;
7035
7036             case EC_INDIR_SR: // the address is in a lcl var
7037
7038                 id->idInsFmt(IF_SRD);
7039                 // disp is really a lclVarNum
7040                 noway_assert((unsigned)disp == (size_t)disp);
7041                 id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
7042                 sz = emitInsSizeSV(id, insCodeMR(INS_call), (unsigned)disp, 0);
7043
7044                 break;
7045
7046             case EC_FUNC_VIRTUAL:
7047
7048             CALL_ADDR_MODE:
7049
7050                 // fall-through
7051
7052                 // The function is "ireg" if id->idIsCallRegPtr(),
7053                 // else [ireg+xmul*xreg+disp]
7054
7055                 id->idInsFmt(IF_ARD);
7056
7057                 id->idAddr()->iiaAddrMode.amBaseReg = ireg;
7058                 id->idAddr()->iiaAddrMode.amIndxReg = xreg;
7059                 id->idAddr()->iiaAddrMode.amScale   = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
7060
7061                 sz = emitInsSizeAM(id, insCodeMR(INS_call));
7062
7063                 if (ireg == REG_NA && xreg == REG_NA)
7064                 {
7065                     if (codeGen->genCodeIndirAddrNeedsReloc(disp))
7066                     {
7067                         id->idSetIsDspReloc();
7068                     }
7069 #ifdef _TARGET_AMD64_
7070                     else
7071                     {
7072                         // An absolute indir address that doesn't need reloc should fit within 32-bits
7073                         // to be encoded as offset relative to zero.  This addr mode requires an extra
7074                         // SIB byte
7075                         noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7076                         sz++;
7077                     }
7078 #endif //_TARGET_AMD64_
7079                 }
7080
7081                 break;
7082
7083             default:
7084                 NO_WAY("unexpected instruction");
7085                 break;
7086         }
7087     }
7088     else if (callType == EC_FUNC_TOKEN_INDIR)
7089     {
7090         /* "call [method_addr]" */
7091
7092         assert(addr != nullptr);
7093
7094         id->idInsFmt(IF_METHPTR);
7095         id->idAddr()->iiaAddr = (BYTE*)addr;
7096         sz                    = 6;
7097
7098         // Since this is an indirect call through a pointer and we don't
7099         // currently pass in emitAttr into this function, we query codegen
7100         // whether addr needs a reloc.
7101         if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
7102         {
7103             id->idSetIsDspReloc();
7104         }
7105 #ifdef _TARGET_AMD64_
7106         else
7107         {
7108             // An absolute indir address that doesn't need reloc should fit within 32-bits
7109             // to be encoded as offset relative to zero.  This addr mode requires an extra
7110             // SIB byte
7111             noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7112             sz++;
7113         }
7114 #endif //_TARGET_AMD64_
7115     }
7116     else
7117     {
7118         /* This is a simple direct call: "call helper/method/addr" */
7119
7120         assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
7121
7122         assert(addr != nullptr);
7123
7124         id->idInsFmt(IF_METHOD);
7125         sz = 5;
7126
7127         id->idAddr()->iiaAddr = (BYTE*)addr;
7128
7129         if (callType == EC_FUNC_ADDR)
7130         {
7131             id->idSetIsCallAddr();
7132         }
7133
7134         // Direct call to a method and no addr indirection is needed.
7135         if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
7136         {
7137             id->idSetIsDspReloc();
7138         }
7139     }
7140
7141 #ifdef DEBUG
7142     if (emitComp->verbose && 0)
7143     {
7144         if (id->idIsLargeCall())
7145         {
7146             if (callType >= EC_FUNC_VIRTUAL)
7147             {
7148                 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7149                        VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7150             }
7151             else
7152             {
7153                 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7154                        VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7155             }
7156         }
7157     }
7158
7159     id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
7160     id->idDebugOnlyInfo()->idCallSig   = sigInfo;
7161 #endif // DEBUG
7162
7163 #ifdef LATE_DISASM
7164     if (addr != nullptr)
7165     {
7166         codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
7167     }
7168 #endif // LATE_DISASM
7169
7170     id->idCodeSize(sz);
7171
7172     dispIns(id);
7173     emitCurIGsize += sz;
7174
7175 #if !FEATURE_FIXED_OUT_ARGS
7176
7177     /* The call will pop the arguments */
7178
7179     if (emitCntStackDepth && argSize > 0)
7180     {
7181         noway_assert((ssize_t)emitCurStackLvl >= argSize);
7182         emitCurStackLvl -= (int)argSize;
7183         assert((int)emitCurStackLvl >= 0);
7184     }
7185
7186 #endif // !FEATURE_FIXED_OUT_ARGS
7187 }
7188
7189 #ifdef DEBUG
7190 /*****************************************************************************
7191  *
7192  *  The following called for each recorded instruction -- use for debugging.
7193  */
7194 void emitter::emitInsSanityCheck(instrDesc* id)
7195 {
7196     // make certain you only try to put relocs on things that can have them.
7197     ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7198     if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
7199     {
7200         idOp = ID_OP_CNS;
7201     }
7202
7203     if (id->idIsDspReloc())
7204     {
7205         assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
7206                idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
7207                idOp == ID_OP_LBL);
7208     }
7209
7210     if (id->idIsCnsReloc())
7211     {
7212         assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
7213                idOp == ID_OP_CALL || idOp == ID_OP_JMP);
7214     }
7215 }
7216 #endif
7217
7218 /*****************************************************************************
7219  *
7220  *  Return the allocated size (in bytes) of the given instruction descriptor.
7221  */
7222
7223 size_t emitter::emitSizeOfInsDsc(instrDesc* id)
7224 {
7225     if (emitIsScnsInsDsc(id))
7226     {
7227         return SMALL_IDSC_SIZE;
7228     }
7229
7230     assert((unsigned)id->idInsFmt() < emitFmtCount);
7231
7232     ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7233
7234     // An INS_call instruction may use a "fat" direct/indirect call descriptor
7235     // except for a local call to a label (i.e. call to a finally)
7236     // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
7237     //  INS_call instruction always uses one of these idOps
7238
7239     if (id->idIns() == INS_call)
7240     {
7241         assert(idOp == ID_OP_CALL || // is a direct   call
7242                idOp == ID_OP_SPEC || // is a indirect call
7243                idOp == ID_OP_JMP);   // is a local call to finally clause
7244     }
7245
7246     switch (idOp)
7247     {
7248         case ID_OP_NONE:
7249             break;
7250
7251         case ID_OP_LBL:
7252             return sizeof(instrDescLbl);
7253
7254         case ID_OP_JMP:
7255             return sizeof(instrDescJmp);
7256
7257         case ID_OP_CALL:
7258         case ID_OP_SPEC:
7259             if (id->idIsLargeCall())
7260             {
7261                 /* Must be a "fat" indirect call descriptor */
7262                 return sizeof(instrDescCGCA);
7263             }
7264
7265             __fallthrough;
7266
7267         case ID_OP_SCNS:
7268         case ID_OP_CNS:
7269         case ID_OP_DSP:
7270         case ID_OP_DSP_CNS:
7271         case ID_OP_AMD:
7272         case ID_OP_AMD_CNS:
7273             if (id->idIsLargeCns())
7274             {
7275                 if (id->idIsLargeDsp())
7276                 {
7277                     return sizeof(instrDescCnsDsp);
7278                 }
7279                 else
7280                 {
7281                     return sizeof(instrDescCns);
7282                 }
7283             }
7284             else
7285             {
7286                 if (id->idIsLargeDsp())
7287                 {
7288                     return sizeof(instrDescDsp);
7289                 }
7290                 else
7291                 {
7292                     return sizeof(instrDesc);
7293                 }
7294             }
7295
7296         default:
7297             NO_WAY("unexpected instruction descriptor format");
7298             break;
7299     }
7300
7301     return sizeof(instrDesc);
7302 }
7303
7304 /*****************************************************************************/
7305 #ifdef DEBUG
7306 /*****************************************************************************
7307  *
7308  *  Return a string that represents the given register.
7309  */
7310
7311 const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
7312 {
7313     static char          rb[2][128];
7314     static unsigned char rbc = 0;
7315
7316     const char* rn = emitComp->compRegVarName(reg, varName);
7317
7318 #ifdef _TARGET_AMD64_
7319     char suffix = '\0';
7320
7321     switch (EA_SIZE(attr))
7322     {
7323         case EA_32BYTE:
7324             return emitYMMregName(reg);
7325
7326         case EA_16BYTE:
7327             return emitXMMregName(reg);
7328
7329         case EA_8BYTE:
7330             if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7331             {
7332                 return emitXMMregName(reg);
7333             }
7334             break;
7335
7336         case EA_4BYTE:
7337             if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7338             {
7339                 return emitXMMregName(reg);
7340             }
7341
7342             if (reg > REG_R15)
7343             {
7344                 break;
7345             }
7346
7347             if (reg > REG_RDI)
7348             {
7349                 suffix = 'd';
7350                 goto APPEND_SUFFIX;
7351             }
7352             rbc        = (rbc + 1) % 2;
7353             rb[rbc][0] = 'e';
7354             rb[rbc][1] = rn[1];
7355             rb[rbc][2] = rn[2];
7356             rb[rbc][3] = 0;
7357             rn         = rb[rbc];
7358             break;
7359
7360         case EA_2BYTE:
7361             if (reg > REG_RDI)
7362             {
7363                 suffix = 'w';
7364                 goto APPEND_SUFFIX;
7365             }
7366             rn++;
7367             break;
7368
7369         case EA_1BYTE:
7370             if (reg > REG_RDI)
7371             {
7372                 suffix = 'b';
7373             APPEND_SUFFIX:
7374                 rbc        = (rbc + 1) % 2;
7375                 rb[rbc][0] = rn[0];
7376                 rb[rbc][1] = rn[1];
7377                 if (rn[2])
7378                 {
7379                     assert(rn[3] == 0);
7380                     rb[rbc][2] = rn[2];
7381                     rb[rbc][3] = suffix;
7382                     rb[rbc][4] = 0;
7383                 }
7384                 else
7385                 {
7386                     rb[rbc][2] = suffix;
7387                     rb[rbc][3] = 0;
7388                 }
7389             }
7390             else
7391             {
7392                 rbc        = (rbc + 1) % 2;
7393                 rb[rbc][0] = rn[1];
7394                 if (reg < 4)
7395                 {
7396                     rb[rbc][1] = 'l';
7397                     rb[rbc][2] = 0;
7398                 }
7399                 else
7400                 {
7401                     rb[rbc][1] = rn[2];
7402                     rb[rbc][2] = 'l';
7403                     rb[rbc][3] = 0;
7404                 }
7405             }
7406
7407             rn = rb[rbc];
7408             break;
7409
7410         default:
7411             break;
7412     }
7413 #endif // _TARGET_AMD64_
7414
7415 #ifdef _TARGET_X86_
7416     assert(strlen(rn) >= 3);
7417
7418     switch (EA_SIZE(attr))
7419     {
7420         case EA_32BYTE:
7421             return emitYMMregName(reg);
7422
7423         case EA_16BYTE:
7424             return emitXMMregName(reg);
7425
7426         case EA_8BYTE:
7427             if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7428             {
7429                 return emitXMMregName(reg);
7430             }
7431             break;
7432
7433         case EA_4BYTE:
7434             if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7435             {
7436                 return emitXMMregName(reg);
7437             }
7438             break;
7439
7440         case EA_2BYTE:
7441             rn++;
7442             break;
7443
7444         case EA_1BYTE:
7445             rbc        = (rbc + 1) % 2;
7446             rb[rbc][0] = rn[1];
7447             rb[rbc][1] = 'l';
7448             strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
7449
7450             rn = rb[rbc];
7451             break;
7452
7453         default:
7454             break;
7455     }
7456 #endif // _TARGET_X86_
7457
7458 #if 0
7459     // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
7460     // however it's possibly not interesting most of the time.
7461     if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
7462     {
7463         if (rn != rb[rbc])
7464         {
7465             rbc = (rbc+1)%2;
7466             strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
7467             rn = rb[rbc];
7468         }
7469
7470         if (EA_IS_GCREF(attr))
7471         {
7472             strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
7473         }
7474         else if (EA_IS_BYREF(attr))
7475         {
7476             strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
7477         }
7478     }
7479 #endif // 0
7480
7481     return rn;
7482 }
7483
7484 /*****************************************************************************
7485  *
7486  *  Return a string that represents the given FP register.
7487  */
7488
7489 const char* emitter::emitFPregName(unsigned reg, bool varName)
7490 {
7491     assert(reg < REG_COUNT);
7492
7493     return emitComp->compFPregVarName((regNumber)(reg), varName);
7494 }
7495
7496 /*****************************************************************************
7497  *
7498  *  Return a string that represents the given XMM register.
7499  */
7500
7501 const char* emitter::emitXMMregName(unsigned reg)
7502 {
7503     static const char* const regNames[] = {
7504 #define REGDEF(name, rnum, mask, sname) "x" sname,
7505 #include "register.h"
7506     };
7507
7508     assert(reg < REG_COUNT);
7509     assert(reg < _countof(regNames));
7510
7511     return regNames[reg];
7512 }
7513
7514 /*****************************************************************************
7515  *
7516  *  Return a string that represents the given YMM register.
7517  */
7518
7519 const char* emitter::emitYMMregName(unsigned reg)
7520 {
7521     static const char* const regNames[] = {
7522 #define REGDEF(name, rnum, mask, sname) "y" sname,
7523 #include "register.h"
7524     };
7525
7526     assert(reg < REG_COUNT);
7527     assert(reg < _countof(regNames));
7528
7529     return regNames[reg];
7530 }
7531
7532 /*****************************************************************************
7533  *
7534  *  Display a static data member reference.
7535  */
7536
7537 void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
7538 {
7539     int doffs;
7540
7541     /* Filter out the special case of fs:[offs] */
7542
7543     // Munge any pointers if we want diff-able disassembly
7544     if (emitComp->opts.disDiffable)
7545     {
7546         ssize_t top12bits = (offs >> 20);
7547         if ((top12bits != 0) && (top12bits != -1))
7548         {
7549             offs = 0xD1FFAB1E;
7550         }
7551     }
7552
7553     if (fldHnd == FLD_GLOBAL_FS)
7554     {
7555         printf("FS:[0x%04X]", offs);
7556         return;
7557     }
7558
7559     if (fldHnd == FLD_GLOBAL_DS)
7560     {
7561         printf("[0x%04X]", offs);
7562         return;
7563     }
7564
7565     printf("[");
7566
7567     doffs = Compiler::eeGetJitDataOffs(fldHnd);
7568
7569     if (reloc)
7570     {
7571         printf("reloc ");
7572     }
7573
7574     if (doffs >= 0)
7575     {
7576         if (doffs & 1)
7577         {
7578             printf("@CNS%02u", doffs - 1);
7579         }
7580         else
7581         {
7582             printf("@RWD%02u", doffs);
7583         }
7584
7585         if (offs)
7586         {
7587             printf("%+Id", offs);
7588         }
7589     }
7590     else
7591     {
7592         printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
7593
7594         if (offs)
7595         {
7596             printf("%+Id", offs);
7597         }
7598     }
7599
7600     printf("]");
7601
7602     if (emitComp->opts.varNames && offs < 0)
7603     {
7604         printf("'%s", emitComp->eeGetFieldName(fldHnd));
7605         if (offs)
7606         {
7607             printf("%+Id", offs);
7608         }
7609         printf("'");
7610     }
7611 }
7612
7613 /*****************************************************************************
7614  *
7615  *  Display a stack frame reference.
7616  */
7617
7618 void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
7619 {
7620     int  addr;
7621     bool bEBP;
7622
7623     printf("[");
7624
7625     if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
7626     {
7627         if (varx < 0)
7628         {
7629             printf("TEMP_%02u", -varx);
7630         }
7631         else
7632         {
7633             printf("V%02u", +varx);
7634         }
7635
7636         if (disp < 0)
7637         {
7638             printf("-0x%X", -disp);
7639         }
7640         else if (disp > 0)
7641         {
7642             printf("+0x%X", +disp);
7643         }
7644     }
7645
7646     if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7647     {
7648         if (!asmfm)
7649         {
7650             printf(" ");
7651         }
7652
7653         addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
7654
7655         if (bEBP)
7656         {
7657             printf(STR_FPBASE);
7658
7659             if (addr < 0)
7660             {
7661                 printf("-%02XH", -addr);
7662             }
7663             else if (addr > 0)
7664             {
7665                 printf("+%02XH", addr);
7666             }
7667         }
7668         else
7669         {
7670             /* Adjust the offset by amount currently pushed on the stack */
7671
7672             printf(STR_SPBASE);
7673
7674             if (addr < 0)
7675             {
7676                 printf("-%02XH", -addr);
7677             }
7678             else if (addr > 0)
7679             {
7680                 printf("+%02XH", addr);
7681             }
7682
7683 #if !FEATURE_FIXED_OUT_ARGS
7684
7685             if (emitCurStackLvl)
7686                 printf("+%02XH", emitCurStackLvl);
7687
7688 #endif // !FEATURE_FIXED_OUT_ARGS
7689         }
7690     }
7691
7692     printf("]");
7693
7694     if (varx >= 0 && emitComp->opts.varNames)
7695     {
7696         LclVarDsc*  varDsc;
7697         const char* varName;
7698
7699         assert((unsigned)varx < emitComp->lvaCount);
7700         varDsc  = emitComp->lvaTable + varx;
7701         varName = emitComp->compLocalVarName(varx, offs);
7702
7703         if (varName)
7704         {
7705             printf("'%s", varName);
7706
7707             if (disp < 0)
7708             {
7709                 printf("-%d", -disp);
7710             }
7711             else if (disp > 0)
7712             {
7713                 printf("+%d", +disp);
7714             }
7715
7716             printf("'");
7717         }
7718     }
7719 }
7720
7721 /*****************************************************************************
7722  *
7723  *  Display an reloc value
7724  *  If we are formatting for an assembly listing don't print the hex value
7725  *  since it will prevent us from doing assembly diffs
7726  */
7727 void emitter::emitDispReloc(ssize_t value)
7728 {
7729     if (emitComp->opts.disAsm)
7730     {
7731         printf("(reloc)");
7732     }
7733     else
7734     {
7735         printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
7736     }
7737 }
7738
7739 /*****************************************************************************
7740  *
7741  *  Display an address mode.
7742  */
7743
7744 void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
7745 {
7746     bool    nsep = false;
7747     ssize_t disp;
7748
7749     unsigned     jtno = 0;
7750     dataSection* jdsc = nullptr;
7751
7752     /* The displacement field is in an unusual place for calls */
7753
7754     disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
7755
7756     /* Display a jump table label if this is a switch table jump */
7757
7758     if (id->idIns() == INS_i_jmp)
7759     {
7760         UNATIVE_OFFSET offs = 0;
7761
7762         /* Find the appropriate entry in the data section list */
7763
7764         for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
7765         {
7766             UNATIVE_OFFSET size = jdsc->dsSize;
7767
7768             /* Is this a label table? */
7769
7770             if (size & 1)
7771             {
7772                 size--;
7773                 jtno++;
7774
7775                 if (offs == id->idDebugOnlyInfo()->idMemCookie)
7776                 {
7777                     break;
7778                 }
7779             }
7780
7781             offs += size;
7782         }
7783
7784         /* If we've found a matching entry then is a table jump */
7785
7786         if (jdsc)
7787         {
7788             if (id->idIsDspReloc())
7789             {
7790                 printf("reloc ");
7791             }
7792             printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
7793         }
7794
7795         disp -= id->idDebugOnlyInfo()->idMemCookie;
7796     }
7797
7798     bool frameRef = false;
7799
7800     printf("[");
7801
7802     if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
7803     {
7804         printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7805         nsep = true;
7806         if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
7807         {
7808             frameRef = true;
7809         }
7810         else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
7811         {
7812             frameRef = true;
7813         }
7814     }
7815
7816     if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
7817     {
7818         size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
7819
7820         if (nsep)
7821         {
7822             printf("+");
7823         }
7824         if (scale > 1)
7825         {
7826             printf("%u*", scale);
7827         }
7828         printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
7829         nsep = true;
7830     }
7831
7832     if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
7833     {
7834         if (nsep)
7835         {
7836             printf("+");
7837         }
7838         emitDispReloc(disp);
7839     }
7840     else
7841     {
7842         // Munge any pointers if we want diff-able disassembly
7843         // It's assumed to be a pointer when disp is outside of the range (-1M, +1M); top bits are not 0 or -1
7844         if (!frameRef && emitComp->opts.disDiffable && (static_cast<size_t>((disp >> 20) + 1) > 1))
7845         {
7846             if (nsep)
7847             {
7848                 printf("+");
7849             }
7850             printf("D1FFAB1EH");
7851         }
7852         else if (disp > 0)
7853         {
7854             if (nsep)
7855             {
7856                 printf("+");
7857             }
7858             if (frameRef)
7859             {
7860                 printf("%02XH", disp);
7861             }
7862             else if (disp < 1000)
7863             {
7864                 printf("%d", disp);
7865             }
7866             else if (disp <= 0xFFFF)
7867             {
7868                 printf("%04XH", disp);
7869             }
7870             else
7871             {
7872                 printf("%08XH", disp);
7873             }
7874         }
7875         else if (disp < 0)
7876         {
7877             if (frameRef)
7878             {
7879                 printf("-%02XH", -disp);
7880             }
7881             else if (disp > -1000)
7882             {
7883                 printf("-%d", -disp);
7884             }
7885             else if (disp >= -0xFFFF)
7886             {
7887                 printf("-%04XH", -disp);
7888             }
7889             else if (disp < -0xFFFFFF)
7890             {
7891                 if (nsep)
7892                 {
7893                     printf("+");
7894                 }
7895                 printf("%08XH", disp);
7896             }
7897             else
7898             {
7899                 printf("-%08XH", -disp);
7900             }
7901         }
7902         else if (!nsep)
7903         {
7904             printf("%04XH", disp);
7905         }
7906     }
7907
7908     printf("]");
7909
7910     // pretty print string if it looks like one
7911     if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA))
7912     {
7913         const wchar_t* str = emitComp->eeGetCPString(disp);
7914         if (str != nullptr)
7915         {
7916             printf("      '%S'", str);
7917         }
7918     }
7919
7920     if (jdsc && !noDetail)
7921     {
7922         unsigned     cnt = (jdsc->dsSize - 1) / TARGET_POINTER_SIZE;
7923         BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
7924
7925 #ifdef _TARGET_AMD64_
7926 #define SIZE_LETTER "Q"
7927 #else
7928 #define SIZE_LETTER "D"
7929 #endif
7930         printf("\n\n    J_M%03u_DS%02u LABEL   " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
7931
7932         /* Display the label table (it's stored as "BasicBlock*" values) */
7933
7934         do
7935         {
7936             insGroup* lab;
7937
7938             /* Convert the BasicBlock* value to an IG address */
7939
7940             lab = (insGroup*)emitCodeGetCookie(*bbp++);
7941             assert(lab);
7942
7943             printf("\n            D" SIZE_LETTER "      G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
7944         } while (--cnt);
7945     }
7946 }
7947
7948 /*****************************************************************************
7949  *
7950  *  If the given instruction is a shift, display the 2nd operand.
7951  */
7952
7953 void emitter::emitDispShift(instruction ins, int cnt)
7954 {
7955     switch (ins)
7956     {
7957         case INS_rcl_1:
7958         case INS_rcr_1:
7959         case INS_rol_1:
7960         case INS_ror_1:
7961         case INS_shl_1:
7962         case INS_shr_1:
7963         case INS_sar_1:
7964             printf(", 1");
7965             break;
7966
7967         case INS_rcl:
7968         case INS_rcr:
7969         case INS_rol:
7970         case INS_ror:
7971         case INS_shl:
7972         case INS_shr:
7973         case INS_sar:
7974             printf(", cl");
7975             break;
7976
7977         case INS_rcl_N:
7978         case INS_rcr_N:
7979         case INS_rol_N:
7980         case INS_ror_N:
7981         case INS_shl_N:
7982         case INS_shr_N:
7983         case INS_sar_N:
7984             printf(", %d", cnt);
7985             break;
7986
7987         default:
7988             break;
7989     }
7990 }
7991
7992 /*****************************************************************************
7993  *
7994  *  Display (optionally) the bytes for the instruction encoding in hex
7995  */
7996
7997 void emitter::emitDispInsHex(BYTE* code, size_t sz)
7998 {
7999     // We do not display the instruction hex if we want diff-able disassembly
8000     if (!emitComp->opts.disDiffable)
8001     {
8002 #ifdef _TARGET_AMD64_
8003         // how many bytes per instruction we format for
8004         const size_t digits = 10;
8005 #else // _TARGET_X86
8006         const size_t digits = 6;
8007 #endif
8008         printf(" ");
8009         for (unsigned i = 0; i < sz; i++)
8010         {
8011             printf("%02X", (*((BYTE*)(code + i))));
8012         }
8013
8014         if (sz < digits)
8015         {
8016             printf("%.*s", 2 * (digits - sz), "                         ");
8017         }
8018     }
8019 }
8020
8021 /*****************************************************************************
8022  *
8023  *  Display the given instruction.
8024  */
8025
8026 void emitter::emitDispIns(
8027     instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
8028 {
8029     emitAttr    attr;
8030     const char* sstr;
8031
8032     instruction ins = id->idIns();
8033
8034     if (emitComp->verbose)
8035     {
8036         unsigned idNum = id->idDebugOnlyInfo()->idNum;
8037         printf("IN%04x: ", idNum);
8038     }
8039
8040 #define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
8041
8042     /* Display a constant value if the instruction references one */
8043
8044     if (!isNew)
8045     {
8046         switch (id->idInsFmt())
8047         {
8048             int offs;
8049
8050             case IF_MRD_RRD:
8051             case IF_MWR_RRD:
8052             case IF_MRW_RRD:
8053
8054             case IF_RRD_MRD:
8055             case IF_RWR_MRD:
8056             case IF_RRW_MRD:
8057
8058             case IF_MRD_CNS:
8059             case IF_MWR_CNS:
8060             case IF_MRW_CNS:
8061             case IF_MRW_SHF:
8062
8063             case IF_MRD:
8064             case IF_MWR:
8065             case IF_MRW:
8066
8067             case IF_MRD_OFF:
8068
8069                 /* Is this actually a reference to a data section? */
8070
8071                 offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
8072
8073                 if (offs >= 0)
8074                 {
8075                     void* addr;
8076
8077                     /* Display a data section reference */
8078
8079                     assert((unsigned)offs < emitConsDsc.dsdOffs);
8080                     addr = emitConsBlock ? emitConsBlock + offs : nullptr;
8081
8082 #if 0
8083                 // TODO-XArch-Cleanup: Fix or remove this code.
8084                 /* Is the operand an integer or floating-point value? */
8085
8086                 bool isFP = false;
8087
8088                 if  (CodeGen::instIsFP(id->idIns()))
8089                 {
8090                     switch (id->idIns())
8091                     {
8092                     case INS_fild:
8093                     case INS_fildl:
8094                         break;
8095
8096                     default:
8097                         isFP = true;
8098                         break;
8099                     }
8100                 }
8101
8102                 if (offs & 1)
8103                     printf("@CNS%02u", offs);
8104                 else
8105                     printf("@RWD%02u", offs);
8106
8107                 printf("      ");
8108
8109                 if  (addr)
8110                 {
8111                     addr = 0;
8112                     // TODO-XArch-Bug?:
8113                     //          This was busted by switching the order
8114                     //          in which we output the code block vs.
8115                     //          the data blocks -- when we get here,
8116                     //          the data block has not been filled in
8117                     //          yet, so we'll display garbage.
8118
8119                     if  (isFP)
8120                     {
8121                         if  (id->idOpSize() == EA_4BYTE)
8122                             printf("DF      %f \n", addr ? *(float   *)addr : 0);
8123                         else
8124                             printf("DQ      %lf\n", addr ? *(double  *)addr : 0);
8125                     }
8126                     else
8127                     {
8128                         if  (id->idOpSize() <= EA_4BYTE)
8129                             printf("DD      %d \n", addr ? *(int     *)addr : 0);
8130                         else
8131                             printf("DQ      %D \n", addr ? *(__int64 *)addr : 0);
8132                     }
8133                 }
8134 #endif
8135                 }
8136                 break;
8137
8138             default:
8139                 break;
8140         }
8141     }
8142
8143     // printf("[F=%s] "   , emitIfName(id->idInsFmt()));
8144     // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
8145     // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
8146     // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
8147     // printf("[A=%08X] " , emitSimpleStkMask);
8148     // printf("[A=%08X] " , emitSimpleByrefStkMask);
8149     // printf("[L=%02u] " , id->idCodeSize());
8150
8151     if (!emitComp->opts.dspEmit && !isNew && !asmfm)
8152     {
8153         doffs = true;
8154     }
8155
8156     /* Display the instruction offset */
8157
8158     emitDispInsOffs(offset, doffs);
8159
8160     if (code != nullptr)
8161     {
8162         /* Display the instruction hex code */
8163
8164         emitDispInsHex(code, sz);
8165     }
8166
8167     /* Display the instruction name */
8168
8169     sstr = codeGen->genInsName(ins);
8170
8171     if (IsAVXInstruction(ins) && !IsBMIInstruction(ins))
8172     {
8173         printf(" v%-8s", sstr);
8174     }
8175     else
8176     {
8177         printf(" %-9s", sstr);
8178     }
8179 #ifndef FEATURE_PAL
8180     if (strnlen_s(sstr, 10) >= 8)
8181 #else  // FEATURE_PAL
8182     if (strnlen(sstr, 10) >= 8)
8183 #endif // FEATURE_PAL
8184     {
8185         printf(" ");
8186     }
8187
8188     /* By now the size better be set to something */
8189
8190     assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
8191
8192     /* Figure out the operand size */
8193
8194     if (id->idGCref() == GCT_GCREF)
8195     {
8196         attr = EA_GCREF;
8197         sstr = "gword ptr ";
8198     }
8199     else if (id->idGCref() == GCT_BYREF)
8200     {
8201         attr = EA_BYREF;
8202         sstr = "bword ptr ";
8203     }
8204     else
8205     {
8206         attr = id->idOpSize();
8207         sstr = codeGen->genSizeStr(attr);
8208
8209         if (ins == INS_lea)
8210         {
8211 #ifdef _TARGET_AMD64_
8212             assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
8213 #else
8214             assert(attr == EA_4BYTE);
8215 #endif
8216             sstr = "";
8217         }
8218     }
8219
8220     /* Now see what instruction format we've got */
8221
8222     // First print the implicit register usage
8223     if (instrHasImplicitRegPairDest(ins))
8224     {
8225         printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
8226     }
8227     else if (instrIs3opImul(ins))
8228     {
8229         regNumber tgtReg = inst3opImulReg(ins);
8230         printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
8231     }
8232
8233     switch (id->idInsFmt())
8234     {
8235         ssize_t     val;
8236         ssize_t     offs;
8237         CnsVal      cnsVal;
8238         const char* methodName;
8239
8240         case IF_CNS:
8241             val = emitGetInsSC(id);
8242 #ifdef _TARGET_AMD64_
8243             // no 8-byte immediates allowed here!
8244             assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8245 #endif
8246             if (id->idIsCnsReloc())
8247             {
8248                 emitDispReloc(val);
8249             }
8250             else
8251             {
8252             PRINT_CONSTANT:
8253                 // Munge any pointers if we want diff-able disassembly
8254                 if (emitComp->opts.disDiffable)
8255                 {
8256                     ssize_t top14bits = (val >> 18);
8257                     if ((top14bits != 0) && (top14bits != -1))
8258                     {
8259                         val = 0xD1FFAB1E;
8260                     }
8261                 }
8262                 if ((val > -1000) && (val < 1000))
8263                 {
8264                     printf("%d", val);
8265                 }
8266                 else if ((val > 0) || (val < -0xFFFFFF))
8267                 {
8268                     printf("0x%IX", val);
8269                 }
8270                 else
8271                 { // (val < 0)
8272                     printf("-0x%IX", -val);
8273                 }
8274             }
8275             break;
8276
8277         case IF_ARD:
8278         case IF_AWR:
8279         case IF_ARW:
8280
8281             if (ins == INS_call && id->idIsCallRegPtr())
8282             {
8283                 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
8284                 break;
8285             }
8286
8287             printf("%s", sstr);
8288             emitDispAddrMode(id, isNew);
8289             emitDispShift(ins);
8290
8291             if (ins == INS_call)
8292             {
8293                 assert(id->idInsFmt() == IF_ARD);
8294
8295                 /* Ignore indirect calls */
8296
8297                 if (id->idDebugOnlyInfo()->idMemCookie == 0)
8298                 {
8299                     break;
8300                 }
8301
8302                 assert(id->idDebugOnlyInfo()->idMemCookie);
8303
8304                 /* This is a virtual call */
8305
8306                 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
8307                 printf("%s", methodName);
8308             }
8309             break;
8310
8311         case IF_RRD_ARD:
8312         case IF_RWR_ARD:
8313         case IF_RRW_ARD:
8314 #ifdef _TARGET_AMD64_
8315             if (ins == INS_movsxd)
8316             {
8317                 attr = EA_8BYTE;
8318             }
8319             else
8320 #endif
8321                 if (ins == INS_movsx || ins == INS_movzx)
8322             {
8323                 attr = EA_PTRSIZE;
8324             }
8325             else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8326             {
8327                 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8328                 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8329                 attr = EA_4BYTE;
8330             }
8331             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8332             emitDispAddrMode(id);
8333             break;
8334
8335         case IF_RRW_ARD_CNS:
8336         case IF_RWR_ARD_CNS:
8337         {
8338             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8339             emitDispAddrMode(id);
8340             emitGetInsAmdCns(id, &cnsVal);
8341
8342             val = cnsVal.cnsVal;
8343             printf(", ");
8344
8345             if (cnsVal.cnsReloc)
8346             {
8347                 emitDispReloc(val);
8348             }
8349             else
8350             {
8351                 goto PRINT_CONSTANT;
8352             }
8353
8354             break;
8355         }
8356
8357         case IF_AWR_RRD_CNS:
8358         {
8359             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8360             // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8361             sstr = codeGen->genSizeStr(EA_ATTR(16));
8362             printf(sstr);
8363             emitDispAddrMode(id);
8364             printf(", %s", emitRegName(id->idReg1(), attr));
8365
8366             emitGetInsAmdCns(id, &cnsVal);
8367
8368             val = cnsVal.cnsVal;
8369             printf(", ");
8370
8371             if (cnsVal.cnsReloc)
8372             {
8373                 emitDispReloc(val);
8374             }
8375             else
8376             {
8377                 goto PRINT_CONSTANT;
8378             }
8379
8380             break;
8381         }
8382
8383         case IF_RWR_RRD_ARD:
8384             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8385             emitDispAddrMode(id);
8386             break;
8387
8388         case IF_RWR_ARD_RRD:
8389             if (ins == INS_vpgatherqd || ins == INS_vgatherqps)
8390             {
8391                 attr = EA_16BYTE;
8392             }
8393             sstr = codeGen->genSizeStr(EA_ATTR(4));
8394             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8395             emitDispAddrMode(id);
8396             printf(", %s", emitRegName(id->idReg2(), attr));
8397             break;
8398
8399         case IF_RWR_RRD_ARD_CNS:
8400         {
8401             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8402             emitDispAddrMode(id);
8403             emitGetInsAmdCns(id, &cnsVal);
8404
8405             val = cnsVal.cnsVal;
8406             printf(", ");
8407
8408             if (cnsVal.cnsReloc)
8409             {
8410                 emitDispReloc(val);
8411             }
8412             else
8413             {
8414                 goto PRINT_CONSTANT;
8415             }
8416
8417             break;
8418         }
8419
8420         case IF_RWR_RRD_ARD_RRD:
8421         {
8422             printf("%s, ", emitRegName(id->idReg1(), attr));
8423             printf("%s, ", emitRegName(id->idReg2(), attr));
8424             emitDispAddrMode(id);
8425
8426             emitGetInsAmdCns(id, &cnsVal);
8427             val = (cnsVal.cnsVal >> 4) + XMMBASE;
8428             printf(", %s", emitRegName((regNumber)val, attr));
8429             break;
8430         }
8431
8432         case IF_ARD_RRD:
8433         case IF_AWR_RRD:
8434         case IF_ARW_RRD:
8435
8436             printf("%s", sstr);
8437             emitDispAddrMode(id);
8438             printf(", %s", emitRegName(id->idReg1(), attr));
8439             break;
8440
8441         case IF_AWR_RRD_RRD:
8442         {
8443             printf("%s", sstr);
8444             emitDispAddrMode(id);
8445             printf(", %s", emitRegName(id->idReg1(), attr));
8446             printf(", %s", emitRegName(id->idReg2(), attr));
8447             break;
8448         }
8449
8450         case IF_ARD_CNS:
8451         case IF_AWR_CNS:
8452         case IF_ARW_CNS:
8453         case IF_ARW_SHF:
8454
8455             printf("%s", sstr);
8456             emitDispAddrMode(id);
8457             emitGetInsAmdCns(id, &cnsVal);
8458             val = cnsVal.cnsVal;
8459 #ifdef _TARGET_AMD64_
8460             // no 8-byte immediates allowed here!
8461             assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8462 #endif
8463             if (id->idInsFmt() == IF_ARW_SHF)
8464             {
8465                 emitDispShift(ins, (BYTE)val);
8466             }
8467             else
8468             {
8469                 printf(", ");
8470                 if (cnsVal.cnsReloc)
8471                 {
8472                     emitDispReloc(val);
8473                 }
8474                 else
8475                 {
8476                     goto PRINT_CONSTANT;
8477                 }
8478             }
8479             break;
8480
8481         case IF_SRD:
8482         case IF_SWR:
8483         case IF_SRW:
8484
8485             printf("%s", sstr);
8486
8487 #if !FEATURE_FIXED_OUT_ARGS
8488             if (ins == INS_pop)
8489                 emitCurStackLvl -= sizeof(int);
8490 #endif
8491
8492             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8493                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8494
8495 #if !FEATURE_FIXED_OUT_ARGS
8496             if (ins == INS_pop)
8497                 emitCurStackLvl += sizeof(int);
8498 #endif
8499
8500             emitDispShift(ins);
8501             break;
8502
8503         case IF_SRD_RRD:
8504         case IF_SWR_RRD:
8505         case IF_SRW_RRD:
8506
8507             printf("%s", sstr);
8508
8509             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8510                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8511
8512             printf(", %s", emitRegName(id->idReg1(), attr));
8513             break;
8514
8515         case IF_SRD_CNS:
8516         case IF_SWR_CNS:
8517         case IF_SRW_CNS:
8518         case IF_SRW_SHF:
8519
8520             printf("%s", sstr);
8521
8522             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8523                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8524
8525             emitGetInsCns(id, &cnsVal);
8526             val = cnsVal.cnsVal;
8527 #ifdef _TARGET_AMD64_
8528             // no 8-byte immediates allowed here!
8529             assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8530 #endif
8531             if (id->idInsFmt() == IF_SRW_SHF)
8532             {
8533                 emitDispShift(ins, (BYTE)val);
8534             }
8535             else
8536             {
8537                 printf(", ");
8538                 if (cnsVal.cnsReloc)
8539                 {
8540                     emitDispReloc(val);
8541                 }
8542                 else
8543                 {
8544                     goto PRINT_CONSTANT;
8545                 }
8546             }
8547             break;
8548
8549         case IF_RRD_SRD:
8550         case IF_RWR_SRD:
8551         case IF_RRW_SRD:
8552 #ifdef _TARGET_AMD64_
8553             if (ins == INS_movsxd)
8554             {
8555                 attr = EA_8BYTE;
8556             }
8557             else
8558 #endif
8559                 if (ins == INS_movsx || ins == INS_movzx)
8560             {
8561                 attr = EA_PTRSIZE;
8562             }
8563             else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8564             {
8565                 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8566                 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8567                 attr = EA_4BYTE;
8568             }
8569
8570             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8571             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8572                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8573
8574             break;
8575
8576         case IF_RRW_SRD_CNS:
8577         case IF_RWR_SRD_CNS:
8578         {
8579             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8580             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8581                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8582             emitGetInsCns(id, &cnsVal);
8583
8584             val = cnsVal.cnsVal;
8585             printf(", ");
8586
8587             if (cnsVal.cnsReloc)
8588             {
8589                 emitDispReloc(val);
8590             }
8591             else
8592             {
8593                 goto PRINT_CONSTANT;
8594             }
8595             break;
8596         }
8597
8598         case IF_RWR_RRD_SRD:
8599             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8600             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8601                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8602             break;
8603
8604         case IF_RWR_RRD_SRD_CNS:
8605         {
8606             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8607             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8608                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8609             emitGetInsCns(id, &cnsVal);
8610
8611             val = cnsVal.cnsVal;
8612             printf(", ");
8613
8614             if (cnsVal.cnsReloc)
8615             {
8616                 emitDispReloc(val);
8617             }
8618             else
8619             {
8620                 goto PRINT_CONSTANT;
8621             }
8622             break;
8623         }
8624
8625         case IF_RWR_RRD_SRD_RRD:
8626         {
8627             printf("%s, ", emitRegName(id->idReg1(), attr));
8628             printf("%s, ", emitRegName(id->idReg2(), attr));
8629             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8630                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8631
8632             emitGetInsCns(id, &cnsVal);
8633             val = (cnsVal.cnsVal >> 4) + XMMBASE;
8634             printf(", %s", emitRegName((regNumber)val, attr));
8635             break;
8636         }
8637
8638         case IF_RRD_RRD:
8639         case IF_RWR_RRD:
8640         case IF_RRW_RRD:
8641             if (ins == INS_mov_i2xmm)
8642             {
8643                 printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8644             }
8645             else if (ins == INS_mov_xmm2i)
8646             {
8647                 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
8648             }
8649             else if (ins == INS_pmovmskb)
8650             {
8651                 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8652             }
8653             else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
8654             {
8655                 printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8656             }
8657             else if ((ins == INS_cvttsd2si) || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si))
8658             {
8659                 printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
8660             }
8661 #ifdef _TARGET_AMD64_
8662             else if (ins == INS_movsxd)
8663             {
8664                 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
8665             }
8666 #endif // _TARGET_AMD64_
8667             else if (ins == INS_movsx || ins == INS_movzx)
8668             {
8669                 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
8670             }
8671             else if (ins == INS_bt)
8672             {
8673                 // INS_bt operands are reversed. Display them in the normal order.
8674                 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
8675             }
8676 #ifdef FEATURE_HW_INTRINSICS
8677             else if (ins == INS_crc32 && attr != EA_8BYTE)
8678             {
8679                 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8680                 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8681                 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8682             }
8683 #endif // FEATURE_HW_INTRINSICS
8684             else
8685             {
8686                 printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
8687             }
8688             break;
8689
8690         case IF_RRW_RRW:
8691             assert(ins == INS_xchg);
8692             printf("%s,", emitRegName(id->idReg1(), attr));
8693             printf(" %s", emitRegName(id->idReg2(), attr));
8694             break;
8695
8696         case IF_RWR_RRD_RRD:
8697         {
8698             assert(IsAVXInstruction(ins));
8699             assert(IsThreeOperandAVXInstruction(ins));
8700             regNumber reg2 = id->idReg2();
8701             regNumber reg3 = id->idReg3();
8702             if (ins == INS_bextr || ins == INS_bzhi)
8703             {
8704                 // BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM,
8705                 // which is different from most of other instructions
8706                 regNumber tmp = reg2;
8707                 reg2          = reg3;
8708                 reg3          = tmp;
8709             }
8710             printf("%s, ", emitRegName(id->idReg1(), attr));
8711             printf("%s, ", emitRegName(reg2, attr));
8712             printf("%s", emitRegName(reg3, attr));
8713             break;
8714         }
8715
8716         case IF_RWR_RRD_RRD_CNS:
8717             assert(IsAVXInstruction(ins));
8718             assert(IsThreeOperandAVXInstruction(ins));
8719             printf("%s, ", emitRegName(id->idReg1(), attr));
8720             printf("%s, ", emitRegName(id->idReg2(), attr));
8721             printf("%s, ", emitRegName(id->idReg3(), attr));
8722             val = emitGetInsSC(id);
8723             goto PRINT_CONSTANT;
8724             break;
8725         case IF_RWR_RRD_RRD_RRD:
8726             assert(IsAVXOnlyInstruction(ins));
8727             assert(UseVEXEncoding());
8728             printf("%s, ", emitRegName(id->idReg1(), attr));
8729             printf("%s, ", emitRegName(id->idReg2(), attr));
8730             printf("%s, ", emitRegName(id->idReg3(), attr));
8731             printf("%s", emitRegName(id->idReg4(), attr));
8732             break;
8733         case IF_RRW_RRW_CNS:
8734             printf("%s,", emitRegName(id->idReg1(), attr));
8735             printf(" %s", emitRegName(id->idReg2(), attr));
8736             val = emitGetInsSC(id);
8737 #ifdef _TARGET_AMD64_
8738             // no 8-byte immediates allowed here!
8739             assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8740 #endif
8741             printf(", ");
8742             if (id->idIsCnsReloc())
8743             {
8744                 emitDispReloc(val);
8745             }
8746             else
8747             {
8748                 goto PRINT_CONSTANT;
8749             }
8750             break;
8751
8752         case IF_RRD:
8753         case IF_RWR:
8754         case IF_RRW:
8755             printf("%s", emitRegName(id->idReg1(), attr));
8756             emitDispShift(ins);
8757             break;
8758
8759         case IF_RRW_SHF:
8760             printf("%s", emitRegName(id->idReg1(), attr));
8761             emitDispShift(ins, (BYTE)emitGetInsSC(id));
8762             break;
8763
8764         case IF_RRD_MRD:
8765         case IF_RWR_MRD:
8766         case IF_RRW_MRD:
8767
8768             if (ins == INS_movsx || ins == INS_movzx)
8769             {
8770                 attr = EA_PTRSIZE;
8771             }
8772 #ifdef _TARGET_AMD64_
8773             else if (ins == INS_movsxd)
8774             {
8775                 attr = EA_PTRSIZE;
8776             }
8777 #endif
8778             else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8779             {
8780                 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8781                 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8782                 attr = EA_4BYTE;
8783             }
8784             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8785             offs = emitGetInsDsp(id);
8786             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8787             break;
8788
8789         case IF_RRW_MRD_CNS:
8790         case IF_RWR_MRD_CNS:
8791         {
8792             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8793             offs = emitGetInsDsp(id);
8794             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8795             emitGetInsDcmCns(id, &cnsVal);
8796
8797             val = cnsVal.cnsVal;
8798             printf(", ");
8799
8800             if (cnsVal.cnsReloc)
8801             {
8802                 emitDispReloc(val);
8803             }
8804             else
8805             {
8806                 goto PRINT_CONSTANT;
8807             }
8808             break;
8809         }
8810
8811         case IF_MWR_RRD_CNS:
8812         {
8813             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8814             // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8815             sstr = codeGen->genSizeStr(EA_ATTR(16));
8816             printf(sstr);
8817             offs = emitGetInsDsp(id);
8818             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8819             printf(", %s", emitRegName(id->idReg1(), attr));
8820             emitGetInsDcmCns(id, &cnsVal);
8821
8822             val = cnsVal.cnsVal;
8823             printf(", ");
8824
8825             if (cnsVal.cnsReloc)
8826             {
8827                 emitDispReloc(val);
8828             }
8829             else
8830             {
8831                 goto PRINT_CONSTANT;
8832             }
8833
8834             break;
8835         }
8836
8837         case IF_RWR_RRD_MRD:
8838             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8839             offs = emitGetInsDsp(id);
8840             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8841             break;
8842
8843         case IF_RWR_RRD_MRD_CNS:
8844         {
8845             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8846             offs = emitGetInsDsp(id);
8847             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8848             emitGetInsDcmCns(id, &cnsVal);
8849
8850             val = cnsVal.cnsVal;
8851             printf(", ");
8852
8853             if (cnsVal.cnsReloc)
8854             {
8855                 emitDispReloc(val);
8856             }
8857             else
8858             {
8859                 goto PRINT_CONSTANT;
8860             }
8861             break;
8862         }
8863
8864         case IF_RWR_RRD_MRD_RRD:
8865         {
8866             printf("%s, ", emitRegName(id->idReg1(), attr));
8867             printf("%s, ", emitRegName(id->idReg2(), attr));
8868
8869             offs = emitGetInsDsp(id);
8870             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8871
8872             emitGetInsDcmCns(id, &cnsVal);
8873             val = (cnsVal.cnsVal >> 4) + XMMBASE;
8874             printf(", %s", emitRegName((regNumber)val, attr));
8875             break;
8876         }
8877
8878         case IF_RWR_MRD_OFF:
8879
8880             printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
8881             offs = emitGetInsDsp(id);
8882             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8883             break;
8884
8885         case IF_MRD_RRD:
8886         case IF_MWR_RRD:
8887         case IF_MRW_RRD:
8888
8889             printf("%s", sstr);
8890             offs = emitGetInsDsp(id);
8891             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8892             printf(", %s", emitRegName(id->idReg1(), attr));
8893             break;
8894
8895         case IF_MRD_CNS:
8896         case IF_MWR_CNS:
8897         case IF_MRW_CNS:
8898         case IF_MRW_SHF:
8899
8900             printf("%s", sstr);
8901             offs = emitGetInsDsp(id);
8902             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8903             emitGetInsDcmCns(id, &cnsVal);
8904             val = cnsVal.cnsVal;
8905 #ifdef _TARGET_AMD64_
8906             // no 8-byte immediates allowed here!
8907             assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8908 #endif
8909             if (cnsVal.cnsReloc)
8910             {
8911                 emitDispReloc(val);
8912             }
8913             else if (id->idInsFmt() == IF_MRW_SHF)
8914             {
8915                 emitDispShift(ins, (BYTE)val);
8916             }
8917             else
8918             {
8919                 printf(", ");
8920                 goto PRINT_CONSTANT;
8921             }
8922             break;
8923
8924         case IF_MRD:
8925         case IF_MWR:
8926         case IF_MRW:
8927
8928             printf("%s", sstr);
8929             offs = emitGetInsDsp(id);
8930             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8931             emitDispShift(ins);
8932             break;
8933
8934         case IF_MRD_OFF:
8935
8936             printf("offset ");
8937             offs = emitGetInsDsp(id);
8938             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8939             break;
8940
8941         case IF_RRD_CNS:
8942         case IF_RWR_CNS:
8943         case IF_RRW_CNS:
8944             printf("%s, ", emitRegName(id->idReg1(), attr));
8945             val = emitGetInsSC(id);
8946             if (id->idIsCnsReloc())
8947             {
8948                 emitDispReloc(val);
8949             }
8950             else
8951             {
8952                 goto PRINT_CONSTANT;
8953             }
8954             break;
8955
8956         case IF_LABEL:
8957         case IF_RWR_LABEL:
8958         case IF_SWR_LABEL:
8959
8960             if (ins == INS_lea)
8961             {
8962                 printf("%s, ", emitRegName(id->idReg1(), attr));
8963             }
8964             else if (ins == INS_mov)
8965             {
8966                 /* mov   dword ptr [frame.callSiteReturnAddress], label */
8967                 assert(id->idInsFmt() == IF_SWR_LABEL);
8968                 instrDescLbl* idlbl = (instrDescLbl*)id;
8969
8970                 emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
8971
8972                 printf(", ");
8973             }
8974
8975             if (((instrDescJmp*)id)->idjShort)
8976             {
8977                 printf("SHORT ");
8978             }
8979
8980             if (id->idIsBound())
8981             {
8982                 printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
8983             }
8984             else
8985             {
8986                 printf("L_M%03u_" FMT_BB, Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
8987             }
8988             break;
8989
8990         case IF_METHOD:
8991         case IF_METHPTR:
8992             if (id->idIsCallAddr())
8993             {
8994                 offs       = (ssize_t)id->idAddr()->iiaAddr;
8995                 methodName = "";
8996             }
8997             else
8998             {
8999                 offs       = 0;
9000                 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
9001             }
9002
9003             if (id->idInsFmt() == IF_METHPTR)
9004             {
9005                 printf("[");
9006             }
9007
9008             if (offs)
9009             {
9010                 if (id->idIsDspReloc())
9011                 {
9012                     printf("reloc ");
9013                 }
9014                 printf("%08X", offs);
9015             }
9016             else
9017             {
9018                 printf("%s", methodName);
9019             }
9020
9021             if (id->idInsFmt() == IF_METHPTR)
9022             {
9023                 printf("]");
9024             }
9025
9026             break;
9027
9028         case IF_NONE:
9029             break;
9030
9031         default:
9032             printf("unexpected format %s", emitIfName(id->idInsFmt()));
9033             assert(!"unexpectedFormat");
9034             break;
9035     }
9036
9037     if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
9038     {
9039         // Code size in the instrDesc is different from the actual code size we've been given!
9040         printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
9041     }
9042
9043     printf("\n");
9044 }
9045
9046 /*****************************************************************************/
9047 #endif
9048
9049 /*****************************************************************************
9050  *
9051  *  Output nBytes bytes of NOP instructions
9052  */
9053
9054 static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
9055 {
9056     assert(nBytes <= 15);
9057
9058 #ifndef _TARGET_AMD64_
9059     // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
9060     // more efficient real NOP: 0x0F 0x1F +modR/M
9061     // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
9062     // because debugger and msdis don't like it, so maybe VIA doesn't either
9063     // So instead just stick to repeating single byte nops
9064
9065     switch (nBytes)
9066     {
9067         case 15:
9068             *dst++ = 0x90;
9069             __fallthrough;
9070         case 14:
9071             *dst++ = 0x90;
9072             __fallthrough;
9073         case 13:
9074             *dst++ = 0x90;
9075             __fallthrough;
9076         case 12:
9077             *dst++ = 0x90;
9078             __fallthrough;
9079         case 11:
9080             *dst++ = 0x90;
9081             __fallthrough;
9082         case 10:
9083             *dst++ = 0x90;
9084             __fallthrough;
9085         case 9:
9086             *dst++ = 0x90;
9087             __fallthrough;
9088         case 8:
9089             *dst++ = 0x90;
9090             __fallthrough;
9091         case 7:
9092             *dst++ = 0x90;
9093             __fallthrough;
9094         case 6:
9095             *dst++ = 0x90;
9096             __fallthrough;
9097         case 5:
9098             *dst++ = 0x90;
9099             __fallthrough;
9100         case 4:
9101             *dst++ = 0x90;
9102             __fallthrough;
9103         case 3:
9104             *dst++ = 0x90;
9105             __fallthrough;
9106         case 2:
9107             *dst++ = 0x90;
9108             __fallthrough;
9109         case 1:
9110             *dst++ = 0x90;
9111             break;
9112         case 0:
9113             break;
9114     }
9115 #else  // _TARGET_AMD64_
9116     switch (nBytes)
9117     {
9118         case 2:
9119             *dst++ = 0x66;
9120             __fallthrough;
9121         case 1:
9122             *dst++ = 0x90;
9123             break;
9124         case 0:
9125             break;
9126         case 3:
9127             *dst++ = 0x0F;
9128             *dst++ = 0x1F;
9129             *dst++ = 0x00;
9130             break;
9131         case 4:
9132             *dst++ = 0x0F;
9133             *dst++ = 0x1F;
9134             *dst++ = 0x40;
9135             *dst++ = 0x00;
9136             break;
9137         case 6:
9138             *dst++ = 0x66;
9139             __fallthrough;
9140         case 5:
9141             *dst++ = 0x0F;
9142             *dst++ = 0x1F;
9143             *dst++ = 0x44;
9144             *dst++ = 0x00;
9145             *dst++ = 0x00;
9146             break;
9147         case 7:
9148             *dst++ = 0x0F;
9149             *dst++ = 0x1F;
9150             *dst++ = 0x80;
9151             *dst++ = 0x00;
9152             *dst++ = 0x00;
9153             *dst++ = 0x00;
9154             *dst++ = 0x00;
9155             break;
9156         case 15:
9157             // More than 3 prefixes is slower than just 2 NOPs
9158             dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
9159             break;
9160         case 14:
9161             // More than 3 prefixes is slower than just 2 NOPs
9162             dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
9163             break;
9164         case 13:
9165             // More than 3 prefixes is slower than just 2 NOPs
9166             dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
9167             break;
9168         case 12:
9169             // More than 3 prefixes is slower than just 2 NOPs
9170             dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
9171             break;
9172         case 11:
9173             *dst++ = 0x66;
9174             __fallthrough;
9175         case 10:
9176             *dst++ = 0x66;
9177             __fallthrough;
9178         case 9:
9179             *dst++ = 0x66;
9180             __fallthrough;
9181         case 8:
9182             *dst++ = 0x0F;
9183             *dst++ = 0x1F;
9184             *dst++ = 0x84;
9185             *dst++ = 0x00;
9186             *dst++ = 0x00;
9187             *dst++ = 0x00;
9188             *dst++ = 0x00;
9189             *dst++ = 0x00;
9190             break;
9191     }
9192 #endif // _TARGET_AMD64_
9193
9194     return dst;
9195 }
9196
9197 /*****************************************************************************
9198  *
9199  *  Output an instruction involving an address mode.
9200  */
9201
9202 BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9203 {
9204     regNumber reg;
9205     regNumber rgx;
9206     ssize_t   dsp;
9207     bool      dspInByte;
9208     bool      dspIsZero;
9209
9210     instruction ins  = id->idIns();
9211     emitAttr    size = id->idOpSize();
9212     size_t      opsz = EA_SIZE_IN_BYTES(size);
9213
9214     // Get the base/index registers
9215     reg = id->idAddr()->iiaAddrMode.amBaseReg;
9216     rgx = id->idAddr()->iiaAddrMode.amIndxReg;
9217
9218     // For INS_call the instruction size is actually the return value size
9219     if (ins == INS_call)
9220     {
9221         // Special case: call via a register
9222         if (id->idIsCallRegPtr())
9223         {
9224             code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
9225
9226             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
9227             dst += emitOutputWord(dst, opcode);
9228             goto DONE;
9229         }
9230
9231         // The displacement field is in an unusual place for calls
9232         dsp = emitGetInsCIdisp(id);
9233
9234 #ifdef _TARGET_AMD64_
9235
9236         // Compute the REX prefix if it exists
9237         if (IsExtendedReg(reg, EA_PTRSIZE))
9238         {
9239             insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9240             // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9241             reg = (regNumber)RegEncoding(reg);
9242         }
9243
9244         if (IsExtendedReg(rgx, EA_PTRSIZE))
9245         {
9246             insEncodeRegSIB(ins, rgx, &code);
9247             // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9248             rgx = (regNumber)RegEncoding(rgx);
9249         }
9250
9251         // And emit the REX prefix
9252         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9253
9254 #endif // _TARGET_AMD64_
9255
9256         goto GOT_DSP;
9257     }
9258
9259     // `addc` is used for two kinds if instructions
9260     // 1. ins like ADD that can have reg/mem and const versions both and const version needs to modify the opcode for
9261     // large constant operand (e.g., imm32)
9262     // 2. certain SSE/AVX ins have const operand as control bits that is always 1-Byte (imm8) even if `size` > 1-Byte
9263     if (addc && (size > EA_1BYTE))
9264     {
9265         ssize_t cval = addc->cnsVal;
9266
9267         // Does the constant fit in a byte?
9268         // SSE/AVX do not need to modify opcode
9269         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9270         {
9271             if (id->idInsFmt() != IF_ARW_SHF && !IsSSEOrAVXInstruction(ins))
9272             {
9273                 code |= 2;
9274             }
9275
9276             opsz = 1;
9277         }
9278     }
9279
9280     // Emit VEX prefix if required
9281     // There are some callers who already add VEX prefix and call this routine.
9282     // Therefore, add VEX prefix is one is not already present.
9283     code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9284
9285     // For this format, moves do not support a third operand, so we only need to handle the binary ops.
9286     if (TakesVexPrefix(ins))
9287     {
9288         if (IsDstDstSrcAVXInstruction(ins))
9289         {
9290             regNumber src1 = REG_NA;
9291
9292             switch (id->idInsFmt())
9293             {
9294                 case IF_RWR_RRD_ARD:
9295                 case IF_RWR_ARD_RRD:
9296                 case IF_RWR_RRD_ARD_CNS:
9297                 case IF_RWR_RRD_ARD_RRD:
9298                 {
9299                     src1 = id->idReg2();
9300                     break;
9301                 }
9302
9303                 default:
9304                 {
9305                     src1 = id->idReg1();
9306                     break;
9307                 }
9308             }
9309
9310             // encode source operand reg in 'vvvv' bits in 1's complement form
9311             code = insEncodeReg3456(ins, src1, size, code);
9312         }
9313         else if (IsDstSrcSrcAVXInstruction(ins))
9314         {
9315             code = insEncodeReg3456(ins, id->idReg2(), size, code);
9316         }
9317     }
9318
9319     // Emit the REX prefix if required
9320     if (TakesRexWPrefix(ins, size))
9321     {
9322         code = AddRexWPrefix(ins, code);
9323     }
9324
9325     if (IsExtendedReg(reg, EA_PTRSIZE))
9326     {
9327         insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9328         // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9329         reg = (regNumber)RegEncoding(reg);
9330     }
9331
9332     if (IsExtendedReg(rgx, EA_PTRSIZE))
9333     {
9334         insEncodeRegSIB(ins, rgx, &code);
9335         // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9336         rgx = (regNumber)RegEncoding(rgx);
9337     }
9338
9339     // Special case emitting AVX instructions
9340     if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9341     {
9342         if ((ins == INS_crc32) && (size > EA_1BYTE))
9343         {
9344             code |= 0x0100;
9345
9346             if (size == EA_2BYTE)
9347             {
9348                 dst += emitOutputByte(dst, 0x66);
9349             }
9350         }
9351
9352         regNumber reg345 = REG_NA;
9353         if (IsBMIInstruction(ins))
9354         {
9355             reg345 = getBmiRegNumber(ins);
9356         }
9357         if (reg345 == REG_NA)
9358         {
9359             switch (id->idInsFmt())
9360             {
9361                 case IF_AWR_RRD_RRD:
9362                 {
9363                     reg345 = id->idReg2();
9364                     break;
9365                 }
9366
9367                 default:
9368                 {
9369                     reg345 = id->idReg1();
9370                     break;
9371                 }
9372             }
9373         }
9374         unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
9375
9376         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9377
9378         if (UseVEXEncoding() && (ins != INS_crc32))
9379         {
9380             // Emit last opcode byte
9381             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9382             assert((code & 0xFF) == 0);
9383             dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9384         }
9385         else
9386         {
9387             dst += emitOutputWord(dst, code >> 16);
9388             dst += emitOutputWord(dst, code & 0xFFFF);
9389         }
9390
9391         code = regcode;
9392     }
9393     // Is this a 'big' opcode?
9394     else if (code & 0xFF000000)
9395     {
9396         // Output the REX prefix
9397         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9398
9399         // Output the highest word of the opcode
9400         // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
9401         // and encoded as part of VEX prefix.
9402         if (code & 0xFF000000)
9403         {
9404             dst += emitOutputWord(dst, code >> 16);
9405             code &= 0x0000FFFF;
9406         }
9407     }
9408     else if (code & 0x00FF0000)
9409     {
9410         // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
9411         assert(ins != INS_bt);
9412
9413         // Output the REX prefix
9414         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9415
9416         // Output the highest byte of the opcode
9417         if (code & 0x00FF0000)
9418         {
9419             dst += emitOutputByte(dst, code >> 16);
9420             code &= 0x0000FFFF;
9421         }
9422
9423         // Use the large version if this is not a byte. This trick will not
9424         // work in case of SSE2 and AVX instructions.
9425         if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9426         {
9427             code++;
9428         }
9429     }
9430     else if (CodeGen::instIsFP(ins))
9431     {
9432         assert(size == EA_4BYTE || size == EA_8BYTE);
9433         if (size == EA_8BYTE)
9434         {
9435             code += 4;
9436         }
9437     }
9438     else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9439     {
9440         /* Is the operand size larger than a byte? */
9441
9442         switch (size)
9443         {
9444             case EA_1BYTE:
9445                 break;
9446
9447             case EA_2BYTE:
9448
9449                 /* Output a size prefix for a 16-bit operand */
9450
9451                 dst += emitOutputByte(dst, 0x66);
9452
9453                 __fallthrough;
9454
9455             case EA_4BYTE:
9456 #ifdef _TARGET_AMD64_
9457             case EA_8BYTE:
9458 #endif
9459
9460                 /* Set the 'w' bit to get the large version */
9461
9462                 code |= 0x1;
9463                 break;
9464
9465 #ifdef _TARGET_X86_
9466             case EA_8BYTE:
9467
9468                 /* Double operand - set the appropriate bit */
9469
9470                 code |= 0x04;
9471                 break;
9472
9473 #endif // _TARGET_X86_
9474
9475             default:
9476                 NO_WAY("unexpected size");
9477                 break;
9478         }
9479     }
9480
9481     // Output the REX prefix
9482     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9483
9484     // Get the displacement value
9485     dsp = emitGetInsAmdAny(id);
9486
9487 GOT_DSP:
9488
9489     dspInByte = ((signed char)dsp == (ssize_t)dsp);
9490     dspIsZero = (dsp == 0);
9491
9492     if (id->idIsDspReloc())
9493     {
9494         dspInByte = false; // relocs can't be placed in a byte
9495     }
9496
9497     // Is there a [scaled] index component?
9498     if (rgx == REG_NA)
9499     {
9500         // The address is of the form "[reg+disp]"
9501         switch (reg)
9502         {
9503             case REG_NA:
9504             {
9505                 if (id->idIsDspReloc())
9506                 {
9507                     INT32 addlDelta = 0;
9508
9509                     // The address is of the form "[disp]"
9510                     // On x86 - disp is relative to zero
9511                     // On Amd64 - disp is relative to RIP
9512                     if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9513                     {
9514                         dst += emitOutputByte(dst, code | 0x05);
9515                     }
9516                     else
9517                     {
9518                         dst += emitOutputWord(dst, code | 0x0500);
9519                     }
9520
9521                     if (addc)
9522                     {
9523                         // It is of the form "ins [disp], imm" or "ins reg, [disp], imm"
9524                         // For emitting relocation, we also need to take into account of the
9525                         // additional bytes of code emitted for immed val.
9526
9527                         ssize_t cval = addc->cnsVal;
9528
9529 #ifdef _TARGET_AMD64_
9530                         // all these opcodes only take a sign-extended 4-byte immediate
9531                         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9532 #else  //_TARGET_X86_
9533                         noway_assert(opsz <= 4);
9534 #endif //_TARGET_X86_
9535
9536                         switch (opsz)
9537                         {
9538                             case 0:
9539                             case 4:
9540                             case 8:
9541                                 addlDelta = -4;
9542                                 break;
9543                             case 2:
9544                                 addlDelta = -2;
9545                                 break;
9546                             case 1:
9547                                 addlDelta = -1;
9548                                 break;
9549
9550                             default:
9551                                 assert(!"unexpected operand size");
9552                                 unreached();
9553                         }
9554                     }
9555
9556 #ifdef _TARGET_AMD64_
9557                     // We emit zero on Amd64, to avoid the assert in emitOutputLong()
9558                     dst += emitOutputLong(dst, 0);
9559 #else
9560                     dst += emitOutputLong(dst, dsp);
9561 #endif
9562                     emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
9563                                          addlDelta);
9564                 }
9565                 else
9566                 {
9567 #ifdef _TARGET_X86_
9568                     if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9569                     {
9570                         dst += emitOutputByte(dst, code | 0x05);
9571                     }
9572                     else
9573                     {
9574                         dst += emitOutputWord(dst, code | 0x0500);
9575                     }
9576 #else  //_TARGET_AMD64_
9577                     // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
9578                     // This addr mode should never be used while generating relocatable ngen code nor if
9579                     // the addr can be encoded as pc-relative address.
9580                     noway_assert(!emitComp->opts.compReloc);
9581                     noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
9582                     noway_assert((int)dsp == dsp);
9583
9584                     // This requires, specifying a SIB byte after ModRM byte.
9585                     if (EncodedBySSE38orSSE3A(ins))
9586                     {
9587                         dst += emitOutputByte(dst, code | 0x04);
9588                     }
9589                     else
9590                     {
9591                         dst += emitOutputWord(dst, code | 0x0400);
9592                     }
9593                     dst += emitOutputByte(dst, 0x25);
9594 #endif //_TARGET_AMD64_
9595                     dst += emitOutputLong(dst, dsp);
9596                 }
9597                 break;
9598             }
9599
9600             case REG_EBP:
9601             {
9602                 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9603                 {
9604                     // Does the offset fit in a byte?
9605                     if (dspInByte)
9606                     {
9607                         dst += emitOutputByte(dst, code | 0x45);
9608                         dst += emitOutputByte(dst, dsp);
9609                     }
9610                     else
9611                     {
9612                         dst += emitOutputByte(dst, code | 0x85);
9613                         dst += emitOutputLong(dst, dsp);
9614
9615                         if (id->idIsDspReloc())
9616                         {
9617                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9618                         }
9619                     }
9620                 }
9621                 else
9622                 {
9623                     // Does the offset fit in a byte?
9624                     if (dspInByte)
9625                     {
9626                         dst += emitOutputWord(dst, code | 0x4500);
9627                         dst += emitOutputByte(dst, dsp);
9628                     }
9629                     else
9630                     {
9631                         dst += emitOutputWord(dst, code | 0x8500);
9632                         dst += emitOutputLong(dst, dsp);
9633
9634                         if (id->idIsDspReloc())
9635                         {
9636                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9637                         }
9638                     }
9639                 }
9640                 break;
9641             }
9642
9643             case REG_ESP:
9644             {
9645                 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9646                 {
9647                     // Is the offset 0 or does it at least fit in a byte?
9648                     if (dspIsZero)
9649                     {
9650                         dst += emitOutputByte(dst, code | 0x04);
9651                         dst += emitOutputByte(dst, 0x24);
9652                     }
9653                     else if (dspInByte)
9654                     {
9655                         dst += emitOutputByte(dst, code | 0x44);
9656                         dst += emitOutputByte(dst, 0x24);
9657                         dst += emitOutputByte(dst, dsp);
9658                     }
9659                     else
9660                     {
9661                         dst += emitOutputByte(dst, code | 0x84);
9662                         dst += emitOutputByte(dst, 0x24);
9663                         dst += emitOutputLong(dst, dsp);
9664                         if (id->idIsDspReloc())
9665                         {
9666                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9667                         }
9668                     }
9669                 }
9670                 else
9671                 {
9672                     // Is the offset 0 or does it at least fit in a byte?
9673                     if (dspIsZero)
9674                     {
9675                         dst += emitOutputWord(dst, code | 0x0400);
9676                         dst += emitOutputByte(dst, 0x24);
9677                     }
9678                     else if (dspInByte)
9679                     {
9680                         dst += emitOutputWord(dst, code | 0x4400);
9681                         dst += emitOutputByte(dst, 0x24);
9682                         dst += emitOutputByte(dst, dsp);
9683                     }
9684                     else
9685                     {
9686                         dst += emitOutputWord(dst, code | 0x8400);
9687                         dst += emitOutputByte(dst, 0x24);
9688                         dst += emitOutputLong(dst, dsp);
9689                         if (id->idIsDspReloc())
9690                         {
9691                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9692                         }
9693                     }
9694                 }
9695                 break;
9696             }
9697
9698             default:
9699             {
9700                 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9701                 {
9702                     // Put the register in the opcode
9703                     code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr);
9704
9705                     // Is there a displacement?
9706                     if (dspIsZero)
9707                     {
9708                         // This is simply "[reg]"
9709                         dst += emitOutputByte(dst, code);
9710                     }
9711                     else
9712                     {
9713                         // This is [reg + dsp]" -- does the offset fit in a byte?
9714                         if (dspInByte)
9715                         {
9716                             dst += emitOutputByte(dst, code | 0x40);
9717                             dst += emitOutputByte(dst, dsp);
9718                         }
9719                         else
9720                         {
9721                             dst += emitOutputByte(dst, code | 0x80);
9722                             dst += emitOutputLong(dst, dsp);
9723                             if (id->idIsDspReloc())
9724                             {
9725                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9726                             }
9727                         }
9728                     }
9729                 }
9730                 else
9731                 {
9732                     // Put the register in the opcode
9733                     code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
9734
9735                     // Is there a displacement?
9736                     if (dspIsZero)
9737                     {
9738                         // This is simply "[reg]"
9739                         dst += emitOutputWord(dst, code);
9740                     }
9741                     else
9742                     {
9743                         // This is [reg + dsp]" -- does the offset fit in a byte?
9744                         if (dspInByte)
9745                         {
9746                             dst += emitOutputWord(dst, code | 0x4000);
9747                             dst += emitOutputByte(dst, dsp);
9748                         }
9749                         else
9750                         {
9751                             dst += emitOutputWord(dst, code | 0x8000);
9752                             dst += emitOutputLong(dst, dsp);
9753                             if (id->idIsDspReloc())
9754                             {
9755                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9756                             }
9757                         }
9758                     }
9759                 }
9760
9761                 break;
9762             }
9763         }
9764     }
9765     else
9766     {
9767         unsigned regByte;
9768
9769         // We have a scaled index operand
9770         unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
9771
9772         // Is the index operand scaled?
9773         if (mul > 1)
9774         {
9775             // Is there a base register?
9776             if (reg != REG_NA)
9777             {
9778                 // The address is "[reg + {2/4/8} * rgx + icon]"
9779                 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
9780                           insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9781
9782                 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9783                 {
9784                     // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9785                     if (dspIsZero && reg != REG_EBP)
9786                     {
9787                         // The address is "[reg + {2/4/8} * rgx]"
9788                         dst += emitOutputByte(dst, code | 0x04);
9789                         dst += emitOutputByte(dst, regByte);
9790                     }
9791                     else
9792                     {
9793                         // The address is "[reg + {2/4/8} * rgx + disp]"
9794                         if (dspInByte)
9795                         {
9796                             dst += emitOutputByte(dst, code | 0x44);
9797                             dst += emitOutputByte(dst, regByte);
9798                             dst += emitOutputByte(dst, dsp);
9799                         }
9800                         else
9801                         {
9802                             dst += emitOutputByte(dst, code | 0x84);
9803                             dst += emitOutputByte(dst, regByte);
9804                             dst += emitOutputLong(dst, dsp);
9805                             if (id->idIsDspReloc())
9806                             {
9807                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9808                             }
9809                         }
9810                     }
9811                 }
9812                 else
9813                 {
9814                     // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9815                     if (dspIsZero && reg != REG_EBP)
9816                     {
9817                         // The address is "[reg + {2/4/8} * rgx]"
9818                         dst += emitOutputWord(dst, code | 0x0400);
9819                         dst += emitOutputByte(dst, regByte);
9820                     }
9821                     else
9822                     {
9823                         // The address is "[reg + {2/4/8} * rgx + disp]"
9824                         if (dspInByte)
9825                         {
9826                             dst += emitOutputWord(dst, code | 0x4400);
9827                             dst += emitOutputByte(dst, regByte);
9828                             dst += emitOutputByte(dst, dsp);
9829                         }
9830                         else
9831                         {
9832                             dst += emitOutputWord(dst, code | 0x8400);
9833                             dst += emitOutputByte(dst, regByte);
9834                             dst += emitOutputLong(dst, dsp);
9835                             if (id->idIsDspReloc())
9836                             {
9837                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9838                             }
9839                         }
9840                     }
9841                 }
9842             }
9843             else
9844             {
9845                 // The address is "[{2/4/8} * rgx + icon]"
9846                 regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
9847                           insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9848
9849                 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9850                 {
9851                     dst += emitOutputByte(dst, code | 0x04);
9852                 }
9853                 else
9854                 {
9855                     dst += emitOutputWord(dst, code | 0x0400);
9856                 }
9857
9858                 dst += emitOutputByte(dst, regByte);
9859
9860                 // Special case: jump through a jump table
9861                 if (ins == INS_i_jmp)
9862                 {
9863                     dsp += (size_t)emitConsBlock;
9864                 }
9865
9866                 dst += emitOutputLong(dst, dsp);
9867                 if (id->idIsDspReloc())
9868                 {
9869                     emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9870                 }
9871             }
9872         }
9873         else
9874         {
9875             // The address is "[reg+rgx+dsp]"
9876             regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
9877
9878             if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9879             {
9880                 if (dspIsZero && reg != REG_EBP)
9881                 {
9882                     // This is [reg+rgx]"
9883                     dst += emitOutputByte(dst, code | 0x04);
9884                     dst += emitOutputByte(dst, regByte);
9885                 }
9886                 else
9887                 {
9888                     // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9889                     if (dspInByte)
9890                     {
9891                         dst += emitOutputByte(dst, code | 0x44);
9892                         dst += emitOutputByte(dst, regByte);
9893                         dst += emitOutputByte(dst, dsp);
9894                     }
9895                     else
9896                     {
9897                         dst += emitOutputByte(dst, code | 0x84);
9898                         dst += emitOutputByte(dst, regByte);
9899                         dst += emitOutputLong(dst, dsp);
9900                         if (id->idIsDspReloc())
9901                         {
9902                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9903                         }
9904                     }
9905                 }
9906             }
9907             else
9908             {
9909                 if (dspIsZero && reg != REG_EBP)
9910                 {
9911                     // This is [reg+rgx]"
9912                     dst += emitOutputWord(dst, code | 0x0400);
9913                     dst += emitOutputByte(dst, regByte);
9914                 }
9915                 else
9916                 {
9917                     // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9918                     if (dspInByte)
9919                     {
9920                         dst += emitOutputWord(dst, code | 0x4400);
9921                         dst += emitOutputByte(dst, regByte);
9922                         dst += emitOutputByte(dst, dsp);
9923                     }
9924                     else
9925                     {
9926                         dst += emitOutputWord(dst, code | 0x8400);
9927                         dst += emitOutputByte(dst, regByte);
9928                         dst += emitOutputLong(dst, dsp);
9929                         if (id->idIsDspReloc())
9930                         {
9931                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9932                         }
9933                     }
9934                 }
9935             }
9936         }
9937     }
9938
9939     // Now generate the constant value, if present
9940     if (addc)
9941     {
9942         ssize_t cval = addc->cnsVal;
9943
9944 #ifdef _TARGET_AMD64_
9945         // all these opcodes only take a sign-extended 4-byte immediate
9946         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9947 #endif
9948
9949         switch (opsz)
9950         {
9951             case 0:
9952             case 4:
9953             case 8:
9954                 dst += emitOutputLong(dst, cval);
9955                 break;
9956             case 2:
9957                 dst += emitOutputWord(dst, cval);
9958                 break;
9959             case 1:
9960                 dst += emitOutputByte(dst, cval);
9961                 break;
9962
9963             default:
9964                 assert(!"unexpected operand size");
9965         }
9966
9967         if (addc->cnsReloc)
9968         {
9969             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
9970             assert(opsz == 4);
9971         }
9972     }
9973
9974 DONE:
9975
9976     // Does this instruction operate on a GC ref value?
9977     if (id->idGCref())
9978     {
9979         switch (id->idInsFmt())
9980         {
9981             case IF_ARD:
9982             case IF_AWR:
9983             case IF_ARW:
9984                 break;
9985
9986             case IF_RRD_ARD:
9987                 break;
9988
9989             case IF_RWR_ARD:
9990                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9991                 break;
9992
9993             case IF_RRW_ARD:
9994                 // Mark the destination register as holding a GCT_BYREF
9995                 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9996                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
9997                 break;
9998
9999             case IF_ARD_RRD:
10000             case IF_AWR_RRD:
10001                 break;
10002
10003             case IF_AWR_RRD_RRD:
10004                 break;
10005
10006             case IF_ARD_CNS:
10007             case IF_AWR_CNS:
10008                 break;
10009
10010             case IF_ARW_RRD:
10011             case IF_ARW_CNS:
10012                 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10013                 break;
10014
10015             default:
10016 #ifdef DEBUG
10017                 emitDispIns(id, false, false, false);
10018 #endif
10019                 assert(!"unexpected GC ref instruction format");
10020         }
10021
10022         // mul can never produce a GC ref
10023         assert(!instrIs3opImul(ins));
10024         assert(ins != INS_mulEAX && ins != INS_imulEAX);
10025     }
10026     else
10027     {
10028         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10029         {
10030             switch (id->idInsFmt())
10031             {
10032                 case IF_RWR_ARD:
10033                 case IF_RRW_ARD:
10034                 case IF_RWR_RRD_ARD:
10035                     emitGCregDeadUpd(id->idReg1(), dst);
10036                     break;
10037                 default:
10038                     break;
10039             }
10040
10041             if (ins == INS_mulEAX || ins == INS_imulEAX)
10042             {
10043                 emitGCregDeadUpd(REG_EAX, dst);
10044                 emitGCregDeadUpd(REG_EDX, dst);
10045             }
10046
10047             // For the three operand imul instruction the target register
10048             // is encoded in the opcode
10049
10050             if (instrIs3opImul(ins))
10051             {
10052                 regNumber tgtReg = inst3opImulReg(ins);
10053                 emitGCregDeadUpd(tgtReg, dst);
10054             }
10055         }
10056     }
10057
10058     return dst;
10059 }
10060
10061 /*****************************************************************************
10062  *
10063  *  Output an instruction involving a stack frame value.
10064  */
10065
10066 BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10067 {
10068     int  adr;
10069     int  dsp;
10070     bool EBPbased;
10071     bool dspInByte;
10072     bool dspIsZero;
10073
10074     instruction ins  = id->idIns();
10075     emitAttr    size = id->idOpSize();
10076     size_t      opsz = EA_SIZE_IN_BYTES(size);
10077
10078     assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
10079
10080     // `addc` is used for two kinds if instructions
10081     // 1. ins like ADD that can have reg/mem and const versions both and const version needs to modify the opcode for
10082     // large constant operand (e.g., imm32)
10083     // 2. certain SSE/AVX ins have const operand as control bits that is always 1-Byte (imm8) even if `size` > 1-Byte
10084     if (addc && (size > EA_1BYTE))
10085     {
10086         ssize_t cval = addc->cnsVal;
10087
10088         // Does the constant fit in a byte?
10089         // SSE/AVX do not need to modify opcode
10090         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10091         {
10092             if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) &&
10093                 (id->idInsFmt() != IF_RWR_RRD_SRD_CNS) && !IsSSEOrAVXInstruction(ins))
10094             {
10095                 code |= 2;
10096             }
10097
10098             opsz = 1;
10099         }
10100     }
10101
10102     // Add VEX prefix if required.
10103     // There are some callers who already add VEX prefix and call this routine.
10104     // Therefore, add VEX prefix is one is not already present.
10105     code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10106
10107     // Compute the REX prefix
10108     if (TakesRexWPrefix(ins, size))
10109     {
10110         code = AddRexWPrefix(ins, code);
10111     }
10112
10113     // Special case emitting AVX instructions
10114     if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10115     {
10116         if ((ins == INS_crc32) && (size > EA_1BYTE))
10117         {
10118             code |= 0x0100;
10119
10120             if (size == EA_2BYTE)
10121             {
10122                 dst += emitOutputByte(dst, 0x66);
10123             }
10124         }
10125
10126         regNumber reg345 = REG_NA;
10127         if (IsBMIInstruction(ins))
10128         {
10129             reg345 = getBmiRegNumber(ins);
10130         }
10131         if (reg345 == REG_NA)
10132         {
10133             reg345 = id->idReg1();
10134         }
10135         else
10136         {
10137             code = insEncodeReg3456(ins, id->idReg1(), size, code);
10138         }
10139         unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10140
10141         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10142
10143         if (UseVEXEncoding() && (ins != INS_crc32))
10144         {
10145             // Emit last opcode byte
10146             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10147             assert((code & 0xFF) == 0);
10148             dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10149         }
10150         else
10151         {
10152             dst += emitOutputWord(dst, code >> 16);
10153             dst += emitOutputWord(dst, code & 0xFFFF);
10154         }
10155
10156         code = regcode;
10157     }
10158     // Is this a 'big' opcode?
10159     else if (code & 0xFF000000)
10160     {
10161         // Output the REX prefix
10162         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10163
10164         // Output the highest word of the opcode
10165         // We need to check again because in case of AVX instructions the leading
10166         // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10167         if (code & 0xFF000000)
10168         {
10169             dst += emitOutputWord(dst, code >> 16);
10170             code &= 0x0000FFFF;
10171         }
10172     }
10173     else if (code & 0x00FF0000)
10174     {
10175         // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
10176         assert(ins != INS_bt);
10177
10178         // Output the REX prefix
10179         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10180
10181         // Output the highest byte of the opcode.
10182         // We need to check again because in case of AVX instructions the leading
10183         // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10184         if (code & 0x00FF0000)
10185         {
10186             dst += emitOutputByte(dst, code >> 16);
10187             code &= 0x0000FFFF;
10188         }
10189
10190         // Use the large version if this is not a byte
10191         if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) &&
10192             !IsAVXInstruction(ins))
10193         {
10194             code |= 0x1;
10195         }
10196     }
10197     else if (CodeGen::instIsFP(ins))
10198     {
10199         assert(size == EA_4BYTE || size == EA_8BYTE);
10200
10201         if (size == EA_8BYTE)
10202         {
10203             code += 4;
10204         }
10205     }
10206     else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
10207     {
10208         // Is the operand size larger than a byte?
10209         switch (size)
10210         {
10211             case EA_1BYTE:
10212                 break;
10213
10214             case EA_2BYTE:
10215                 // Output a size prefix for a 16-bit operand
10216                 dst += emitOutputByte(dst, 0x66);
10217                 __fallthrough;
10218
10219             case EA_4BYTE:
10220 #ifdef _TARGET_AMD64_
10221             case EA_8BYTE:
10222 #endif // _TARGET_AMD64_
10223
10224                 /* Set the 'w' size bit to indicate 32-bit operation
10225                  * Note that incrementing "code" for INS_call (0xFF) would
10226                  * overflow, whereas setting the lower bit to 1 just works out
10227                  */
10228
10229                 code |= 0x01;
10230                 break;
10231
10232 #ifdef _TARGET_X86_
10233             case EA_8BYTE:
10234
10235                 // Double operand - set the appropriate bit.
10236                 // I don't know what a legitimate reason to end up in this case would be
10237                 // considering that FP is taken care of above...
10238                 // what is an instruction that takes a double which is not covered by the
10239                 // above instIsFP? Of the list in instrsxarch, only INS_fprem
10240                 code |= 0x04;
10241                 NO_WAY("bad 8 byte op");
10242                 break;
10243 #endif // _TARGET_X86_
10244
10245             default:
10246                 NO_WAY("unexpected size");
10247                 break;
10248         }
10249     }
10250
10251     // Output the REX prefix
10252     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10253
10254     // Figure out the variable's frame position
10255     int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
10256
10257     adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
10258     dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
10259
10260     dspInByte = ((signed char)dsp == (int)dsp);
10261     dspIsZero = (dsp == 0);
10262
10263     // for stack varaibles the dsp should never be a reloc
10264     assert(id->idIsDspReloc() == 0);
10265
10266     if (EBPbased)
10267     {
10268         // EBP-based variable: does the offset fit in a byte?
10269         if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10270         {
10271             if (dspInByte)
10272             {
10273                 dst += emitOutputByte(dst, code | 0x45);
10274                 dst += emitOutputByte(dst, dsp);
10275             }
10276             else
10277             {
10278                 dst += emitOutputByte(dst, code | 0x85);
10279                 dst += emitOutputLong(dst, dsp);
10280             }
10281         }
10282         else
10283         {
10284             if (dspInByte)
10285             {
10286                 dst += emitOutputWord(dst, code | 0x4500);
10287                 dst += emitOutputByte(dst, dsp);
10288             }
10289             else
10290             {
10291                 dst += emitOutputWord(dst, code | 0x8500);
10292                 dst += emitOutputLong(dst, dsp);
10293             }
10294         }
10295     }
10296     else
10297     {
10298
10299 #if !FEATURE_FIXED_OUT_ARGS
10300         // Adjust the offset by the amount currently pushed on the CPU stack
10301         dsp += emitCurStackLvl;
10302 #endif
10303
10304         dspInByte = ((signed char)dsp == (int)dsp);
10305         dspIsZero = (dsp == 0);
10306
10307         // Does the offset fit in a byte?
10308         if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10309         {
10310             if (dspInByte)
10311             {
10312                 if (dspIsZero)
10313                 {
10314                     dst += emitOutputByte(dst, code | 0x04);
10315                     dst += emitOutputByte(dst, 0x24);
10316                 }
10317                 else
10318                 {
10319                     dst += emitOutputByte(dst, code | 0x44);
10320                     dst += emitOutputByte(dst, 0x24);
10321                     dst += emitOutputByte(dst, dsp);
10322                 }
10323             }
10324             else
10325             {
10326                 dst += emitOutputByte(dst, code | 0x84);
10327                 dst += emitOutputByte(dst, 0x24);
10328                 dst += emitOutputLong(dst, dsp);
10329             }
10330         }
10331         else
10332         {
10333             if (dspInByte)
10334             {
10335                 if (dspIsZero)
10336                 {
10337                     dst += emitOutputWord(dst, code | 0x0400);
10338                     dst += emitOutputByte(dst, 0x24);
10339                 }
10340                 else
10341                 {
10342                     dst += emitOutputWord(dst, code | 0x4400);
10343                     dst += emitOutputByte(dst, 0x24);
10344                     dst += emitOutputByte(dst, dsp);
10345                 }
10346             }
10347             else
10348             {
10349                 dst += emitOutputWord(dst, code | 0x8400);
10350                 dst += emitOutputByte(dst, 0x24);
10351                 dst += emitOutputLong(dst, dsp);
10352             }
10353         }
10354     }
10355
10356     // Now generate the constant value, if present
10357     if (addc)
10358     {
10359         ssize_t cval = addc->cnsVal;
10360
10361 #ifdef _TARGET_AMD64_
10362         // all these opcodes only take a sign-extended 4-byte immediate
10363         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10364 #endif
10365
10366         switch (opsz)
10367         {
10368             case 0:
10369             case 4:
10370             case 8:
10371                 dst += emitOutputLong(dst, cval);
10372                 break;
10373             case 2:
10374                 dst += emitOutputWord(dst, cval);
10375                 break;
10376             case 1:
10377                 dst += emitOutputByte(dst, cval);
10378                 break;
10379
10380             default:
10381                 assert(!"unexpected operand size");
10382         }
10383
10384         if (addc->cnsReloc)
10385         {
10386             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10387             assert(opsz == 4);
10388         }
10389     }
10390
10391     // Does this instruction operate on a GC ref value?
10392     if (id->idGCref())
10393     {
10394         // Factor in the sub-variable offset
10395         adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
10396
10397         switch (id->idInsFmt())
10398         {
10399             case IF_SRD:
10400                 // Read  stack                    -- no change
10401                 break;
10402
10403             case IF_SWR: // Stack Write (So we need to update GC live for stack var)
10404                 // Write stack                    -- GC var may be born
10405                 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10406                 break;
10407
10408             case IF_SRD_CNS:
10409                 // Read  stack                    -- no change
10410                 break;
10411
10412             case IF_SWR_CNS:
10413                 // Write stack                    -- no change
10414                 break;
10415
10416             case IF_SRD_RRD:
10417             case IF_RRD_SRD:
10418                 // Read  stack   , read  register -- no change
10419                 break;
10420
10421             case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
10422
10423                 // Read  stack   , write register -- GC reg may be born
10424                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10425                 break;
10426
10427             case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
10428                 // Read  register, write stack    -- GC var may be born
10429                 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10430                 break;
10431
10432             case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
10433
10434                 // reg could have been a GCREF as GCREF + int=BYREF
10435                 //                             or BYREF+/-int=BYREF
10436                 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10437                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10438                 break;
10439
10440             case IF_SRW_CNS:
10441             case IF_SRW_RRD:
10442             // += -= of a byref, no change
10443
10444             case IF_SRW:
10445                 break;
10446
10447             default:
10448 #ifdef DEBUG
10449                 emitDispIns(id, false, false, false);
10450 #endif
10451                 assert(!"unexpected GC ref instruction format");
10452         }
10453     }
10454     else
10455     {
10456         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10457         {
10458             switch (id->idInsFmt())
10459             {
10460                 case IF_RWR_SRD: // Register Write, Stack Read
10461                 case IF_RRW_SRD: // Register Read/Write, Stack Read
10462                 case IF_RWR_RRD_SRD:
10463                     emitGCregDeadUpd(id->idReg1(), dst);
10464                     break;
10465                 default:
10466                     break;
10467             }
10468
10469             if (ins == INS_mulEAX || ins == INS_imulEAX)
10470             {
10471                 emitGCregDeadUpd(REG_EAX, dst);
10472                 emitGCregDeadUpd(REG_EDX, dst);
10473             }
10474
10475             // For the three operand imul instruction the target register
10476             // is encoded in the opcode
10477
10478             if (instrIs3opImul(ins))
10479             {
10480                 regNumber tgtReg = inst3opImulReg(ins);
10481                 emitGCregDeadUpd(tgtReg, dst);
10482             }
10483         }
10484     }
10485
10486     return dst;
10487 }
10488
10489 /*****************************************************************************
10490  *
10491  *  Output an instruction with a static data member (class variable).
10492  */
10493
10494 BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10495 {
10496     BYTE*                addr;
10497     CORINFO_FIELD_HANDLE fldh;
10498     ssize_t              offs;
10499     int                  doff;
10500
10501     emitAttr    size      = id->idOpSize();
10502     size_t      opsz      = EA_SIZE_IN_BYTES(size);
10503     instruction ins       = id->idIns();
10504     bool        isMoffset = false;
10505
10506     // Get hold of the field handle and offset
10507     fldh = id->idAddr()->iiaFieldHnd;
10508     offs = emitGetInsDsp(id);
10509
10510     // Special case: mov reg, fs:[ddd]
10511     if (fldh == FLD_GLOBAL_FS)
10512     {
10513         dst += emitOutputByte(dst, 0x64);
10514     }
10515
10516     // Compute VEX prefix
10517     // Some of its callers already add VEX prefix and then call this routine.
10518     // Therefore add VEX prefix is not already present.
10519     code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10520
10521     // Compute the REX prefix
10522     if (TakesRexWPrefix(ins, size))
10523     {
10524         code = AddRexWPrefix(ins, code);
10525     }
10526
10527     // `addc` is used for two kinds if instructions
10528     // 1. ins like ADD that can have reg/mem and const versions both and const version needs to modify the opcode for
10529     // large constant operand (e.g., imm32)
10530     // 2. certain SSE/AVX ins have const operand as control bits that is always 1-Byte (imm8) even if `size` > 1-Byte
10531     if (addc && (size > EA_1BYTE))
10532     {
10533         ssize_t cval = addc->cnsVal;
10534         // Does the constant fit in a byte?
10535         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10536         {
10537             // SSE/AVX do not need to modify opcode
10538             if (id->idInsFmt() != IF_MRW_SHF && !IsSSEOrAVXInstruction(ins))
10539             {
10540                 code |= 2;
10541             }
10542
10543             opsz = 1;
10544         }
10545     }
10546 #ifdef _TARGET_X86_
10547     else
10548     {
10549         // Special case: "mov eax, [addr]" and "mov [addr], eax"
10550         // Amd64: this is one case where addr can be 64-bit in size.  This is
10551         // currently unused or not enabled on amd64 as it always uses RIP
10552         // relative addressing which results in smaller instruction size.
10553         if (ins == INS_mov && id->idReg1() == REG_EAX)
10554         {
10555             switch (id->idInsFmt())
10556             {
10557                 case IF_RWR_MRD:
10558
10559                     assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
10560
10561                     code &= ~((code_t)0xFFFFFFFF);
10562                     code |= 0xA0;
10563                     isMoffset = true;
10564                     break;
10565
10566                 case IF_MWR_RRD:
10567
10568                     assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
10569
10570                     code &= ~((code_t)0xFFFFFFFF);
10571                     code |= 0xA2;
10572                     isMoffset = true;
10573                     break;
10574
10575                 default:
10576                     break;
10577             }
10578         }
10579     }
10580 #endif //_TARGET_X86_
10581
10582     // Special case emitting AVX instructions
10583     if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10584     {
10585         if ((ins == INS_crc32) && (size > EA_1BYTE))
10586         {
10587             code |= 0x0100;
10588
10589             if (size == EA_2BYTE)
10590             {
10591                 dst += emitOutputByte(dst, 0x66);
10592             }
10593         }
10594
10595         regNumber reg345 = REG_NA;
10596         if (IsBMIInstruction(ins))
10597         {
10598             reg345 = getBmiRegNumber(ins);
10599         }
10600         if (reg345 == REG_NA)
10601         {
10602             reg345 = id->idReg1();
10603         }
10604         else
10605         {
10606             code = insEncodeReg3456(ins, id->idReg1(), size, code);
10607         }
10608         unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10609
10610         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10611
10612         if (UseVEXEncoding() && (ins != INS_crc32))
10613         {
10614             // Emit last opcode byte
10615             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10616             assert((code & 0xFF) == 0);
10617             dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10618         }
10619         else
10620         {
10621             dst += emitOutputWord(dst, code >> 16);
10622             dst += emitOutputWord(dst, code & 0xFFFF);
10623         }
10624
10625         // Emit Mod,R/M byte
10626         dst += emitOutputByte(dst, regcode | 0x05);
10627         code = 0;
10628     }
10629     // Is this a 'big' opcode?
10630     else if (code & 0xFF000000)
10631     {
10632         // Output the REX prefix
10633         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10634
10635         // Output the highest word of the opcode.
10636         // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
10637         if (code & 0xFF000000)
10638         {
10639             dst += emitOutputWord(dst, code >> 16);
10640         }
10641         code &= 0x0000FFFF;
10642     }
10643     else if (code & 0x00FF0000)
10644     {
10645         // Output the REX prefix
10646         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10647
10648         // Check again as VEX prefix would have encoded leading opcode byte
10649         if (code & 0x00FF0000)
10650         {
10651             dst += emitOutputByte(dst, code >> 16);
10652             code &= 0x0000FFFF;
10653         }
10654
10655         if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
10656              insIsCMOV(ins)) &&
10657             size != EA_1BYTE)
10658         {
10659             // movsx and movzx are 'big' opcodes but also have the 'w' bit
10660             code++;
10661         }
10662     }
10663     else if (CodeGen::instIsFP(ins))
10664     {
10665         assert(size == EA_4BYTE || size == EA_8BYTE);
10666
10667         if (size == EA_8BYTE)
10668         {
10669             code += 4;
10670         }
10671     }
10672     else
10673     {
10674         // Is the operand size larger than a byte?
10675         switch (size)
10676         {
10677             case EA_1BYTE:
10678                 break;
10679
10680             case EA_2BYTE:
10681                 // Output a size prefix for a 16-bit operand
10682                 dst += emitOutputByte(dst, 0x66);
10683                 __fallthrough;
10684
10685             case EA_4BYTE:
10686 #ifdef _TARGET_AMD64_
10687             case EA_8BYTE:
10688 #endif
10689                 // Set the 'w' bit to get the large version
10690                 code |= 0x1;
10691                 break;
10692
10693 #ifdef _TARGET_X86_
10694             case EA_8BYTE:
10695                 // Double operand - set the appropriate bit
10696                 code |= 0x04;
10697                 break;
10698 #endif // _TARGET_X86_
10699
10700             default:
10701                 assert(!"unexpected size");
10702         }
10703     }
10704
10705     // Output the REX prefix
10706     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10707
10708     if (code)
10709     {
10710         if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
10711         {
10712             dst += emitOutputByte(dst, code);
10713         }
10714         else
10715         {
10716             dst += emitOutputWord(dst, code);
10717         }
10718     }
10719
10720     // Do we have a constant or a static data member?
10721     doff = Compiler::eeGetJitDataOffs(fldh);
10722     if (doff >= 0)
10723     {
10724         addr = emitConsBlock + doff;
10725
10726         int byteSize = EA_SIZE_IN_BYTES(size);
10727
10728         // this instruction has a fixed size (4) src.
10729         if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
10730         {
10731             byteSize = 4;
10732         }
10733         // This has a fixed size (8) source.
10734         if (ins == INS_vbroadcastsd)
10735         {
10736             byteSize = 8;
10737         }
10738
10739         // Check that the offset is properly aligned (i.e. the ddd in [ddd])
10740         assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
10741     }
10742     else
10743     {
10744         // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
10745         if (jitStaticFldIsGlobAddr(fldh))
10746         {
10747             addr = nullptr;
10748         }
10749         else
10750         {
10751             addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
10752             if (addr == nullptr)
10753             {
10754                 NO_WAY("could not obtain address of static field");
10755             }
10756         }
10757     }
10758
10759     BYTE* target = (addr + offs);
10760
10761     if (!isMoffset)
10762     {
10763         INT32 addlDelta = 0;
10764
10765         if (addc)
10766         {
10767             // It is of the form "ins [disp], imm" or "ins reg, [disp], imm"
10768             // For emitting relocation, we also need to take into account of the
10769             // additional bytes of code emitted for immed val.
10770
10771             ssize_t cval = addc->cnsVal;
10772
10773 #ifdef _TARGET_AMD64_
10774             // all these opcodes only take a sign-extended 4-byte immediate
10775             noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10776 #else  //_TARGET_X86_
10777             noway_assert(opsz <= 4);
10778 #endif //_TARGET_X86_
10779
10780             switch (opsz)
10781             {
10782                 case 0:
10783                 case 4:
10784                 case 8:
10785                     addlDelta = -4;
10786                     break;
10787                 case 2:
10788                     addlDelta = -2;
10789                     break;
10790                 case 1:
10791                     addlDelta = -1;
10792                     break;
10793
10794                 default:
10795                     assert(!"unexpected operand size");
10796                     unreached();
10797             }
10798         }
10799
10800 #ifdef _TARGET_AMD64_
10801         // All static field and data section constant accesses should be marked as relocatable
10802         noway_assert(id->idIsDspReloc());
10803         dst += emitOutputLong(dst, 0);
10804 #else  //_TARGET_X86_
10805         dst += emitOutputLong(dst, (int)target);
10806 #endif //_TARGET_X86_
10807
10808         if (id->idIsDspReloc())
10809         {
10810             emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
10811         }
10812     }
10813     else
10814     {
10815 #ifdef _TARGET_AMD64_
10816         // This code path should never be hit on amd64 since it always uses RIP relative addressing.
10817         // In future if ever there is a need to enable this special case, also enable the logic
10818         // that sets isMoffset to true on amd64.
10819         unreached();
10820 #else //_TARGET_X86_
10821
10822         dst += emitOutputSizeT(dst, (ssize_t)target);
10823
10824         if (id->idIsDspReloc())
10825         {
10826             emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET);
10827         }
10828
10829 #endif //_TARGET_X86_
10830     }
10831
10832     // Now generate the constant value, if present
10833     if (addc)
10834     {
10835         ssize_t cval = addc->cnsVal;
10836
10837 #ifdef _TARGET_AMD64_
10838         // all these opcodes only take a sign-extended 4-byte immediate
10839         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10840 #endif
10841
10842         switch (opsz)
10843         {
10844             case 0:
10845             case 4:
10846             case 8:
10847                 dst += emitOutputLong(dst, cval);
10848                 break;
10849             case 2:
10850                 dst += emitOutputWord(dst, cval);
10851                 break;
10852             case 1:
10853                 dst += emitOutputByte(dst, cval);
10854                 break;
10855
10856             default:
10857                 assert(!"unexpected operand size");
10858         }
10859         if (addc->cnsReloc)
10860         {
10861             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10862             assert(opsz == 4);
10863         }
10864     }
10865
10866     // Does this instruction operate on a GC ref value?
10867     if (id->idGCref())
10868     {
10869         switch (id->idInsFmt())
10870         {
10871             case IF_MRD:
10872             case IF_MRW:
10873             case IF_MWR:
10874                 break;
10875
10876             case IF_RRD_MRD:
10877                 break;
10878
10879             case IF_RWR_MRD:
10880                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10881                 break;
10882
10883             case IF_MRD_RRD:
10884             case IF_MWR_RRD:
10885             case IF_MRW_RRD:
10886                 break;
10887
10888             case IF_MRD_CNS:
10889             case IF_MWR_CNS:
10890             case IF_MRW_CNS:
10891                 break;
10892
10893             case IF_RRW_MRD:
10894
10895                 assert(id->idGCref() == GCT_BYREF);
10896                 assert(ins == INS_add || ins == INS_sub);
10897
10898                 // Mark it as holding a GCT_BYREF
10899                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10900                 break;
10901
10902             default:
10903 #ifdef DEBUG
10904                 emitDispIns(id, false, false, false);
10905 #endif
10906                 assert(!"unexpected GC ref instruction format");
10907         }
10908     }
10909     else
10910     {
10911         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10912         {
10913             switch (id->idInsFmt())
10914             {
10915                 case IF_RWR_MRD:
10916                 case IF_RRW_MRD:
10917                 case IF_RWR_RRD_MRD:
10918                     emitGCregDeadUpd(id->idReg1(), dst);
10919                     break;
10920                 default:
10921                     break;
10922             }
10923
10924             if (ins == INS_mulEAX || ins == INS_imulEAX)
10925             {
10926                 emitGCregDeadUpd(REG_EAX, dst);
10927                 emitGCregDeadUpd(REG_EDX, dst);
10928             }
10929
10930             // For the three operand imul instruction the target register
10931             // is encoded in the opcode
10932
10933             if (instrIs3opImul(ins))
10934             {
10935                 regNumber tgtReg = inst3opImulReg(ins);
10936                 emitGCregDeadUpd(tgtReg, dst);
10937             }
10938         }
10939     }
10940
10941     return dst;
10942 }
10943
10944 /*****************************************************************************
10945  *
10946  *  Output an instruction with one register operand.
10947  */
10948
10949 BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
10950 {
10951     code_t code;
10952
10953     instruction ins  = id->idIns();
10954     regNumber   reg  = id->idReg1();
10955     emitAttr    size = id->idOpSize();
10956
10957     // We would to update GC info correctly
10958     assert(!IsSSEInstruction(ins));
10959     assert(!IsAVXInstruction(ins));
10960
10961     // Get the 'base' opcode
10962     switch (ins)
10963     {
10964         case INS_inc:
10965         case INS_dec:
10966
10967 #ifdef _TARGET_AMD64_
10968             if (true)
10969 #else
10970             if (size == EA_1BYTE)
10971 #endif
10972             {
10973                 assert(INS_inc_l == INS_inc + 1);
10974                 assert(INS_dec_l == INS_dec + 1);
10975
10976                 // Can't use the compact form, use the long form
10977                 ins = (instruction)(ins + 1);
10978                 if (size == EA_2BYTE)
10979                 {
10980                     // Output a size prefix for a 16-bit operand
10981                     dst += emitOutputByte(dst, 0x66);
10982                 }
10983
10984                 code = insCodeRR(ins);
10985                 if (size != EA_1BYTE)
10986                 {
10987                     // Set the 'w' bit to get the large version
10988                     code |= 0x1;
10989                 }
10990
10991                 if (TakesRexWPrefix(ins, size))
10992                 {
10993                     code = AddRexWPrefix(ins, code);
10994                 }
10995
10996                 // Register...
10997                 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
10998
10999                 // Output the REX prefix
11000                 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11001
11002                 dst += emitOutputWord(dst, code | (regcode << 8));
11003             }
11004             else
11005             {
11006                 if (size == EA_2BYTE)
11007                 {
11008                     // Output a size prefix for a 16-bit operand
11009                     dst += emitOutputByte(dst, 0x66);
11010                 }
11011                 dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
11012             }
11013             break;
11014
11015         case INS_pop:
11016         case INS_pop_hide:
11017         case INS_push:
11018         case INS_push_hide:
11019
11020             assert(size == EA_PTRSIZE);
11021             code = insEncodeOpreg(ins, reg, size);
11022
11023             assert(!TakesVexPrefix(ins));
11024             assert(!TakesRexWPrefix(ins, size));
11025
11026             // Output the REX prefix
11027             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11028
11029             dst += emitOutputByte(dst, code);
11030             break;
11031
11032         case INS_bswap:
11033         {
11034             assert(size >= EA_4BYTE && size <= EA_PTRSIZE); // 16-bit BSWAP is undefined
11035
11036             // The Intel instruction set reference for BSWAP states that extended registers
11037             // should be enabled via REX.R, but per Vol. 2A, Sec. 2.2.1.2 (see also Figure 2-7),
11038             // REX.B should instead be used if the register is encoded in the opcode byte itself.
11039             // Therefore the default logic of insEncodeReg012 is correct for this case.
11040
11041             code = insCodeRR(ins);
11042
11043             if (TakesRexWPrefix(ins, size))
11044             {
11045                 code = AddRexWPrefix(ins, code);
11046             }
11047
11048             // Register...
11049             unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11050
11051             // Output the REX prefix
11052             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11053
11054             dst += emitOutputWord(dst, code | (regcode << 8));
11055             break;
11056         }
11057
11058         case INS_seto:
11059         case INS_setno:
11060         case INS_setb:
11061         case INS_setae:
11062         case INS_sete:
11063         case INS_setne:
11064         case INS_setbe:
11065         case INS_seta:
11066         case INS_sets:
11067         case INS_setns:
11068         case INS_setp:
11069         case INS_setnp:
11070         case INS_setl:
11071         case INS_setge:
11072         case INS_setle:
11073         case INS_setg:
11074
11075             assert(id->idGCref() == GCT_NONE);
11076             assert(size == EA_1BYTE);
11077
11078             code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
11079
11080             // Output the REX prefix
11081             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11082
11083             // We expect this to always be a 'big' opcode
11084             assert(code & 0x00FF0000);
11085
11086             dst += emitOutputByte(dst, code >> 16);
11087             dst += emitOutputWord(dst, code & 0x0000FFFF);
11088
11089             break;
11090
11091         case INS_mulEAX:
11092         case INS_imulEAX:
11093
11094             // Kill off any GC refs in EAX or EDX
11095             emitGCregDeadUpd(REG_EAX, dst);
11096             emitGCregDeadUpd(REG_EDX, dst);
11097
11098             __fallthrough;
11099
11100         default:
11101
11102             assert(id->idGCref() == GCT_NONE);
11103
11104             code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
11105
11106             if (size != EA_1BYTE)
11107             {
11108                 // Set the 'w' bit to get the large version
11109                 code |= 0x1;
11110
11111                 if (size == EA_2BYTE)
11112                 {
11113                     // Output a size prefix for a 16-bit operand
11114                     dst += emitOutputByte(dst, 0x66);
11115                 }
11116             }
11117
11118             code = AddVexPrefixIfNeeded(ins, code, size);
11119
11120             if (TakesRexWPrefix(ins, size))
11121             {
11122                 code = AddRexWPrefix(ins, code);
11123             }
11124
11125             // Output the REX prefix
11126             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11127
11128             dst += emitOutputWord(dst, code);
11129             break;
11130     }
11131
11132     // Are we writing the register? if so then update the GC information
11133     switch (id->idInsFmt())
11134     {
11135         case IF_RRD:
11136             break;
11137         case IF_RWR:
11138             if (id->idGCref())
11139             {
11140                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11141             }
11142             else
11143             {
11144                 emitGCregDeadUpd(id->idReg1(), dst);
11145             }
11146             break;
11147         case IF_RRW:
11148         {
11149 #ifdef DEBUG
11150             regMaskTP regMask = genRegMask(reg);
11151 #endif
11152             if (id->idGCref())
11153             {
11154                 // The reg must currently be holding either a gcref or a byref
11155                 // and the instruction must be inc or dec
11156                 assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
11157                        (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
11158                 assert(id->idGCref() == GCT_BYREF);
11159                 // Mark it as holding a GCT_BYREF
11160                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11161             }
11162             else
11163             {
11164                 // Can't use RRW to trash a GC ref.  It's OK for unverifiable code
11165                 // to trash Byrefs.
11166                 assert((emitThisGCrefRegs & regMask) == 0);
11167             }
11168         }
11169         break;
11170         default:
11171 #ifdef DEBUG
11172             emitDispIns(id, false, false, false);
11173 #endif
11174             assert(!"unexpected instruction format");
11175             break;
11176     }
11177
11178     return dst;
11179 }
11180
11181 /*****************************************************************************
11182  *
11183  *  Output an instruction with two register operands.
11184  */
11185
11186 BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
11187 {
11188     code_t code;
11189
11190     instruction ins  = id->idIns();
11191     regNumber   reg1 = id->idReg1();
11192     regNumber   reg2 = id->idReg2();
11193     emitAttr    size = id->idOpSize();
11194
11195     // Get the 'base' opcode
11196     code = insCodeRM(ins);
11197     code = AddVexPrefixIfNeeded(ins, code, size);
11198     if (IsSSEOrAVXInstruction(ins))
11199     {
11200         code = insEncodeRMreg(ins, code);
11201
11202         if (TakesRexWPrefix(ins, size))
11203         {
11204             code = AddRexWPrefix(ins, code);
11205         }
11206     }
11207     else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
11208     {
11209         code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
11210 #ifdef _TARGET_AMD64_
11211
11212         assert((size < EA_4BYTE) || (insIsCMOV(ins)));
11213         if ((size == EA_8BYTE) || (ins == INS_movsx))
11214         {
11215             code = AddRexWPrefix(ins, code);
11216         }
11217     }
11218     else if (ins == INS_movsxd)
11219     {
11220         code = insEncodeRMreg(ins, code);
11221
11222 #endif // _TARGET_AMD64_
11223     }
11224 #ifdef FEATURE_HW_INTRINSICS
11225     else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt))
11226     {
11227         code = insEncodeRMreg(ins, code);
11228         if ((ins == INS_crc32) && (size > EA_1BYTE))
11229         {
11230             code |= 0x0100;
11231         }
11232
11233         if (size == EA_2BYTE)
11234         {
11235             assert(ins == INS_crc32);
11236             dst += emitOutputByte(dst, 0x66);
11237         }
11238         else if (size == EA_8BYTE)
11239         {
11240             code = AddRexWPrefix(ins, code);
11241         }
11242     }
11243 #endif // FEATURE_HW_INTRINSICS
11244     else
11245     {
11246         code = insEncodeMRreg(ins, insCodeMR(ins));
11247
11248         if (ins != INS_test)
11249         {
11250             code |= 2;
11251         }
11252
11253         switch (size)
11254         {
11255             case EA_1BYTE:
11256                 noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
11257                 noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
11258                 break;
11259
11260             case EA_2BYTE:
11261                 // Output a size prefix for a 16-bit operand
11262                 dst += emitOutputByte(dst, 0x66);
11263                 __fallthrough;
11264
11265             case EA_4BYTE:
11266                 // Set the 'w' bit to get the large version
11267                 code |= 0x1;
11268                 break;
11269
11270 #ifdef _TARGET_AMD64_
11271             case EA_8BYTE:
11272                 // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
11273                 // Don't need to zero out the high bits explicitly
11274                 if ((ins != INS_xor) || (reg1 != reg2))
11275                 {
11276                     code = AddRexWPrefix(ins, code);
11277                 }
11278
11279                 // Set the 'w' bit to get the large version
11280                 code |= 0x1;
11281                 break;
11282
11283 #endif // _TARGET_AMD64_
11284
11285             default:
11286                 assert(!"unexpected size");
11287         }
11288     }
11289
11290     regNumber reg345 = REG_NA;
11291     if (IsBMIInstruction(ins))
11292     {
11293         reg345 = getBmiRegNumber(ins);
11294     }
11295     if (reg345 == REG_NA)
11296     {
11297         reg345 = id->idReg1();
11298     }
11299     unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
11300     regCode |= insEncodeReg012(ins, reg2, size, &code);
11301
11302     if (TakesVexPrefix(ins))
11303     {
11304         // In case of AVX instructions that take 3 operands, we generally want to encode reg1
11305         // as first source.  In this case, reg1 is both a source and a destination.
11306         // The exception is the "merge" 3-operand case, where we have a move instruction, such
11307         // as movss, and we want to merge the source with itself.
11308         //
11309         // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11310         // now we use the single source as source1 and source2.
11311         if (IsDstDstSrcAVXInstruction(ins))
11312         {
11313             // encode source/dest operand reg in 'vvvv' bits in 1's complement form
11314             code = insEncodeReg3456(ins, reg1, size, code);
11315         }
11316         else if (IsDstSrcSrcAVXInstruction(ins))
11317         {
11318             // encode source operand reg in 'vvvv' bits in 1's complement form
11319             code = insEncodeReg3456(ins, reg2, size, code);
11320         }
11321     }
11322
11323     // Output the REX prefix
11324     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11325
11326     if (code & 0xFF000000)
11327     {
11328         // Output the highest word of the opcode
11329         dst += emitOutputWord(dst, code >> 16);
11330         code &= 0x0000FFFF;
11331
11332         if (Is4ByteSSEInstruction(ins))
11333         {
11334             // Output 3rd byte of the opcode
11335             dst += emitOutputByte(dst, code);
11336             code &= 0xFF00;
11337         }
11338     }
11339     else if (code & 0x00FF0000)
11340     {
11341         dst += emitOutputByte(dst, code >> 16);
11342         code &= 0x0000FFFF;
11343     }
11344
11345     // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11346     if ((code & 0xFF00) == 0xC000)
11347     {
11348         dst += emitOutputWord(dst, code | (regCode << 8));
11349     }
11350     else if ((code & 0xFF) == 0x00)
11351     {
11352         // This case happens for some SSE/AVX instructions only
11353         assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins));
11354
11355         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
11356         dst += emitOutputByte(dst, (0xC0 | regCode));
11357     }
11358     else
11359     {
11360         dst += emitOutputWord(dst, code);
11361         dst += emitOutputByte(dst, (0xC0 | regCode));
11362     }
11363
11364     // Does this instruction operate on a GC ref value?
11365     if (id->idGCref())
11366     {
11367         switch (id->idInsFmt())
11368         {
11369             case IF_RRD_RRD:
11370                 break;
11371
11372             case IF_RWR_RRD:
11373
11374                 if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
11375                 {
11376                     // We're relocating "this" in the prolog
11377                     assert(emitComp->lvaIsOriginalThisArg(0));
11378                     assert(emitComp->lvaTable[0].lvRegister);
11379                     assert(emitComp->lvaTable[0].lvRegNum == reg1);
11380
11381                     if (emitFullGCinfo)
11382                     {
11383                         emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
11384                         break;
11385                     }
11386                     else
11387                     {
11388                         /* If emitFullGCinfo==false, the we don't use any
11389                            regPtrDsc's and so explictly note the location
11390                            of "this" in GCEncode.cpp
11391                          */
11392                     }
11393                 }
11394
11395                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11396                 break;
11397
11398             case IF_RRW_RRD:
11399
11400                 switch (id->idIns())
11401                 {
11402                     /*
11403                         This must be one of the following cases:
11404
11405                         xor reg, reg        to assign NULL
11406
11407                         and r1 , r2         if (ptr1 && ptr2) ...
11408                         or  r1 , r2         if (ptr1 || ptr2) ...
11409
11410                         add r1 , r2         to compute a normal byref
11411                         sub r1 , r2         to compute a strange byref (VC only)
11412
11413                     */
11414                     case INS_xor:
11415                         assert(id->idReg1() == id->idReg2());
11416                         emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11417                         break;
11418
11419                     case INS_or:
11420                     case INS_and:
11421                         emitGCregDeadUpd(id->idReg1(), dst);
11422                         break;
11423
11424                     case INS_add:
11425                     case INS_sub:
11426                         assert(id->idGCref() == GCT_BYREF);
11427
11428 #ifdef DEBUG
11429                         regMaskTP regMask;
11430                         regMask = genRegMask(reg1) | genRegMask(reg2);
11431
11432                         // r1/r2 could have been a GCREF as GCREF + int=BYREF
11433                         //                            or BYREF+/-int=BYREF
11434                         assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
11435                                ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
11436 #endif
11437                         // Mark r1 as holding a byref
11438                         emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11439                         break;
11440
11441                     default:
11442 #ifdef DEBUG
11443                         emitDispIns(id, false, false, false);
11444 #endif
11445                         assert(!"unexpected GC reg update instruction");
11446                 }
11447
11448                 break;
11449
11450             case IF_RRW_RRW:
11451                 // This must be "xchg reg1, reg2"
11452                 assert(id->idIns() == INS_xchg);
11453
11454                 // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
11455                 // register pointer mask.
11456
11457                 GCtype gc1, gc2;
11458
11459                 gc1 = emitRegGCtype(reg1);
11460                 gc2 = emitRegGCtype(reg2);
11461
11462                 if (gc1 != gc2)
11463                 {
11464                     // Kill the GC-info about the GC registers
11465
11466                     if (needsGC(gc1))
11467                     {
11468                         emitGCregDeadUpd(reg1, dst);
11469                     }
11470
11471                     if (needsGC(gc2))
11472                     {
11473                         emitGCregDeadUpd(reg2, dst);
11474                     }
11475
11476                     // Now, swap the info
11477
11478                     if (needsGC(gc1))
11479                     {
11480                         emitGCregLiveUpd(gc1, reg2, dst);
11481                     }
11482
11483                     if (needsGC(gc2))
11484                     {
11485                         emitGCregLiveUpd(gc2, reg1, dst);
11486                     }
11487                 }
11488                 break;
11489
11490             default:
11491 #ifdef DEBUG
11492                 emitDispIns(id, false, false, false);
11493 #endif
11494                 assert(!"unexpected GC ref instruction format");
11495         }
11496     }
11497     else
11498     {
11499         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
11500         {
11501             switch (id->idInsFmt())
11502             {
11503                 case IF_RRD_CNS:
11504                     // INS_mulEAX can not be used with any of these formats
11505                     assert(ins != INS_mulEAX && ins != INS_imulEAX);
11506
11507                     // For the three operand imul instruction the target
11508                     // register is encoded in the opcode
11509
11510                     if (instrIs3opImul(ins))
11511                     {
11512                         regNumber tgtReg = inst3opImulReg(ins);
11513                         emitGCregDeadUpd(tgtReg, dst);
11514                     }
11515                     break;
11516
11517                 case IF_RWR_RRD:
11518                 case IF_RRW_RRD:
11519                 case IF_RWR_RRD_RRD:
11520                     // INS_movxmm2i writes to reg2.
11521                     if (ins == INS_mov_xmm2i)
11522                     {
11523                         emitGCregDeadUpd(id->idReg2(), dst);
11524                     }
11525                     else
11526                     {
11527                         emitGCregDeadUpd(id->idReg1(), dst);
11528                     }
11529                     break;
11530
11531                 default:
11532                     break;
11533             }
11534         }
11535     }
11536
11537     return dst;
11538 }
11539
11540 BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
11541 {
11542     code_t code;
11543
11544     instruction ins = id->idIns();
11545     assert(IsAVXInstruction(ins));
11546     assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
11547     regNumber targetReg = id->idReg1();
11548     regNumber src1      = id->idReg2();
11549     regNumber src2      = id->idReg3();
11550     emitAttr  size      = id->idOpSize();
11551
11552     code = insCodeRM(ins);
11553     code = AddVexPrefixIfNeeded(ins, code, size);
11554     code = insEncodeRMreg(ins, code);
11555
11556     if (TakesRexWPrefix(ins, size))
11557     {
11558         code = AddRexWPrefix(ins, code);
11559     }
11560
11561     unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
11562     regCode |= insEncodeReg012(ins, src2, size, &code);
11563     // encode source operand reg in 'vvvv' bits in 1's complement form
11564     code = insEncodeReg3456(ins, src1, size, code);
11565
11566     // Output the REX prefix
11567     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11568
11569     // Is this a 'big' opcode?
11570     if (code & 0xFF000000)
11571     {
11572         // Output the highest word of the opcode
11573         dst += emitOutputWord(dst, code >> 16);
11574         code &= 0x0000FFFF;
11575     }
11576     else if (code & 0x00FF0000)
11577     {
11578         dst += emitOutputByte(dst, code >> 16);
11579         code &= 0x0000FFFF;
11580     }
11581
11582     // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11583     if ((code & 0xFF00) == 0xC000)
11584     {
11585         dst += emitOutputWord(dst, code | (regCode << 8));
11586     }
11587     else if ((code & 0xFF) == 0x00)
11588     {
11589         // This case happens for AVX instructions only
11590         assert(IsAVXInstruction(ins));
11591
11592         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
11593         dst += emitOutputByte(dst, (0xC0 | regCode));
11594     }
11595     else
11596     {
11597         dst += emitOutputWord(dst, code);
11598         dst += emitOutputByte(dst, (0xC0 | regCode));
11599     }
11600
11601     noway_assert(!id->idGCref());
11602
11603     return dst;
11604 }
11605
11606 /*****************************************************************************
11607  *
11608  *  Output an instruction with a register and constant operands.
11609  */
11610
11611 BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
11612 {
11613     code_t      code;
11614     emitAttr    size      = id->idOpSize();
11615     instruction ins       = id->idIns();
11616     regNumber   reg       = id->idReg1();
11617     ssize_t     val       = emitGetInsSC(id);
11618     bool        valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
11619
11620     // BT reg,imm might be useful but it requires special handling of the immediate value
11621     // (it is always encoded in a byte). Let's not complicate things until this is needed.
11622     assert(ins != INS_bt);
11623
11624     if (id->idIsCnsReloc())
11625     {
11626         valInByte = false; // relocs can't be placed in a byte
11627     }
11628
11629     noway_assert(emitVerifyEncodable(ins, size, reg));
11630
11631     if (IsSSEOrAVXInstruction(ins))
11632     {
11633         // Handle SSE2 instructions of the form "opcode reg, immed8"
11634
11635         assert(id->idGCref() == GCT_NONE);
11636         assert(valInByte);
11637
11638         // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
11639         regNumber regOpcode = getSseShiftRegNumber(ins);
11640
11641         // Get the 'base' opcode.
11642         code = insCodeMI(ins);
11643         code = AddVexPrefixIfNeeded(ins, code, size);
11644         code = insEncodeMIreg(ins, reg, size, code);
11645         assert(code & 0x00FF0000);
11646         if (TakesVexPrefix(ins))
11647         {
11648             // The 'vvvv' bits encode the destination register, which for this case (RI)
11649             // is the same as the source.
11650             code = insEncodeReg3456(ins, reg, size, code);
11651         }
11652
11653         unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
11654
11655         // Output the REX prefix
11656         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11657
11658         if (code & 0xFF000000)
11659         {
11660             dst += emitOutputWord(dst, code >> 16);
11661         }
11662         else if (code & 0xFF0000)
11663         {
11664             dst += emitOutputByte(dst, code >> 16);
11665         }
11666
11667         dst += emitOutputWord(dst, code | regcode);
11668
11669         dst += emitOutputByte(dst, val);
11670
11671         return dst;
11672     }
11673
11674     // The 'mov' opcode is special
11675     if (ins == INS_mov)
11676     {
11677         code = insCodeACC(ins);
11678         assert(code < 0x100);
11679
11680         code |= 0x08; // Set the 'w' bit
11681         unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11682         code |= regcode;
11683
11684         // This is INS_mov and will not take VEX prefix
11685         assert(!TakesVexPrefix(ins));
11686
11687         if (TakesRexWPrefix(ins, size))
11688         {
11689             code = AddRexWPrefix(ins, code);
11690         }
11691
11692         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11693
11694         dst += emitOutputByte(dst, code);
11695         if (size == EA_4BYTE)
11696         {
11697             dst += emitOutputLong(dst, val);
11698         }
11699 #ifdef _TARGET_AMD64_
11700         else
11701         {
11702             assert(size == EA_PTRSIZE);
11703             dst += emitOutputSizeT(dst, val);
11704         }
11705 #endif
11706
11707         if (id->idIsCnsReloc())
11708         {
11709             emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
11710         }
11711
11712         goto DONE;
11713     }
11714
11715     // Decide which encoding is the shortest
11716     bool useSigned, useACC;
11717
11718     if (reg == REG_EAX && !instrIs3opImul(ins))
11719     {
11720         if (size == EA_1BYTE || (ins == INS_test))
11721         {
11722             // For al, ACC encoding is always the smallest
11723             useSigned = false;
11724             useACC    = true;
11725         }
11726         else
11727         {
11728             /* For ax/eax, we avoid ACC encoding for small constants as we
11729              * can emit the small constant and have it sign-extended.
11730              * For big constants, the ACC encoding is better as we can use
11731              * the 1 byte opcode
11732              */
11733
11734             if (valInByte)
11735             {
11736                 // avoid using ACC encoding
11737                 useSigned = true;
11738                 useACC    = false;
11739             }
11740             else
11741             {
11742                 useSigned = false;
11743                 useACC    = true;
11744             }
11745         }
11746     }
11747     else
11748     {
11749         useACC = false;
11750
11751         if (valInByte)
11752         {
11753             useSigned = true;
11754         }
11755         else
11756         {
11757             useSigned = false;
11758         }
11759     }
11760
11761     // "test" has no 's' bit
11762     if (ins == INS_test)
11763     {
11764         useSigned = false;
11765     }
11766
11767     // Get the 'base' opcode
11768     if (useACC)
11769     {
11770         assert(!useSigned);
11771         code = insCodeACC(ins);
11772     }
11773     else
11774     {
11775         assert(!useSigned || valInByte);
11776
11777         // Some instructions (at least 'imul') do not have a
11778         // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
11779         if (valInByte && useSigned && insNeedsRRIb(ins))
11780         {
11781             code = insEncodeRRIb(ins, reg, size);
11782         }
11783         else
11784         {
11785             code = insCodeMI(ins);
11786             code = AddVexPrefixIfNeeded(ins, code, size);
11787             code = insEncodeMIreg(ins, reg, size, code);
11788         }
11789     }
11790
11791     switch (size)
11792     {
11793         case EA_1BYTE:
11794             break;
11795
11796         case EA_2BYTE:
11797             // Output a size prefix for a 16-bit operand
11798             dst += emitOutputByte(dst, 0x66);
11799             __fallthrough;
11800
11801         case EA_4BYTE:
11802             // Set the 'w' bit to get the large version
11803             code |= 0x1;
11804             break;
11805
11806 #ifdef _TARGET_AMD64_
11807         case EA_8BYTE:
11808             /* Set the 'w' bit to get the large version */
11809             /* and the REX.W bit to get the really large version */
11810
11811             code = AddRexWPrefix(ins, code);
11812             code |= 0x1;
11813             break;
11814 #endif
11815
11816         default:
11817             assert(!"unexpected size");
11818     }
11819
11820     // Output the REX prefix
11821     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11822
11823     // Does the value fit in a sign-extended byte?
11824     // Important!  Only set the 's' bit when we have a size larger than EA_1BYTE.
11825     // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
11826
11827     if (useSigned && (size > EA_1BYTE))
11828     {
11829         // We can just set the 's' bit, and issue an immediate byte
11830
11831         code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
11832         dst += emitOutputWord(dst, code);
11833         dst += emitOutputByte(dst, val);
11834     }
11835     else
11836     {
11837         // Can we use an accumulator (EAX) encoding?
11838         if (useACC)
11839         {
11840             dst += emitOutputByte(dst, code);
11841         }
11842         else
11843         {
11844             dst += emitOutputWord(dst, code);
11845         }
11846
11847         switch (size)
11848         {
11849             case EA_1BYTE:
11850                 dst += emitOutputByte(dst, val);
11851                 break;
11852             case EA_2BYTE:
11853                 dst += emitOutputWord(dst, val);
11854                 break;
11855             case EA_4BYTE:
11856                 dst += emitOutputLong(dst, val);
11857                 break;
11858 #ifdef _TARGET_AMD64_
11859             case EA_8BYTE:
11860                 dst += emitOutputLong(dst, val);
11861                 break;
11862 #endif // _TARGET_AMD64_
11863             default:
11864                 break;
11865         }
11866
11867         if (id->idIsCnsReloc())
11868         {
11869             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11870             assert(size == EA_4BYTE);
11871         }
11872     }
11873
11874 DONE:
11875
11876     // Does this instruction operate on a GC ref value?
11877     if (id->idGCref())
11878     {
11879         switch (id->idInsFmt())
11880         {
11881             case IF_RRD_CNS:
11882                 break;
11883
11884             case IF_RWR_CNS:
11885                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11886                 break;
11887
11888             case IF_RRW_CNS:
11889                 assert(id->idGCref() == GCT_BYREF);
11890
11891 #ifdef DEBUG
11892                 regMaskTP regMask;
11893                 regMask = genRegMask(reg);
11894                 // FIXNOW review the other places and relax the assert there too
11895
11896                 // The reg must currently be holding either a gcref or a byref
11897                 // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
11898                 if (emitThisGCrefRegs & regMask)
11899                 {
11900                     assert(ins == INS_add);
11901                 }
11902                 if (emitThisByrefRegs & regMask)
11903                 {
11904                     assert(ins == INS_add || ins == INS_sub);
11905                 }
11906 #endif
11907                 // Mark it as holding a GCT_BYREF
11908                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11909                 break;
11910
11911             default:
11912 #ifdef DEBUG
11913                 emitDispIns(id, false, false, false);
11914 #endif
11915                 assert(!"unexpected GC ref instruction format");
11916         }
11917
11918         // mul can never produce a GC ref
11919         assert(!instrIs3opImul(ins));
11920         assert(ins != INS_mulEAX && ins != INS_imulEAX);
11921     }
11922     else
11923     {
11924         switch (id->idInsFmt())
11925         {
11926             case IF_RRD_CNS:
11927                 // INS_mulEAX can not be used with any of these formats
11928                 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11929
11930                 // For the three operand imul instruction the target
11931                 // register is encoded in the opcode
11932
11933                 if (instrIs3opImul(ins))
11934                 {
11935                     regNumber tgtReg = inst3opImulReg(ins);
11936                     emitGCregDeadUpd(tgtReg, dst);
11937                 }
11938                 break;
11939
11940             case IF_RRW_CNS:
11941             case IF_RWR_CNS:
11942                 assert(!instrIs3opImul(ins));
11943
11944                 emitGCregDeadUpd(id->idReg1(), dst);
11945                 break;
11946
11947             default:
11948 #ifdef DEBUG
11949                 emitDispIns(id, false, false, false);
11950 #endif
11951                 assert(!"unexpected GC ref instruction format");
11952         }
11953     }
11954
11955     return dst;
11956 }
11957
11958 /*****************************************************************************
11959  *
11960  *  Output an instruction with a constant operand.
11961  */
11962
11963 BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
11964 {
11965     code_t      code;
11966     instruction ins       = id->idIns();
11967     emitAttr    size      = id->idOpSize();
11968     ssize_t     val       = emitGetInsSC(id);
11969     bool        valInByte = ((signed char)val == val);
11970
11971     // We would to update GC info correctly
11972     assert(!IsSSEInstruction(ins));
11973     assert(!IsAVXInstruction(ins));
11974
11975 #ifdef _TARGET_AMD64_
11976     // all these opcodes take a sign-extended 4-byte immediate, max
11977     noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
11978 #endif
11979
11980     if (id->idIsCnsReloc())
11981     {
11982         valInByte = false; // relocs can't be placed in a byte
11983
11984         // Of these instructions only the push instruction can have reloc
11985         assert(ins == INS_push || ins == INS_push_hide);
11986     }
11987
11988     switch (ins)
11989     {
11990         case INS_jge:
11991             assert((val >= -128) && (val <= 127));
11992             dst += emitOutputByte(dst, insCode(ins));
11993             dst += emitOutputByte(dst, val);
11994             break;
11995
11996         case INS_loop:
11997             assert((val >= -128) && (val <= 127));
11998             dst += emitOutputByte(dst, insCodeMI(ins));
11999             dst += emitOutputByte(dst, val);
12000             break;
12001
12002         case INS_ret:
12003             assert(val);
12004             dst += emitOutputByte(dst, insCodeMI(ins));
12005             dst += emitOutputWord(dst, val);
12006             break;
12007
12008         case INS_push_hide:
12009         case INS_push:
12010             code = insCodeMI(ins);
12011
12012             // Does the operand fit in a byte?
12013             if (valInByte)
12014             {
12015                 dst += emitOutputByte(dst, code | 2);
12016                 dst += emitOutputByte(dst, val);
12017             }
12018             else
12019             {
12020                 if (TakesRexWPrefix(ins, size))
12021                 {
12022                     code = AddRexWPrefix(ins, code);
12023                     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12024                 }
12025
12026                 dst += emitOutputByte(dst, code);
12027                 dst += emitOutputLong(dst, val);
12028                 if (id->idIsCnsReloc())
12029                 {
12030                     emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
12031                 }
12032             }
12033
12034             // Did we push a GC ref value?
12035             if (id->idGCref())
12036             {
12037 #ifdef DEBUG
12038                 printf("UNDONE: record GCref push [cns]\n");
12039 #endif
12040             }
12041
12042             break;
12043
12044         default:
12045             assert(!"unexpected instruction");
12046     }
12047
12048     return dst;
12049 }
12050
12051 /*****************************************************************************
12052  *
12053  *  Output a local jump instruction.
12054  *  This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
12055  *  needs to get bound to an actual address and processed by branch shortening.
12056  */
12057
12058 BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
12059 {
12060     unsigned srcOffs;
12061     unsigned dstOffs;
12062     ssize_t  distVal;
12063
12064     instrDescJmp* id  = (instrDescJmp*)i;
12065     instruction   ins = id->idIns();
12066     bool          jmp;
12067     bool          relAddr = true; // does the instruction use relative-addressing?
12068
12069     // SSE/AVX doesnt make any sense here
12070     assert(!IsSSEInstruction(ins));
12071     assert(!IsAVXInstruction(ins));
12072
12073     size_t ssz;
12074     size_t lsz;
12075
12076     switch (ins)
12077     {
12078         default:
12079             ssz = JCC_SIZE_SMALL;
12080             lsz = JCC_SIZE_LARGE;
12081             jmp = true;
12082             break;
12083
12084         case INS_jmp:
12085             ssz = JMP_SIZE_SMALL;
12086             lsz = JMP_SIZE_LARGE;
12087             jmp = true;
12088             break;
12089
12090         case INS_call:
12091             ssz = lsz = CALL_INST_SIZE;
12092             jmp       = false;
12093             break;
12094
12095         case INS_push_hide:
12096         case INS_push:
12097             ssz = lsz = 5;
12098             jmp       = false;
12099             relAddr   = false;
12100             break;
12101
12102         case INS_mov:
12103         case INS_lea:
12104             ssz = lsz = id->idCodeSize();
12105             jmp       = false;
12106             relAddr   = false;
12107             break;
12108     }
12109
12110     // Figure out the distance to the target
12111     srcOffs = emitCurCodeOffs(dst);
12112     dstOffs = id->idAddr()->iiaIGlabel->igOffs;
12113
12114     if (relAddr)
12115     {
12116         distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
12117     }
12118     else
12119     {
12120         distVal = (ssize_t)emitOffsetToPtr(dstOffs);
12121     }
12122
12123     if (dstOffs <= srcOffs)
12124     {
12125         // This is a backward jump - distance is known at this point
12126         CLANG_FORMAT_COMMENT_ANCHOR;
12127
12128 #if DEBUG_EMIT
12129         if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
12130         {
12131             size_t blkOffs = id->idjIG->igOffs;
12132
12133             if (INTERESTING_JUMP_NUM == 0)
12134             {
12135                 printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12136             }
12137             printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
12138             printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
12139             printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
12140         }
12141 #endif
12142
12143         // Can we use a short jump?
12144         if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
12145         {
12146             emitSetShortJump(id);
12147         }
12148     }
12149     else
12150     {
12151         // This is a  forward jump - distance will be an upper limit
12152         emitFwdJumps = true;
12153
12154         // The target offset will be closer by at least 'emitOffsAdj', but only if this
12155         // jump doesn't cross the hot-cold boundary.
12156         if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12157         {
12158             dstOffs -= emitOffsAdj;
12159             distVal -= emitOffsAdj;
12160         }
12161
12162         // Record the location of the jump for later patching
12163         id->idjOffs = dstOffs;
12164
12165         // Are we overflowing the id->idjOffs bitfield?
12166         if (id->idjOffs != dstOffs)
12167         {
12168             IMPL_LIMITATION("Method is too large");
12169         }
12170
12171 #if DEBUG_EMIT
12172         if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
12173         {
12174             size_t blkOffs = id->idjIG->igOffs;
12175
12176             if (INTERESTING_JUMP_NUM == 0)
12177             {
12178                 printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12179             }
12180             printf("[4] Jump  block is at %08X\n", blkOffs);
12181             printf("[4] Jump        is at %08X\n", srcOffs);
12182             printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
12183         }
12184 #endif
12185
12186         // Can we use a short jump?
12187         if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
12188         {
12189             emitSetShortJump(id);
12190         }
12191     }
12192
12193     // Adjust the offset to emit relative to the end of the instruction
12194     if (relAddr)
12195     {
12196         distVal -= id->idjShort ? ssz : lsz;
12197     }
12198
12199 #ifdef DEBUG
12200     if (0 && emitComp->verbose)
12201     {
12202         size_t sz          = id->idjShort ? ssz : lsz;
12203         int    distValSize = id->idjShort ? 4 : 8;
12204         printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
12205                emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
12206                distVal);
12207     }
12208 #endif
12209
12210     // What size jump should we use?
12211     if (id->idjShort)
12212     {
12213         // Short jump
12214         assert(!id->idjKeepLong);
12215         assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
12216
12217         assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
12218         assert(JMP_SIZE_SMALL == 2);
12219
12220         assert(jmp);
12221
12222         if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
12223         {
12224             emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
12225
12226 #ifdef DEBUG
12227             if (emitComp->verbose)
12228             {
12229                 printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
12230             }
12231 #endif
12232         }
12233
12234         dst += emitOutputByte(dst, insCode(ins));
12235
12236         // For forward jumps, record the address of the distance value
12237         id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
12238
12239         dst += emitOutputByte(dst, distVal);
12240     }
12241     else
12242     {
12243         code_t code;
12244
12245         // Long  jump
12246         if (jmp)
12247         {
12248             // clang-format off
12249             assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
12250             assert(INS_jo  + (INS_l_jmp - INS_jmp) == INS_l_jo);
12251             assert(INS_jb  + (INS_l_jmp - INS_jmp) == INS_l_jb);
12252             assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
12253             assert(INS_je  + (INS_l_jmp - INS_jmp) == INS_l_je);
12254             assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
12255             assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
12256             assert(INS_ja  + (INS_l_jmp - INS_jmp) == INS_l_ja);
12257             assert(INS_js  + (INS_l_jmp - INS_jmp) == INS_l_js);
12258             assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
12259             assert(INS_jp  + (INS_l_jmp - INS_jmp) == INS_l_jp);
12260             assert(INS_jnp + (INS_l_jmp - INS_jmp) == INS_l_jnp);
12261             assert(INS_jl  + (INS_l_jmp - INS_jmp) == INS_l_jl);
12262             assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
12263             assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
12264             assert(INS_jg  + (INS_l_jmp - INS_jmp) == INS_l_jg);
12265             // clang-format on
12266
12267             code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
12268         }
12269         else if (ins == INS_push || ins == INS_push_hide)
12270         {
12271             assert(insCodeMI(INS_push) == 0x68);
12272             code = 0x68;
12273         }
12274         else if (ins == INS_mov)
12275         {
12276             // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
12277             insFormat tmpInsFmt   = id->idInsFmt();
12278             insGroup* tmpIGlabel  = id->idAddr()->iiaIGlabel;
12279             bool      tmpDspReloc = id->idIsDspReloc();
12280
12281             id->idInsFmt(IF_SWR_CNS);
12282             id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
12283             id->idSetIsDspReloc(false);
12284
12285             dst = emitOutputSV(dst, id, insCodeMI(ins));
12286
12287             // Restore id fields with original values
12288             id->idInsFmt(tmpInsFmt);
12289             id->idAddr()->iiaIGlabel = tmpIGlabel;
12290             id->idSetIsDspReloc(tmpDspReloc);
12291             code = 0xCC;
12292         }
12293         else if (ins == INS_lea)
12294         {
12295             // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
12296             // We basically are doing what emitIns_R_AI does.
12297             // TODO-XArch-Cleanup: revisit this.
12298             instrDescAmd  idAmdStackLocal;
12299             instrDescAmd* idAmd = &idAmdStackLocal;
12300             *(instrDesc*)idAmd  = *(instrDesc*)id; // copy all the "core" fields
12301             memset((BYTE*)idAmd + sizeof(instrDesc), 0,
12302                    sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
12303
12304             idAmd->idInsFmt(IF_RWR_ARD);
12305             idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
12306             idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
12307             emitSetAmdDisp(idAmd, distVal); // set the displacement
12308             idAmd->idSetIsDspReloc(id->idIsDspReloc());
12309             assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
12310
12311             UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
12312             idAmd->idCodeSize(sz);
12313
12314             code = insCodeRM(ins);
12315             code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
12316
12317             dst = emitOutputAM(dst, idAmd, code, nullptr);
12318
12319             code = 0xCC;
12320
12321             // For forward jumps, record the address of the distance value
12322             // Hard-coded 4 here because we already output the displacement, as the last thing.
12323             id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
12324
12325             // We're done
12326             return dst;
12327         }
12328         else
12329         {
12330             code = 0xE8;
12331         }
12332
12333         if (ins != INS_mov)
12334         {
12335             dst += emitOutputByte(dst, code);
12336
12337             if (code & 0xFF00)
12338             {
12339                 dst += emitOutputByte(dst, code >> 8);
12340             }
12341         }
12342
12343         // For forward jumps, record the address of the distance value
12344         id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
12345
12346         dst += emitOutputLong(dst, distVal);
12347
12348 #ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
12349         if (emitComp->opts.compReloc)
12350 #endif
12351         {
12352             if (!relAddr)
12353             {
12354                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
12355             }
12356             else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12357             {
12358                 assert(id->idjKeepLong);
12359                 emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
12360             }
12361         }
12362     }
12363
12364     // Local calls kill all registers
12365     if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
12366     {
12367         emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
12368     }
12369
12370     return dst;
12371 }
12372
12373 /*****************************************************************************
12374  *
12375  *  Append the machine code corresponding to the given instruction descriptor
12376  *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
12377  *  is the instruction group that contains the instruction. Updates '*dp' to
12378  *  point past the generated code, and returns the size of the instruction
12379  *  descriptor in bytes.
12380  */
12381
12382 #ifdef _PREFAST_
12383 #pragma warning(push)
12384 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12385 #endif
12386 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
12387 {
12388     assert(emitIssuing);
12389
12390     BYTE*         dst           = *dp;
12391     size_t        sz            = sizeof(instrDesc);
12392     instruction   ins           = id->idIns();
12393     unsigned char callInstrSize = 0;
12394
12395 #ifdef DEBUG
12396     bool dspOffs = emitComp->opts.dspGCtbls;
12397 #endif // DEBUG
12398
12399     emitAttr size = id->idOpSize();
12400
12401     assert(REG_NA == (int)REG_NA);
12402
12403     assert(ins != INS_imul || size >= EA_4BYTE);                  // Has no 'w' bit
12404     assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
12405
12406     VARSET_TP GCvars(VarSetOps::UninitVal());
12407
12408     // What instruction format have we got?
12409     switch (id->idInsFmt())
12410     {
12411         code_t   code;
12412         unsigned regcode;
12413         int      args;
12414         CnsVal   cnsVal;
12415
12416         BYTE* addr;
12417         bool  recCall;
12418
12419         regMaskTP gcrefRegs;
12420         regMaskTP byrefRegs;
12421
12422         /********************************************************************/
12423         /*                        No operands                               */
12424         /********************************************************************/
12425         case IF_NONE:
12426             // the loop alignment pseudo instruction
12427             if (ins == INS_align)
12428             {
12429                 sz  = SMALL_IDSC_SIZE;
12430                 dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
12431                 assert(((size_t)dst & 0x0f) == 0);
12432                 break;
12433             }
12434
12435             if (ins == INS_nop)
12436             {
12437                 dst = emitOutputNOP(dst, id->idCodeSize());
12438                 break;
12439             }
12440
12441             // the cdq instruction kills the EDX register implicitly
12442             if (ins == INS_cdq)
12443             {
12444                 emitGCregDeadUpd(REG_EDX, dst);
12445             }
12446
12447             assert(id->idGCref() == GCT_NONE);
12448
12449             code = insCodeMR(ins);
12450
12451 #ifdef _TARGET_AMD64_
12452             // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
12453             code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
12454
12455             if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
12456             {
12457                 code = AddRexWPrefix(ins, code);
12458             }
12459             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12460 #endif
12461             // Is this a 'big' opcode?
12462             if (code & 0xFF000000)
12463             {
12464                 // The high word and then the low word
12465                 dst += emitOutputWord(dst, code >> 16);
12466                 code &= 0x0000FFFF;
12467                 dst += emitOutputWord(dst, code);
12468             }
12469             else if (code & 0x00FF0000)
12470             {
12471                 // The high byte and then the low word
12472                 dst += emitOutputByte(dst, code >> 16);
12473                 code &= 0x0000FFFF;
12474                 dst += emitOutputWord(dst, code);
12475             }
12476             else if (code & 0xFF00)
12477             {
12478                 // The 2 byte opcode
12479                 dst += emitOutputWord(dst, code);
12480             }
12481             else
12482             {
12483                 // The 1 byte opcode
12484                 dst += emitOutputByte(dst, code);
12485             }
12486
12487             break;
12488
12489         /********************************************************************/
12490         /*                Simple constant, local label, method              */
12491         /********************************************************************/
12492
12493         case IF_CNS:
12494             dst = emitOutputIV(dst, id);
12495             sz  = emitSizeOfInsDsc(id);
12496             break;
12497
12498         case IF_LABEL:
12499         case IF_RWR_LABEL:
12500         case IF_SWR_LABEL:
12501             assert(id->idGCref() == GCT_NONE);
12502             assert(id->idIsBound());
12503
12504             // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
12505             dst = emitOutputLJ(dst, id);
12506             sz  = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
12507             break;
12508
12509         case IF_METHOD:
12510         case IF_METHPTR:
12511             // Assume we'll be recording this call
12512             recCall = true;
12513
12514             // Get hold of the argument count and field Handle
12515             args = emitGetInsCDinfo(id);
12516
12517             // Is this a "fat" call descriptor?
12518             if (id->idIsLargeCall())
12519             {
12520                 instrDescCGCA* idCall = (instrDescCGCA*)id;
12521                 gcrefRegs             = idCall->idcGcrefRegs;
12522                 byrefRegs             = idCall->idcByrefRegs;
12523                 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12524                 sz = sizeof(instrDescCGCA);
12525             }
12526             else
12527             {
12528                 assert(!id->idIsLargeDsp());
12529                 assert(!id->idIsLargeCns());
12530
12531                 gcrefRegs = emitDecodeCallGCregs(id);
12532                 byrefRegs = 0;
12533                 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12534                 sz = sizeof(instrDesc);
12535             }
12536
12537             addr = (BYTE*)id->idAddr()->iiaAddr;
12538             assert(addr != nullptr);
12539
12540             // Some helpers don't get recorded in GC tables
12541             if (id->idIsNoGC())
12542             {
12543                 recCall = false;
12544             }
12545
12546             // What kind of a call do we have here?
12547             if (id->idInsFmt() == IF_METHPTR)
12548             {
12549                 // This is call indirect via a method pointer
12550
12551                 code = insCodeMR(ins);
12552                 if (ins == INS_i_jmp)
12553                 {
12554                     code |= 1;
12555                 }
12556
12557                 if (id->idIsDspReloc())
12558                 {
12559                     dst += emitOutputWord(dst, code | 0x0500);
12560 #ifdef _TARGET_AMD64_
12561                     dst += emitOutputLong(dst, 0);
12562 #else
12563                     dst += emitOutputLong(dst, (int)addr);
12564 #endif
12565                     emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
12566                 }
12567                 else
12568                 {
12569 #ifdef _TARGET_X86_
12570                     dst += emitOutputWord(dst, code | 0x0500);
12571 #else  //_TARGET_AMD64_
12572                     // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
12573                     // This addr mode should never be used while generating relocatable ngen code nor if
12574                     // the addr can be encoded as pc-relative address.
12575                     noway_assert(!emitComp->opts.compReloc);
12576                     noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
12577                     noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
12578
12579                     // This requires, specifying a SIB byte after ModRM byte.
12580                     dst += emitOutputWord(dst, code | 0x0400);
12581                     dst += emitOutputByte(dst, 0x25);
12582 #endif //_TARGET_AMD64_
12583                     dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
12584                 }
12585                 goto DONE_CALL;
12586             }
12587
12588             // Else
12589             // This is call direct where we know the target, thus we can
12590             // use a direct call; the target to jump to is in iiaAddr.
12591             assert(id->idInsFmt() == IF_METHOD);
12592
12593             // Output the call opcode followed by the target distance
12594             dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
12595
12596             ssize_t offset;
12597 #ifdef _TARGET_AMD64_
12598             // All REL32 on Amd64 go through recordRelocation.  Here we will output zero to advance dst.
12599             offset = 0;
12600             assert(id->idIsDspReloc());
12601 #else
12602             // Calculate PC relative displacement.
12603             // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
12604             // only allow a 32-bit offset, so we correctly use sizeof(INT32)
12605             offset = addr - (dst + sizeof(INT32));
12606 #endif
12607
12608             dst += emitOutputLong(dst, offset);
12609
12610             if (id->idIsDspReloc())
12611             {
12612                 emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
12613             }
12614
12615         DONE_CALL:
12616
12617             /* We update the GC info before the call as the variables cannot be
12618                used by the call. Killing variables before the call helps with
12619                boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
12620                If we ever track aliased variables (which could be used by the
12621                call), we would have to keep them alive past the call.
12622              */
12623             assert(FitsIn<unsigned char>(dst - *dp));
12624             callInstrSize = static_cast<unsigned char>(dst - *dp);
12625             emitUpdateLiveGCvars(GCvars, *dp);
12626
12627             // If the method returns a GC ref, mark EAX appropriately
12628             if (id->idGCref() == GCT_GCREF)
12629             {
12630                 gcrefRegs |= RBM_EAX;
12631             }
12632             else if (id->idGCref() == GCT_BYREF)
12633             {
12634                 byrefRegs |= RBM_EAX;
12635             }
12636
12637 #ifdef UNIX_AMD64_ABI
12638             // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
12639             if (id->idIsLargeCall())
12640             {
12641                 instrDescCGCA* idCall = (instrDescCGCA*)id;
12642                 if (idCall->idSecondGCref() == GCT_GCREF)
12643                 {
12644                     gcrefRegs |= RBM_RDX;
12645                 }
12646                 else if (idCall->idSecondGCref() == GCT_BYREF)
12647                 {
12648                     byrefRegs |= RBM_RDX;
12649                 }
12650             }
12651 #endif // UNIX_AMD64_ABI
12652
12653             // If the GC register set has changed, report the new set
12654             if (gcrefRegs != emitThisGCrefRegs)
12655             {
12656                 emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
12657             }
12658
12659             if (byrefRegs != emitThisByrefRegs)
12660             {
12661                 emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
12662             }
12663
12664             if (recCall || args)
12665             {
12666                 // For callee-pop, all arguments will be popped  after the call.
12667                 // For caller-pop, any GC arguments will go dead after the call.
12668
12669                 assert(callInstrSize != 0);
12670
12671                 if (args >= 0)
12672                 {
12673                     emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
12674                 }
12675                 else
12676                 {
12677                     emitStackKillArgs(dst, -args, callInstrSize);
12678                 }
12679             }
12680
12681             // Do we need to record a call location for GC purposes?
12682             if (!emitFullGCinfo && recCall)
12683             {
12684                 assert(callInstrSize != 0);
12685                 emitRecordGCcall(dst, callInstrSize);
12686             }
12687
12688 #ifdef DEBUG
12689             if (ins == INS_call)
12690             {
12691                 emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
12692                                    (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
12693             }
12694 #endif // DEBUG
12695
12696             break;
12697
12698         /********************************************************************/
12699         /*                      One register operand                        */
12700         /********************************************************************/
12701
12702         case IF_RRD:
12703         case IF_RWR:
12704         case IF_RRW:
12705             dst = emitOutputR(dst, id);
12706             sz  = SMALL_IDSC_SIZE;
12707             break;
12708
12709         /********************************************************************/
12710         /*                 Register and register/constant                   */
12711         /********************************************************************/
12712
12713         case IF_RRW_SHF:
12714             code = insCodeMR(ins);
12715             // Emit the VEX prefix if it exists
12716             code = AddVexPrefixIfNeeded(ins, code, size);
12717             code = insEncodeMRreg(ins, id->idReg1(), size, code);
12718
12719             // set the W bit
12720             if (size != EA_1BYTE)
12721             {
12722                 code |= 1;
12723             }
12724
12725             // Emit the REX prefix if it exists
12726             if (TakesRexWPrefix(ins, size))
12727             {
12728                 code = AddRexWPrefix(ins, code);
12729             }
12730
12731             // Output a size prefix for a 16-bit operand
12732             if (size == EA_2BYTE)
12733             {
12734                 dst += emitOutputByte(dst, 0x66);
12735             }
12736
12737             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12738             dst += emitOutputWord(dst, code);
12739             dst += emitOutputByte(dst, emitGetInsSC(id));
12740             sz = emitSizeOfInsDsc(id);
12741
12742             // Update GC info.
12743             assert(!id->idGCref());
12744             emitGCregDeadUpd(id->idReg1(), dst);
12745             break;
12746
12747         case IF_RRD_RRD:
12748         case IF_RWR_RRD:
12749         case IF_RRW_RRD:
12750         case IF_RRW_RRW:
12751             dst = emitOutputRR(dst, id);
12752             sz  = SMALL_IDSC_SIZE;
12753             break;
12754
12755         case IF_RRD_CNS:
12756         case IF_RWR_CNS:
12757         case IF_RRW_CNS:
12758             dst = emitOutputRI(dst, id);
12759             sz  = emitSizeOfInsDsc(id);
12760             break;
12761
12762         case IF_RWR_RRD_RRD:
12763             dst = emitOutputRRR(dst, id);
12764             sz  = emitSizeOfInsDsc(id);
12765             break;
12766         case IF_RWR_RRD_RRD_CNS:
12767         case IF_RWR_RRD_RRD_RRD:
12768             dst = emitOutputRRR(dst, id);
12769             sz  = emitSizeOfInsDsc(id);
12770             dst += emitOutputByte(dst, emitGetInsSC(id));
12771             break;
12772
12773         case IF_RRW_RRW_CNS:
12774             assert(id->idGCref() == GCT_NONE);
12775
12776             // Get the 'base' opcode (it's a big one)
12777             // Also, determine which operand goes where in the ModRM byte.
12778             regNumber mReg;
12779             regNumber rReg;
12780             if (hasCodeMR(ins))
12781             {
12782                 code = insCodeMR(ins);
12783                 // Emit the VEX prefix if it exists
12784                 code = AddVexPrefixIfNeeded(ins, code, size);
12785                 code = insEncodeMRreg(ins, code);
12786                 mReg = id->idReg1();
12787                 rReg = id->idReg2();
12788             }
12789             else if (hasCodeMI(ins))
12790             {
12791                 code = insCodeMI(ins);
12792
12793                 // Emit the VEX prefix if it exists
12794                 code = AddVexPrefixIfNeeded(ins, code, size);
12795
12796                 assert((code & 0xC000) == 0);
12797                 code |= 0xC000;
12798
12799                 mReg = id->idReg2();
12800
12801                 // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
12802                 rReg = getSseShiftRegNumber(ins);
12803             }
12804             else
12805             {
12806                 code = insCodeRM(ins);
12807                 // Emit the VEX prefix if it exists
12808                 code = AddVexPrefixIfNeeded(ins, code, size);
12809                 code = insEncodeRMreg(ins, code);
12810                 mReg = id->idReg2();
12811                 rReg = id->idReg1();
12812             }
12813             assert(code & 0x00FF0000);
12814
12815             if (TakesRexWPrefix(ins, size))
12816             {
12817                 code = AddRexWPrefix(ins, code);
12818             }
12819
12820             if (TakesVexPrefix(ins))
12821             {
12822                 if (IsDstDstSrcAVXInstruction(ins))
12823                 {
12824                     // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
12825                     // This code will have to change when we support 3 operands.
12826                     // For now, we always overload this source with the destination (always reg1).
12827                     // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
12828                     // e.g. pslldq, when/if we support those instructions with 2 registers.)
12829                     // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
12830                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
12831                 }
12832                 else if (IsDstSrcSrcAVXInstruction(ins))
12833                 {
12834                     // This is a "merge" move instruction.
12835                     // Encode source operand reg in 'vvvv' bits in 1's complement form
12836                     code = insEncodeReg3456(ins, id->idReg2(), size, code);
12837                 }
12838             }
12839
12840             regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code));
12841
12842             // Output the REX prefix
12843             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12844
12845             if (code & 0xFF000000)
12846             {
12847                 // Output the highest word of the opcode
12848                 dst += emitOutputWord(dst, code >> 16);
12849                 code &= 0x0000FFFF;
12850
12851                 if (Is4ByteSSEInstruction(ins))
12852                 {
12853                     // Output 3rd byte of the opcode
12854                     dst += emitOutputByte(dst, code);
12855                     code &= 0xFF00;
12856                 }
12857             }
12858             else if (code & 0x00FF0000)
12859             {
12860                 dst += emitOutputByte(dst, code >> 16);
12861                 code &= 0x0000FFFF;
12862             }
12863
12864             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
12865             if ((code & 0xFF00) == 0xC000)
12866             {
12867                 dst += emitOutputWord(dst, code | (regcode << 8));
12868             }
12869             else if ((code & 0xFF) == 0x00)
12870             {
12871                 // This case happens for some SSE/AVX instructions only
12872                 assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins));
12873
12874                 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
12875                 dst += emitOutputByte(dst, (0xC0 | regcode));
12876             }
12877             else
12878             {
12879                 dst += emitOutputWord(dst, code);
12880                 dst += emitOutputByte(dst, (0xC0 | regcode));
12881             }
12882
12883             dst += emitOutputByte(dst, emitGetInsSC(id));
12884             sz = emitSizeOfInsDsc(id);
12885
12886             // Kill any GC ref in the destination register if necessary.
12887             if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
12888             {
12889                 emitGCregDeadUpd(id->idReg1(), dst);
12890             }
12891             break;
12892
12893         /********************************************************************/
12894         /*                      Address mode operand                        */
12895         /********************************************************************/
12896
12897         case IF_ARD:
12898         case IF_AWR:
12899         case IF_ARW:
12900
12901             dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
12902
12903             switch (ins)
12904             {
12905                 case INS_call:
12906
12907                 IND_CALL:
12908                     // Get hold of the argument count and method handle
12909                     args = emitGetInsCIargs(id);
12910
12911                     // Is this a "fat" call descriptor?
12912                     if (id->idIsLargeCall())
12913                     {
12914                         instrDescCGCA* idCall = (instrDescCGCA*)id;
12915
12916                         gcrefRegs = idCall->idcGcrefRegs;
12917                         byrefRegs = idCall->idcByrefRegs;
12918                         VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12919                         sz = sizeof(instrDescCGCA);
12920                     }
12921                     else
12922                     {
12923                         assert(!id->idIsLargeDsp());
12924                         assert(!id->idIsLargeCns());
12925
12926                         gcrefRegs = emitDecodeCallGCregs(id);
12927                         byrefRegs = 0;
12928                         VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12929                         sz = sizeof(instrDesc);
12930                     }
12931
12932                     recCall = true;
12933
12934                     goto DONE_CALL;
12935
12936                 default:
12937                     sz = emitSizeOfInsDsc(id);
12938                     break;
12939             }
12940             break;
12941
12942         case IF_RRW_ARD_CNS:
12943         case IF_RWR_ARD_CNS:
12944             assert(IsSSEOrAVXInstruction(ins));
12945             emitGetInsAmdCns(id, &cnsVal);
12946             code = insCodeRM(ins);
12947
12948             // Special case 4-byte AVX instructions
12949             if (EncodedBySSE38orSSE3A(ins))
12950             {
12951                 dst = emitOutputAM(dst, id, code, &cnsVal);
12952             }
12953             else
12954             {
12955                 code    = AddVexPrefixIfNeeded(ins, code, size);
12956                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12957                 dst     = emitOutputAM(dst, id, code | regcode, &cnsVal);
12958             }
12959
12960             sz = emitSizeOfInsDsc(id);
12961             break;
12962
12963         case IF_AWR_RRD_CNS:
12964             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12965             assert(UseVEXEncoding());
12966             emitGetInsAmdCns(id, &cnsVal);
12967             code = insCodeMR(ins);
12968             dst  = emitOutputAM(dst, id, code, &cnsVal);
12969             sz   = emitSizeOfInsDsc(id);
12970             break;
12971
12972         case IF_RRD_ARD:
12973         case IF_RWR_ARD:
12974         case IF_RRW_ARD:
12975         case IF_RWR_RRD_ARD:
12976         {
12977             code = insCodeRM(ins);
12978             if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
12979             {
12980                 dst = emitOutputAM(dst, id, code);
12981             }
12982             else
12983             {
12984                 code    = AddVexPrefixIfNeeded(ins, code, size);
12985                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12986                 dst     = emitOutputAM(dst, id, code | regcode);
12987             }
12988             sz = emitSizeOfInsDsc(id);
12989             break;
12990         }
12991
12992         case IF_RWR_ARD_RRD:
12993         {
12994             assert(IsAVX2GatherInstruction(ins));
12995             code = insCodeRM(ins);
12996             dst  = emitOutputAM(dst, id, code);
12997             sz   = emitSizeOfInsDsc(id);
12998             break;
12999         }
13000
13001         case IF_RWR_RRD_ARD_CNS:
13002         case IF_RWR_RRD_ARD_RRD:
13003         {
13004             assert(IsSSEOrAVXInstruction(ins));
13005             emitGetInsAmdCns(id, &cnsVal);
13006             code = insCodeRM(ins);
13007             if (EncodedBySSE38orSSE3A(ins))
13008             {
13009                 dst = emitOutputAM(dst, id, code, &cnsVal);
13010             }
13011             else
13012             {
13013                 code    = AddVexPrefixIfNeeded(ins, code, size);
13014                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13015                 dst     = emitOutputAM(dst, id, code | regcode, &cnsVal);
13016             }
13017             sz = emitSizeOfInsDsc(id);
13018             break;
13019         }
13020
13021         case IF_ARD_RRD:
13022         case IF_AWR_RRD:
13023         case IF_ARW_RRD:
13024             code    = insCodeMR(ins);
13025             code    = AddVexPrefixIfNeeded(ins, code, size);
13026             regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13027             dst     = emitOutputAM(dst, id, code | regcode);
13028             sz      = emitSizeOfInsDsc(id);
13029             break;
13030
13031         case IF_AWR_RRD_RRD:
13032         {
13033             code = insCodeMR(ins);
13034             code = AddVexPrefixIfNeeded(ins, code, size);
13035             dst  = emitOutputAM(dst, id, code);
13036             sz   = emitSizeOfInsDsc(id);
13037             break;
13038         }
13039
13040         case IF_ARD_CNS:
13041         case IF_AWR_CNS:
13042         case IF_ARW_CNS:
13043             emitGetInsAmdCns(id, &cnsVal);
13044             dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
13045             sz  = emitSizeOfInsDsc(id);
13046             break;
13047
13048         case IF_ARW_SHF:
13049             emitGetInsAmdCns(id, &cnsVal);
13050             dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
13051             sz  = emitSizeOfInsDsc(id);
13052             break;
13053
13054         /********************************************************************/
13055         /*                      Stack-based operand                         */
13056         /********************************************************************/
13057
13058         case IF_SRD:
13059         case IF_SWR:
13060         case IF_SRW:
13061
13062             assert(ins != INS_pop_hide);
13063             if (ins == INS_pop)
13064             {
13065                 // The offset in "pop [ESP+xxx]" is relative to the new ESP value
13066                 CLANG_FORMAT_COMMENT_ANCHOR;
13067
13068 #if !FEATURE_FIXED_OUT_ARGS
13069                 emitCurStackLvl -= sizeof(int);
13070 #endif
13071                 dst = emitOutputSV(dst, id, insCodeMR(ins));
13072
13073 #if !FEATURE_FIXED_OUT_ARGS
13074                 emitCurStackLvl += sizeof(int);
13075 #endif
13076                 break;
13077             }
13078
13079             dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
13080
13081             if (ins == INS_call)
13082             {
13083                 goto IND_CALL;
13084             }
13085
13086             break;
13087
13088         case IF_SRD_CNS:
13089         case IF_SWR_CNS:
13090         case IF_SRW_CNS:
13091             emitGetInsCns(id, &cnsVal);
13092             dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
13093             sz  = emitSizeOfInsDsc(id);
13094             break;
13095
13096         case IF_SRW_SHF:
13097             emitGetInsCns(id, &cnsVal);
13098             dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
13099             sz  = emitSizeOfInsDsc(id);
13100             break;
13101
13102         case IF_RRW_SRD_CNS:
13103         case IF_RWR_SRD_CNS:
13104             assert(IsSSEOrAVXInstruction(ins));
13105             emitGetInsCns(id, &cnsVal);
13106             code = insCodeRM(ins);
13107
13108             // Special case 4-byte AVX instructions
13109             if (EncodedBySSE38orSSE3A(ins))
13110             {
13111                 dst = emitOutputSV(dst, id, code, &cnsVal);
13112             }
13113             else
13114             {
13115                 code = AddVexPrefixIfNeeded(ins, code, size);
13116
13117                 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13118                 // Note that reg1 is both a source and a destination.
13119                 //
13120                 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13121                 // now we use the single source as source1 and source2.
13122                 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13123                 if (IsDstDstSrcAVXInstruction(ins))
13124                 {
13125                     // encode source operand reg in 'vvvv' bits in 1's complement form
13126                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
13127                 }
13128
13129                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13130                 dst     = emitOutputSV(dst, id, code | regcode, &cnsVal);
13131             }
13132
13133             sz = emitSizeOfInsDsc(id);
13134             break;
13135
13136         case IF_RRD_SRD:
13137         case IF_RWR_SRD:
13138         case IF_RRW_SRD:
13139         {
13140             code = insCodeRM(ins);
13141
13142             // 4-byte AVX instructions are special cased inside emitOutputSV
13143             // since they do not have space to encode ModRM byte.
13144             if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13145             {
13146                 dst = emitOutputSV(dst, id, code);
13147             }
13148             else
13149             {
13150                 code = AddVexPrefixIfNeeded(ins, code, size);
13151
13152                 if (IsDstDstSrcAVXInstruction(ins))
13153                 {
13154                     // encode source operand reg in 'vvvv' bits in 1's complement form
13155                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
13156                 }
13157
13158                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13159                 dst     = emitOutputSV(dst, id, code | regcode);
13160             }
13161
13162             sz = emitSizeOfInsDsc(id);
13163             break;
13164         }
13165
13166         case IF_RWR_RRD_SRD:
13167         {
13168             // This should only be called on AVX instructions
13169             assert(IsAVXInstruction(ins));
13170
13171             code = insCodeRM(ins);
13172             code = AddVexPrefixIfNeeded(ins, code, size);
13173             code = insEncodeReg3456(ins, id->idReg2(), size,
13174                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
13175
13176             // 4-byte AVX instructions are special cased inside emitOutputSV
13177             // since they do not have space to encode ModRM byte.
13178             if (EncodedBySSE38orSSE3A(ins))
13179             {
13180                 dst = emitOutputSV(dst, id, code);
13181             }
13182             else
13183             {
13184                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13185                 dst     = emitOutputSV(dst, id, code | regcode);
13186             }
13187             break;
13188         }
13189
13190         case IF_RWR_RRD_SRD_CNS:
13191         case IF_RWR_RRD_SRD_RRD:
13192         {
13193             // This should only be called on AVX instructions
13194             assert(IsAVXInstruction(ins));
13195             emitGetInsCns(id, &cnsVal);
13196
13197             code = insCodeRM(ins);
13198             code = AddVexPrefixIfNeeded(ins, code, size);
13199             code = insEncodeReg3456(ins, id->idReg2(), size,
13200                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
13201
13202             // 4-byte AVX instructions are special cased inside emitOutputSV
13203             // since they do not have space to encode ModRM byte.
13204             if (EncodedBySSE38orSSE3A(ins))
13205             {
13206                 dst = emitOutputSV(dst, id, code, &cnsVal);
13207             }
13208             else
13209             {
13210                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13211                 dst     = emitOutputSV(dst, id, code | regcode, &cnsVal);
13212             }
13213
13214             sz = emitSizeOfInsDsc(id);
13215             break;
13216         }
13217
13218         case IF_SRD_RRD:
13219         case IF_SWR_RRD:
13220         case IF_SRW_RRD:
13221             code = insCodeMR(ins);
13222             code = AddVexPrefixIfNeeded(ins, code, size);
13223
13224             // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13225             // Note that reg1 is both a source and a destination.
13226             //
13227             // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13228             // now we use the single source as source1 and source2.
13229             // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13230             if (IsDstDstSrcAVXInstruction(ins))
13231             {
13232                 // encode source operand reg in 'vvvv' bits in 1's complement form
13233                 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13234             }
13235
13236             regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13237             dst     = emitOutputSV(dst, id, code | regcode);
13238             break;
13239
13240         /********************************************************************/
13241         /*                    Direct memory address                         */
13242         /********************************************************************/
13243
13244         case IF_MRD:
13245         case IF_MRW:
13246         case IF_MWR:
13247
13248             noway_assert(ins != INS_call);
13249             dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
13250             sz  = emitSizeOfInsDsc(id);
13251             break;
13252
13253         case IF_MRD_OFF:
13254             dst = emitOutputCV(dst, id, insCodeMI(ins));
13255             break;
13256
13257         case IF_RRW_MRD_CNS:
13258         case IF_RWR_MRD_CNS:
13259             assert(IsSSEOrAVXInstruction(ins));
13260             emitGetInsDcmCns(id, &cnsVal);
13261             code = insCodeRM(ins);
13262
13263             // Special case 4-byte AVX instructions
13264             if (EncodedBySSE38orSSE3A(ins))
13265             {
13266                 dst = emitOutputCV(dst, id, code, &cnsVal);
13267             }
13268             else
13269             {
13270                 code = AddVexPrefixIfNeeded(ins, code, size);
13271
13272                 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13273                 // Note that reg1 is both a source and a destination.
13274                 //
13275                 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13276                 // now we use the single source as source1 and source2.
13277                 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13278                 if (IsDstDstSrcAVXInstruction(ins))
13279                 {
13280                     // encode source operand reg in 'vvvv' bits in 1's complement form
13281                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
13282                 }
13283
13284                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13285                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
13286             }
13287
13288             sz = emitSizeOfInsDsc(id);
13289             break;
13290
13291         case IF_MWR_RRD_CNS:
13292             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
13293             assert(UseVEXEncoding());
13294             emitGetInsDcmCns(id, &cnsVal);
13295             code = insCodeMR(ins);
13296             // only AVX2 vextracti128 and AVX vextractf128 can reach this path,
13297             // they do not need VEX.vvvv to encode the register operand
13298             dst = emitOutputCV(dst, id, code, &cnsVal);
13299             sz  = emitSizeOfInsDsc(id);
13300             break;
13301
13302         case IF_RRD_MRD:
13303         case IF_RWR_MRD:
13304         case IF_RRW_MRD:
13305         {
13306             code = insCodeRM(ins);
13307
13308             // Special case 4-byte AVX instructions
13309             if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13310             {
13311                 dst = emitOutputCV(dst, id, code);
13312             }
13313             else
13314             {
13315                 code = AddVexPrefixIfNeeded(ins, code, size);
13316
13317                 if (IsDstDstSrcAVXInstruction(ins))
13318                 {
13319                     // encode source operand reg in 'vvvv' bits in 1's complement form
13320                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
13321                 }
13322
13323                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13324                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
13325             }
13326
13327             sz = emitSizeOfInsDsc(id);
13328             break;
13329         }
13330
13331         case IF_RWR_RRD_MRD:
13332         {
13333             // This should only be called on AVX instructions
13334             assert(IsAVXInstruction(ins));
13335
13336             code = insCodeRM(ins);
13337             code = AddVexPrefixIfNeeded(ins, code, size);
13338             code = insEncodeReg3456(ins, id->idReg2(), size,
13339                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
13340
13341             // Special case 4-byte AVX instructions
13342             if (EncodedBySSE38orSSE3A(ins))
13343             {
13344                 dst = emitOutputCV(dst, id, code);
13345             }
13346             else
13347             {
13348                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13349                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
13350             }
13351             sz = emitSizeOfInsDsc(id);
13352             break;
13353         }
13354
13355         case IF_RWR_RRD_MRD_CNS:
13356         case IF_RWR_RRD_MRD_RRD:
13357         {
13358             // This should only be called on AVX instructions
13359             assert(IsAVXInstruction(ins));
13360             emitGetInsCns(id, &cnsVal);
13361
13362             code = insCodeRM(ins);
13363             code = AddVexPrefixIfNeeded(ins, code, size);
13364             code = insEncodeReg3456(ins, id->idReg2(), size,
13365                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
13366
13367             // Special case 4-byte AVX instructions
13368             if (EncodedBySSE38orSSE3A(ins))
13369             {
13370                 dst = emitOutputCV(dst, id, code, &cnsVal);
13371             }
13372             else
13373             {
13374                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13375                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
13376             }
13377             sz = emitSizeOfInsDsc(id);
13378             break;
13379         }
13380
13381         case IF_RWR_MRD_OFF:
13382             code = insCode(ins);
13383             code = AddVexPrefixIfNeeded(ins, code, size);
13384
13385             // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13386             // Note that reg1 is both a source and a destination.
13387             //
13388             // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13389             // now we use the single source as source1 and source2.
13390             // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13391             if (IsDstDstSrcAVXInstruction(ins))
13392             {
13393                 // encode source operand reg in 'vvvv' bits in 1's complement form
13394                 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13395             }
13396
13397             regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
13398             dst     = emitOutputCV(dst, id, code | 0x30 | regcode);
13399             sz      = emitSizeOfInsDsc(id);
13400             break;
13401
13402         case IF_MRD_RRD:
13403         case IF_MWR_RRD:
13404         case IF_MRW_RRD:
13405             code = insCodeMR(ins);
13406             code = AddVexPrefixIfNeeded(ins, code, size);
13407
13408             // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13409             // Note that reg1 is both a source and a destination.
13410             //
13411             // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13412             // now we use the single source as source1 and source2.
13413             // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13414             if (IsDstDstSrcAVXInstruction(ins))
13415             {
13416                 // encode source operand reg in 'vvvv' bits in 1's complement form
13417                 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13418             }
13419
13420             regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13421             dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
13422             sz      = emitSizeOfInsDsc(id);
13423             break;
13424
13425         case IF_MRD_CNS:
13426         case IF_MWR_CNS:
13427         case IF_MRW_CNS:
13428             emitGetInsDcmCns(id, &cnsVal);
13429             dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
13430             sz  = emitSizeOfInsDsc(id);
13431             break;
13432
13433         case IF_MRW_SHF:
13434             emitGetInsDcmCns(id, &cnsVal);
13435             dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
13436             sz  = emitSizeOfInsDsc(id);
13437             break;
13438
13439         /********************************************************************/
13440         /*                            oops                                  */
13441         /********************************************************************/
13442
13443         default:
13444
13445 #ifdef DEBUG
13446             printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
13447             assert(!"don't know how to encode this instruction");
13448 #endif
13449             break;
13450     }
13451
13452     // Make sure we set the instruction descriptor size correctly
13453     assert(sz == emitSizeOfInsDsc(id));
13454
13455 #if !FEATURE_FIXED_OUT_ARGS
13456     bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
13457
13458 #if FEATURE_EH_FUNCLETS
13459     updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
13460 #endif // FEATURE_EH_FUNCLETS
13461
13462     // Make sure we keep the current stack level up to date
13463     if (updateStackLevel)
13464     {
13465         switch (ins)
13466         {
13467             case INS_push:
13468                 // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
13469                 // finally block for calling it locally for an op_leave.
13470                 emitStackPush(dst, id->idGCref());
13471                 break;
13472
13473             case INS_pop:
13474                 emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
13475                 break;
13476
13477             case INS_sub:
13478                 // Check for "sub ESP, icon"
13479                 if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13480                 {
13481                     assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
13482                     emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13483                 }
13484                 break;
13485
13486             case INS_add:
13487                 // Check for "add ESP, icon"
13488                 if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13489                 {
13490                     assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
13491                     emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
13492                                  (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13493                 }
13494                 break;
13495
13496             default:
13497                 break;
13498         }
13499     }
13500
13501 #endif // !FEATURE_FIXED_OUT_ARGS
13502
13503     assert((int)emitCurStackLvl >= 0);
13504
13505     // Only epilog "instructions" and some pseudo-instrs
13506     // are allowed not to generate any code
13507
13508     assert(*dp != dst || emitInstHasNoCode(ins));
13509
13510 #ifdef DEBUG
13511     if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
13512     {
13513         emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
13514     }
13515
13516     if (emitComp->compDebugBreak)
13517     {
13518         // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
13519         // at the beginning of this method.
13520         if (JitConfig.JitEmitPrintRefRegs() != 0)
13521         {
13522             printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
13523             printf("  emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
13524             printRegMaskInt(emitThisGCrefRegs);
13525             emitDispRegSet(emitThisGCrefRegs);
13526             printf("\n");
13527             printf("  emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
13528             printRegMaskInt(emitThisByrefRegs);
13529             emitDispRegSet(emitThisByrefRegs);
13530             printf("\n");
13531         }
13532
13533         // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
13534         // emitting instruction a6, (i.e. IN00a6 in jitdump).
13535         if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
13536         {
13537             assert(!"JitBreakEmitOutputInstr reached");
13538         }
13539     }
13540 #endif
13541
13542 #ifdef TRANSLATE_PDB
13543     if (*dp != dst)
13544     {
13545         // only map instruction groups to instruction groups
13546         MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
13547     }
13548 #endif
13549
13550     *dp = dst;
13551
13552 #ifdef DEBUG
13553     if (ins == INS_mulEAX || ins == INS_imulEAX)
13554     {
13555         // INS_mulEAX has implicit target of Edx:Eax. Make sure
13556         // that we detected this cleared its GC-status.
13557
13558         assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
13559     }
13560
13561     if (instrIs3opImul(ins))
13562     {
13563         // The target of the 3-operand imul is implicitly encoded. Make sure
13564         // that we detected the implicit register and cleared its GC-status.
13565
13566         regMaskTP regMask = genRegMask(inst3opImulReg(ins));
13567         assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
13568     }
13569 #endif
13570
13571     return sz;
13572 }
13573 #ifdef _PREFAST_
13574 #pragma warning(pop)
13575 #endif
13576
13577 /*****************************************************************************/
13578 /*****************************************************************************/
13579
13580 #endif // defined(_TARGET_XARCH_)