src/jit/emitxarch.cpp

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
   6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
   7 XX                                                                           XX
   8 XX                             emitX86.cpp                                   XX
   9 XX                                                                           XX
  10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  12 */
  13
  14 #include "jitpch.h"
  15 #ifdef _MSC_VER
  16 #pragma hdrstop
  17 #endif
  18
  19 #if defined(_TARGET_XARCH_)
  20
  21 /*****************************************************************************/
  22 /*****************************************************************************/
  23
  24 #include "instr.h"
  25 #include "emit.h"
  26 #include "codegen.h"
  27
  28 bool IsSSE2Instruction(instruction ins)
  29 {
  30     return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION);
  31 }
  32
  33 bool IsSSE4Instruction(instruction ins)
  34 {
  35 #ifdef LEGACY_BACKEND
  36     return false;
  37 #else
  38     return (ins >= INS_FIRST_SSE4_INSTRUCTION && ins <= INS_LAST_SSE4_INSTRUCTION);
  39 #endif
  40 }
  41
  42 bool IsSSEOrAVXInstruction(instruction ins)
  43 {
  44 #ifndef LEGACY_BACKEND
  45     return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
  46 #else  // !LEGACY_BACKEND
  47     return IsSSE2Instruction(ins);
  48 #endif // LEGACY_BACKEND
  49 }
  50
  51 bool IsAVXOnlyInstruction(instruction ins)
  52 {
  53 #ifndef LEGACY_BACKEND
  54     return (ins >= INS_FIRST_AVX_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
  55 #else
  56     return false;
  57 #endif
  58 }
  59
  60 bool emitter::IsAVXInstruction(instruction ins)
  61 {
  62 #ifndef LEGACY_BACKEND
  63     return (UseVEXEncoding() && IsSSEOrAVXInstruction(ins));
  64 #else
  65     return false;
  66 #endif
  67 }
  68
  69 #ifndef LEGACY_BACKEND
  70 // Returns true if the AVX instruction is a binary operator that requires 3 operands.
  71 // When we emit an instruction with only two operands, we will duplicate the destination
  72 // as a source.
  73 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
  74 // be formalized by adding an additional field to instruction table to
  75 // to indicate whether a 3-operand instruction.
  76 bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
  77 {
  78     switch (ins)
  79     {
  80         case INS_addpd:
  81         case INS_addps:
  82         case INS_addsd:
  83         case INS_addss:
  84         case INS_addsubpd:
  85         case INS_addsubps:
  86         case INS_andnpd:
  87         case INS_andnps:
  88         case INS_andpd:
  89         case INS_andps:
  90         case INS_blendpd:
  91         case INS_blendps:
  92         case INS_cmppd:
  93         case INS_cmpps:
  94         case INS_cmpsd:
  95         case INS_cmpss:
  96         case INS_cvtsi2sd:
  97         case INS_cvtsi2ss:
  98         case INS_cvtsd2ss:
  99         case INS_cvtss2sd:
 100         case INS_divpd:
 101         case INS_divps:
 102         case INS_divsd:
 103         case INS_divss:
 104         case INS_dppd:
 105         case INS_dpps:
 106         case INS_haddpd:
 107         case INS_haddps:
 108         case INS_hsubpd:
 109         case INS_hsubps:
 110         case INS_insertps:
 111         case INS_maxpd:
 112         case INS_maxps:
 113         case INS_maxsd:
 114         case INS_maxss:
 115         case INS_minpd:
 116         case INS_minps:
 117         case INS_minsd:
 118         case INS_minss:
 119         case INS_movhlps:
 120         case INS_movlhps:
 121         case INS_mpsadbw:
 122         case INS_mulpd:
 123         case INS_mulps:
 124         case INS_mulsd:
 125         case INS_mulss:
 126         case INS_orpd:
 127         case INS_orps:
 128         case INS_packssdw:
 129         case INS_packsswb:
 130         case INS_packusdw:
 131         case INS_packuswb:
 132         case INS_paddb:
 133         case INS_paddd:
 134         case INS_paddq:
 135         case INS_paddsb:
 136         case INS_paddsw:
 137         case INS_paddusb:
 138         case INS_paddusw:
 139         case INS_paddw:
 140         case INS_palignr:
 141         case INS_pand:
 142         case INS_pandn:
 143         case INS_pavgb:
 144         case INS_pavgw:
 145         case INS_pblendw:
 146         case INS_pcmpeqb:
 147         case INS_pcmpeqd:
 148         case INS_pcmpeqq:
 149         case INS_pcmpeqw:
 150         case INS_pcmpgtb:
 151         case INS_pcmpgtd:
 152         case INS_pcmpgtq:
 153         case INS_pcmpgtw:
 154         case INS_phaddd:
 155         case INS_phaddsw:
 156         case INS_phaddw:
 157         case INS_phsubd:
 158         case INS_phsubsw:
 159         case INS_phsubw:
 160         case INS_pinsrb:
 161         case INS_pinsrw:
 162         case INS_pinsrd:
 163         case INS_pinsrq:
 164         case INS_pmaddubsw:
 165         case INS_pmaddwd:
 166         case INS_pmaxsb:
 167         case INS_pmaxsd:
 168         case INS_pmaxsw:
 169         case INS_pmaxub:
 170         case INS_pmaxud:
 171         case INS_pmaxuw:
 172         case INS_pminsb:
 173         case INS_pminsd:
 174         case INS_pminsw:
 175         case INS_pminub:
 176         case INS_pminud:
 177         case INS_pminuw:
 178         case INS_pmuldq:
 179         case INS_pmulhrsw:
 180         case INS_pmulhuw:
 181         case INS_pmulhw:
 182         case INS_pmulld:
 183         case INS_pmullw:
 184         case INS_pmuludq:
 185         case INS_por:
 186         case INS_psadbw:
 187         case INS_pshufb:
 188         case INS_psignb:
 189         case INS_psignd:
 190         case INS_psignw:
 191         case INS_psubb:
 192         case INS_psubd:
 193         case INS_psubq:
 194         case INS_psubsb:
 195         case INS_psubsw:
 196         case INS_psubusb:
 197         case INS_psubusw:
 198         case INS_psubw:
 199         case INS_pslld:
 200         case INS_psllq:
 201         case INS_psllw:
 202         case INS_psrld:
 203         case INS_psrlq:
 204         case INS_psrlw:
 205         case INS_psrad:
 206         case INS_psraw:
 207         case INS_punpckhbw:
 208         case INS_punpckhdq:
 209         case INS_punpckhqdq:
 210         case INS_punpckhwd:
 211         case INS_punpcklbw:
 212         case INS_punpckldq:
 213         case INS_punpcklqdq:
 214         case INS_punpcklwd:
 215         case INS_pxor:
 216         case INS_shufpd:
 217         case INS_shufps:
 218         case INS_subpd:
 219         case INS_subps:
 220         case INS_subsd:
 221         case INS_subss:
 222         case INS_unpckhps:
 223         case INS_unpcklps:
 224         case INS_unpckhpd:
 225         case INS_unpcklpd:
 226         case INS_vinsertf128:
 227         case INS_vinserti128:
 228         case INS_vmaskmovps:
 229         case INS_vmaskmovpd:
 230         case INS_vperm2i128:
 231         case INS_vperm2f128:
 232         case INS_vpermilpsvar:
 233         case INS_vpermilpdvar:
 234         case INS_vpsrlvd:
 235         case INS_vpsrlvq:
 236         case INS_vpsravd:
 237         case INS_vpsllvd:
 238         case INS_vpsllvq:
 239         case INS_xorpd:
 240         case INS_xorps:
 241             return IsAVXInstruction(ins);
 242         default:
 243             return false;
 244     }
 245 }
 246
 247 // Returns true if the AVX instruction requires 3 operands that duplicate the source
 248 // register in the vvvv field.
 249 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
 250 // be formalized by adding an additional field to instruction table to
 251 // to indicate whether a 3-operand instruction.
 252 bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
 253 {
 254     switch (ins)
 255     {
 256         case INS_movhpd:
 257         case INS_movhps:
 258         case INS_movlpd:
 259         case INS_movlps:
 260         case INS_movsdsse2:
 261         case INS_movss:
 262         case INS_rcpss:
 263         case INS_roundsd:
 264         case INS_roundss:
 265         case INS_rsqrtss:
 266         case INS_sqrtsd:
 267         case INS_sqrtss:
 268             return IsAVXInstruction(ins);
 269         default:
 270             return false;
 271     }
 272 }
 273 #endif // !LEGACY_BACKEND
 274
 275 // -------------------------------------------------------------------
 276 // Is4ByteSSE4Instruction: Returns true if the SSE4 instruction
 277 // is a 4-byte opcode.
 278 //
 279 // Arguments:
 280 //    ins  -  instruction
 281 //
 282 // Note that this should be true for any of the instructions in instrsXArch.h
 283 // that use the SSE38 or SSE3A macro.
 284 bool emitter::Is4ByteSSE4Instruction(instruction ins)
 285 {
 286 #ifdef LEGACY_BACKEND
 287     // On legacy backend SSE4 is not enabled.
 288     return false;
 289 #else
 290     return UseSSE4() && IsSSE4Instruction(ins) && EncodedBySSE38orSSE3A(ins);
 291 #endif // LEGACY_BACKEND
 292 }
 293
 294 // ------------------------------------------------------------------------------
 295 // Is4ByteSSE4OrAVXInstruction: Returns true if the SSE4 or AVX instruction is a 4-byte opcode.
 296 //
 297 // Arguments:
 298 //    ins  -  instructions
 299 //
 300 // Note that this should be true for any of the instructions in instrsXArch.h
 301 // that use the SSE38 or SSE3A macro.
 302 bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins)
 303 {
 304 #ifdef LEGACY_BACKEND
 305     // On legacy backend SSE4 and AVX are not enabled.
 306     return false;
 307 #else
 308     return ((UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins))) ||
 309             (UseSSE4() && IsSSE4Instruction(ins))) &&
 310            EncodedBySSE38orSSE3A(ins);
 311 #endif // LEGACY_BACKEND
 312 }
 313
 314 #ifndef LEGACY_BACKEND
 315 // Returns true if this instruction requires a VEX prefix
 316 // All AVX instructions require a VEX prefix
 317 bool emitter::TakesVexPrefix(instruction ins)
 318 {
 319     // special case vzeroupper as it requires 2-byte VEX prefix
 320     // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
 321     switch (ins)
 322     {
 323         case INS_lfence:
 324         case INS_mfence:
 325         case INS_movnti:
 326         case INS_prefetchnta:
 327         case INS_prefetcht0:
 328         case INS_prefetcht1:
 329         case INS_prefetcht2:
 330         case INS_sfence:
 331         case INS_vzeroupper:
 332             return false;
 333         default:
 334             break;
 335     }
 336
 337     return IsAVXInstruction(ins);
 338 }
 339
 340 // Add base VEX prefix without setting W, R, X, or B bits
 341 // L bit will be set based on emitter attr.
 342 //
 343 // 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
 344 //  - R, X, B, W - bits to express corresponding REX prefixes
 345 //  - m-mmmmm (5-bit)
 346 //    0-00001 - implied leading 0F opcode byte
 347 //    0-00010 - implied leading 0F 38 opcode bytes
 348 //    0-00011 - implied leading 0F 3A opcode bytes
 349 //    Rest    - reserved for future use and usage of them will uresult in Undefined instruction exception
 350 //
 351 // - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
 352 // - L - scalar or AVX-128 bit operations (L=0),  256-bit operations (L=1)
 353 // - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
 354 //                 these prefixes are treated mandatory when used with escape opcode 0Fh for
 355 //                 some SIMD instructions
 356 //   00  - None   (0F    - packed float)
 357 //   01  - 66     (66 0F - packed double)
 358 //   10  - F3     (F3 0F - scalar float
 359 //   11  - F2     (F2 0F - scalar double)
 360 //
 361 // TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX
 362 // prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar
 363 // and AVX-128 bit operations.
 364 #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
 365 #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
 366 #define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
 367 emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
 368 {
 369     // Only AVX instructions require VEX prefix
 370     assert(IsAVXInstruction(ins));
 371
 372     // Shouldn't have already added Vex prefix
 373     assert(!hasVexPrefix(code));
 374
 375     // Set L bit to 1 in case of instructions that operate on 256-bits.
 376     assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0);
 377     code |= DEFAULT_3BYTE_VEX_PREFIX;
 378     if (attr == EA_32BYTE)
 379     {
 380         code |= LBIT_IN_3BYTE_VEX_PREFIX;
 381     }
 382
 383     return code;
 384 }
 385 #endif // !LEGACY_BACKEND
 386
 387 // Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
 388 bool TakesRexWPrefix(instruction ins, emitAttr attr)
 389 {
 390 #ifndef LEGACY_BACKEND
 391     // Because the current implementation of AVX does not have a way to distinguish between the register
 392     // size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are
 393     // required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE),
 394     // and here we must special case these by the opcode.
 395     switch (ins)
 396     {
 397         case INS_vpermq:
 398         case INS_vpsrlvq:
 399         case INS_vpsllvq:
 400         case INS_pinsrq:
 401         case INS_pextrq:
 402             return true;
 403         default:
 404             break;
 405     }
 406 #endif // !LEGACY_BACKEND
 407 #ifdef _TARGET_AMD64_
 408     // movsx should always sign extend out to 8 bytes just because we don't track
 409     // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
 410     // of the source, not the dest).
 411     // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
 412     // cased here.
 413     //
 414     // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
 415     if (ins == INS_movsx || ins == INS_rex_jmp)
 416     {
 417         return true;
 418     }
 419
 420     if (EA_SIZE(attr) != EA_8BYTE)
 421     {
 422         return false;
 423     }
 424
 425     if (IsSSEOrAVXInstruction(ins))
 426     {
 427         switch (ins)
 428         {
 429             case INS_cvttsd2si:
 430             case INS_cvttss2si:
 431             case INS_cvtsd2si:
 432             case INS_cvtss2si:
 433             case INS_cvtsi2sd:
 434             case INS_cvtsi2ss:
 435             case INS_mov_xmm2i:
 436             case INS_mov_i2xmm:
 437             case INS_movnti:
 438                 return true;
 439             default:
 440                 return false;
 441         }
 442     }
 443
 444     // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
 445     // opcodes...
 446     // These are all the instructions that default to 8-byte operand without the REX.W bit
 447     // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
 448     // so we never need it
 449     if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
 450         (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
 451     {
 452         return true;
 453     }
 454     else
 455     {
 456         return false;
 457     }
 458 #else  //!_TARGET_AMD64 = _TARGET_X86_
 459     return false;
 460 #endif //!_TARGET_AMD64_
 461 }
 462
 463 // Returns true if using this register will require a REX.* prefix.
 464 // Since XMM registers overlap with YMM registers, this routine
 465 // can also be used to know whether a YMM register if the
 466 // instruction in question is AVX.
 467 bool IsExtendedReg(regNumber reg)
 468 {
 469 #ifdef _TARGET_AMD64_
 470     return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
 471 #else
 472     // X86 JIT operates in 32-bit mode and hence extended reg are not available.
 473     return false;
 474 #endif
 475 }
 476
 477 // Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
 478 bool IsExtendedReg(regNumber reg, emitAttr attr)
 479 {
 480 #ifdef _TARGET_AMD64_
 481     // Not a register, so doesn't need a prefix
 482     if (reg > REG_XMM15)
 483     {
 484         return false;
 485     }
 486
 487     // Opcode field only has 3 bits for the register, these high registers
 488     // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
 489     if (IsExtendedReg(reg))
 490     {
 491         return true;
 492     }
 493
 494     if (EA_SIZE(attr) != EA_1BYTE)
 495     {
 496         return false;
 497     }
 498
 499     // There are 12 one byte registers addressible 'below' r8b:
 500     //     al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
 501     // The first 4 are always addressible, the last 8 are divided into 2 sets:
 502     //     ah,  ch,  dh,  bh
 503     //          -- or --
 504     //     spl, bpl, sil, dil
 505     // Both sets are encoded exactly the same, the difference is the presence
 506     // of a REX prefix, even a REX prefix with no other bits set (0x40).
 507     // So in order to get to the second set we need a REX prefix (but no bits).
 508     //
 509     // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
 510     // encoding/tracking/encoding registers.
 511     return (reg >= REG_RSP);
 512 #else
 513     // X86 JIT operates in 32-bit mode and hence extended reg are not available.
 514     return false;
 515 #endif
 516 }
 517
 518 // Since XMM registers overlap with YMM registers, this routine
 519 // can also used to know whether a YMM register in case of AVX instructions.
 520 //
 521 // Legacy X86: we have XMM0-XMM7 available but this routine cannot be used to
 522 // determine whether a reg is XMM because they share the same reg numbers
 523 // with integer registers.  Hence always return false.
 524 bool IsXMMReg(regNumber reg)
 525 {
 526 #ifndef LEGACY_BACKEND
 527 #ifdef _TARGET_AMD64_
 528     return (reg >= REG_XMM0) && (reg <= REG_XMM15);
 529 #else  // !_TARGET_AMD64_
 530     return (reg >= REG_XMM0) && (reg <= REG_XMM7);
 531 #endif // !_TARGET_AMD64_
 532 #else  // LEGACY_BACKEND
 533     return false;
 534 #endif // LEGACY_BACKEND
 535 }
 536
 537 // Returns bits to be encoded in instruction for the given register.
 538 unsigned RegEncoding(regNumber reg)
 539 {
 540 #ifndef LEGACY_BACKEND
 541     static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE");
 542 #endif
 543     return (unsigned)(reg & 0x7);
 544 }
 545
 546 // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
 547 // SSE2: separate 1-byte prefix gets added before opcode.
 548 // AVX:  specific bits within VEX prefix need to be set in bit-inverted form.
 549 emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
 550 {
 551     if (UseVEXEncoding() && IsAVXInstruction(ins))
 552     {
 553         // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
 554         if (TakesVexPrefix(ins))
 555         {
 556             assert(hasVexPrefix(code));
 557
 558             // W-bit is the only bit that is added in non bit-inverted form.
 559             return emitter::code_t(code | 0x00008000000000ULL);
 560         }
 561     }
 562 #ifdef _TARGET_AMD64_
 563     return emitter::code_t(code | 0x4800000000ULL);
 564 #else
 565     assert(!"UNREACHED");
 566     return code;
 567 #endif
 568 }
 569
 570 #ifdef _TARGET_AMD64_
 571
 572 emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
 573 {
 574     if (UseVEXEncoding() && IsAVXInstruction(ins))
 575     {
 576         // Right now support 3-byte VEX prefix
 577         if (TakesVexPrefix(ins))
 578         {
 579             assert(hasVexPrefix(code));
 580
 581             // R-bit is added in bit-inverted form.
 582             return code & 0xFF7FFFFFFFFFFFULL;
 583         }
 584     }
 585
 586     return code | 0x4400000000ULL;
 587 }
 588
 589 emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
 590 {
 591     if (UseVEXEncoding() && IsAVXInstruction(ins))
 592     {
 593         // Right now support 3-byte VEX prefix
 594         if (TakesVexPrefix(ins))
 595         {
 596             assert(hasVexPrefix(code));
 597
 598             // X-bit is added in bit-inverted form.
 599             return code & 0xFFBFFFFFFFFFFFULL;
 600         }
 601     }
 602
 603     return code | 0x4200000000ULL;
 604 }
 605
 606 emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
 607 {
 608     if (UseVEXEncoding() && IsAVXInstruction(ins))
 609     {
 610         // Right now support 3-byte VEX prefix
 611         if (TakesVexPrefix(ins))
 612         {
 613             assert(hasVexPrefix(code));
 614
 615             // B-bit is added in bit-inverted form.
 616             return code & 0xFFDFFFFFFFFFFFULL;
 617         }
 618     }
 619
 620     return code | 0x4100000000ULL;
 621 }
 622
 623 // Adds REX prefix (0x40) without W, R, X or B bits set
 624 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
 625 {
 626     assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
 627     return code | 0x4000000000ULL;
 628 }
 629
 630 #endif //_TARGET_AMD64_
 631
 632 bool isPrefix(BYTE b)
 633 {
 634     assert(b != 0);    // Caller should check this
 635     assert(b != 0x67); // We don't use the address size prefix
 636     assert(b != 0x65); // The GS segment override prefix is emitted separately
 637     assert(b != 0x64); // The FS segment override prefix is emitted separately
 638     assert(b != 0xF0); // The lock prefix is emitted separately
 639     assert(b != 0x2E); // We don't use the CS segment override prefix
 640     assert(b != 0x3E); // Or the DS segment override prefix
 641     assert(b != 0x26); // Or the ES segment override prefix
 642     assert(b != 0x36); // Or the SS segment override prefix
 643
 644     // That just leaves the size prefixes used in SSE opcodes:
 645     //      Scalar Double  Scalar Single  Packed Double
 646     return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
 647 }
 648
 649 // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
 650 unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
 651 {
 652 #ifndef LEGACY_BACKEND
 653     if (hasVexPrefix(code))
 654     {
 655         // Only AVX instructions should have a VEX prefix
 656         assert(UseVEXEncoding() && IsAVXInstruction(ins));
 657         code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
 658         code &= 0x00000000FFFFFFFFLL;
 659
 660         WORD leadingBytes = 0;
 661         BYTE check        = (code >> 24) & 0xFF;
 662         if (check != 0)
 663         {
 664             // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
 665             // 4-byte opcode: with the bytes ordered as 0x22114433
 666             // check for a prefix in the 11 position
 667             BYTE sizePrefix = (code >> 16) & 0xFF;
 668             if (sizePrefix != 0 && isPrefix(sizePrefix))
 669             {
 670                 // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
 671                 //
 672                 //   00  - None   (0F    - packed float)
 673                 //   01  - 66     (66 0F - packed double)
 674                 //   10  - F3     (F3 0F - scalar float
 675                 //   11  - F2     (F2 0F - scalar double)
 676                 switch (sizePrefix)
 677                 {
 678                     case 0x66:
 679                         vexPrefix |= 0x01;
 680                         break;
 681                     case 0xF3:
 682                         vexPrefix |= 0x02;
 683                         break;
 684                     case 0xF2:
 685                         vexPrefix |= 0x03;
 686                         break;
 687                     default:
 688                         assert(!"unrecognized SIMD size prefix");
 689                         unreached();
 690                 }
 691
 692                 // Now the byte in the 22 position must be an escape byte 0F
 693                 leadingBytes = check;
 694                 assert(leadingBytes == 0x0F);
 695
 696                 // Get rid of both sizePrefix and escape byte
 697                 code &= 0x0000FFFFLL;
 698
 699                 // Check the byte in the 33 position to see if it is 3A or 38.
 700                 // In such a case escape bytes must be 0x0F3A or 0x0F38
 701                 check = code & 0xFF;
 702                 if (check == 0x3A || check == 0x38)
 703                 {
 704                     leadingBytes = (leadingBytes << 8) | check;
 705                     code &= 0x0000FF00LL;
 706                 }
 707             }
 708         }
 709         else
 710         {
 711             // 2-byte opcode with the bytes ordered as 0x0011RM22
 712             // the byte in position 11 must be an escape byte.
 713             leadingBytes = (code >> 16) & 0xFF;
 714             assert(leadingBytes == 0x0F || leadingBytes == 0x00);
 715             code &= 0xFFFF;
 716         }
 717
 718         // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
 719         // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
 720         // implied leading bytes
 721         switch (leadingBytes)
 722         {
 723             case 0x00:
 724                 // there is no leading byte
 725                 break;
 726             case 0x0F:
 727                 vexPrefix |= 0x0100;
 728                 break;
 729             case 0x0F38:
 730                 vexPrefix |= 0x0200;
 731                 break;
 732             case 0x0F3A:
 733                 vexPrefix |= 0x0300;
 734                 break;
 735             default:
 736                 assert(!"encountered unknown leading bytes");
 737                 unreached();
 738         }
 739
 740         // At this point
 741         //     VEX.2211RM33 got transformed as VEX.0000RM33
 742         //     VEX.0011RM22 got transformed as VEX.0000RM22
 743         //
 744         // Now output VEX prefix leaving the 4-byte opcode
 745         emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
 746         emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
 747         emitOutputByte(dst + 2, vexPrefix & 0xFF);
 748         return 3;
 749     }
 750 #endif // !LEGACY_BACKEND
 751
 752 #ifdef _TARGET_AMD64_
 753     if (code > 0x00FFFFFFFFLL)
 754     {
 755         BYTE prefix = (code >> 32) & 0xFF;
 756         noway_assert(prefix >= 0x40 && prefix <= 0x4F);
 757         code &= 0x00000000FFFFFFFFLL;
 758
 759         // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
 760         // we can remove this code as well
 761
 762         // The REX prefix is required to come after all other prefixes.
 763         // Some of our 'opcodes' actually include some prefixes, if that
 764         // is the case, shift them over and place the REX prefix after
 765         // the other prefixes, and emit any prefix that got moved out.
 766         BYTE check = (code >> 24) & 0xFF;
 767         if (check == 0)
 768         {
 769             // 3-byte opcode: with the bytes ordered as 0x00113322
 770             // check for a prefix in the 11 position
 771             check = (code >> 16) & 0xFF;
 772             if (check != 0 && isPrefix(check))
 773             {
 774                 // Swap the rex prefix and whatever this prefix is
 775                 code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
 776                 // and then emit the other prefix
 777                 return emitOutputByte(dst, check);
 778             }
 779         }
 780         else
 781         {
 782             // 4-byte opcode with the bytes ordered as 0x22114433
 783             // first check for a prefix in the 11 position
 784             BYTE check2 = (code >> 16) & 0xFF;
 785             if (isPrefix(check2))
 786             {
 787                 assert(!isPrefix(check)); // We currently don't use this, so it is untested
 788                 if (isPrefix(check))
 789                 {
 790                     // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
 791                     // Change to c2rrc1XXXX, and emit check2 now
 792                     code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL));
 793                 }
 794                 else
 795                 {
 796                     // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
 797                     // Change to c2XXrrXXXX, and emit check2 now
 798                     code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL));
 799                 }
 800                 return emitOutputByte(dst, check2);
 801             }
 802         }
 803
 804         return emitOutputByte(dst, prefix);
 805     }
 806 #endif // _TARGET_AMD64_
 807
 808     return 0;
 809 }
 810
 811 #ifdef _TARGET_AMD64_
 812 /*****************************************************************************
 813  * Is the last instruction emitted a call instruction?
 814  */
 815 bool emitter::emitIsLastInsCall()
 816 {
 817     if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
 818     {
 819         return true;
 820     }
 821
 822     return false;
 823 }
 824
 825 /*****************************************************************************
 826  * We're about to create an epilog. If the last instruction we output was a 'call',
 827  * then we need to insert a NOP, to allow for proper exception-handling behavior.
 828  */
 829 void emitter::emitOutputPreEpilogNOP()
 830 {
 831     if (emitIsLastInsCall())
 832     {
 833         emitIns(INS_nop);
 834     }
 835 }
 836
 837 #endif //_TARGET_AMD64_
 838
 839 // Size of rex prefix in bytes
 840 unsigned emitter::emitGetRexPrefixSize(instruction ins)
 841 {
 842     // In case of AVX instructions, REX prefixes are part of VEX prefix.
 843     // And hence requires no additional byte to encode REX prefixes.
 844     if (IsAVXInstruction(ins))
 845     {
 846         return 0;
 847     }
 848
 849     // If not AVX, then we would need 1-byte to encode REX prefix.
 850     return 1;
 851 }
 852
 853 // Size of vex prefix in bytes
 854 unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
 855 {
 856     // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a
 857     // scope for size win by using 2-byte vex prefix for some of the
 858     // scalar, avx-128 and most common avx-256 instructions.
 859     if (IsAVXInstruction(ins))
 860     {
 861         return 3;
 862     }
 863
 864     // If not AVX, then we don't need to encode vex prefix.
 865     return 0;
 866 }
 867
 868 // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
 869 // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
 870 // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
 871 // instruction size estimate will be accurate.
 872 // Basically this function will decrease the vexPrefixSize,
 873 // so that opcodeSize + vexPrefixAdjustedSize will be the right size.
 874 // rightOpcodeSize + vexPrefixSize
 875 //=(opcodeSize - ExtrabytesSize) + vexPrefixSize
 876 //=opcodeSize + (vexPrefixSize - ExtrabytesSize)
 877 //=opcodeSize + vexPrefixAdjustedSize
 878 unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
 879 {
 880 #ifndef LEGACY_BACKEND
 881     if (IsAVXInstruction(ins))
 882     {
 883         unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
 884         // Currently vex prefix size is hard coded as 3 bytes,
 885         // In future we should support 2 bytes vex prefix.
 886         assert(vexPrefixAdjustedSize == 3);
 887
 888         // In this case, opcode will contains escape prefix at least one byte,
 889         // vexPrefixAdjustedSize should be minus one.
 890         vexPrefixAdjustedSize -= 1;
 891
 892         // Get the fourth byte in Opcode.
 893         // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
 894         BYTE check = (code >> 24) & 0xFF;
 895         if (check != 0)
 896         {
 897             // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
 898             // 4-byte opcode: with the bytes ordered as 0x22114433
 899             // Simd prefix is at the first byte.
 900             BYTE sizePrefix = (code >> 16) & 0xFF;
 901             if (sizePrefix != 0 && isPrefix(sizePrefix))
 902             {
 903                 vexPrefixAdjustedSize -= 1;
 904             }
 905
 906             // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
 907             // But in this case the opcode has not counted R\M part.
 908             // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
 909             //=opcodeSize + VexPrefixAdjustedSize -1 + 1
 910             //=opcodeSize + VexPrefixAdjustedSize
 911             // So although we may have second byte escape prefix, we won't decrease vexPrefixAjustedSize.
 912         }
 913
 914         return vexPrefixAdjustedSize;
 915     }
 916 #endif // !LEGACY_BACKEND
 917     return 0;
 918 }
 919
 920 // Get size of rex or vex prefix emitted in code
 921 unsigned emitter::emitGetPrefixSize(code_t code)
 922 {
 923     if (hasVexPrefix(code))
 924     {
 925         return 3;
 926     }
 927
 928     if (hasRexPrefix(code))
 929     {
 930         return 1;
 931     }
 932
 933     return 0;
 934 }
 935
 936 #ifdef _TARGET_X86_
 937 /*****************************************************************************
 938  *
 939  *  Record a non-empty stack
 940  */
 941
 942 void emitter::emitMarkStackLvl(unsigned stackLevel)
 943 {
 944     assert(int(stackLevel) >= 0);
 945     assert(emitCurStackLvl == 0);
 946     assert(emitCurIG->igStkLvl == 0);
 947     assert(emitCurIGfreeNext == emitCurIGfreeBase);
 948
 949     assert(stackLevel && stackLevel % sizeof(int) == 0);
 950
 951     emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
 952
 953     if (emitMaxStackDepth < emitCurStackLvl)
 954     {
 955         JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
 956         emitMaxStackDepth = emitCurStackLvl;
 957     }
 958 }
 959 #endif
 960
 961 /*****************************************************************************
 962  *
 963  *  Get hold of the address mode displacement value for an indirect call.
 964  */
 965
 966 inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
 967 {
 968     if (id->idIsLargeCall())
 969     {
 970         return ((instrDescCGCA*)id)->idcDisp;
 971     }
 972     else
 973     {
 974         assert(!id->idIsLargeDsp());
 975         assert(!id->idIsLargeCns());
 976
 977         return id->idAddr()->iiaAddrMode.amDisp;
 978     }
 979 }
 980
 981 /** ***************************************************************************
 982  *
 983  *  The following table is used by the instIsFP()/instUse/DefFlags() helpers.
 984  */
 985
 986 #define INST_DEF_FL 0x20 // does the instruction set flags?
 987 #define INST_USE_FL 0x40 // does the instruction use flags?
 988
 989 // clang-format off
 990 const BYTE          CodeGenInterface::instInfo[] =
 991 {
 992     #define INST0(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
 993     #define INST1(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
 994     #define INST2(id, nm, fp, um, rf, wf, mr, mi             ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
 995     #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm         ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
 996     #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4     ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
 997     #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
 998     #include "instrs.h"
 999     #undef  INST0
1000     #undef  INST1
1001     #undef  INST2
1002     #undef  INST3
1003     #undef  INST4
1004     #undef  INST5
1005 };
1006 // clang-format on
1007
1008 /*****************************************************************************
1009  *
1010  *  Initialize the table used by emitInsModeFormat().
1011  */
1012
1013 // clang-format off
1014 const BYTE          emitter::emitInsModeFmtTab[] =
1015 {
1016     #define INST0(id, nm, fp, um, rf, wf, mr                ) um,
1017     #define INST1(id, nm, fp, um, rf, wf, mr                ) um,
1018     #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) um,
1019     #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) um,
1020     #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) um,
1021     #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um,
1022     #include "instrs.h"
1023     #undef  INST0
1024     #undef  INST1
1025     #undef  INST2
1026     #undef  INST3
1027     #undef  INST4
1028     #undef  INST5
1029 };
1030 // clang-format on
1031
1032 #ifdef DEBUG
1033 unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab);
1034 #endif
1035
1036 /*****************************************************************************
1037  *
1038  *  Combine the given base format with the update mode of the instuction.
1039  */
1040
1041 inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
1042 {
1043     assert(IF_RRD + IUM_RD == IF_RRD);
1044     assert(IF_RRD + IUM_WR == IF_RWR);
1045     assert(IF_RRD + IUM_RW == IF_RRW);
1046
1047     return (insFormat)(base + emitInsUpdateMode(ins));
1048 }
1049
1050 /*****************************************************************************
1051  *
1052  *  A version of scInsModeFormat() that handles X87 floating-point instructions.
1053  */
1054
1055 #if FEATURE_STACK_FP_X87
1056 emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst)
1057 {
1058     if (CodeGen::instIsFP(ins))
1059     {
1060         assert(IF_TRD_SRD + 1 == IF_TWR_SRD);
1061         assert(IF_TRD_SRD + 2 == IF_TRW_SRD);
1062
1063         assert(IF_TRD_MRD + 1 == IF_TWR_MRD);
1064         assert(IF_TRD_MRD + 2 == IF_TRW_MRD);
1065
1066         assert(IF_TRD_ARD + 1 == IF_TWR_ARD);
1067         assert(IF_TRD_ARD + 2 == IF_TRW_ARD);
1068
1069         switch (ins)
1070         {
1071             case INS_fst:
1072             case INS_fstp:
1073             case INS_fistp:
1074             case INS_fistpl:
1075                 return (insFormat)(FPst);
1076
1077             case INS_fld:
1078             case INS_fild:
1079                 return (insFormat)(FPld + 1);
1080
1081             case INS_fcomp:
1082             case INS_fcompp:
1083             case INS_fcomip:
1084                 return (insFormat)(FPld);
1085
1086             default:
1087                 return (insFormat)(FPld + 2);
1088         }
1089     }
1090     else
1091     {
1092         return emitInsModeFormat(ins, base);
1093     }
1094 }
1095 #endif // FEATURE_STACK_FP_X87
1096
1097 // This is a helper we need due to Vs Whidbey #254016 in order to distinguish
1098 // if we can not possibly be updating an integer register. This is not the best
1099 // solution, but the other ones (see bug) are going to be much more complicated.
1100 // The issue here is that on legacy x86, the XMM registers use the same register numbers
1101 // as the general purpose registers, so we need to distinguish them.
1102 // We really only need this for x86 where this issue exists.
1103 bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
1104 {
1105     instruction ins = id->idIns();
1106
1107     // The following SSE2 instructions write to a general purpose integer register.
1108     if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si
1109 #ifndef LEGACY_BACKEND
1110         || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || ins == INS_pmovmskb ||
1111         ins == INS_pextrw || ins == INS_pextrb || ins == INS_pextrd || ins == INS_pextrq || ins == INS_extractps
1112 #endif // !LEGACY_BACKEND
1113         )
1114     {
1115         return false;
1116     }
1117
1118     return true;
1119 }
1120
1121 /*****************************************************************************
1122  *
1123  *  Returns the base encoding of the given CPU instruction.
1124  */
1125
1126 inline size_t insCode(instruction ins)
1127 {
1128     // clang-format off
1129     const static
1130     size_t          insCodes[] =
1131     {
1132         #define INST0(id, nm, fp, um, rf, wf, mr                ) mr,
1133         #define INST1(id, nm, fp, um, rf, wf, mr                ) mr,
1134         #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mr,
1135         #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mr,
1136         #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mr,
1137         #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
1138         #include "instrs.h"
1139         #undef  INST0
1140         #undef  INST1
1141         #undef  INST2
1142         #undef  INST3
1143         #undef  INST4
1144         #undef  INST5
1145     };
1146     // clang-format on
1147
1148     assert((unsigned)ins < _countof(insCodes));
1149     assert((insCodes[ins] != BAD_CODE));
1150
1151     return insCodes[ins];
1152 }
1153
1154 /*****************************************************************************
1155  *
1156  *  Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
1157  */
1158
1159 inline size_t insCodeACC(instruction ins)
1160 {
1161     // clang-format off
1162     const static
1163     size_t          insCodesACC[] =
1164     {
1165         #define INST0(id, nm, fp, um, rf, wf, mr                )
1166         #define INST1(id, nm, fp, um, rf, wf, mr                )
1167         #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
1168         #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
1169         #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) a4,
1170         #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4,
1171         #include "instrs.h"
1172         #undef  INST0
1173         #undef  INST1
1174         #undef  INST2
1175         #undef  INST3
1176         #undef  INST4
1177         #undef  INST5
1178     };
1179     // clang-format on
1180
1181     assert((unsigned)ins < _countof(insCodesACC));
1182     assert((insCodesACC[ins] != BAD_CODE));
1183
1184     return insCodesACC[ins];
1185 }
1186
1187 /*****************************************************************************
1188  *
1189  *  Returns the "register" encoding of the given CPU instruction.
1190  */
1191
1192 inline size_t insCodeRR(instruction ins)
1193 {
1194     // clang-format off
1195     const static
1196     size_t          insCodesRR[] =
1197     {
1198         #define INST0(id, nm, fp, um, rf, wf, mr                )
1199         #define INST1(id, nm, fp, um, rf, wf, mr                )
1200         #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
1201         #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
1202         #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    )
1203         #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr,
1204         #include "instrs.h"
1205         #undef  INST0
1206         #undef  INST1
1207         #undef  INST2
1208         #undef  INST3
1209         #undef  INST4
1210         #undef  INST5
1211     };
1212     // clang-format on
1213
1214     assert((unsigned)ins < _countof(insCodesRR));
1215     assert((insCodesRR[ins] != BAD_CODE));
1216
1217     return insCodesRR[ins];
1218 }
1219
1220 // clang-format off
1221 const static
1222 size_t          insCodesRM[] =
1223 {
1224     #define INST0(id, nm, fp, um, rf, wf, mr                )
1225     #define INST1(id, nm, fp, um, rf, wf, mr                )
1226     #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
1227     #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) rm,
1228     #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) rm,
1229     #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
1230     #include "instrs.h"
1231     #undef  INST0
1232     #undef  INST1
1233     #undef  INST2
1234     #undef  INST3
1235     #undef  INST4
1236     #undef  INST5
1237 };
1238 // clang-format on
1239
1240 // Returns true iff the give CPU instruction has an RM encoding.
1241 inline bool hasCodeRM(instruction ins)
1242 {
1243     assert((unsigned)ins < _countof(insCodesRM));
1244     return ((insCodesRM[ins] != BAD_CODE));
1245 }
1246
1247 /*****************************************************************************
1248  *
1249  *  Returns the "reg, [r/m]" encoding of the given CPU instruction.
1250  */
1251
1252 inline size_t insCodeRM(instruction ins)
1253 {
1254     assert((unsigned)ins < _countof(insCodesRM));
1255     assert((insCodesRM[ins] != BAD_CODE));
1256
1257     return insCodesRM[ins];
1258 }
1259
1260 // clang-format off
1261 const static
1262 size_t          insCodesMI[] =
1263 {
1264     #define INST0(id, nm, fp, um, rf, wf, mr                )
1265     #define INST1(id, nm, fp, um, rf, wf, mr                )
1266     #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mi,
1267     #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mi,
1268     #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mi,
1269     #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
1270     #include "instrs.h"
1271     #undef  INST0
1272     #undef  INST1
1273     #undef  INST2
1274     #undef  INST3
1275     #undef  INST4
1276     #undef  INST5
1277 };
1278 // clang-format on
1279
1280 // Returns true iff the give CPU instruction has an MI encoding.
1281 inline bool hasCodeMI(instruction ins)
1282 {
1283     assert((unsigned)ins < _countof(insCodesMI));
1284     return ((insCodesMI[ins] != BAD_CODE));
1285 }
1286
1287 /*****************************************************************************
1288  *
1289  *  Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
1290  */
1291
1292 inline size_t insCodeMI(instruction ins)
1293 {
1294     assert((unsigned)ins < _countof(insCodesMI));
1295     assert((insCodesMI[ins] != BAD_CODE));
1296
1297     return insCodesMI[ins];
1298 }
1299
1300 // clang-format off
1301 const static
1302 size_t          insCodesMR[] =
1303 {
1304     #define INST0(id, nm, fp, um, rf, wf, mr                )
1305     #define INST1(id, nm, fp, um, rf, wf, mr                ) mr,
1306     #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mr,
1307     #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mr,
1308     #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mr,
1309     #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
1310     #include "instrs.h"
1311     #undef  INST0
1312     #undef  INST1
1313     #undef  INST2
1314     #undef  INST3
1315     #undef  INST4
1316     #undef  INST5
1317 };
1318 // clang-format on
1319
1320 // Returns true iff the give CPU instruction has an MR encoding.
1321 inline bool hasCodeMR(instruction ins)
1322 {
1323     assert((unsigned)ins < _countof(insCodesMR));
1324     return ((insCodesMR[ins] != BAD_CODE));
1325 }
1326
1327 /*****************************************************************************
1328  *
1329  *  Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
1330  */
1331
1332 inline size_t insCodeMR(instruction ins)
1333 {
1334     assert((unsigned)ins < _countof(insCodesMR));
1335     assert((insCodesMR[ins] != BAD_CODE));
1336
1337     return insCodesMR[ins];
1338 }
1339
1340 // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
1341 bool emitter::EncodedBySSE38orSSE3A(instruction ins)
1342 {
1343     const size_t SSE38 = 0x0F660038;
1344     const size_t SSE3A = 0x0F66003A;
1345     const size_t MASK  = 0xFFFF00FF;
1346
1347     size_t insCode = 0;
1348
1349     if (hasCodeRM(ins))
1350     {
1351         insCode = insCodeRM(ins);
1352     }
1353     else if (hasCodeMI(ins))
1354     {
1355         insCode = insCodeMI(ins);
1356     }
1357     else if (hasCodeMR(ins))
1358     {
1359         insCode = insCodeMR(ins);
1360     }
1361
1362     insCode &= MASK;
1363     return insCode == SSE38 || insCode == SSE3A;
1364 }
1365
1366 /*****************************************************************************
1367  *
1368  *  Returns an encoding for the specified register to be used in the bit0-2
1369  *  part of an opcode.
1370  */
1371
1372 inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
1373 {
1374     assert(reg < REG_STK);
1375
1376 #ifndef LEGACY_BACKEND
1377 #ifdef _TARGET_AMD64_
1378     // Either code is not NULL or reg is not an extended reg.
1379     // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1380     // which would require code != NULL.
1381     assert(code != nullptr || !IsExtendedReg(reg));
1382
1383     if (IsExtendedReg(reg))
1384     {
1385         *code = AddRexBPrefix(ins, *code); // REX.B
1386     }
1387     else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1388     {
1389         // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1390         // not the corresponding AH, CH, DH, or BH
1391         *code = AddRexPrefix(ins, *code); // REX
1392     }
1393 #endif // _TARGET_AMD64_
1394
1395     unsigned regBits = RegEncoding(reg);
1396
1397 #else // LEGACY_BACKEND
1398
1399     unsigned regBits = reg;
1400
1401 #endif // LEGACY_BACKEND
1402
1403     assert(regBits < 8);
1404     return regBits;
1405 }
1406
1407 /*****************************************************************************
1408  *
1409  *  Returns an encoding for the specified register to be used in the bit3-5
1410  *  part of an opcode.
1411  */
1412
1413 inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
1414 {
1415     assert(reg < REG_STK);
1416
1417 #ifndef LEGACY_BACKEND
1418 #ifdef _TARGET_AMD64_
1419     // Either code is not NULL or reg is not an extended reg.
1420     // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1421     // which would require code != NULL.
1422     assert(code != nullptr || !IsExtendedReg(reg));
1423
1424     if (IsExtendedReg(reg))
1425     {
1426         *code = AddRexRPrefix(ins, *code); // REX.R
1427     }
1428     else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1429     {
1430         // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1431         // not the corresponding AH, CH, DH, or BH
1432         *code = AddRexPrefix(ins, *code); // REX
1433     }
1434 #endif // _TARGET_AMD64_
1435
1436     unsigned regBits = RegEncoding(reg);
1437
1438 #else // LEGACY_BACKEND
1439
1440     unsigned regBits = reg;
1441
1442 #endif // LEGACY_BACKEND
1443
1444     assert(regBits < 8);
1445     return (regBits << 3);
1446 }
1447
1448 /***********************************************************************************
1449  *
1450  *  Returns modified AVX opcode with the specified register encoded in bits 3-6 of
1451  *  byte 2 of VEX prefix.
1452  */
1453 inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
1454 {
1455 #ifndef LEGACY_BACKEND
1456     assert(reg < REG_STK);
1457     assert(IsAVXInstruction(ins));
1458     assert(hasVexPrefix(code));
1459
1460     // Get 4-bit register encoding
1461     // RegEncoding() gives lower 3 bits
1462     // IsExtendedReg() gives MSB.
1463     code_t regBits = RegEncoding(reg);
1464     if (IsExtendedReg(reg))
1465     {
1466         regBits |= 0x08;
1467     }
1468
1469     // VEX prefix encodes register operand in 1's complement form
1470     // Shift count = 4-bytes of opcode + 0-2 bits
1471     assert(regBits <= 0xF);
1472     regBits <<= 35;
1473     return code ^ regBits;
1474
1475 #else
1476     return code;
1477 #endif
1478 }
1479
1480 /*****************************************************************************
1481  *
1482  *  Returns an encoding for the specified register to be used in the bit3-5
1483  *  part of an SIB byte (unshifted).
1484  *  Used exclusively to generate the REX.X bit and truncate the register.
1485  */
1486
1487 inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
1488 {
1489     assert(reg < REG_STK);
1490
1491 #ifdef _TARGET_AMD64_
1492     // Either code is not NULL or reg is not an extended reg.
1493     // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1494     // which would require code != NULL.
1495     assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
1496
1497     if (IsExtendedReg(reg))
1498     {
1499         *code = AddRexXPrefix(ins, *code); // REX.X
1500     }
1501     unsigned regBits = RegEncoding(reg);
1502 #else  // !_TARGET_AMD64_
1503     unsigned regBits = reg;
1504 #endif // !_TARGET_AMD64_
1505
1506     assert(regBits < 8);
1507     return regBits;
1508 }
1509
1510 /*****************************************************************************
1511  *
1512  *  Returns the "[r/m]" opcode with the mod/RM field set to register.
1513  */
1514
1515 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
1516 {
1517     // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1518     // Otherwise, it will be placed after the 4 byte encoding.
1519     if ((code & 0xFF00) == 0)
1520     {
1521         assert((code & 0xC000) == 0);
1522         code |= 0xC000;
1523     }
1524
1525     return code;
1526 }
1527
1528 /*****************************************************************************
1529  *
1530  *  Returns the given "[r/m]" opcode with the mod/RM field set to register.
1531  */
1532
1533 inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
1534 {
1535     // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1536     // Otherwise, it will be placed after the 4 byte encoding.
1537     if ((code & 0xFF00) == 0)
1538     {
1539         assert((code & 0xC000) == 0);
1540         code |= 0xC000;
1541     }
1542     return code;
1543 }
1544
1545 /*****************************************************************************
1546  *
1547  *  Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
1548  *  the given register.
1549  */
1550
1551 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1552 {
1553     assert((code & 0xC000) == 0);
1554     code |= 0xC000;
1555     unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1556     code |= regcode;
1557     return code;
1558 }
1559
1560 /*****************************************************************************
1561  *
1562  *  Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
1563  *  the given register.
1564  */
1565
1566 inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1567 {
1568     assert((code & 0xC000) == 0);
1569     code |= 0xC000;
1570     unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1571     code |= regcode;
1572     return code;
1573 }
1574
1575 /*****************************************************************************
1576  *
1577  *  Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
1578  *  "reg,reg,imm8" form.
1579  */
1580 inline bool insNeedsRRIb(instruction ins)
1581 {
1582     // If this list gets longer, use a switch or a table.
1583     return ins == INS_imul;
1584 }
1585
1586 /*****************************************************************************
1587  *
1588  *  Returns the "reg,reg,imm8" opcode with both the reg's set to the
1589  *  the given register.
1590  */
1591 inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
1592 {
1593     assert(size == EA_4BYTE); // All we handle for now.
1594     assert(insNeedsRRIb(ins));
1595     // If this list gets longer, use a switch, or a table lookup.
1596     code_t   code    = 0x69c0;
1597     unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1598     // We use the same register as source and destination.  (Could have another version that does both regs...)
1599     code |= regcode;
1600     code |= (regcode << 3);
1601     return code;
1602 }
1603
1604 /*****************************************************************************
1605  *
1606  *  Returns the "+reg" opcode with the the given register set into the low
1607  *  nibble of the opcode
1608  */
1609
1610 inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
1611 {
1612     code_t   code    = insCodeRR(ins);
1613     unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1614     code |= regcode;
1615     return code;
1616 }
1617
1618 /*****************************************************************************
1619  *
1620  *  Return the 'SS' field value for the given index scale factor.
1621  */
1622
1623 inline unsigned emitter::insSSval(unsigned scale)
1624 {
1625     assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1626
1627     const static BYTE scales[] = {
1628         0x00, // 1
1629         0x40, // 2
1630         0xFF, // 3
1631         0x80, // 4
1632         0xFF, // 5
1633         0xFF, // 6
1634         0xFF, // 7
1635         0xC0, // 8
1636     };
1637
1638     return scales[scale - 1];
1639 }
1640
1641 const instruction emitJumpKindInstructions[] = {INS_nop,
1642
1643 #define JMP_SMALL(en, rev, ins) INS_##ins,
1644 #include "emitjmps.h"
1645
1646                                                 INS_call};
1647
1648 const emitJumpKind emitReverseJumpKinds[] = {
1649     EJ_NONE,
1650
1651 #define JMP_SMALL(en, rev, ins) EJ_##rev,
1652 #include "emitjmps.h"
1653 };
1654
1655 /*****************************************************************************
1656  * Look up the instruction for a jump kind
1657  */
1658
1659 /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
1660 {
1661     assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
1662     return emitJumpKindInstructions[jumpKind];
1663 }
1664
1665 /*****************************************************************************
1666  * Reverse the conditional jump
1667  */
1668
1669 /* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
1670 {
1671     assert(jumpKind < EJ_COUNT);
1672     return emitReverseJumpKinds[jumpKind];
1673 }
1674
1675 /*****************************************************************************
1676  * The size for these instructions is less than EA_4BYTE,
1677  * but the target register need not be byte-addressable
1678  */
1679
1680 inline bool emitInstHasNoCode(instruction ins)
1681 {
1682     if (ins == INS_align)
1683     {
1684         return true;
1685     }
1686
1687     return false;
1688 }
1689
1690 /*****************************************************************************
1691  * When encoding instructions that operate on byte registers
1692  * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
1693  * otherwise we will incorrectly encode the instruction
1694  */
1695
1696 bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
1697 {
1698 #if CPU_HAS_BYTE_REGS
1699     if (size != EA_1BYTE) // Not operating on a byte register is fine
1700     {
1701         return true;
1702     }
1703
1704     if ((ins != INS_movsx) && // These three instructions support high register
1705         (ins != INS_movzx)    // encodings for reg1
1706 #ifdef FEATURE_HW_INTRINSICS
1707         && (ins != INS_crc32)
1708 #endif
1709             )
1710     {
1711         // reg1 must be a byte-able register
1712         if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
1713         {
1714             return false;
1715         }
1716     }
1717     // if reg2 is not REG_NA then reg2 must be a byte-able register
1718     if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
1719     {
1720         return false;
1721     }
1722 #endif
1723     // The instruction can be encoded
1724     return true;
1725 }
1726
1727 /*****************************************************************************
1728  *
1729  *  Estimate the size (in bytes of generated code) of the given instruction.
1730  */
1731
1732 inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
1733 {
1734     UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
1735 #ifdef _TARGET_AMD64_
1736     size += emitGetPrefixSize(code);
1737 #endif
1738     return size;
1739 }
1740
1741 inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
1742 {
1743     return emitInsSize(insCodeRM(ins));
1744 }
1745
1746 inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
1747 {
1748     emitAttr size = EA_SIZE(attr);
1749
1750     UNATIVE_OFFSET sz;
1751
1752     // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
1753     // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
1754     // This would probably be better expressed as a different format or something?
1755     code_t code = insCodeRM(ins);
1756
1757     if ((code & 0xFF00) != 0)
1758     {
1759         sz = 5;
1760     }
1761     else
1762     {
1763         sz = emitInsSize(insEncodeRMreg(ins, code));
1764     }
1765
1766     // Most 16-bit operand instructions will need a prefix
1767     if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
1768     {
1769         sz += 1;
1770     }
1771
1772     // VEX prefix
1773     sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
1774
1775     // REX prefix
1776     if (!hasRexPrefix(code))
1777     {
1778         if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
1779             IsExtendedReg(reg2, attr))
1780         {
1781             sz += emitGetRexPrefixSize(ins);
1782         }
1783     }
1784
1785     return sz;
1786 }
1787
1788 /*****************************************************************************/
1789
1790 inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
1791 {
1792     UNATIVE_OFFSET size = emitInsSize(code);
1793     UNATIVE_OFFSET offs;
1794     bool           offsIsUpperBound = true;
1795     bool           EBPbased         = true;
1796
1797     /*  Is this a temporary? */
1798
1799     if (var < 0)
1800     {
1801         /* An address off of ESP takes an extra byte */
1802
1803         if (!emitHasFramePtr)
1804         {
1805             size++;
1806         }
1807
1808 #ifndef LEGACY_BACKEND
1809         // The offset is already assigned. Find the temp.
1810         TempDsc* tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_USED);
1811         if (tmp == nullptr)
1812         {
1813             // It might be in the free lists, if we're working on zero initializing the temps.
1814             tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_FREE);
1815         }
1816         assert(tmp != nullptr);
1817         offs = tmp->tdTempOffs();
1818
1819         // We only care about the magnitude of the offset here, to determine instruction size.
1820         if (emitComp->isFramePointerUsed())
1821         {
1822             if ((int)offs < 0)
1823             {
1824                 offs = -(int)offs;
1825             }
1826         }
1827         else
1828         {
1829             // SP-based offsets must already be positive.
1830             assert((int)offs >= 0);
1831         }
1832 #else  // LEGACY_BACKEND
1833         /* We'll have to estimate the max. possible offset of this temp */
1834
1835         // TODO: Get an estimate of the temp offset instead of assuming
1836         // TODO: that any temp may be at the max. temp offset!!!!!!!!!!
1837
1838         if (emitComp->lvaTempsHaveLargerOffsetThanVars())
1839         {
1840             offs = emitLclSize + emitMaxTmpSize;
1841         }
1842         else
1843         {
1844             offs = emitMaxTmpSize;
1845         }
1846
1847         offsIsUpperBound = false;
1848 #endif // LEGACY_BACKEND
1849     }
1850     else
1851     {
1852
1853         /* Get the frame offset of the (non-temp) variable */
1854
1855         offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
1856
1857         /* An address off of ESP takes an extra byte */
1858
1859         if (!EBPbased)
1860         {
1861             ++size;
1862         }
1863
1864         /* Is this a stack parameter reference? */
1865
1866         if (emitComp->lvaIsParameter(var)
1867 #if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
1868             && !emitComp->lvaIsRegArgument(var)
1869 #endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
1870                 )
1871         {
1872             /* If no EBP frame, arguments are off of ESP, above temps */
1873
1874             if (!EBPbased)
1875             {
1876                 assert((int)offs >= 0);
1877
1878                 offsIsUpperBound = false; // since #temps can increase
1879                 offs += emitMaxTmpSize;
1880             }
1881         }
1882         else
1883         {
1884             /* Locals off of EBP are at negative offsets */
1885
1886             if (EBPbased)
1887             {
1888 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
1889                 // If localloc is not used, then ebp chaining is done and hence
1890                 // offset of locals will be at negative offsets, Otherwise offsets
1891                 // will be positive.  In future, when RBP gets positioned in the
1892                 // middle of the frame so as to optimize instruction encoding size,
1893                 // the below asserts needs to be modified appropriately.
1894                 // However, for Unix platforms, we always do frame pointer chaining,
1895                 // so offsets from the frame pointer will always be negative.
1896                 if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
1897                 {
1898                     noway_assert((int)offs >= 0);
1899                 }
1900                 else
1901 #endif
1902                 {
1903                     // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
1904                     CLANG_FORMAT_COMMENT_ANCHOR;
1905
1906 #ifdef UNIX_AMD64_ABI
1907                     LclVarDsc* varDsc         = emitComp->lvaTable + var;
1908                     bool       isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
1909                     // Register passed args could have a stack offset of 0.
1910                     noway_assert((int)offs < 0 || isRegPassedArg);
1911 #else  // !UNIX_AMD64_ABI
1912                     noway_assert((int)offs < 0);
1913 #endif // !UNIX_AMD64_ABI
1914                 }
1915
1916                 assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
1917
1918                 // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
1919                 if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
1920                     unsigned(var) == emitComp->lvaStubArgumentVar)
1921                 {
1922                     offs -= emitMaxTmpSize;
1923                 }
1924
1925                 if ((int)offs < 0)
1926                 {
1927                     // offset is negative
1928                     return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
1929                 }
1930 #ifdef _TARGET_AMD64_
1931                 // This case arises for localloc frames
1932                 else
1933                 {
1934                     return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
1935                 }
1936 #endif
1937             }
1938
1939             if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
1940             {
1941                 offs += emitMaxTmpSize;
1942             }
1943         }
1944     }
1945
1946     assert((int)offs >= 0);
1947
1948 #if !FEATURE_FIXED_OUT_ARGS
1949
1950     /* Are we addressing off of ESP? */
1951
1952     if (!emitHasFramePtr)
1953     {
1954         /* Adjust the effective offset if necessary */
1955
1956         if (emitCntStackDepth)
1957             offs += emitCurStackLvl;
1958
1959         // we could (and used to) check for the special case [sp] here but the stack offset
1960         // estimator was off, and there is very little harm in overestimating for such a
1961         // rare case.
1962     }
1963
1964 #endif // !FEATURE_FIXED_OUT_ARGS
1965
1966 //  printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
1967 //         emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
1968
1969 #ifdef _TARGET_AMD64_
1970     bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
1971 #else
1972     bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
1973 #endif
1974
1975 #ifdef LEGACY_BACKEND
1976     /* If we are using a small encoding, there is a danger that we might
1977        end up having to use a larger encoding. Record 'offs' so that
1978        we can detect if such a situation occurs */
1979
1980     if (useSmallEncoding && !offsIsUpperBound)
1981     {
1982         if (emitGrowableMaxByteOffs < offs)
1983         {
1984             emitGrowableMaxByteOffs = offs;
1985 #ifdef DEBUG
1986             // Remember which instruction this is
1987             emitMaxByteOffsIdNum = emitInsCount;
1988 #endif
1989         }
1990     }
1991 #endif // LEGACY_BACKEND
1992
1993     // If it is ESP based, and the offset is zero, we will not encode the disp part.
1994     if (!EBPbased && offs == 0)
1995     {
1996         return size;
1997     }
1998     else
1999     {
2000         return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
2001     }
2002 }
2003
2004 inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val)
2005 {
2006     instruction    ins       = id->idIns();
2007     UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
2008     UNATIVE_OFFSET prefix    = 0;
2009     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2010
2011 #ifdef _TARGET_AMD64_
2012     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2013     // all other opcodes take a sign-extended 4-byte immediate
2014     noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
2015 #endif // _TARGET_AMD64_
2016
2017     if (valSize > sizeof(int))
2018     {
2019         valSize = sizeof(int);
2020     }
2021
2022     if (id->idIsCnsReloc())
2023     {
2024         valInByte = false; // relocs can't be placed in a byte
2025         assert(valSize == sizeof(int));
2026     }
2027
2028     if (valInByte)
2029     {
2030         valSize = sizeof(char);
2031     }
2032
2033     // 16-bit operand instructions need a prefix.
2034     // This referes to 66h size prefix override
2035     if (id->idOpSize() == EA_2BYTE)
2036     {
2037         prefix = 1;
2038     }
2039
2040     return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp);
2041 }
2042
2043 /*****************************************************************************/
2044
2045 static bool baseRegisterRequiresSibByte(regNumber base)
2046 {
2047 #ifdef _TARGET_AMD64_
2048     return base == REG_ESP || base == REG_R12;
2049 #else
2050     return base == REG_ESP;
2051 #endif
2052 }
2053
2054 static bool baseRegisterRequiresDisplacement(regNumber base)
2055 {
2056 #ifdef _TARGET_AMD64_
2057     return base == REG_EBP || base == REG_R13;
2058 #else
2059     return base == REG_EBP;
2060 #endif
2061 }
2062
2063 UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
2064 {
2065     emitAttr    attrSize = id->idOpSize();
2066     instruction ins      = id->idIns();
2067     /* The displacement field is in an unusual place for calls */
2068     ssize_t        dsp       = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
2069     bool           dspInByte = ((signed char)dsp == (ssize_t)dsp);
2070     bool           dspIsZero = (dsp == 0);
2071     UNATIVE_OFFSET size;
2072
2073     // Note that the values in reg and rgx are used in this method to decide
2074     // how many bytes will be needed by the address [reg+rgx+cns]
2075     // this includes the prefix bytes when reg or rgx are registers R8-R15
2076     regNumber reg;
2077     regNumber rgx;
2078
2079     // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
2080     // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
2081     // ideally these should really be the only idInsFmts that we see here
2082     //  but we have some outliers to deal with:
2083     //     emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
2084     //     emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
2085
2086     switch (id->idInsFmt())
2087     {
2088         case IF_RWR_LABEL:
2089         case IF_MRW_CNS:
2090         case IF_MRW_RRD:
2091         case IF_MRW_SHF:
2092             reg = REG_NA;
2093             rgx = REG_NA;
2094             break;
2095
2096         default:
2097             reg = id->idAddr()->iiaAddrMode.amBaseReg;
2098             rgx = id->idAddr()->iiaAddrMode.amIndxReg;
2099             break;
2100     }
2101
2102     if (id->idIsDspReloc())
2103     {
2104         dspInByte = false; // relocs can't be placed in a byte
2105         dspIsZero = false; // relocs won't always be zero
2106     }
2107
2108     if (code & 0xFF000000)
2109     {
2110         size = 4;
2111     }
2112     else if (code & 0x00FF0000)
2113     {
2114         // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
2115         assert(ins != INS_bt);
2116
2117         assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE)    // Only for x64
2118                || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
2119                || (ins == INS_movzx) || (ins == INS_movsx)
2120                // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
2121                || isPrefetch(ins));
2122         size = 3;
2123     }
2124     else
2125     {
2126         size = 2;
2127
2128         // Most 16-bit operands will require a size prefix.
2129         // This refers to 66h size prefix override.
2130         CLANG_FORMAT_COMMENT_ANCHOR;
2131
2132 #if FEATURE_STACK_FP_X87
2133         if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
2134 #else  // FEATURE_STACK_FP_X87
2135         if (attrSize == EA_2BYTE)
2136 #endif // FEATURE_STACK_FP_X87
2137         {
2138             size++;
2139         }
2140     }
2141
2142     size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2143
2144     if (hasRexPrefix(code))
2145     {
2146         // REX prefix
2147         size += emitGetRexPrefixSize(ins);
2148     }
2149     else if (TakesRexWPrefix(ins, attrSize))
2150     {
2151         // REX.W prefix
2152         size += emitGetRexPrefixSize(ins);
2153     }
2154     else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
2155              ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize)))
2156     {
2157         // Should have a REX byte
2158         size += emitGetRexPrefixSize(ins);
2159     }
2160
2161     if (rgx == REG_NA)
2162     {
2163         /* The address is of the form "[reg+disp]" */
2164
2165         if (reg == REG_NA)
2166         {
2167             /* The address is of the form "[disp]" */
2168
2169             size += sizeof(INT32);
2170
2171 #ifdef _TARGET_AMD64_
2172             // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
2173             if (!id->idIsDspReloc())
2174             {
2175                 size++;
2176             }
2177 #endif
2178             return size;
2179         }
2180
2181         // If this is just "call reg", we're done.
2182         if (id->idIsCallRegPtr())
2183         {
2184             assert(ins == INS_call);
2185             assert(dsp == 0);
2186             return size;
2187         }
2188
2189         // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
2190         if (baseRegisterRequiresSibByte(reg))
2191         {
2192             size++;
2193         }
2194
2195         // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
2196         // Otherwise, the displacement can be elided if it is zero.
2197         if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2198         {
2199             return size;
2200         }
2201
2202         /* Does the offset fit in a byte? */
2203
2204         if (dspInByte)
2205         {
2206             size += sizeof(char);
2207         }
2208         else
2209         {
2210             size += sizeof(INT32);
2211         }
2212     }
2213     else
2214     {
2215         /* An index register is present */
2216
2217         size++;
2218
2219         /* Is the index value scaled? */
2220
2221         if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
2222         {
2223             /* Is there a base register? */
2224
2225             if (reg != REG_NA)
2226             {
2227                 /* The address is "[reg + {2/4/8} * rgx + icon]" */
2228
2229                 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2230                 {
2231                     /* The address is "[reg + {2/4/8} * rgx]" */
2232                 }
2233                 else
2234                 {
2235                     /* The address is "[reg + {2/4/8} * rgx + disp]" */
2236
2237                     if (dspInByte)
2238                     {
2239                         size += sizeof(char);
2240                     }
2241                     else
2242                     {
2243                         size += sizeof(int);
2244                     }
2245                 }
2246             }
2247             else
2248             {
2249                 /* The address is "[{2/4/8} * rgx + icon]" */
2250
2251                 size += sizeof(INT32);
2252             }
2253         }
2254         else
2255         {
2256             if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
2257             {
2258                 /* Swap reg and rgx, such that reg is not EBP/R13 */
2259                 regNumber tmp                       = reg;
2260                 id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
2261                 id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
2262             }
2263
2264             /* The address is "[reg+rgx+dsp]" */
2265
2266             if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2267             {
2268                 /* This is [reg+rgx]" */
2269             }
2270             else
2271             {
2272                 /* This is [reg+rgx+dsp]" */
2273
2274                 if (dspInByte)
2275                 {
2276                     size += sizeof(char);
2277                 }
2278                 else
2279                 {
2280                     size += sizeof(int);
2281                 }
2282             }
2283         }
2284     }
2285
2286     return size;
2287 }
2288
2289 inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
2290 {
2291     instruction    ins       = id->idIns();
2292     UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
2293     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2294
2295     // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
2296     // but it requires special handling of the immediate value (it is always encoded in a byte).
2297     // Let's not complicate things until this is needed.
2298     assert(ins != INS_bt);
2299
2300 #ifdef _TARGET_AMD64_
2301     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2302     // all other opcodes take a sign-extended 4-byte immediate
2303     noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
2304 #endif // _TARGET_AMD64_
2305
2306     if (valSize > sizeof(INT32))
2307     {
2308         valSize = sizeof(INT32);
2309     }
2310
2311     if (id->idIsCnsReloc())
2312     {
2313         valInByte = false; // relocs can't be placed in a byte
2314         assert(valSize == sizeof(INT32));
2315     }
2316
2317     if (valInByte)
2318     {
2319         valSize = sizeof(char);
2320     }
2321
2322     return valSize + emitInsSizeAM(id, code);
2323 }
2324
2325 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
2326 {
2327     instruction ins = id->idIns();
2328
2329     // fgMorph changes any statics that won't fit into 32-bit addresses
2330     // into constants with an indir, rather than GT_CLS_VAR
2331     // so we should only hit this path for statics that are RIP-relative
2332     UNATIVE_OFFSET size = sizeof(INT32);
2333
2334     // Most 16-bit operand instructions will need a prefix.
2335     // This refers to 66h size prefix override.
2336
2337     if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
2338     {
2339         size++;
2340     }
2341
2342     return size + emitInsSize(code);
2343 }
2344
2345 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
2346 {
2347     instruction    ins       = id->idIns();
2348     UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
2349     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2350
2351 #ifndef _TARGET_AMD64_
2352     // occasionally longs get here on x86
2353     if (valSize > sizeof(INT32))
2354         valSize = sizeof(INT32);
2355 #endif // !_TARGET_AMD64_
2356
2357     if (id->idIsCnsReloc())
2358     {
2359         valInByte = false; // relocs can't be placed in a byte
2360         assert(valSize == sizeof(INT32));
2361     }
2362
2363     if (valInByte)
2364     {
2365         valSize = sizeof(char);
2366     }
2367
2368     return valSize + emitInsSizeCV(id, code);
2369 }
2370
2371 /*****************************************************************************
2372  *
2373  *  Allocate instruction descriptors for instructions with address modes.
2374  */
2375
2376 inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
2377 {
2378     if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2379     {
2380         instrDescAmd* id = emitAllocInstrAmd(size);
2381
2382         id->idSetIsLargeDsp();
2383 #ifdef DEBUG
2384         id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2385 #endif
2386         id->idaAmdVal = dsp;
2387
2388         return id;
2389     }
2390     else
2391     {
2392         instrDesc* id = emitAllocInstr(size);
2393
2394         id->idAddr()->iiaAddrMode.amDisp = dsp;
2395         assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2396
2397         return id;
2398     }
2399 }
2400
2401 /*****************************************************************************
2402  *
2403  *  Set the displacement field in an instruction. Only handles instrDescAmd type.
2404  */
2405
2406 inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
2407 {
2408     if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2409     {
2410         id->idSetIsLargeDsp();
2411 #ifdef DEBUG
2412         id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2413 #endif
2414         id->idaAmdVal = dsp;
2415     }
2416     else
2417     {
2418         id->idSetIsSmallDsp();
2419         id->idAddr()->iiaAddrMode.amDisp = dsp;
2420         assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2421     }
2422 }
2423
2424 /*****************************************************************************
2425  *
2426  *  Allocate an instruction descriptor for an instruction that uses both
2427  *  an address mode displacement and a constant.
2428  */
2429
2430 emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
2431 {
2432     if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
2433     {
2434         if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2435         {
2436             instrDesc* id = emitAllocInstr(size);
2437
2438             id->idSmallCns(cns);
2439
2440             id->idAddr()->iiaAddrMode.amDisp = dsp;
2441             assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2442
2443             return id;
2444         }
2445         else
2446         {
2447             instrDescCns* id = emitAllocInstrCns(size);
2448
2449             id->idSetIsLargeCns();
2450             id->idcCnsVal = cns;
2451
2452             id->idAddr()->iiaAddrMode.amDisp = dsp;
2453             assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2454
2455             return id;
2456         }
2457     }
2458     else
2459     {
2460         if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2461         {
2462             instrDescAmd* id = emitAllocInstrAmd(size);
2463
2464             id->idSetIsLargeDsp();
2465 #ifdef DEBUG
2466             id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2467 #endif
2468             id->idaAmdVal = dsp;
2469
2470             id->idSmallCns(cns);
2471
2472             return id;
2473         }
2474         else
2475         {
2476             instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
2477
2478             id->idSetIsLargeCns();
2479             id->idacCnsVal = cns;
2480
2481             id->idSetIsLargeDsp();
2482 #ifdef DEBUG
2483             id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2484 #endif
2485             id->idacAmdVal = dsp;
2486
2487             return id;
2488         }
2489     }
2490 }
2491
2492 /*****************************************************************************
2493  *
2494  *  The next instruction will be a loop head entry point
2495  *  So insert a dummy instruction here to ensure that
2496  *  the x86 I-cache alignment rule is followed.
2497  */
2498
2499 void emitter::emitLoopAlign()
2500 {
2501     /* Insert a pseudo-instruction to ensure that we align
2502        the next instruction properly */
2503
2504     instrDesc* id = emitNewInstrTiny(EA_1BYTE);
2505     id->idIns(INS_align);
2506     id->idCodeSize(15); // We may need to skip up to 15 bytes of code
2507     emitCurIGsize += 15;
2508 }
2509
2510 /*****************************************************************************
2511  *
2512  *  Add a NOP instruction of the given size.
2513  */
2514
2515 void emitter::emitIns_Nop(unsigned size)
2516 {
2517     assert(size <= 15);
2518
2519     instrDesc* id = emitNewInstr();
2520     id->idIns(INS_nop);
2521     id->idInsFmt(IF_NONE);
2522     id->idCodeSize(size);
2523
2524     dispIns(id);
2525     emitCurIGsize += size;
2526 }
2527
2528 /*****************************************************************************
2529  *
2530  *  Add an instruction with no operands.
2531  */
2532 #ifdef DEBUG
2533 static bool isX87InsWithNoOperands(instruction ins)
2534 {
2535 #if FEATURE_STACK_FP_X87
2536     return (ins == INS_f2xm1 || ins == INS_fchs || ins == INS_fld1 || ins == INS_fld1 || ins == INS_fldl2e ||
2537             ins == INS_fldz || ins == INS_fprem || ins == INS_frndint || ins == INS_fscale);
2538 #else  // !FEATURE_STACK_FP_X87
2539     return false;
2540 #endif // !FEATURE_STACK_FP_X87
2541 }
2542 #endif // DEBUG
2543
2544 void emitter::emitIns(instruction ins)
2545 {
2546     UNATIVE_OFFSET sz;
2547     instrDesc*     id   = emitNewInstr();
2548     code_t         code = insCodeMR(ins);
2549
2550 #ifdef DEBUG
2551 #if FEATURE_STACK_FP_X87
2552     if (ins != INS_fabs && ins != INS_fsqrt && ins != INS_fsin && ins != INS_fcos)
2553 #endif // FEATURE_STACK_FP_X87
2554
2555     {
2556         // We cannot have #ifdef inside macro expansion.
2557         bool assertCond = (ins == INS_cdq || isX87InsWithNoOperands(ins) || ins == INS_int3 || ins == INS_lock ||
2558                            ins == INS_leave || ins == INS_movsb || ins == INS_movsd || ins == INS_movsp ||
2559                            ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd || ins == INS_r_movsp ||
2560                            ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
2561                            ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
2562 #ifndef LEGACY_BACKEND
2563                            // These instructions take zero operands
2564                            || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence
2565 #endif
2566                            );
2567
2568         assert(assertCond);
2569     }
2570 #endif // DEBUG
2571
2572     assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
2573
2574     if (code & 0xFF000000)
2575     {
2576         sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
2577     }
2578     else if (code & 0x00FF0000)
2579     {
2580         sz = 3;
2581     }
2582     else if (code & 0x0000FF00)
2583     {
2584         sz = 2;
2585     }
2586     else
2587     {
2588         sz = 1;
2589     }
2590
2591 #ifndef LEGACY_BACKEND
2592     // vzeroupper includes its 2-byte VEX prefix in its MR code.
2593     assert((ins != INS_vzeroupper) || (sz == 3));
2594 #endif
2595
2596     insFormat fmt = IF_NONE;
2597
2598 #if FEATURE_STACK_FP_X87
2599     if (CodeGen::instIsFP(ins))
2600     {
2601         fmt = emitInsModeFormat(ins, IF_TRD);
2602     }
2603 #endif // FEATURE_STACK_FP_X87
2604
2605     id->idIns(ins);
2606     id->idInsFmt(fmt);
2607     id->idCodeSize(sz);
2608
2609     dispIns(id);
2610     emitCurIGsize += sz;
2611 }
2612
2613 #if !defined(LEGACY_BACKEND)
2614 // Add an instruction with no operands, but whose encoding depends on the size
2615 // (Only CDQ/CQO currently)
2616 void emitter::emitIns(instruction ins, emitAttr attr)
2617 {
2618     UNATIVE_OFFSET sz;
2619     instrDesc*     id   = emitNewInstr(attr);
2620     code_t         code = insCodeMR(ins);
2621     assert(ins == INS_cdq);
2622     assert((code & 0xFFFFFF00) == 0);
2623     sz = 1;
2624
2625     insFormat fmt = IF_NONE;
2626
2627     sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
2628     if (TakesRexWPrefix(ins, attr))
2629     {
2630         sz += emitGetRexPrefixSize(ins);
2631     }
2632
2633     id->idIns(ins);
2634     id->idInsFmt(fmt);
2635     id->idCodeSize(sz);
2636
2637     dispIns(id);
2638     emitCurIGsize += sz;
2639 }
2640
2641 //------------------------------------------------------------------------
2642 // emitMapFmtForIns: map the instruction format based on the instruction.
2643 // Shift-by-a-constant instructions have a special format.
2644 //
2645 // Arguments:
2646 //    fmt - the instruction format to map
2647 //    ins - the instruction
2648 //
2649 // Returns:
2650 //    The mapped instruction format.
2651 //
2652 emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
2653 {
2654     switch (ins)
2655     {
2656         case INS_rol_N:
2657         case INS_ror_N:
2658         case INS_rcl_N:
2659         case INS_rcr_N:
2660         case INS_shl_N:
2661         case INS_shr_N:
2662         case INS_sar_N:
2663         {
2664             switch (fmt)
2665             {
2666                 case IF_RRW_CNS:
2667                     return IF_RRW_SHF;
2668                 case IF_MRW_CNS:
2669                     return IF_MRW_SHF;
2670                 case IF_SRW_CNS:
2671                     return IF_SRW_SHF;
2672                 case IF_ARW_CNS:
2673                     return IF_ARW_SHF;
2674                 default:
2675                     unreached();
2676             }
2677         }
2678
2679         default:
2680             return fmt;
2681     }
2682 }
2683
2684 //------------------------------------------------------------------------
2685 // emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
2686 //
2687 // Arguments:
2688 //    fmt - the instruction format to map
2689 //
2690 // Returns:
2691 //    The mapped instruction format.
2692 //
2693 emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
2694 {
2695     switch (fmt)
2696     {
2697         case IF_ARD:
2698             return IF_MRD;
2699         case IF_AWR:
2700             return IF_MWR;
2701         case IF_ARW:
2702             return IF_MRW;
2703
2704         case IF_RRD_ARD:
2705             return IF_RRD_MRD;
2706         case IF_RWR_ARD:
2707             return IF_RWR_MRD;
2708         case IF_RWR_ARD_CNS:
2709             return IF_RWR_MRD_CNS;
2710         case IF_RRW_ARD:
2711             return IF_RRW_MRD;
2712         case IF_RRW_ARD_CNS:
2713             return IF_RRW_MRD_CNS;
2714         case IF_RWR_RRD_ARD:
2715             return IF_RWR_RRD_MRD;
2716         case IF_RWR_RRD_ARD_CNS:
2717             return IF_RWR_RRD_MRD_CNS;
2718
2719         case IF_ARD_RRD:
2720             return IF_MRD_RRD;
2721         case IF_AWR_RRD:
2722             return IF_MWR_RRD;
2723         case IF_ARW_RRD:
2724             return IF_MRW_RRD;
2725
2726         case IF_ARD_CNS:
2727             return IF_MRD_CNS;
2728         case IF_AWR_CNS:
2729             return IF_MWR_CNS;
2730         case IF_ARW_CNS:
2731             return IF_MRW_CNS;
2732
2733         case IF_AWR_RRD_CNS:
2734             return IF_MWR_RRD_CNS;
2735
2736         case IF_ARW_SHF:
2737             return IF_MRW_SHF;
2738
2739         default:
2740             unreached();
2741     }
2742 }
2743
2744 //------------------------------------------------------------------------
2745 // emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
2746 //
2747 // Arguments:
2748 //    indir - the memory operand.
2749 //    id - the instrDesc to fill in.
2750 //    fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
2751 //          GT_CLS_VAR_ADDR), this function will map it to the correct format.
2752 //    ins - the instruction we are generating. This might affect the instruction format we choose.
2753 //
2754 // Assumptions:
2755 //    The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
2756 //
2757 // Post-conditions:
2758 //    For base address of int constant:
2759 //        -- the caller must have added the int constant base to the instrDesc when creating it via
2760 //           emitNewInstrAmdCns().
2761 //    For simple address modes (base + scale * index + offset):
2762 //        -- the base register, index register, and scale factor are set.
2763 //        -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
2764 //           emitNewInstrAmdCns().
2765 //
2766 //    The instruction format is set.
2767 //
2768 //    idSetIsDspReloc() is called if necessary.
2769 //
2770 void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
2771 {
2772     assert(fmt != IF_NONE);
2773
2774     GenTree* memBase = indir->Base();
2775
2776     if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
2777     {
2778         CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
2779
2780         // Static always need relocs
2781         if (!jitStaticFldIsGlobAddr(fldHnd))
2782         {
2783             // Contract:
2784             // fgMorphField() changes any statics that won't fit into 32-bit addresses into
2785             // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
2786             // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
2787             //
2788             // Data section constants: these get allocated close to code block of the method and
2789             // always addressable IP relative.  These too should be marked as relocatable.
2790
2791             id->idSetIsDspReloc();
2792         }
2793
2794         id->idAddr()->iiaFieldHnd = fldHnd;
2795         id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
2796     }
2797     else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
2798     {
2799         // Absolute addresses marked as contained should fit within the base of addr mode.
2800         assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
2801
2802         // Either not generating relocatable code, or addr must be an icon handle, or the
2803         // constant is zero (which we won't generate a relocation for).
2804         assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0));
2805
2806         if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
2807         {
2808             id->idSetIsDspReloc();
2809         }
2810
2811         id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2812         id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2813         id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1; // for completeness
2814
2815         id->idInsFmt(emitMapFmtForIns(fmt, ins));
2816
2817         // Absolute address must have already been set in the instrDesc constructor.
2818         assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
2819     }
2820     else
2821     {
2822         if (memBase != nullptr)
2823         {
2824             id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
2825         }
2826         else
2827         {
2828             id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2829         }
2830
2831         if (indir->HasIndex())
2832         {
2833             id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
2834         }
2835         else
2836         {
2837             id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2838         }
2839         id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
2840
2841         id->idInsFmt(emitMapFmtForIns(fmt, ins));
2842
2843         // disp must have already been set in the instrDesc constructor.
2844         assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly
2845     }
2846 }
2847
2848 // Takes care of storing all incoming register parameters
2849 // into its corresponding shadow space (defined by the x64 ABI)
2850 void emitter::spillIntArgRegsToShadowSlots()
2851 {
2852     unsigned       argNum;
2853     instrDesc*     id;
2854     UNATIVE_OFFSET sz;
2855
2856     assert(emitComp->compGeneratingProlog);
2857
2858     for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
2859     {
2860         regNumber argReg = intArgRegs[argNum];
2861
2862         // The offsets for the shadow space start at RSP + 8
2863         // (right before the caller return address)
2864         int offset = (argNum + 1) * EA_PTRSIZE;
2865
2866         id = emitNewInstrAmd(EA_PTRSIZE, offset);
2867         id->idIns(INS_mov);
2868         id->idInsFmt(IF_AWR_RRD);
2869         id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
2870         id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2871         id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);
2872
2873         // The offset has already been set in the intrDsc ctor,
2874         // make sure we got it right.
2875         assert(emitGetInsAmdAny(id) == ssize_t(offset));
2876
2877         id->idReg1(argReg);
2878         sz = emitInsSizeAM(id, insCodeMR(INS_mov));
2879         id->idCodeSize(sz);
2880         emitCurIGsize += sz;
2881     }
2882 }
2883
2884 //------------------------------------------------------------------------
2885 // emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss")
2886 // instruction for a GT_IND node.
2887 //
2888 // Arguments:
2889 //    ins - the instruction to emit
2890 //    attr - the instruction operand size
2891 //    dstReg - the destination register
2892 //    mem - the GT_IND node
2893 //
2894 void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem)
2895 {
2896     assert(mem->OperIs(GT_IND));
2897
2898     GenTree* addr = mem->Addr();
2899
2900     if (addr->OperGet() == GT_CLS_VAR_ADDR)
2901     {
2902         emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, 0);
2903         return;
2904     }
2905
2906     if (addr->OperGet() == GT_LCL_VAR_ADDR)
2907     {
2908         GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2909         emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0);
2910         codeGen->genUpdateLife(varNode);
2911         return;
2912     }
2913
2914     assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
2915     ssize_t    offset = mem->Offset();
2916     instrDesc* id     = emitNewInstrAmd(attr, offset);
2917     id->idIns(ins);
2918     id->idReg1(dstReg);
2919     emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
2920     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2921     id->idCodeSize(sz);
2922     dispIns(id);
2923     emitCurIGsize += sz;
2924 }
2925
2926 //------------------------------------------------------------------------
2927 // emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2928 // instruction for a GT_STOREIND node.
2929 //
2930 // Arguments:
2931 //    ins - the instruction to emit
2932 //    attr - the instruction operand size
2933 //    mem - the GT_STOREIND node
2934 //
2935 void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
2936 {
2937     assert(mem->OperIs(GT_STOREIND));
2938
2939     GenTree* addr = mem->Addr();
2940     GenTree* data = mem->Data();
2941
2942     if (addr->OperGet() == GT_CLS_VAR_ADDR)
2943     {
2944         if (data->isContainedIntOrIImmed())
2945         {
2946             emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
2947         }
2948         else
2949         {
2950             assert(!data->isContained());
2951             emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
2952         }
2953         return;
2954     }
2955
2956     if (addr->OperGet() == GT_LCL_VAR_ADDR)
2957     {
2958         GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2959         if (data->isContainedIntOrIImmed())
2960         {
2961             emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2962         }
2963         else
2964         {
2965             assert(!data->isContained());
2966             emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2967         }
2968         codeGen->genUpdateLife(varNode);
2969         return;
2970     }
2971
2972     ssize_t        offset = mem->Offset();
2973     UNATIVE_OFFSET sz;
2974     instrDesc*     id;
2975
2976     if (data->isContainedIntOrIImmed())
2977     {
2978         int icon = (int)data->AsIntConCommon()->IconValue();
2979         id       = emitNewInstrAmdCns(attr, offset, icon);
2980         id->idIns(ins);
2981         emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
2982         sz = emitInsSizeAM(id, insCodeMI(ins), icon);
2983         id->idCodeSize(sz);
2984     }
2985     else
2986     {
2987         assert(!data->isContained());
2988         id = emitNewInstrAmd(attr, offset);
2989         id->idIns(ins);
2990         emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
2991         id->idReg1(data->gtRegNum);
2992         sz = emitInsSizeAM(id, insCodeMR(ins));
2993         id->idCodeSize(sz);
2994     }
2995
2996     dispIns(id);
2997     emitCurIGsize += sz;
2998 }
2999
3000 //------------------------------------------------------------------------
3001 // emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
3002 // instruction for a GT_STORE_LCL_VAR node.
3003 //
3004 // Arguments:
3005 //    ins - the instruction to emit
3006 //    attr - the instruction operand size
3007 //    varNode - the GT_STORE_LCL_VAR node
3008 //
3009 void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode)
3010 {
3011     assert(varNode->OperIs(GT_STORE_LCL_VAR));
3012     assert(varNode->gtRegNum == REG_NA); // stack store
3013
3014     GenTree* data = varNode->gtGetOp1();
3015     codeGen->inst_set_SV_var(varNode);
3016
3017     if (data->isContainedIntOrIImmed())
3018     {
3019         emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
3020     }
3021     else
3022     {
3023         assert(!data->isContained());
3024         emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
3025     }
3026     codeGen->genUpdateLife(varNode);
3027 }
3028
3029 //------------------------------------------------------------------------
3030 // emitInsBinary: Emits an instruction for a node which takes two operands
3031 //
3032 // Arguments:
3033 //    ins - the instruction to emit
3034 //    attr - the instruction operand size
3035 //    dst - the destination and first source operand
3036 //    src - the second source operand
3037 //
3038 // Assumptions:
3039 //  i) caller of this routine needs to call genConsumeReg()
3040 // ii) caller of this routine needs to call genProduceReg()
3041 regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
3042 {
3043     // We can only have one memory operand and only src can be a constant operand
3044     // However, the handling for a given operand type (mem, cns, or other) is fairly
3045     // consistent regardless of whether they are src or dst. As such, we will find
3046     // the type of each operand and only check them against src/dst where relevant.
3047
3048     GenTree* memOp   = nullptr;
3049     GenTree* cnsOp   = nullptr;
3050     GenTree* otherOp = nullptr;
3051
3052     if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp())
3053     {
3054         // dst can only be a modrm
3055         assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) ||
3056                instrIs3opImul(ins)); // dst on 3opImul isn't really the dst
3057         assert(!src->isUsedFromMemory());
3058
3059         memOp = dst;
3060
3061         if (src->isContained())
3062         {
3063             assert(src->IsCnsIntOrI());
3064             cnsOp = src;
3065         }
3066         else
3067         {
3068             otherOp = src;
3069         }
3070     }
3071     else if (src->isContained() || src->isUsedFromSpillTemp())
3072     {
3073         assert(!dst->isUsedFromMemory());
3074         otherOp = dst;
3075
3076         if ((src->IsCnsIntOrI() || src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp())
3077         {
3078             assert(!src->isUsedFromMemory() || src->IsCnsFltOrDbl());
3079             cnsOp = src;
3080         }
3081         else
3082         {
3083             assert(src->isUsedFromMemory());
3084             memOp = src;
3085         }
3086     }
3087
3088     // At this point, we either have a memory operand or we don't.
3089     //
3090     // If we don't then the logic is very simple and  we will either be emitting a
3091     // `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise.
3092     //
3093     // If we do have a memory operand, the logic is a bit more complicated as we need
3094     // to do different things depending on the type of memory operand. These types include:
3095     //  * Spill temp
3096     //  * Indirect access
3097     //    * Local variable
3098     //    * Class variable
3099     //    * Addressing mode [base + index * scale + offset]
3100     //  * Local field
3101     //  * Local variable
3102     //
3103     // Most of these types (except Indirect: Class variable and Indirect: Addressing mode)
3104     // give us a a local variable number and an offset and access memory on the stack
3105     //
3106     // Indirect: Class variable is used for access static class variables and gives us a handle
3107     // to the memory location we read from
3108     //
3109     // Indirect: Addressing mode is used for the remaining memory accesses and will give us
3110     // a base address, an index, a scale, and an offset. These are combined to let us easily
3111     // access the given memory location.
3112     //
3113     // In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]`
3114     // or `[mem], reg`) by comparing memOp to src to determine which `emitIns_*` method needs
3115     // to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable)
3116     // where only src can be the immediate.
3117
3118     if (memOp != nullptr)
3119     {
3120         TempDsc* tmpDsc = nullptr;
3121         unsigned varNum = BAD_VAR_NUM;
3122         unsigned offset = (unsigned)-1;
3123
3124         if (memOp->isUsedFromSpillTemp())
3125         {
3126             assert(memOp->IsRegOptional());
3127
3128             tmpDsc = codeGen->getSpillTempDsc(memOp);
3129             varNum = tmpDsc->tdTempNum();
3130             offset = 0;
3131
3132             emitComp->tmpRlsTemp(tmpDsc);
3133         }
3134         else if (memOp->isIndir())
3135         {
3136             GenTreeIndir* memIndir = memOp->AsIndir();
3137             GenTree*      memBase  = memIndir->gtOp1;
3138
3139             switch (memBase->OperGet())
3140             {
3141                 case GT_LCL_VAR_ADDR:
3142                 {
3143                     varNum = memBase->AsLclVarCommon()->GetLclNum();
3144                     offset = 0;
3145
3146                     // Ensure that all the GenTreeIndir values are set to their defaults.
3147                     assert(!memIndir->HasIndex());
3148                     assert(memIndir->Scale() == 1);
3149                     assert(memIndir->Offset() == 0);
3150
3151                     break;
3152                 }
3153
3154                 case GT_CLS_VAR_ADDR:
3155                 {
3156                     if (memOp == src)
3157                     {
3158                         assert(otherOp == dst);
3159                         assert(cnsOp == nullptr);
3160
3161                         if (instrHasImplicitRegPairDest(ins))
3162                         {
3163                             // src is a class static variable
3164                             // dst is implicit - RDX:RAX
3165                             emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
3166                         }
3167                         else
3168                         {
3169                             // src is a class static variable
3170                             // dst is a register
3171                             emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
3172                         }
3173                     }
3174                     else
3175                     {
3176                         assert(memOp == dst);
3177
3178                         if (cnsOp != nullptr)
3179                         {
3180                             assert(cnsOp == src);
3181                             assert(otherOp == nullptr);
3182                             assert(src->IsCnsIntOrI());
3183
3184                             // src is an contained immediate
3185                             // dst is a class static variable
3186                             emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0,
3187                                         (int)src->gtIntConCommon.IconValue());
3188                         }
3189                         else
3190                         {
3191                             assert(otherOp == src);
3192
3193                             // src is a register
3194                             // dst is a class static variable
3195                             emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
3196                         }
3197                     }
3198
3199                     return dst->gtRegNum;
3200                 }
3201
3202                 default: // Addressing mode [base + index * scale + offset]
3203                 {
3204                     instrDesc* id = nullptr;
3205
3206                     if (cnsOp != nullptr)
3207                     {
3208                         assert(memOp == dst);
3209                         assert(cnsOp == src);
3210                         assert(otherOp == nullptr);
3211                         assert(src->IsCnsIntOrI());
3212
3213                         id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue());
3214                     }
3215                     else
3216                     {
3217                         ssize_t offset = memIndir->Offset();
3218                         id             = emitNewInstrAmd(attr, offset);
3219                         id->idIns(ins);
3220
3221                         GenTree* regTree = (memOp == src) ? dst : src;
3222
3223                         // there must be one non-contained op
3224                         assert(!regTree->isContained());
3225                         id->idReg1(regTree->gtRegNum);
3226                     }
3227                     assert(id != nullptr);
3228
3229                     id->idIns(ins); // Set the instruction.
3230
3231                     // Determine the instruction format
3232                     insFormat fmt = IF_NONE;
3233
3234                     if (memOp == src)
3235                     {
3236                         assert(cnsOp == nullptr);
3237                         assert(otherOp == dst);
3238
3239                         if (instrHasImplicitRegPairDest(ins))
3240                         {
3241                             fmt = emitInsModeFormat(ins, IF_ARD);
3242                         }
3243                         else
3244                         {
3245                             fmt = emitInsModeFormat(ins, IF_RRD_ARD);
3246                         }
3247                     }
3248                     else
3249                     {
3250                         assert(memOp == dst);
3251
3252                         if (cnsOp != nullptr)
3253                         {
3254                             assert(cnsOp == src);
3255                             assert(otherOp == nullptr);
3256                             assert(src->IsCnsIntOrI());
3257
3258                             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
3259                         }
3260                         else
3261                         {
3262                             assert(otherOp == src);
3263                             fmt = emitInsModeFormat(ins, IF_ARD_RRD);
3264                         }
3265                     }
3266                     assert(fmt != IF_NONE);
3267                     emitHandleMemOp(memIndir, id, fmt, ins);
3268
3269                     // Determine the instruction size
3270                     UNATIVE_OFFSET sz = 0;
3271
3272                     if (memOp == src)
3273                     {
3274                         assert(otherOp == dst);
3275                         assert(cnsOp == nullptr);
3276
3277                         if (instrHasImplicitRegPairDest(ins))
3278                         {
3279                             sz = emitInsSizeAM(id, insCode(ins));
3280                         }
3281                         else
3282                         {
3283                             sz = emitInsSizeAM(id, insCodeRM(ins));
3284                         }
3285                     }
3286                     else
3287                     {
3288                         assert(memOp == dst);
3289
3290                         if (cnsOp != nullptr)
3291                         {
3292                             assert(memOp == dst);
3293                             assert(cnsOp == src);
3294                             assert(otherOp == nullptr);
3295
3296                             sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue());
3297                         }
3298                         else
3299                         {
3300                             assert(otherOp == src);
3301                             sz = emitInsSizeAM(id, insCodeMR(ins));
3302                         }
3303                     }
3304                     assert(sz != 0);
3305
3306                     id->idCodeSize(sz);
3307
3308                     dispIns(id);
3309                     emitCurIGsize += sz;
3310
3311                     return (memOp == src) ? dst->gtRegNum : REG_NA;
3312                 }
3313             }
3314         }
3315         else
3316         {
3317             switch (memOp->OperGet())
3318             {
3319                 case GT_LCL_FLD:
3320                 case GT_STORE_LCL_FLD:
3321                 {
3322                     GenTreeLclFld* lclField = memOp->AsLclFld();
3323                     varNum                  = lclField->GetLclNum();
3324                     offset                  = lclField->gtLclFld.gtLclOffs;
3325                     break;
3326                 }
3327
3328                 case GT_LCL_VAR:
3329                 {
3330                     assert(memOp->IsRegOptional() || !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate());
3331                     varNum = memOp->AsLclVar()->GetLclNum();
3332                     offset = 0;
3333                     break;
3334                 }
3335
3336                 default:
3337                     unreached();
3338                     break;
3339             }
3340         }
3341
3342         // Ensure we got a good varNum and offset.
3343         // We also need to check for `tmpDsc != nullptr` since spill temp numbers
3344         // are negative and start with -1, which also happens to be BAD_VAR_NUM.
3345         assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
3346         assert(offset != (unsigned)-1);
3347
3348         if (memOp == src)
3349         {
3350             assert(otherOp == dst);
3351             assert(cnsOp == nullptr);
3352
3353             if (instrHasImplicitRegPairDest(ins))
3354             {
3355                 // src is a stack based local variable
3356                 // dst is implicit - RDX:RAX
3357                 emitIns_S(ins, attr, varNum, offset);
3358             }
3359             else
3360             {
3361                 // src is a stack based local variable
3362                 // dst is a register
3363                 emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
3364             }
3365         }
3366         else
3367         {
3368             assert(memOp == dst);
3369             assert((dst->gtRegNum == REG_NA) || dst->IsRegOptional());
3370
3371             if (cnsOp != nullptr)
3372             {
3373                 assert(cnsOp == src);
3374                 assert(otherOp == nullptr);
3375                 assert(src->IsCnsIntOrI());
3376
3377                 // src is an contained immediate
3378                 // dst is a stack based local variable
3379                 emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
3380             }
3381             else
3382             {
3383                 assert(otherOp == src);
3384                 assert(!src->isContained());
3385
3386                 // src is a register
3387                 // dst is a stack based local variable
3388                 emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
3389             }
3390         }
3391     }
3392     else if (cnsOp != nullptr) // reg, immed
3393     {
3394         assert(cnsOp == src);
3395         assert(otherOp == dst);
3396
3397         if (src->IsCnsIntOrI())
3398         {
3399             assert(!dst->isContained());
3400             GenTreeIntConCommon* intCns = src->AsIntConCommon();
3401             emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue());
3402         }
3403         else
3404         {
3405             assert(src->IsCnsFltOrDbl());
3406             GenTreeDblCon* dblCns = src->AsDblCon();
3407
3408             CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns));
3409             emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
3410         }
3411     }
3412     else // reg, reg
3413     {
3414         assert(otherOp == nullptr);
3415         assert(!src->isContained() && !dst->isContained());
3416
3417         if (instrHasImplicitRegPairDest(ins))
3418         {
3419             emitIns_R(ins, attr, src->gtRegNum);
3420         }
3421         else
3422         {
3423             emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
3424         }
3425     }
3426
3427     return dst->gtRegNum;
3428 }
3429
3430 //------------------------------------------------------------------------
3431 // emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
3432 //
3433 // Responsible for emitting a single instruction that will perform an operation of the form:
3434 //      *addr = *addr <BinOp> src
3435 // For example:
3436 //      ADD [RAX], RCX
3437 //
3438 // Arguments:
3439 //    ins - instruction to generate
3440 //    attr - emitter attribute for instruction
3441 //    storeInd - indir for RMW addressing mode
3442 //    src - source operand of instruction
3443 //
3444 // Assumptions:
3445 //    Lowering has taken care of recognizing the StoreInd pattern of:
3446 //          StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
3447 //    The address to store is already sitting in a register.
3448 //
3449 // Notes:
3450 //    This is a no-produce operation, meaning that no register output will
3451 //    be produced for future use in the code stream.
3452 //
3453 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
3454 {
3455     GenTree* addr = storeInd->Addr();
3456     addr          = addr->gtSkipReloadOrCopy();
3457     assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
3458            addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
3459
3460     instrDesc*     id = nullptr;
3461     UNATIVE_OFFSET sz;
3462
3463     ssize_t offset = 0;
3464     if (addr->OperGet() != GT_CLS_VAR_ADDR)
3465     {
3466         offset = storeInd->Offset();
3467     }
3468
3469     if (src->isContainedIntOrIImmed())
3470     {
3471         GenTreeIntConCommon* intConst = src->AsIntConCommon();
3472         id                            = emitNewInstrAmdCns(attr, offset, (int)intConst->IconValue());
3473         emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
3474         id->idIns(ins);
3475         sz = emitInsSizeAM(id, insCodeMI(ins), (int)intConst->IconValue());
3476     }
3477     else
3478     {
3479         assert(!src->isContained()); // there must be one non-contained src
3480
3481         // ind, reg
3482         id = emitNewInstrAmd(attr, offset);
3483         emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
3484         id->idReg1(src->gtRegNum);
3485         id->idIns(ins);
3486         sz = emitInsSizeAM(id, insCodeMR(ins));
3487     }
3488
3489     id->idCodeSize(sz);
3490
3491     dispIns(id);
3492     emitCurIGsize += sz;
3493 }
3494
3495 //------------------------------------------------------------------------
3496 // emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
3497 //
3498 // Responsible for emitting a single instruction that will perform an operation of the form:
3499 //      *addr = UnaryOp *addr
3500 // For example:
3501 //      NOT [RAX]
3502 //
3503 // Arguments:
3504 //    ins - instruction to generate
3505 //    attr - emitter attribute for instruction
3506 //    storeInd - indir for RMW addressing mode
3507 //
3508 // Assumptions:
3509 //    Lowering has taken care of recognizing the StoreInd pattern of:
3510 //          StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
3511 //    The address to store is already sitting in a register.
3512 //
3513 // Notes:
3514 //    This is a no-produce operation, meaning that no register output will
3515 //    be produced for future use in the code stream.
3516 //
3517 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
3518 {
3519     GenTree* addr = storeInd->Addr();
3520     addr          = addr->gtSkipReloadOrCopy();
3521     assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
3522            addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
3523
3524     ssize_t offset = 0;
3525     if (addr->OperGet() != GT_CLS_VAR_ADDR)
3526     {
3527         offset = storeInd->Offset();
3528     }
3529
3530     instrDesc* id = emitNewInstrAmd(attr, offset);
3531     emitHandleMemOp(storeInd, id, IF_ARW, ins);
3532     id->idIns(ins);
3533     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3534     id->idCodeSize(sz);
3535
3536     dispIns(id);
3537     emitCurIGsize += sz;
3538 }
3539
3540 #endif // !LEGACY_BACKEND
3541
3542 #if FEATURE_STACK_FP_X87
3543 /*****************************************************************************
3544  *
3545  *  Add an instruction of the form "op ST(0),ST(n)".
3546  */
3547
3548 void emitter::emitIns_F0_F(instruction ins, unsigned fpreg)
3549 {
3550     UNATIVE_OFFSET sz  = 2;
3551     instrDesc*     id  = emitNewInstr();
3552     insFormat      fmt = emitInsModeFormat(ins, IF_TRD_FRD);
3553
3554     id->idIns(ins);
3555     id->idInsFmt(fmt);
3556     id->idReg1((regNumber)fpreg);
3557     id->idCodeSize(sz);
3558
3559     dispIns(id);
3560     emitCurIGsize += sz;
3561 }
3562
3563 /*****************************************************************************
3564  *
3565  *  Add an instruction of the form "op ST(n),ST(0)".
3566  */
3567
3568 void emitter::emitIns_F_F0(instruction ins, unsigned fpreg)
3569 {
3570     UNATIVE_OFFSET sz  = 2;
3571     instrDesc*     id  = emitNewInstr();
3572     insFormat      fmt = emitInsModeFormat(ins, IF_FRD_TRD);
3573
3574     id->idIns(ins);
3575     id->idInsFmt(fmt);
3576     id->idReg1((regNumber)fpreg);
3577     id->idCodeSize(sz);
3578
3579     dispIns(id);
3580     emitCurIGsize += sz;
3581 }
3582 #endif // FEATURE_STACK_FP_X87
3583
3584 /*****************************************************************************
3585  *
3586  *  Add an instruction referencing a single register.
3587  */
3588
3589 void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
3590 {
3591     emitAttr size = EA_SIZE(attr);
3592
3593     assert(size <= EA_PTRSIZE);
3594     noway_assert(emitVerifyEncodable(ins, size, reg));
3595
3596     UNATIVE_OFFSET sz;
3597     instrDesc*     id = emitNewInstrTiny(attr);
3598
3599     switch (ins)
3600     {
3601         case INS_inc:
3602         case INS_dec:
3603 #ifdef _TARGET_AMD64_
3604
3605             sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
3606
3607 #else // !_TARGET_AMD64_
3608
3609             if (size == EA_1BYTE)
3610                 sz = 2; // Use the long form as the small one has no 'w' bit
3611             else
3612                 sz    = 1; // Use short form
3613
3614 #endif // !_TARGET_AMD64_
3615
3616             break;
3617
3618         case INS_pop:
3619         case INS_pop_hide:
3620         case INS_push:
3621         case INS_push_hide:
3622
3623             /* We don't currently push/pop small values */
3624
3625             assert(size == EA_PTRSIZE);
3626
3627             sz = 1;
3628             break;
3629
3630         default:
3631
3632             /* All the sixteen INS_setCCs are contiguous. */
3633
3634             if (INS_seto <= ins && ins <= INS_setg)
3635             {
3636                 // Rough check that we used the endpoints for the range check
3637
3638                 assert(INS_seto + 0xF == INS_setg);
3639
3640                 // The caller must specify EA_1BYTE for 'attr'
3641
3642                 assert(attr == EA_1BYTE);
3643
3644                 /* We expect this to always be a 'big' opcode */
3645
3646                 assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
3647
3648                 size = attr;
3649
3650                 sz = 3;
3651                 break;
3652             }
3653             else
3654             {
3655                 sz = 2;
3656                 break;
3657             }
3658     }
3659     insFormat fmt = emitInsModeFormat(ins, IF_RRD);
3660
3661     id->idIns(ins);
3662     id->idInsFmt(fmt);
3663     id->idReg1(reg);
3664
3665     // 16-bit operand instructions will need a prefix.
3666     // This refers to 66h size prefix override.
3667     if (size == EA_2BYTE)
3668     {
3669         sz += 1;
3670     }
3671
3672     // Vex bytes
3673     sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
3674
3675     // REX byte
3676     if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
3677     {
3678         sz += emitGetRexPrefixSize(ins);
3679     }
3680
3681     id->idCodeSize(sz);
3682
3683     dispIns(id);
3684     emitCurIGsize += sz;
3685
3686     emitAdjustStackDepthPushPop(ins);
3687 }
3688
3689 /*****************************************************************************
3690  *
3691  *  Add an instruction referencing a register and a constant.
3692  */
3693
3694 void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
3695 {
3696     emitAttr size = EA_SIZE(attr);
3697
3698     // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
3699     assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
3700
3701     noway_assert(emitVerifyEncodable(ins, size, reg));
3702
3703 #ifdef _TARGET_AMD64_
3704     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3705     // all other opcodes take a sign-extended 4-byte immediate
3706     noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
3707 #endif
3708
3709     UNATIVE_OFFSET sz;
3710     instrDesc*     id;
3711     insFormat      fmt       = emitInsModeFormat(ins, IF_RRD_CNS);
3712     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
3713
3714     // BT reg,imm might be useful but it requires special handling of the immediate value
3715     // (it is always encoded in a byte). Let's not complicate things until this is needed.
3716     assert(ins != INS_bt);
3717
3718     // Figure out the size of the instruction
3719     switch (ins)
3720     {
3721         case INS_mov:
3722 #ifdef _TARGET_AMD64_
3723             // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
3724             // and this isn't a reloc constant.
3725             if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
3726             {
3727                 attr = size = EA_4BYTE;
3728             }
3729
3730             if (size > EA_4BYTE)
3731             {
3732                 sz = 9; // Really it is 10, but we'll add one more later
3733                 break;
3734             }
3735 #endif // _TARGET_AMD64_
3736             sz = 5;
3737             break;
3738
3739         case INS_rcl_N:
3740         case INS_rcr_N:
3741         case INS_rol_N:
3742         case INS_ror_N:
3743         case INS_shl_N:
3744         case INS_shr_N:
3745         case INS_sar_N:
3746             assert(val != 1);
3747             fmt = IF_RRW_SHF;
3748             sz  = 3;
3749             val &= 0x7F;
3750             valInByte = true; // shift amount always placed in a byte
3751             break;
3752
3753         default:
3754
3755             if (EA_IS_CNS_RELOC(attr))
3756             {
3757                 valInByte = false; // relocs can't be placed in a byte
3758             }
3759
3760             if (valInByte)
3761             {
3762                 if (IsSSEOrAVXInstruction(ins))
3763                 {
3764                     sz = 5;
3765                 }
3766                 else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins))
3767                 {
3768                     sz = 2;
3769                 }
3770                 else
3771                 {
3772                     sz = 3;
3773                 }
3774             }
3775             else
3776             {
3777                 if (reg == REG_EAX && !instrIs3opImul(ins))
3778                 {
3779                     sz = 1;
3780                 }
3781                 else
3782                 {
3783                     sz = 2;
3784                 }
3785
3786 #ifdef _TARGET_AMD64_
3787                 if (size > EA_4BYTE)
3788                 {
3789                     // We special-case anything that takes a full 8-byte constant.
3790                     sz += 4;
3791                 }
3792                 else
3793 #endif // _TARGET_AMD64_
3794                 {
3795                     sz += EA_SIZE_IN_BYTES(attr);
3796                 }
3797             }
3798             break;
3799     }
3800
3801     // Vex prefix size
3802     sz += emitGetVexPrefixSize(ins, attr);
3803
3804     // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
3805     // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
3806     // register. So we also need to check if that built-in register is an extended register.
3807     if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
3808     {
3809         sz += emitGetRexPrefixSize(ins);
3810     }
3811
3812 #if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
3813     assert(reg < 8);
3814 #endif
3815
3816     id = emitNewInstrSC(attr, val);
3817     id->idIns(ins);
3818     id->idInsFmt(fmt);
3819     id->idReg1(reg);
3820
3821     // 16-bit operand instructions will need a prefix
3822     if (size == EA_2BYTE)
3823     {
3824         sz += 1;
3825     }
3826
3827     id->idCodeSize(sz);
3828
3829     dispIns(id);
3830     emitCurIGsize += sz;
3831
3832     if (reg == REG_ESP)
3833     {
3834         emitAdjustStackDepth(ins, val);
3835     }
3836 }
3837
3838 /*****************************************************************************
3839  *
3840  *  Add an instruction referencing an integer constant.
3841  */
3842
3843 void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
3844 {
3845     UNATIVE_OFFSET sz;
3846     instrDesc*     id;
3847     bool           valInByte = ((signed char)val == val);
3848
3849 #ifdef _TARGET_AMD64_
3850     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3851     // all other opcodes take a sign-extended 4-byte immediate
3852     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3853 #endif
3854
3855     if (EA_IS_CNS_RELOC(attr))
3856     {
3857         valInByte = false; // relocs can't be placed in a byte
3858     }
3859
3860     switch (ins)
3861     {
3862         case INS_loop:
3863         case INS_jge:
3864             sz = 2;
3865             break;
3866
3867         case INS_ret:
3868             sz = 3;
3869             break;
3870
3871         case INS_push_hide:
3872         case INS_push:
3873             sz = valInByte ? 2 : 5;
3874             break;
3875
3876         default:
3877             NO_WAY("unexpected instruction");
3878     }
3879
3880     id = emitNewInstrSC(attr, val);
3881     id->idIns(ins);
3882     id->idInsFmt(IF_CNS);
3883     id->idCodeSize(sz);
3884
3885     dispIns(id);
3886     emitCurIGsize += sz;
3887
3888     emitAdjustStackDepthPushPop(ins);
3889 }
3890
3891 /*****************************************************************************
3892  *
3893  *  Add a "jump through a table" instruction.
3894  */
3895
3896 void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
3897 {
3898     assert(EA_SIZE(attr) == EA_4BYTE);
3899
3900     UNATIVE_OFFSET    sz  = 3 + 4;
3901     const instruction ins = INS_i_jmp;
3902
3903     if (IsExtendedReg(reg, attr))
3904     {
3905         sz += emitGetRexPrefixSize(ins);
3906     }
3907
3908     instrDesc* id = emitNewInstrAmd(attr, base);
3909
3910     id->idIns(ins);
3911     id->idInsFmt(IF_ARD);
3912     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
3913     id->idAddr()->iiaAddrMode.amIndxReg = reg;
3914     id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZP;
3915
3916 #ifdef DEBUG
3917     id->idDebugOnlyInfo()->idMemCookie = base;
3918 #endif
3919
3920     id->idCodeSize(sz);
3921
3922     dispIns(id);
3923     emitCurIGsize += sz;
3924 }
3925
3926 /*****************************************************************************
3927  *
3928  *  Add an instruction with a static data member operand. If 'size' is 0, the
3929  *  instruction operates on the address of the static member instead of its
3930  *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
3931  */
3932
3933 void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
3934 {
3935     // Static always need relocs
3936     if (!jitStaticFldIsGlobAddr(fldHnd))
3937     {
3938         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3939     }
3940
3941     UNATIVE_OFFSET sz;
3942     instrDesc*     id;
3943
3944     /* Are we pushing the offset of the class variable? */
3945
3946     if (EA_IS_OFFSET(attr))
3947     {
3948         assert(ins == INS_push);
3949         sz = 1 + TARGET_POINTER_SIZE;
3950
3951         id = emitNewInstrDsp(EA_1BYTE, offs);
3952         id->idIns(ins);
3953         id->idInsFmt(IF_MRD_OFF);
3954     }
3955     else
3956     {
3957 #if FEATURE_STACK_FP_X87
3958         insFormat fmt = emitInsModeFormat(ins, IF_MRD, IF_TRD_MRD, IF_MWR_TRD);
3959 #else  // !FEATURE_STACK_FP_X87
3960         insFormat fmt = emitInsModeFormat(ins, IF_MRD);
3961 #endif // !FEATURE_STACK_FP_X87
3962
3963         id = emitNewInstrDsp(attr, offs);
3964         id->idIns(ins);
3965         id->idInsFmt(fmt);
3966         sz = emitInsSizeCV(id, insCodeMR(ins));
3967     }
3968
3969     // Vex prefix size
3970     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
3971
3972     if (TakesRexWPrefix(ins, attr))
3973     {
3974         // REX.W prefix
3975         sz += emitGetRexPrefixSize(ins);
3976     }
3977
3978     id->idAddr()->iiaFieldHnd = fldHnd;
3979
3980     id->idCodeSize(sz);
3981
3982     dispIns(id);
3983     emitCurIGsize += sz;
3984
3985     emitAdjustStackDepthPushPop(ins);
3986 }
3987
3988 /*****************************************************************************
3989  *
3990  *  Add an instruction with two register operands.
3991  */
3992
3993 void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
3994 {
3995     emitAttr size = EA_SIZE(attr);
3996
3997     /* We don't want to generate any useless mov instructions! */
3998     CLANG_FORMAT_COMMENT_ANCHOR;
3999
4000 #ifdef _TARGET_AMD64_
4001     // Same-reg 4-byte mov can be useful because it performs a
4002     // zero-extension to 8 bytes.
4003     assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
4004 #else
4005     assert(ins != INS_mov || reg1 != reg2);
4006 #endif // _TARGET_AMD64_
4007
4008     assert(size <= EA_32BYTE);
4009     noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
4010
4011     UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
4012
4013     if (Is4ByteSSE4Instruction(ins))
4014     {
4015         // The 4-Byte SSE4 instructions require one additional byte
4016         sz += 1;
4017     }
4018
4019     /* Special case: "XCHG" uses a different format */
4020     insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
4021
4022     instrDesc* id = emitNewInstrTiny(attr);
4023     id->idIns(ins);
4024     id->idInsFmt(fmt);
4025     id->idReg1(reg1);
4026     id->idReg2(reg2);
4027     id->idCodeSize(sz);
4028
4029     dispIns(id);
4030     emitCurIGsize += sz;
4031 }
4032
4033 /*****************************************************************************
4034  *
4035  *  Add an instruction with two register operands and an integer constant.
4036  */
4037
4038 void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
4039 {
4040     // SSE2 version requires 5 bytes and SSE4/AVX version 6 bytes
4041     UNATIVE_OFFSET sz = 4;
4042     if (IsSSEOrAVXInstruction(ins))
4043     {
4044         // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
4045         // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
4046         // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
4047         sz = (UseVEXEncoding() || UseSSE4()) ? 6 : 5;
4048     }
4049
4050 #ifdef _TARGET_AMD64_
4051     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4052     // all other opcodes take a sign-extended 4-byte immediate
4053     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4054 #endif
4055
4056     instrDesc* id = emitNewInstrSC(attr, ival);
4057
4058     // REX prefix
4059     if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
4060     {
4061         sz += emitGetRexPrefixSize(ins);
4062     }
4063
4064 #ifndef LEGACY_BACKEND
4065     if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding())
4066     {
4067         assert(UseSSE4());
4068         sz += 1;
4069     }
4070 #endif // !LEGACY_BACKEND
4071
4072     id->idIns(ins);
4073     id->idInsFmt(IF_RRW_RRW_CNS);
4074     id->idReg1(reg1);
4075     id->idReg2(reg2);
4076     id->idCodeSize(sz);
4077
4078     dispIns(id);
4079     emitCurIGsize += sz;
4080 }
4081
4082 #ifndef LEGACY_BACKEND
4083 void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
4084 {
4085     assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta);
4086
4087     instrDesc* id = emitNewInstrAmd(attr, offs);
4088
4089     id->idIns(ins);
4090
4091     id->idInsFmt(IF_ARD);
4092     id->idAddr()->iiaAddrMode.amBaseReg = base;
4093     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4094
4095     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
4096     id->idCodeSize(sz);
4097
4098     dispIns(id);
4099     emitCurIGsize += sz;
4100 }
4101
4102 void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt)
4103 {
4104     ssize_t    offs = indir->Offset();
4105     instrDesc* id   = emitNewInstrAmd(attr, offs);
4106
4107     id->idIns(ins);
4108     id->idReg1(reg1);
4109
4110     emitHandleMemOp(indir, id, fmt, ins);
4111
4112     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4113     id->idCodeSize(sz);
4114
4115     dispIns(id);
4116     emitCurIGsize += sz;
4117 }
4118
4119 void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
4120 {
4121     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4122     assert(IsSSEOrAVXInstruction(ins));
4123
4124     ssize_t    offs = indir->Offset();
4125     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
4126
4127     id->idIns(ins);
4128     id->idReg1(reg1);
4129
4130     emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
4131
4132     // Plus one for the 1-byte immediate (ival)
4133     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4134
4135     if (Is4ByteSSE4Instruction(ins))
4136     {
4137         // The 4-Byte SSE4 instructions require two additional bytes
4138         sz += 2;
4139     }
4140
4141     id->idCodeSize(sz);
4142
4143     dispIns(id);
4144     emitCurIGsize += sz;
4145 }
4146
4147 void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
4148 {
4149     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4150     assert(IsSSEOrAVXInstruction(ins));
4151
4152     instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4153
4154     id->idIns(ins);
4155     id->idReg1(reg1);
4156
4157     id->idInsFmt(IF_RRW_ARD_CNS);
4158     id->idAddr()->iiaAddrMode.amBaseReg = base;
4159     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4160
4161     // Plus one for the 1-byte immediate (ival)
4162     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4163
4164     if (Is4ByteSSE4Instruction(ins))
4165     {
4166         // The 4-Byte SSE4 instructions require two additional bytes
4167         sz += 2;
4168     }
4169
4170     id->idCodeSize(sz);
4171
4172     dispIns(id);
4173     emitCurIGsize += sz;
4174 }
4175
4176 void emitter::emitIns_R_C_I(
4177     instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4178 {
4179     // Static always need relocs
4180     if (!jitStaticFldIsGlobAddr(fldHnd))
4181     {
4182         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4183     }
4184
4185     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4186     assert(IsSSEOrAVXInstruction(ins));
4187
4188     instrDesc*     id = emitNewInstrCnsDsp(attr, ival, offs);
4189     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4190
4191     if (Is4ByteSSE4Instruction(ins))
4192     {
4193         // The 4-Byte SSE4 instructions require two additional bytes
4194         sz += 2;
4195     }
4196
4197     id->idIns(ins);
4198     id->idInsFmt(IF_RRW_MRD_CNS);
4199     id->idReg1(reg1);
4200     id->idAddr()->iiaFieldHnd = fldHnd;
4201
4202     id->idCodeSize(sz);
4203     dispIns(id);
4204     emitCurIGsize += sz;
4205 }
4206
4207 void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
4208 {
4209     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4210     assert(IsSSEOrAVXInstruction(ins));
4211
4212     instrDesc*     id = emitNewInstrCns(attr, ival);
4213     UNATIVE_OFFSET sz =
4214         emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4215
4216     if (Is4ByteSSE4Instruction(ins))
4217     {
4218         // The 4-Byte SSE4 instructions require two additional bytes
4219         sz += 2;
4220     }
4221
4222     id->idIns(ins);
4223     id->idInsFmt(IF_RRW_SRD_CNS);
4224     id->idReg1(reg1);
4225     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4226
4227 #ifdef DEBUG
4228     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4229 #endif
4230
4231     id->idCodeSize(sz);
4232
4233     dispIns(id);
4234     emitCurIGsize += sz;
4235 }
4236
4237 void emitter::emitIns_R_R_A(
4238     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt)
4239 {
4240     assert(IsSSEOrAVXInstruction(ins));
4241     assert(IsThreeOperandAVXInstruction(ins));
4242
4243     ssize_t    offs = indir->Offset();
4244     instrDesc* id   = emitNewInstrAmd(attr, offs);
4245
4246     id->idIns(ins);
4247     id->idReg1(reg1);
4248     id->idReg2(reg2);
4249
4250     emitHandleMemOp(indir, id, fmt, ins);
4251
4252     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4253     id->idCodeSize(sz);
4254
4255     dispIns(id);
4256     emitCurIGsize += sz;
4257 }
4258 #endif // !LEGACY_BACKEND
4259
4260 void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
4261 {
4262     assert(IsSSEOrAVXInstruction(ins));
4263     assert(IsThreeOperandAVXInstruction(ins));
4264
4265     instrDesc* id = emitNewInstrAmd(attr, offs);
4266
4267     id->idIns(ins);
4268     id->idReg1(reg1);
4269     id->idReg2(reg2);
4270
4271     id->idInsFmt(IF_RWR_RRD_ARD);
4272     id->idAddr()->iiaAddrMode.amBaseReg = base;
4273     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4274
4275     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4276     id->idCodeSize(sz);
4277
4278     dispIns(id);
4279     emitCurIGsize += sz;
4280 }
4281
4282 void emitter::emitIns_R_R_C(
4283     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
4284 {
4285     assert(IsSSEOrAVXInstruction(ins));
4286     assert(IsThreeOperandAVXInstruction(ins));
4287
4288     // Static always need relocs
4289     if (!jitStaticFldIsGlobAddr(fldHnd))
4290     {
4291         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4292     }
4293
4294     instrDesc*     id = emitNewInstrDsp(attr, offs);
4295     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4296
4297     id->idIns(ins);
4298     id->idInsFmt(IF_RWR_RRD_MRD);
4299     id->idReg1(reg1);
4300     id->idReg2(reg2);
4301     id->idAddr()->iiaFieldHnd = fldHnd;
4302
4303     id->idCodeSize(sz);
4304
4305     dispIns(id);
4306     emitCurIGsize += sz;
4307 }
4308
4309 /*****************************************************************************
4310 *
4311 *  Add an instruction with three register operands.
4312 */
4313
4314 void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
4315 {
4316     assert(IsSSEOrAVXInstruction(ins));
4317     assert(IsThreeOperandAVXInstruction(ins));
4318     // Currently vex prefix only use three bytes mode.
4319     // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
4320     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4321     UNATIVE_OFFSET sz = 5;
4322
4323     instrDesc* id = emitNewInstr(attr);
4324     id->idIns(ins);
4325     id->idInsFmt(IF_RWR_RRD_RRD);
4326     id->idReg1(targetReg);
4327     id->idReg2(reg1);
4328     id->idReg3(reg2);
4329
4330     id->idCodeSize(sz);
4331     dispIns(id);
4332     emitCurIGsize += sz;
4333 }
4334
4335 void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
4336 {
4337     assert(IsSSEOrAVXInstruction(ins));
4338     assert(IsThreeOperandAVXInstruction(ins));
4339
4340     instrDesc*     id = emitNewInstr(attr);
4341     UNATIVE_OFFSET sz =
4342         emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4343
4344     id->idIns(ins);
4345     id->idInsFmt(IF_RWR_RRD_SRD);
4346     id->idReg1(reg1);
4347     id->idReg2(reg2);
4348     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4349
4350 #ifdef DEBUG
4351     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4352 #endif
4353
4354     id->idCodeSize(sz);
4355     dispIns(id);
4356     emitCurIGsize += sz;
4357 }
4358
4359 #ifndef LEGACY_BACKEND
4360 void emitter::emitIns_R_R_A_I(
4361     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
4362 {
4363     assert(IsSSEOrAVXInstruction(ins));
4364     assert(IsThreeOperandAVXInstruction(ins));
4365
4366     ssize_t    offs = indir->Offset();
4367     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
4368
4369     id->idIns(ins);
4370     id->idReg1(reg1);
4371     id->idReg2(reg2);
4372
4373     emitHandleMemOp(indir, id, fmt, ins);
4374
4375     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4376     id->idCodeSize(sz);
4377
4378     dispIns(id);
4379     emitCurIGsize += sz;
4380 }
4381
4382 void emitter::emitIns_R_R_AR_I(
4383     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
4384 {
4385     assert(IsSSEOrAVXInstruction(ins));
4386     assert(IsThreeOperandAVXInstruction(ins));
4387
4388     instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4389
4390     id->idIns(ins);
4391     id->idReg1(reg1);
4392     id->idReg2(reg2);
4393
4394     id->idInsFmt(IF_RWR_RRD_ARD_CNS);
4395     id->idAddr()->iiaAddrMode.amBaseReg = base;
4396     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4397
4398     // Plus one for the 1-byte immediate (ival)
4399     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4400     id->idCodeSize(sz);
4401
4402     dispIns(id);
4403     emitCurIGsize += sz;
4404 }
4405 #endif // !LEGACY_BACKEND
4406
4407 void emitter::emitIns_R_R_C_I(
4408     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4409 {
4410     assert(IsSSEOrAVXInstruction(ins));
4411     assert(IsThreeOperandAVXInstruction(ins));
4412
4413     // Static always need relocs
4414     if (!jitStaticFldIsGlobAddr(fldHnd))
4415     {
4416         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4417     }
4418
4419     instrDesc*     id = emitNewInstrCnsDsp(attr, ival, offs);
4420     UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4421
4422     id->idIns(ins);
4423     id->idInsFmt(IF_RWR_RRD_MRD_CNS);
4424     id->idReg1(reg1);
4425     id->idReg2(reg2);
4426     id->idAddr()->iiaFieldHnd = fldHnd;
4427
4428     id->idCodeSize(sz);
4429     dispIns(id);
4430     emitCurIGsize += sz;
4431 }
4432
4433 /**********************************************************************************
4434 * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
4435 *
4436 * Arguments:
4437 *    ins       - the instruction to add
4438 *    attr      - the emitter attribute for instruction
4439 *    targetReg - the target (destination) register
4440 *    reg1      - the first source register
4441 *    reg2      - the second source register
4442 *    ival      - the immediate value
4443 */
4444
4445 void emitter::emitIns_R_R_R_I(
4446     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
4447 {
4448     assert(IsSSEOrAVXInstruction(ins));
4449     assert(IsThreeOperandAVXInstruction(ins));
4450     // Currently vex prefix only use three bytes mode.
4451     // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6
4452     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4453     UNATIVE_OFFSET sz = 6;
4454
4455     instrDesc* id = emitNewInstrCns(attr, ival);
4456     id->idIns(ins);
4457     id->idInsFmt(IF_RWR_RRD_RRD_CNS);
4458     id->idReg1(targetReg);
4459     id->idReg2(reg1);
4460     id->idReg3(reg2);
4461
4462     id->idCodeSize(sz);
4463     dispIns(id);
4464     emitCurIGsize += sz;
4465 }
4466
4467 void emitter::emitIns_R_R_S_I(
4468     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
4469 {
4470     assert(IsSSEOrAVXInstruction(ins));
4471     assert(IsThreeOperandAVXInstruction(ins));
4472
4473     instrDesc*     id = emitNewInstrCns(attr, ival);
4474     UNATIVE_OFFSET sz =
4475         emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4476
4477     id->idIns(ins);
4478     id->idInsFmt(IF_RWR_RRD_SRD_CNS);
4479     id->idReg1(reg1);
4480     id->idReg2(reg2);
4481     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4482
4483 #ifdef DEBUG
4484     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4485 #endif
4486
4487     id->idCodeSize(sz);
4488     dispIns(id);
4489     emitCurIGsize += sz;
4490 }
4491
4492 #ifndef LEGACY_BACKEND
4493
4494 void emitter::emitIns_R_R_R_R(
4495     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4496 {
4497     assert(isAvxBlendv(ins));
4498     assert(UseVEXEncoding());
4499     // Currently vex prefix only use three bytes mode.
4500     // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4501     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4502     UNATIVE_OFFSET sz = 6;
4503
4504     // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4505     // which encodes the fourth register into imm8[7:4]
4506     int ival = (reg3 - XMMBASE) << 4; // convert reg3 to ival
4507
4508     instrDesc* id = emitNewInstrCns(attr, ival);
4509     id->idIns(ins);
4510     id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4511     id->idReg1(targetReg);
4512     id->idReg2(reg1);
4513     id->idReg3(reg2);
4514     id->idReg4(reg3);
4515
4516     id->idCodeSize(sz);
4517     dispIns(id);
4518     emitCurIGsize += sz;
4519 }
4520
4521 #endif // !LEGACY_BACKEND
4522
4523 /*****************************************************************************
4524  *
4525  *  Add an instruction with a register + static member operands.
4526  */
4527 void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
4528 {
4529     // Static always need relocs
4530     if (!jitStaticFldIsGlobAddr(fldHnd))
4531     {
4532         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4533     }
4534
4535     emitAttr size = EA_SIZE(attr);
4536
4537     assert(size <= EA_32BYTE);
4538     noway_assert(emitVerifyEncodable(ins, size, reg));
4539
4540     UNATIVE_OFFSET sz;
4541     instrDesc*     id;
4542
4543     // Are we MOV'ing the offset of the class variable into EAX?
4544     if (EA_IS_OFFSET(attr))
4545     {
4546         id = emitNewInstrDsp(EA_1BYTE, offs);
4547         id->idIns(ins);
4548         id->idInsFmt(IF_RWR_MRD_OFF);
4549
4550         assert(ins == INS_mov && reg == REG_EAX);
4551
4552         // Special case: "mov eax, [addr]" is smaller
4553         sz = 1 + TARGET_POINTER_SIZE;
4554     }
4555     else
4556     {
4557         insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
4558
4559         id = emitNewInstrDsp(attr, offs);
4560         id->idIns(ins);
4561         id->idInsFmt(fmt);
4562
4563 #ifdef _TARGET_X86_
4564         // Special case: "mov eax, [addr]" is smaller.
4565         // This case is not enabled for amd64 as it always uses RIP relative addressing
4566         // and it results in smaller instruction size than encoding 64-bit addr in the
4567         // instruction.
4568         if (ins == INS_mov && reg == REG_EAX)
4569         {
4570             sz = 1 + TARGET_POINTER_SIZE;
4571             if (size == EA_2BYTE)
4572                 sz += 1;
4573         }
4574         else
4575 #endif //_TARGET_X86_
4576         {
4577             sz = emitInsSizeCV(id, insCodeRM(ins));
4578         }
4579
4580         // Special case: mov reg, fs:[ddd]
4581         if (fldHnd == FLD_GLOBAL_FS)
4582         {
4583             sz += 1;
4584         }
4585     }
4586
4587     // VEX prefix
4588     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4589
4590     // REX prefix
4591     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4592     {
4593         sz += emitGetRexPrefixSize(ins);
4594     }
4595
4596     id->idReg1(reg);
4597     id->idCodeSize(sz);
4598
4599     id->idAddr()->iiaFieldHnd = fldHnd;
4600
4601     dispIns(id);
4602     emitCurIGsize += sz;
4603 }
4604
4605 /*****************************************************************************
4606  *
4607  *  Add an instruction with a static member + register operands.
4608  */
4609
4610 void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
4611 {
4612     // Static always need relocs
4613     if (!jitStaticFldIsGlobAddr(fldHnd))
4614     {
4615         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4616     }
4617
4618     emitAttr size = EA_SIZE(attr);
4619
4620 #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
4621     // For x86 RyuJIT it is valid to storeind a double sized operand in an xmm reg to memory
4622     assert(size <= EA_8BYTE);
4623 #else
4624     assert(size <= EA_PTRSIZE);
4625 #endif
4626
4627     noway_assert(emitVerifyEncodable(ins, size, reg));
4628
4629     instrDesc* id  = emitNewInstrDsp(attr, offs);
4630     insFormat  fmt = emitInsModeFormat(ins, IF_MRD_RRD);
4631
4632     id->idIns(ins);
4633     id->idInsFmt(fmt);
4634
4635     UNATIVE_OFFSET sz;
4636
4637 #ifdef _TARGET_X86_
4638     // Special case: "mov [addr], EAX" is smaller.
4639     // This case is not enable for amd64 as it always uses RIP relative addressing
4640     // and it will result in smaller instruction size than encoding 64-bit addr in
4641     // the instruction.
4642     if (ins == INS_mov && reg == REG_EAX)
4643     {
4644         sz = 1 + TARGET_POINTER_SIZE;
4645         if (size == EA_2BYTE)
4646             sz += 1;
4647     }
4648     else
4649 #endif //_TARGET_X86_
4650     {
4651         sz = emitInsSizeCV(id, insCodeMR(ins));
4652     }
4653
4654     // Special case: mov reg, fs:[ddd]
4655     if (fldHnd == FLD_GLOBAL_FS)
4656     {
4657         sz += 1;
4658     }
4659
4660     // VEX prefix
4661     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
4662
4663     // REX prefix
4664     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4665     {
4666         sz += emitGetRexPrefixSize(ins);
4667     }
4668
4669     id->idReg1(reg);
4670     id->idCodeSize(sz);
4671
4672     id->idAddr()->iiaFieldHnd = fldHnd;
4673
4674     dispIns(id);
4675     emitCurIGsize += sz;
4676 }
4677
4678 /*****************************************************************************
4679  *
4680  *  Add an instruction with a static member + constant.
4681  */
4682
4683 void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
4684 {
4685     // Static always need relocs
4686     if (!jitStaticFldIsGlobAddr(fldHnd))
4687     {
4688         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4689     }
4690
4691     insFormat fmt;
4692
4693     switch (ins)
4694     {
4695         case INS_rcl_N:
4696         case INS_rcr_N:
4697         case INS_rol_N:
4698         case INS_ror_N:
4699         case INS_shl_N:
4700         case INS_shr_N:
4701         case INS_sar_N:
4702             assert(val != 1);
4703             fmt = IF_MRW_SHF;
4704             val &= 0x7F;
4705             break;
4706
4707         default:
4708             fmt = emitInsModeFormat(ins, IF_MRD_CNS);
4709             break;
4710     }
4711
4712     instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
4713     id->idIns(ins);
4714     id->idInsFmt(fmt);
4715
4716     code_t         code = insCodeMI(ins);
4717     UNATIVE_OFFSET sz   = emitInsSizeCV(id, code, val);
4718
4719     // Vex prefix
4720     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
4721
4722     // REX prefix, if not already included in "code"
4723     if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
4724     {
4725         sz += emitGetRexPrefixSize(ins);
4726     }
4727
4728     id->idAddr()->iiaFieldHnd = fldHnd;
4729     id->idCodeSize(sz);
4730
4731     dispIns(id);
4732     emitCurIGsize += sz;
4733 }
4734
4735 void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
4736 {
4737     assert(ins == INS_mov);
4738     assert(dst->bbFlags & BBF_JMP_TARGET);
4739
4740     instrDescLbl* id = emitNewInstrLbl();
4741
4742     id->idIns(ins);
4743     id->idInsFmt(IF_SWR_LABEL);
4744     id->idAddr()->iiaBBlabel = dst;
4745
4746     /* The label reference is always long */
4747
4748     id->idjShort    = 0;
4749     id->idjKeepLong = 1;
4750
4751     /* Record the current IG and offset within it */
4752
4753     id->idjIG   = emitCurIG;
4754     id->idjOffs = emitCurIGsize;
4755
4756     /* Append this instruction to this IG's jump list */
4757
4758     id->idjNext      = emitCurIGjmpList;
4759     emitCurIGjmpList = id;
4760
4761     UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs);
4762     id->dstLclVar.initLclVarAddr(varx, offs);
4763 #ifdef DEBUG
4764     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4765 #endif
4766
4767 #if EMITTER_STATS
4768     emitTotalIGjmps++;
4769 #endif
4770
4771 #ifndef _TARGET_AMD64_
4772     // Storing the address of a basicBlock will need a reloc
4773     // as the instruction uses the absolute address,
4774     // not a relative address.
4775     //
4776     // On Amd64, Absolute code addresses should always go through a reloc to
4777     // to be encoded as RIP rel32 offset.
4778     if (emitComp->opts.compReloc)
4779 #endif
4780     {
4781         id->idSetIsDspReloc();
4782     }
4783
4784     id->idCodeSize(sz);
4785
4786     dispIns(id);
4787     emitCurIGsize += sz;
4788 }
4789
4790 /*****************************************************************************
4791  *
4792  *  Add a label instruction.
4793  */
4794 void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
4795 {
4796     assert(ins == INS_lea);
4797     assert(dst->bbFlags & BBF_JMP_TARGET);
4798
4799     instrDescJmp* id = emitNewInstrJmp();
4800
4801     id->idIns(ins);
4802     id->idReg1(reg);
4803     id->idInsFmt(IF_RWR_LABEL);
4804     id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
4805     id->idAddr()->iiaBBlabel = dst;
4806
4807     /* The label reference is always long */
4808
4809     id->idjShort    = 0;
4810     id->idjKeepLong = 1;
4811
4812     /* Record the current IG and offset within it */
4813
4814     id->idjIG   = emitCurIG;
4815     id->idjOffs = emitCurIGsize;
4816
4817     /* Append this instruction to this IG's jump list */
4818
4819     id->idjNext      = emitCurIGjmpList;
4820     emitCurIGjmpList = id;
4821
4822 #ifdef DEBUG
4823     // Mark the catch return
4824     if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
4825     {
4826         id->idDebugOnlyInfo()->idCatchRet = true;
4827     }
4828 #endif // DEBUG
4829
4830 #if EMITTER_STATS
4831     emitTotalIGjmps++;
4832 #endif
4833
4834     // Set the relocation flags - these give hint to zap to perform
4835     // relocation of the specified 32bit address.
4836     //
4837     // Note the relocation flags influence the size estimate.
4838     id->idSetRelocFlags(attr);
4839
4840     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4841     id->idCodeSize(sz);
4842
4843     dispIns(id);
4844     emitCurIGsize += sz;
4845 }
4846
4847 /*****************************************************************************
4848  *
4849  *  The following adds instructions referencing address modes.
4850  */
4851
4852 void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
4853 {
4854     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
4855
4856 #ifdef _TARGET_AMD64_
4857     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4858     // all other opcodes take a sign-extended 4-byte immediate
4859     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4860 #endif
4861
4862     insFormat fmt;
4863
4864     switch (ins)
4865     {
4866         case INS_rcl_N:
4867         case INS_rcr_N:
4868         case INS_rol_N:
4869         case INS_ror_N:
4870         case INS_shl_N:
4871         case INS_shr_N:
4872         case INS_sar_N:
4873             assert(val != 1);
4874             fmt = IF_ARW_SHF;
4875             val &= 0x7F;
4876             break;
4877
4878         default:
4879             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
4880             break;
4881     }
4882
4883     /*
4884     Useful if you want to trap moves with 0 constant
4885     if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
4886     {
4887         printf("MOV 0\n");
4888     }
4889     */
4890
4891     UNATIVE_OFFSET sz;
4892     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
4893     id->idIns(ins);
4894     id->idInsFmt(fmt);
4895
4896     id->idAddr()->iiaAddrMode.amBaseReg = reg;
4897     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4898
4899     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
4900
4901     sz = emitInsSizeAM(id, insCodeMI(ins), val);
4902     id->idCodeSize(sz);
4903
4904     dispIns(id);
4905     emitCurIGsize += sz;
4906 }
4907
4908 void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
4909 {
4910     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
4911
4912 #ifdef _TARGET_AMD64_
4913     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4914     // all other opcodes take a sign-extended 4-byte immediate
4915     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4916 #endif
4917
4918     insFormat fmt;
4919
4920     switch (ins)
4921     {
4922         case INS_rcl_N:
4923         case INS_rcr_N:
4924         case INS_rol_N:
4925         case INS_ror_N:
4926         case INS_shl_N:
4927         case INS_shr_N:
4928         case INS_sar_N:
4929             assert(val != 1);
4930             fmt = IF_ARW_SHF;
4931             val &= 0x7F;
4932             break;
4933
4934         default:
4935             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
4936             break;
4937     }
4938
4939     /*
4940     Useful if you want to trap moves with 0 constant
4941     if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
4942     {
4943         printf("MOV 0\n");
4944     }
4945     */
4946
4947     UNATIVE_OFFSET sz;
4948     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
4949     id->idIns(ins);
4950     id->idInsFmt(fmt);
4951
4952     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
4953     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4954
4955     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
4956
4957     sz = emitInsSizeAM(id, insCodeMI(ins), val);
4958     id->idCodeSize(sz);
4959
4960     dispIns(id);
4961     emitCurIGsize += sz;
4962 }
4963
4964 void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
4965 {
4966     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
4967     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
4968
4969     if (ins == INS_lea)
4970     {
4971         if (ireg == base && disp == 0)
4972         {
4973             // Maybe the emitter is not the common place for this optimization, but it's a better choke point
4974             // for all the emitIns(ins, tree), we would have to be analyzing at each call site
4975             //
4976             return;
4977         }
4978     }
4979
4980     UNATIVE_OFFSET sz;
4981     instrDesc*     id  = emitNewInstrAmd(attr, disp);
4982     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
4983
4984     id->idIns(ins);
4985     id->idInsFmt(fmt);
4986     id->idReg1(ireg);
4987
4988     id->idAddr()->iiaAddrMode.amBaseReg = base;
4989     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4990
4991     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
4992
4993     sz = emitInsSizeAM(id, insCodeRM(ins));
4994
4995     if (Is4ByteSSE4Instruction(ins))
4996     {
4997         // The 4-Byte SSE4 instructions require two additional bytes
4998         sz += 2;
4999     }
5000
5001     id->idCodeSize(sz);
5002
5003     dispIns(id);
5004     emitCurIGsize += sz;
5005 }
5006
5007 void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5008 {
5009     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5010     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5011
5012     UNATIVE_OFFSET sz;
5013     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5014     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5015
5016     id->idIns(ins);
5017     id->idInsFmt(fmt);
5018     id->idReg1(ireg);
5019
5020     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5021     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5022
5023     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5024
5025     sz = emitInsSizeAM(id, insCodeRM(ins));
5026     id->idCodeSize(sz);
5027
5028     dispIns(id);
5029     emitCurIGsize += sz;
5030 }
5031
5032 void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5033 {
5034     UNATIVE_OFFSET sz;
5035     instrDesc*     id = emitNewInstrAmd(attr, disp);
5036     insFormat      fmt;
5037
5038     if (ireg == REG_NA)
5039     {
5040 #if FEATURE_STACK_FP_X87
5041         fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5042 #else  // !FEATURE_STACK_FP_X87
5043         fmt       = emitInsModeFormat(ins, IF_ARD);
5044 #endif // !FEATURE_STACK_FP_X87
5045     }
5046     else
5047     {
5048         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5049
5050         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
5051         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5052
5053         id->idReg1(ireg);
5054     }
5055
5056     id->idIns(ins);
5057     id->idInsFmt(fmt);
5058
5059     id->idAddr()->iiaAddrMode.amBaseReg = base;
5060     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5061
5062     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5063
5064     sz = emitInsSizeAM(id, insCodeMR(ins));
5065     id->idCodeSize(sz);
5066
5067     dispIns(id);
5068     emitCurIGsize += sz;
5069
5070     emitAdjustStackDepthPushPop(ins);
5071 }
5072
5073 #ifndef LEGACY_BACKEND
5074 void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
5075 {
5076     assert(ins == INS_vextracti128 || ins == INS_vextractf128);
5077     assert(base != REG_NA);
5078     assert(ireg != REG_NA);
5079     instrDesc* id = emitNewInstrAmdCns(attr, disp, ival);
5080
5081     id->idIns(ins);
5082     id->idInsFmt(IF_AWR_RRD_CNS);
5083     id->idAddr()->iiaAddrMode.amBaseReg = base;
5084     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5085     id->idReg1(ireg);
5086
5087     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5088
5089     // Plus one for the 1-byte immediate (ival)
5090     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)) + 1;
5091     id->idCodeSize(sz);
5092
5093     dispIns(id);
5094     emitCurIGsize += sz;
5095 }
5096 #endif
5097
5098 void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5099 {
5100     UNATIVE_OFFSET sz;
5101     instrDesc*     id = emitNewInstrAmd(attr, disp);
5102     insFormat      fmt;
5103
5104     if (ireg == REG_NA)
5105     {
5106 #if FEATURE_STACK_FP_X87
5107         fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5108 #else  // FEATURE_STACK_FP_X87
5109         fmt       = emitInsModeFormat(ins, IF_ARD);
5110 #endif // FEATURE_STACK_FP_X87
5111     }
5112     else
5113     {
5114         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5115
5116         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5117         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5118
5119         id->idReg1(ireg);
5120     }
5121
5122     id->idIns(ins);
5123     id->idInsFmt(fmt);
5124
5125     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5126     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5127
5128     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5129
5130     sz = emitInsSizeAM(id, insCodeMR(ins));
5131     id->idCodeSize(sz);
5132
5133     dispIns(id);
5134     emitCurIGsize += sz;
5135
5136     emitAdjustStackDepthPushPop(ins);
5137 }
5138
5139 void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
5140 {
5141     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5142
5143 #ifdef _TARGET_AMD64_
5144     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5145     // all other opcodes take a sign-extended 4-byte immediate
5146     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5147 #endif
5148
5149     insFormat fmt;
5150
5151     switch (ins)
5152     {
5153         case INS_rcl_N:
5154         case INS_rcr_N:
5155         case INS_rol_N:
5156         case INS_ror_N:
5157         case INS_shl_N:
5158         case INS_shr_N:
5159         case INS_sar_N:
5160             assert(val != 1);
5161             fmt = IF_ARW_SHF;
5162             val &= 0x7F;
5163             break;
5164
5165         default:
5166             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5167             break;
5168     }
5169
5170     UNATIVE_OFFSET sz;
5171     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5172     id->idIns(ins);
5173     id->idInsFmt(fmt);
5174
5175     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5176     id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5177     id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;
5178
5179     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5180
5181     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5182     id->idCodeSize(sz);
5183
5184     dispIns(id);
5185     emitCurIGsize += sz;
5186 }
5187
5188 void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
5189 {
5190     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5191     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5192
5193     UNATIVE_OFFSET sz;
5194     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5195     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5196
5197     id->idIns(ins);
5198     id->idInsFmt(fmt);
5199     id->idReg1(ireg);
5200
5201     id->idAddr()->iiaAddrMode.amBaseReg = base;
5202     id->idAddr()->iiaAddrMode.amIndxReg = index;
5203     id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;
5204
5205     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5206
5207     sz = emitInsSizeAM(id, insCodeRM(ins));
5208     id->idCodeSize(sz);
5209
5210     dispIns(id);
5211     emitCurIGsize += sz;
5212 }
5213
5214 void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
5215 {
5216     UNATIVE_OFFSET sz;
5217     instrDesc*     id = emitNewInstrAmd(attr, disp);
5218     insFormat      fmt;
5219
5220     if (ireg == REG_NA)
5221     {
5222 #if FEATURE_STACK_FP_X87
5223         fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5224 #else  // FEATURE_STACK_FP_X87
5225         fmt       = emitInsModeFormat(ins, IF_ARD);
5226 #endif // FEATURE_STACK_FP_X87
5227     }
5228     else
5229     {
5230         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5231
5232         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5233         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5234
5235         id->idReg1(ireg);
5236     }
5237
5238     id->idIns(ins);
5239     id->idInsFmt(fmt);
5240
5241     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5242     id->idAddr()->iiaAddrMode.amIndxReg = index;
5243     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);
5244
5245     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5246
5247     sz = emitInsSizeAM(id, insCodeMR(ins));
5248     id->idCodeSize(sz);
5249
5250     dispIns(id);
5251     emitCurIGsize += sz;
5252
5253     emitAdjustStackDepthPushPop(ins);
5254 }
5255
5256 void emitter::emitIns_I_ARX(
5257     instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
5258 {
5259     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5260
5261 #ifdef _TARGET_AMD64_
5262     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5263     // all other opcodes take a sign-extended 4-byte immediate
5264     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5265 #endif
5266
5267     insFormat fmt;
5268
5269     switch (ins)
5270     {
5271         case INS_rcl_N:
5272         case INS_rcr_N:
5273         case INS_rol_N:
5274         case INS_ror_N:
5275         case INS_shl_N:
5276         case INS_shr_N:
5277         case INS_sar_N:
5278             assert(val != 1);
5279             fmt = IF_ARW_SHF;
5280             val &= 0x7F;
5281             break;
5282
5283         default:
5284             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5285             break;
5286     }
5287
5288     UNATIVE_OFFSET sz;
5289     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5290
5291     id->idIns(ins);
5292     id->idInsFmt(fmt);
5293
5294     id->idAddr()->iiaAddrMode.amBaseReg = reg;
5295     id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5296     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5297
5298     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5299
5300     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5301     id->idCodeSize(sz);
5302
5303     dispIns(id);
5304     emitCurIGsize += sz;
5305 }
5306
5307 void emitter::emitIns_R_ARX(
5308     instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5309 {
5310     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5311     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5312
5313     UNATIVE_OFFSET sz;
5314     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5315     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5316
5317     id->idIns(ins);
5318     id->idInsFmt(fmt);
5319     id->idReg1(ireg);
5320
5321     id->idAddr()->iiaAddrMode.amBaseReg = base;
5322     id->idAddr()->iiaAddrMode.amIndxReg = index;
5323     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5324
5325     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5326
5327     sz = emitInsSizeAM(id, insCodeRM(ins));
5328     id->idCodeSize(sz);
5329
5330     dispIns(id);
5331     emitCurIGsize += sz;
5332 }
5333
5334 void emitter::emitIns_ARX_R(
5335     instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5336 {
5337     UNATIVE_OFFSET sz;
5338     instrDesc*     id = emitNewInstrAmd(attr, disp);
5339     insFormat      fmt;
5340
5341     if (ireg == REG_NA)
5342     {
5343 #if FEATURE_STACK_FP_X87
5344         fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5345 #else  // !FEATURE_STACK_FP_X87
5346         fmt       = emitInsModeFormat(ins, IF_ARD);
5347 #endif // !FEATURE_STACK_FP_X87
5348     }
5349     else
5350     {
5351         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5352
5353         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5354         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5355
5356         id->idReg1(ireg);
5357     }
5358
5359     id->idIns(ins);
5360     id->idInsFmt(fmt);
5361
5362     id->idAddr()->iiaAddrMode.amBaseReg = base;
5363     id->idAddr()->iiaAddrMode.amIndxReg = index;
5364     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5365
5366     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5367
5368     sz = emitInsSizeAM(id, insCodeMR(ins));
5369     id->idCodeSize(sz);
5370
5371     dispIns(id);
5372     emitCurIGsize += sz;
5373
5374     emitAdjustStackDepthPushPop(ins);
5375 }
5376
5377 void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
5378 {
5379     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5380
5381 #ifdef _TARGET_AMD64_
5382     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5383     // all other opcodes take a sign-extended 4-byte immediate
5384     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5385 #endif
5386
5387     insFormat fmt;
5388
5389     switch (ins)
5390     {
5391         case INS_rcl_N:
5392         case INS_rcr_N:
5393         case INS_rol_N:
5394         case INS_ror_N:
5395         case INS_shl_N:
5396         case INS_shr_N:
5397         case INS_sar_N:
5398             assert(val != 1);
5399             fmt = IF_ARW_SHF;
5400             val &= 0x7F;
5401             break;
5402
5403         default:
5404             fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5405             break;
5406     }
5407
5408     UNATIVE_OFFSET sz;
5409     instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
5410     id->idIns(ins);
5411     id->idInsFmt(fmt);
5412
5413     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5414     id->idAddr()->iiaAddrMode.amIndxReg = reg;
5415     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5416
5417     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5418
5419     sz = emitInsSizeAM(id, insCodeMI(ins), val);
5420     id->idCodeSize(sz);
5421
5422     dispIns(id);
5423     emitCurIGsize += sz;
5424 }
5425
5426 void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5427 {
5428     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5429     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5430
5431     UNATIVE_OFFSET sz;
5432     instrDesc*     id  = emitNewInstrAmd(attr, disp);
5433     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5434
5435     id->idIns(ins);
5436     id->idInsFmt(fmt);
5437     id->idReg1(ireg);
5438
5439     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5440     id->idAddr()->iiaAddrMode.amIndxReg = reg;
5441     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5442
5443     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5444
5445     sz = emitInsSizeAM(id, insCodeRM(ins));
5446     id->idCodeSize(sz);
5447
5448     dispIns(id);
5449     emitCurIGsize += sz;
5450 }
5451
5452 void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5453 {
5454     UNATIVE_OFFSET sz;
5455     instrDesc*     id = emitNewInstrAmd(attr, disp);
5456     insFormat      fmt;
5457
5458     if (ireg == REG_NA)
5459     {
5460 #if FEATURE_STACK_FP_X87
5461         fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5462 #else  // !FEATURE_STACK_FP_X87
5463         fmt       = emitInsModeFormat(ins, IF_ARD);
5464 #endif // !FEATURE_STACK_FP_X87
5465     }
5466     else
5467     {
5468         fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5469         noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5470         assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5471
5472         id->idReg1(ireg);
5473     }
5474
5475     id->idIns(ins);
5476     id->idInsFmt(fmt);
5477
5478     id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5479     id->idAddr()->iiaAddrMode.amIndxReg = reg;
5480     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
5481
5482     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5483
5484     sz = emitInsSizeAM(id, insCodeMR(ins));
5485     id->idCodeSize(sz);
5486
5487     dispIns(id);
5488     emitCurIGsize += sz;
5489
5490     emitAdjustStackDepthPushPop(ins);
5491 }
5492
5493 #ifdef FEATURE_HW_INTRINSICS
5494 void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir)
5495 {
5496     if (UseVEXEncoding())
5497     {
5498         emitIns_R_R_A(ins, attr, reg, reg1, indir, IF_RWR_RRD_ARD);
5499     }
5500     else
5501     {
5502         if (reg1 != reg)
5503         {
5504             emitIns_R_R(INS_movaps, attr, reg, reg1);
5505         }
5506         emitIns_R_A(ins, attr, reg, indir, IF_RRW_ARD);
5507     }
5508 }
5509
5510 void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base)
5511 {
5512     if (UseVEXEncoding())
5513     {
5514         emitIns_R_R_AR(ins, attr, reg, reg1, base, 0);
5515     }
5516     else
5517     {
5518         if (reg1 != reg)
5519         {
5520             emitIns_R_R(INS_movaps, attr, reg, reg1);
5521         }
5522         emitIns_R_AR(ins, attr, reg, base, 0);
5523     }
5524 }
5525
5526 void emitter::emitIns_SIMD_R_R_C(
5527     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs)
5528 {
5529     if (UseVEXEncoding())
5530     {
5531         emitIns_R_R_C(ins, attr, reg, reg1, fldHnd, offs);
5532     }
5533     else
5534     {
5535         if (reg1 != reg)
5536         {
5537             emitIns_R_R(INS_movaps, attr, reg, reg1);
5538         }
5539         emitIns_R_C(ins, attr, reg, fldHnd, offs);
5540     }
5541 }
5542
5543 void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2)
5544 {
5545     if (UseVEXEncoding())
5546     {
5547         emitIns_R_R_R(ins, attr, reg, reg1, reg2);
5548     }
5549     else
5550     {
5551         if (reg1 != reg)
5552         {
5553             // Ensure we aren't overwriting op2
5554             assert(reg2 != reg);
5555
5556             emitIns_R_R(INS_movaps, attr, reg, reg1);
5557         }
5558         emitIns_R_R(ins, attr, reg, reg2);
5559     }
5560 }
5561
5562 static bool isSseShift(instruction ins)
5563 {
5564     switch (ins)
5565     {
5566         case INS_psrldq:
5567         case INS_pslldq:
5568         case INS_psrld:
5569         case INS_psrlw:
5570         case INS_psrlq:
5571         case INS_pslld:
5572         case INS_psllw:
5573         case INS_psllq:
5574         case INS_psrad:
5575         case INS_psraw:
5576             return true;
5577         default:
5578             return false;
5579     }
5580 }
5581
5582 //------------------------------------------------------------------------
5583 // IsDstSrcImmAvxInstruction: check if instruction has "R(M) R(M) I" format
5584 // for EVEX, VEX and legacy SSE encodings and has no (E)VEX.NDS
5585 //
5586 // Arguments:
5587 //    instruction -- processor instruction to check
5588 //
5589 // Return Value:
5590 //    true if instruction has "R(M) R(M) I" format and has no (E)VEX.NDS
5591 //
5592 static bool IsDstSrcImmAvxInstruction(instruction ins)
5593 {
5594     switch (ins)
5595     {
5596         case INS_extractps:
5597         case INS_pextrb:
5598         case INS_pextrw:
5599         case INS_pextrd:
5600         case INS_pextrq:
5601         case INS_pshufd:
5602         case INS_pshufhw:
5603         case INS_pshuflw:
5604             return true;
5605         default:
5606             return false;
5607     }
5608 }
5609
5610 void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int ival)
5611 {
5612     // TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I
5613     bool isShift = isSseShift(ins);
5614     if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift))
5615     {
5616         emitIns_R_R_I(ins, attr, reg, reg1, ival);
5617     }
5618     else
5619     {
5620         if (reg1 != reg)
5621         {
5622             emitIns_R_R(INS_movaps, attr, reg, reg1);
5623         }
5624         // TODO-XARCH-BUG emitOutputRI cannot work with SSE2 shift instruction on imm8 > 127, so we replace it by the
5625         // semantic alternatives. https://github.com/dotnet/coreclr/issues/16543
5626         if (isShift && ival > 127)
5627         {
5628             ival = 127;
5629         }
5630         emitIns_R_I(ins, attr, reg, ival);
5631     }
5632 }
5633
5634 void emitter::emitIns_SIMD_R_R_R_R(
5635     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3)
5636 {
5637     assert(isAvxBlendv(ins) || isSse41Blendv(ins));
5638     if (UseVEXEncoding())
5639     {
5640         // convert SSE encoding of SSE4.1 instructions to VEX encoding
5641         switch (ins)
5642         {
5643             case INS_blendvps:
5644                 ins = INS_vblendvps;
5645                 break;
5646             case INS_blendvpd:
5647                 ins = INS_vblendvpd;
5648                 break;
5649             case INS_pblendvb:
5650                 ins = INS_vpblendvb;
5651                 break;
5652             default:
5653                 break;
5654         }
5655         emitIns_R_R_R_R(ins, attr, reg, reg1, reg2, reg3);
5656     }
5657     else
5658     {
5659         assert(isSse41Blendv(ins));
5660         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
5661         if (reg3 != REG_XMM0)
5662         {
5663             // Ensure we aren't overwriting op1 or op2
5664             assert(reg1 != REG_XMM0);
5665             assert(reg2 != REG_XMM0);
5666
5667             emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3);
5668         }
5669         if (reg1 != reg)
5670         {
5671             // Ensure we aren't overwriting op2 or op3
5672             assert(reg2 != reg);
5673             assert((reg3 == REG_XMM0) || (reg != REG_XMM0));
5674
5675             emitIns_R_R(INS_movaps, attr, reg, reg1);
5676         }
5677         emitIns_R_R(ins, attr, reg, reg2);
5678     }
5679 }
5680
5681 void emitter::emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs)
5682 {
5683     if (UseVEXEncoding())
5684     {
5685         emitIns_R_R_S(ins, attr, reg, reg1, varx, offs);
5686     }
5687     else
5688     {
5689         if (reg1 != reg)
5690         {
5691             emitIns_R_R(INS_movaps, attr, reg, reg1);
5692         }
5693         emitIns_R_S(ins, attr, reg, varx, offs);
5694     }
5695 }
5696
5697 void emitter::emitIns_SIMD_R_R_A_I(
5698     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival)
5699 {
5700     if (UseVEXEncoding())
5701     {
5702         emitIns_R_R_A_I(ins, attr, reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS);
5703     }
5704     else
5705     {
5706         if (reg1 != reg)
5707         {
5708             emitIns_R_R(INS_movaps, attr, reg, reg1);
5709         }
5710         emitIns_R_A_I(ins, attr, reg, indir, ival);
5711     }
5712 }
5713
5714 void emitter::emitIns_SIMD_R_R_AR_I(
5715     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base, int ival)
5716 {
5717     if (UseVEXEncoding())
5718     {
5719         emitIns_R_R_AR_I(ins, attr, reg, reg1, base, 0, ival);
5720     }
5721     else
5722     {
5723         if (reg1 != reg)
5724         {
5725             emitIns_R_R(INS_movaps, attr, reg, reg1);
5726         }
5727         emitIns_R_AR_I(ins, attr, reg, base, 0, ival);
5728     }
5729 }
5730
5731 void emitter::emitIns_SIMD_R_R_C_I(
5732     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
5733 {
5734     if (UseVEXEncoding())
5735     {
5736         emitIns_R_R_C_I(ins, attr, reg, reg1, fldHnd, offs, ival);
5737     }
5738     else
5739     {
5740         if (reg1 != reg)
5741         {
5742             emitIns_R_R(INS_movaps, attr, reg, reg1);
5743         }
5744         emitIns_R_C_I(ins, attr, reg, fldHnd, offs, ival);
5745     }
5746 }
5747
5748 void emitter::emitIns_SIMD_R_R_R_I(
5749     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int ival)
5750 {
5751     if (UseVEXEncoding())
5752     {
5753         emitIns_R_R_R_I(ins, attr, reg, reg1, reg2, ival);
5754     }
5755     else
5756     {
5757         if (reg1 != reg)
5758         {
5759             // Ensure we aren't overwriting op2
5760             assert(reg2 != reg);
5761
5762             emitIns_R_R(INS_movaps, attr, reg, reg1);
5763         }
5764         emitIns_R_R_I(ins, attr, reg, reg2, ival);
5765     }
5766 }
5767
5768 void emitter::emitIns_SIMD_R_R_S_I(
5769     instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs, int ival)
5770 {
5771     if (UseVEXEncoding())
5772     {
5773         emitIns_R_R_S_I(ins, attr, reg, reg1, varx, offs, ival);
5774     }
5775     else
5776     {
5777         if (reg1 != reg)
5778         {
5779             emitIns_R_R(INS_movaps, attr, reg, reg1);
5780         }
5781         emitIns_R_S_I(ins, attr, reg, varx, offs, ival);
5782     }
5783 }
5784 #endif // FEATURE_HW_INTRINSICS
5785
5786 /*****************************************************************************
5787  *
5788  *  The following add instructions referencing stack-based local variables.
5789  */
5790
5791 void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
5792 {
5793     instrDesc*     id = emitNewInstr(attr);
5794     UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
5795 #if FEATURE_STACK_FP_X87
5796     insFormat fmt = emitInsModeFormat(ins, IF_SRD, IF_TRD_SRD, IF_SWR_TRD);
5797 #else  // !FEATURE_STACK_FP_X87
5798     insFormat fmt = emitInsModeFormat(ins, IF_SRD);
5799 #endif // !FEATURE_STACK_FP_X87
5800
5801     // 16-bit operand instructions will need a prefix
5802     if (EA_SIZE(attr) == EA_2BYTE)
5803     {
5804         sz += 1;
5805     }
5806
5807     // VEX prefix
5808     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
5809
5810     // 64-bit operand instructions will need a REX.W prefix
5811     if (TakesRexWPrefix(ins, attr))
5812     {
5813         sz += emitGetRexPrefixSize(ins);
5814     }
5815
5816     id->idIns(ins);
5817     id->idInsFmt(fmt);
5818     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5819     id->idCodeSize(sz);
5820
5821 #ifdef DEBUG
5822     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5823 #endif
5824     dispIns(id);
5825     emitCurIGsize += sz;
5826
5827     emitAdjustStackDepthPushPop(ins);
5828 }
5829
5830 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
5831 {
5832     instrDesc*     id  = emitNewInstr(attr);
5833     UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeMR(ins), varx, offs);
5834     insFormat      fmt = emitInsModeFormat(ins, IF_SRD_RRD);
5835
5836 #ifdef _TARGET_X86_
5837     if (attr == EA_1BYTE)
5838     {
5839         assert(isByteReg(ireg));
5840     }
5841 #endif
5842     // 16-bit operand instructions will need a prefix
5843     if (EA_SIZE(attr) == EA_2BYTE)
5844     {
5845         sz++;
5846     }
5847
5848     // VEX prefix
5849     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
5850
5851     // 64-bit operand instructions will need a REX.W prefix
5852     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
5853     {
5854         sz += emitGetRexPrefixSize(ins);
5855     }
5856
5857     id->idIns(ins);
5858     id->idInsFmt(fmt);
5859     id->idReg1(ireg);
5860     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5861     id->idCodeSize(sz);
5862 #ifdef DEBUG
5863     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5864 #endif
5865     dispIns(id);
5866     emitCurIGsize += sz;
5867 }
5868
5869 void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
5870 {
5871     emitAttr size = EA_SIZE(attr);
5872     noway_assert(emitVerifyEncodable(ins, size, ireg));
5873
5874     instrDesc*     id  = emitNewInstr(attr);
5875     UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeRM(ins), varx, offs);
5876     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_SRD);
5877
5878     // Most 16-bit operand instructions need a prefix
5879     if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
5880     {
5881         sz++;
5882     }
5883
5884     // VEX prefix
5885     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
5886
5887     // 64-bit operand instructions will need a REX.W prefix
5888     if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
5889     {
5890         sz += emitGetRexPrefixSize(ins);
5891     }
5892
5893     id->idIns(ins);
5894     id->idInsFmt(fmt);
5895     id->idReg1(ireg);
5896     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5897     id->idCodeSize(sz);
5898 #ifdef DEBUG
5899     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5900 #endif
5901     dispIns(id);
5902     emitCurIGsize += sz;
5903 }
5904
5905 void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
5906 {
5907 #ifdef _TARGET_AMD64_
5908     // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5909     // all other opcodes take a sign-extended 4-byte immediate
5910     noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5911 #endif
5912
5913     insFormat fmt;
5914
5915     switch (ins)
5916     {
5917         case INS_rcl_N:
5918         case INS_rcr_N:
5919         case INS_rol_N:
5920         case INS_ror_N:
5921         case INS_shl_N:
5922         case INS_shr_N:
5923         case INS_sar_N:
5924             assert(val != 1);
5925             fmt = IF_SRW_SHF;
5926             val &= 0x7F;
5927             break;
5928
5929         default:
5930             fmt = emitInsModeFormat(ins, IF_SRD_CNS);
5931             break;
5932     }
5933
5934     instrDesc* id = emitNewInstrCns(attr, val);
5935     id->idIns(ins);
5936     id->idInsFmt(fmt);
5937     UNATIVE_OFFSET sz = emitInsSizeSV(id, varx, offs, val);
5938
5939     // VEX prefix
5940     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
5941
5942     // 64-bit operand instructions will need a REX.W prefix
5943     if (TakesRexWPrefix(ins, attr))
5944     {
5945         sz += emitGetRexPrefixSize(ins);
5946     }
5947
5948     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5949     id->idCodeSize(sz);
5950 #ifdef DEBUG
5951     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5952 #endif
5953     dispIns(id);
5954     emitCurIGsize += sz;
5955 }
5956
5957 /*****************************************************************************
5958  *
5959  *  Record that a jump instruction uses the short encoding
5960  *
5961  */
5962 void emitter::emitSetShortJump(instrDescJmp* id)
5963 {
5964     if (id->idjKeepLong)
5965     {
5966         return;
5967     }
5968
5969     id->idjShort = true;
5970 }
5971
5972 /*****************************************************************************
5973  *
5974  *  Add a jmp instruction.
5975  */
5976
5977 void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
5978 {
5979     UNATIVE_OFFSET sz;
5980     instrDescJmp*  id = emitNewInstrJmp();
5981
5982     assert(dst->bbFlags & BBF_JMP_TARGET);
5983
5984     id->idIns(ins);
5985     id->idInsFmt(IF_LABEL);
5986     id->idAddr()->iiaBBlabel = dst;
5987
5988 #ifdef DEBUG
5989     // Mark the finally call
5990     if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
5991     {
5992         id->idDebugOnlyInfo()->idFinallyCall = true;
5993     }
5994 #endif // DEBUG
5995
5996     /* Assume the jump will be long */
5997
5998     id->idjShort    = 0;
5999     id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
6000
6001     /* Record the jump's IG and offset within it */
6002
6003     id->idjIG   = emitCurIG;
6004     id->idjOffs = emitCurIGsize;
6005
6006     /* Append this jump to this IG's jump list */
6007
6008     id->idjNext      = emitCurIGjmpList;
6009     emitCurIGjmpList = id;
6010
6011 #if EMITTER_STATS
6012     emitTotalIGjmps++;
6013 #endif
6014
6015     /* Figure out the max. size of the jump/call instruction */
6016
6017     if (ins == INS_call)
6018     {
6019         sz = CALL_INST_SIZE;
6020     }
6021     else if (ins == INS_push || ins == INS_push_hide)
6022     {
6023         // Pushing the address of a basicBlock will need a reloc
6024         // as the instruction uses the absolute address,
6025         // not a relative address
6026         if (emitComp->opts.compReloc)
6027         {
6028             id->idSetIsDspReloc();
6029         }
6030         sz = PUSH_INST_SIZE;
6031     }
6032     else
6033     {
6034         insGroup* tgt;
6035
6036         /* This is a jump - assume the worst */
6037
6038         sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
6039
6040         /* Can we guess at the jump distance? */
6041
6042         tgt = (insGroup*)emitCodeGetCookie(dst);
6043
6044         if (tgt)
6045         {
6046             int            extra;
6047             UNATIVE_OFFSET srcOffs;
6048             int            jmpDist;
6049
6050             assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
6051
6052             /* This is a backward jump - figure out the distance */
6053
6054             srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
6055
6056             /* Compute the distance estimate */
6057
6058             jmpDist = srcOffs - tgt->igOffs;
6059             assert((int)jmpDist > 0);
6060
6061             /* How much beyond the max. short distance does the jump go? */
6062
6063             extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
6064
6065 #if DEBUG_EMIT
6066             if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6067             {
6068                 if (INTERESTING_JUMP_NUM == 0)
6069                 {
6070                     printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6071                 }
6072                 printf("[0] Jump source is at %08X\n", srcOffs);
6073                 printf("[0] Label block is at %08X\n", tgt->igOffs);
6074                 printf("[0] Jump  distance  - %04X\n", jmpDist);
6075                 if (extra > 0)
6076                 {
6077                     printf("[0] Distance excess = %d  \n", extra);
6078                 }
6079             }
6080 #endif
6081
6082             if (extra <= 0 && !id->idjKeepLong)
6083             {
6084                 /* Wonderful - this jump surely will be short */
6085
6086                 emitSetShortJump(id);
6087                 sz = JMP_SIZE_SMALL;
6088             }
6089         }
6090 #if DEBUG_EMIT
6091         else
6092         {
6093             if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6094             {
6095                 if (INTERESTING_JUMP_NUM == 0)
6096                 {
6097                     printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6098                 }
6099                 printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
6100                        emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
6101                 printf("[0] Label block is unknown\n");
6102             }
6103         }
6104 #endif
6105     }
6106
6107     id->idCodeSize(sz);
6108
6109     dispIns(id);
6110     emitCurIGsize += sz;
6111
6112     emitAdjustStackDepthPushPop(ins);
6113 }
6114
6115 #if !FEATURE_FIXED_OUT_ARGS
6116
6117 //------------------------------------------------------------------------
6118 // emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
6119 //
6120 // Arguments:
6121 //    ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
6122 //
6123 // Notes:
6124 //    1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6125 //    2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6126 //
6127 void emitter::emitAdjustStackDepthPushPop(instruction ins)
6128 {
6129     if (ins == INS_push)
6130     {
6131         emitCurStackLvl += emitCntStackDepth;
6132
6133         if (emitMaxStackDepth < emitCurStackLvl)
6134         {
6135             JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6136             emitMaxStackDepth = emitCurStackLvl;
6137         }
6138     }
6139     else if (ins == INS_pop)
6140     {
6141         emitCurStackLvl -= emitCntStackDepth;
6142         assert((int)emitCurStackLvl >= 0);
6143     }
6144 }
6145
6146 //------------------------------------------------------------------------
6147 // emitAdjustStackDepth: Adjust the current and maximum stack depth.
6148 //
6149 // Arguments:
6150 //    ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
6151 //          It is assumed that the add/sub is on the stack pointer.
6152 //    val - the number of bytes to add to or subtract from the stack pointer.
6153 //
6154 // Notes:
6155 //    1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6156 //    2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6157 //
6158 void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
6159 {
6160     // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
6161     if (emitCntStackDepth == 0)
6162         return;
6163
6164     if (ins == INS_sub)
6165     {
6166         S_UINT32 newStackLvl(emitCurStackLvl);
6167         newStackLvl += S_UINT32(val);
6168         noway_assert(!newStackLvl.IsOverflow());
6169
6170         emitCurStackLvl = newStackLvl.Value();
6171
6172         if (emitMaxStackDepth < emitCurStackLvl)
6173         {
6174             JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6175             emitMaxStackDepth = emitCurStackLvl;
6176         }
6177     }
6178     else if (ins == INS_add)
6179     {
6180         S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
6181         noway_assert(!newStackLvl.IsOverflow());
6182
6183         emitCurStackLvl = newStackLvl.Value();
6184     }
6185 }
6186
6187 #endif // EMIT_TRACK_STACK_DEPTH
6188
6189 /*****************************************************************************
6190  *
6191  *  Add a call instruction (direct or indirect).
6192  *      argSize<0 means that the caller will pop the arguments
6193  *
6194  * The other arguments are interpreted depending on callType as shown:
6195  * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
6196  *
6197  * EC_FUNC_TOKEN       : addr is the method address
6198  * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
6199  * EC_FUNC_ADDR        : addr is the absolute address of the function
6200  * EC_FUNC_VIRTUAL     : "call [ireg+disp]"
6201  *
6202  * If callType is one of these emitCallTypes, addr has to be NULL.
6203  * EC_INDIR_R          : "call ireg".
6204  * EC_INDIR_SR         : "call lcl<disp>" (eg. call [ebp-8]).
6205  * EC_INDIR_C          : "call clsVar<disp>" (eg. call [clsVarAddr])
6206  * EC_INDIR_ARD        : "call [ireg+xreg*xmul+disp]"
6207  *
6208  */
6209
6210 // clang-format off
6211 void emitter::emitIns_Call(EmitCallType          callType,
6212                            CORINFO_METHOD_HANDLE methHnd,
6213                            INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
6214                            void*                 addr,
6215                            ssize_t               argSize,
6216                            emitAttr              retSize
6217                            MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
6218                            VARSET_VALARG_TP      ptrVars,
6219                            regMaskTP             gcrefRegs,
6220                            regMaskTP             byrefRegs,
6221                            IL_OFFSETX            ilOffset, // = BAD_IL_OFFSET
6222                            regNumber             ireg,     // = REG_NA
6223                            regNumber             xreg,     // = REG_NA
6224                            unsigned              xmul,     // = 0
6225                            ssize_t               disp,     // = 0
6226                            bool                  isJump,   // = false
6227                            bool                  isNoGC)   // = false
6228 // clang-format on
6229 {
6230     /* Sanity check the arguments depending on callType */
6231
6232     assert(callType < EC_COUNT);
6233     assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
6234            (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
6235     assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
6236     assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
6237     assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
6238     assert(callType != EC_INDIR_SR ||
6239            (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
6240     assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
6241
6242     // Our stack level should be always greater than the bytes of arguments we push. Just
6243     // a sanity test.
6244     assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
6245
6246 #if STACK_PROBES
6247     if (emitComp->opts.compNeedStackProbes)
6248     {
6249         // If we've pushed more than JIT_RESERVED_STACK allows, do an aditional stack probe
6250         // Else, just make sure the prolog does a probe for us. Invariant we're trying
6251         // to get is that at any point we go out to unmanaged code, there is at least
6252         // CORINFO_STACKPROBE_DEPTH bytes of stack available.
6253         //
6254         // The reason why we are not doing one probe for the max size at the prolog
6255         // is that when don't have the max depth precomputed (it can depend on codegen),
6256         // and we need it at the time we generate locallocs
6257         //
6258         // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
6259         // account everything except for the arguments of a callee.
6260         //
6261         //
6262         //
6263         if ((TARGET_POINTER_SIZE + // return address for call
6264              emitComp->genStackLevel +
6265              // Current stack level. This gets resetted on every
6266              // localloc and on the prolog (invariant is that
6267              // genStackLevel is 0 on basic block entry and exit and
6268              // after any alloca). genStackLevel will include any arguments
6269              // to the call, so we will insert an aditional probe if
6270              // we've consumed more than JIT_RESERVED_STACK bytes
6271              // of stack, which is what the prolog probe covers (in
6272              // addition to the EE requested size)
6273              (emitComp->compHndBBtabCount * TARGET_POINTER_SIZE)
6274              // Hidden slots for calling finallys
6275              ) >= JIT_RESERVED_STACK)
6276         {
6277             // This happens when you have a call with a lot of arguments or a call is done
6278             // when there's a lot of stuff pushed on the stack (for example a call whos returned
6279             // value is an argument of another call that has pushed stuff on the stack)
6280             // This should't be very frequent.
6281             // For different values of JIT_RESERVED_STACK
6282             //
6283             // For mscorlib (109605 calls)
6284             //
6285             // 14190 probes in prologs (56760 bytes of code)
6286             //
6287             // JIT_RESERVED_STACK = 16 : 5452 extra probes
6288             // JIT_RESERVED_STACK = 32 : 1084 extra probes
6289             // JIT_RESERVED_STACK = 64 :    1 extra probes
6290             // JIT_RESERVED_STACK = 96 :    0 extra probes
6291             emitComp->genGenerateStackProbe();
6292         }
6293         else
6294         {
6295             if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog)
6296             {
6297                 if (emitComp->compStackProbePrologDone)
6298                 {
6299                     // We already generated a probe and this call is not happening
6300                     // at a depth >= JIT_RESERVED_STACK, so nothing to do here
6301                 }
6302                 else
6303                 {
6304                     // 3 possible ways to get here:
6305                     // - We are in an epilog and haven't generated a probe in the prolog.
6306                     //   This shouldn't happen as we don't generate any calls in epilog.
6307                     // - We are in the prolog, but doing a call before generating the probe.
6308                     //   This shouldn't happen at all.
6309                     // - We are in the prolog, did not generate a probe but now we need
6310                     //   to generate a probe because we need a call (eg: profiler). We'll
6311                     //   need a probe.
6312                     //
6313                     // In any case, we need a probe
6314
6315                     // Ignore the profiler callback for now.
6316                     if (!emitComp->compIsProfilerHookNeeded())
6317                     {
6318                         assert(!"We do not expect to get here");
6319                         emitComp->genGenerateStackProbe();
6320                     }
6321                 }
6322             }
6323             else
6324             {
6325                 // We will need a probe and will generate it in the prolog
6326                 emitComp->genNeedPrologStackProbe = true;
6327             }
6328         }
6329     }
6330 #endif // STACK_PROBES
6331
6332     int argCnt;
6333
6334     UNATIVE_OFFSET sz;
6335     instrDesc*     id;
6336
6337     /* This is the saved set of registers after a normal call */
6338     unsigned savedSet = RBM_CALLEE_SAVED;
6339
6340     /* some special helper calls have a different saved set registers */
6341
6342     if (isNoGC)
6343     {
6344         // Get the set of registers that this call kills and remove it from the saved set.
6345         savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd));
6346     }
6347     else
6348     {
6349         assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
6350     }
6351
6352     /* Trim out any callee-trashed registers from the live set */
6353
6354     gcrefRegs &= savedSet;
6355     byrefRegs &= savedSet;
6356
6357 #ifdef DEBUG
6358     if (EMIT_GC_VERBOSE)
6359     {
6360         printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
6361         dumpConvertedVarSet(emitComp, ptrVars);
6362         printf(", gcrefRegs=");
6363         printRegMaskInt(gcrefRegs);
6364         emitDispRegSet(gcrefRegs);
6365         printf(", byrefRegs=");
6366         printRegMaskInt(byrefRegs);
6367         emitDispRegSet(byrefRegs);
6368         printf("\n");
6369     }
6370 #endif
6371
6372     assert(argSize % REGSIZE_BYTES == 0);
6373     argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide
6374
6375     /* Managed RetVal: emit sequence point for the call */
6376     if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
6377     {
6378         codeGen->genIPmappingAdd(ilOffset, false);
6379     }
6380
6381     /*
6382         We need to allocate the appropriate instruction descriptor based
6383         on whether this is a direct/indirect call, and whether we need to
6384         record an updated set of live GC variables.
6385
6386         The stats for a ton of classes is as follows:
6387
6388             Direct call w/o  GC vars        220,216
6389             Indir. call w/o  GC vars        144,781
6390
6391             Direct call with GC vars          9,440
6392             Indir. call with GC vars          5,768
6393      */
6394
6395     if (callType >= EC_FUNC_VIRTUAL)
6396     {
6397         /* Indirect call, virtual calls */
6398
6399         assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
6400                callType == EC_INDIR_C || callType == EC_INDIR_ARD);
6401
6402         id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
6403                                  retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6404     }
6405     else
6406     {
6407         // Helper/static/nonvirtual/function calls (direct or through handle),
6408         // and calls to an absolute addr.
6409
6410         assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
6411
6412         id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
6413                                  retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6414     }
6415
6416     /* Update the emitter's live GC ref sets */
6417
6418     VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
6419     emitThisGCrefRegs = gcrefRegs;
6420     emitThisByrefRegs = byrefRegs;
6421
6422     /* Set the instruction - special case jumping a function */
6423     instruction ins = INS_call;
6424
6425     if (isJump)
6426     {
6427         assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
6428         if (callType == EC_FUNC_TOKEN)
6429         {
6430             ins = INS_l_jmp;
6431         }
6432         else
6433         {
6434             ins = INS_i_jmp;
6435         }
6436     }
6437     id->idIns(ins);
6438
6439     id->idSetIsNoGC(isNoGC);
6440
6441     // Record the address: method, indirection, or funcptr
6442     if (callType >= EC_FUNC_VIRTUAL)
6443     {
6444         // This is an indirect call (either a virtual call or func ptr call)
6445
6446         switch (callType)
6447         {
6448             case EC_INDIR_C:
6449                 // Indirect call using an absolute code address.
6450                 // Must be marked as relocatable and is done at the
6451                 // branch target location.
6452                 goto CALL_ADDR_MODE;
6453
6454             case EC_INDIR_R: // the address is in a register
6455
6456                 id->idSetIsCallRegPtr();
6457
6458                 __fallthrough;
6459
6460             case EC_INDIR_ARD: // the address is an indirection
6461
6462                 goto CALL_ADDR_MODE;
6463
6464             case EC_INDIR_SR: // the address is in a lcl var
6465
6466                 id->idInsFmt(IF_SRD);
6467                 // disp is really a lclVarNum
6468                 noway_assert((unsigned)disp == (size_t)disp);
6469                 id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
6470                 sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0);
6471
6472                 break;
6473
6474             case EC_FUNC_VIRTUAL:
6475
6476             CALL_ADDR_MODE:
6477
6478                 // fall-through
6479
6480                 // The function is "ireg" if id->idIsCallRegPtr(),
6481                 // else [ireg+xmul*xreg+disp]
6482
6483                 id->idInsFmt(IF_ARD);
6484
6485                 id->idAddr()->iiaAddrMode.amBaseReg = ireg;
6486                 id->idAddr()->iiaAddrMode.amIndxReg = xreg;
6487                 id->idAddr()->iiaAddrMode.amScale   = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
6488
6489                 sz = emitInsSizeAM(id, insCodeMR(INS_call));
6490
6491                 if (ireg == REG_NA && xreg == REG_NA)
6492                 {
6493                     if (codeGen->genCodeIndirAddrNeedsReloc(disp))
6494                     {
6495                         id->idSetIsDspReloc();
6496                     }
6497 #ifdef _TARGET_AMD64_
6498                     else
6499                     {
6500                         // An absolute indir address that doesn't need reloc should fit within 32-bits
6501                         // to be encoded as offset relative to zero.  This addr mode requires an extra
6502                         // SIB byte
6503                         noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
6504                         sz++;
6505                     }
6506 #endif //_TARGET_AMD64_
6507                 }
6508
6509                 break;
6510
6511             default:
6512                 NO_WAY("unexpected instruction");
6513                 break;
6514         }
6515     }
6516     else if (callType == EC_FUNC_TOKEN_INDIR)
6517     {
6518         /* "call [method_addr]" */
6519
6520         assert(addr != nullptr);
6521
6522         id->idInsFmt(IF_METHPTR);
6523         id->idAddr()->iiaAddr = (BYTE*)addr;
6524         sz                    = 6;
6525
6526         // Since this is an indirect call through a pointer and we don't
6527         // currently pass in emitAttr into this function, we query codegen
6528         // whether addr needs a reloc.
6529         if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
6530         {
6531             id->idSetIsDspReloc();
6532         }
6533 #ifdef _TARGET_AMD64_
6534         else
6535         {
6536             // An absolute indir address that doesn't need reloc should fit within 32-bits
6537             // to be encoded as offset relative to zero.  This addr mode requires an extra
6538             // SIB byte
6539             noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
6540             sz++;
6541         }
6542 #endif //_TARGET_AMD64_
6543     }
6544     else
6545     {
6546         /* This is a simple direct call: "call helper/method/addr" */
6547
6548         assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
6549
6550         assert(addr != nullptr);
6551
6552         id->idInsFmt(IF_METHOD);
6553         sz = 5;
6554
6555         id->idAddr()->iiaAddr = (BYTE*)addr;
6556
6557         if (callType == EC_FUNC_ADDR)
6558         {
6559             id->idSetIsCallAddr();
6560         }
6561
6562         // Direct call to a method and no addr indirection is needed.
6563         if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
6564         {
6565             id->idSetIsDspReloc();
6566         }
6567     }
6568
6569 #ifdef DEBUG
6570     if (emitComp->verbose && 0)
6571     {
6572         if (id->idIsLargeCall())
6573         {
6574             if (callType >= EC_FUNC_VIRTUAL)
6575             {
6576                 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
6577                        VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
6578             }
6579             else
6580             {
6581                 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
6582                        VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
6583             }
6584         }
6585     }
6586
6587     id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
6588     id->idDebugOnlyInfo()->idCallSig   = sigInfo;
6589 #endif // DEBUG
6590
6591 #ifdef LATE_DISASM
6592     if (addr != nullptr)
6593     {
6594         codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
6595     }
6596 #endif // LATE_DISASM
6597
6598     id->idCodeSize(sz);
6599
6600     dispIns(id);
6601     emitCurIGsize += sz;
6602
6603 #if !FEATURE_FIXED_OUT_ARGS
6604
6605     /* The call will pop the arguments */
6606
6607     if (emitCntStackDepth && argSize > 0)
6608     {
6609         noway_assert((ssize_t)emitCurStackLvl >= argSize);
6610         emitCurStackLvl -= (int)argSize;
6611         assert((int)emitCurStackLvl >= 0);
6612     }
6613
6614 #endif // !FEATURE_FIXED_OUT_ARGS
6615 }
6616
6617 #ifdef DEBUG
6618 /*****************************************************************************
6619  *
6620  *  The following called for each recorded instruction -- use for debugging.
6621  */
6622 void emitter::emitInsSanityCheck(instrDesc* id)
6623 {
6624     // make certain you only try to put relocs on things that can have them.
6625     ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
6626     if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
6627     {
6628         idOp = ID_OP_CNS;
6629     }
6630
6631     if (!id->idIsTiny())
6632     {
6633         if (id->idIsDspReloc())
6634         {
6635             assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
6636                    idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
6637                    idOp == ID_OP_LBL);
6638         }
6639
6640         if (id->idIsCnsReloc())
6641         {
6642             assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
6643                    idOp == ID_OP_CALL || idOp == ID_OP_JMP);
6644         }
6645     }
6646 }
6647 #endif
6648
6649 /*****************************************************************************
6650  *
6651  *  Return the allocated size (in bytes) of the given instruction descriptor.
6652  */
6653
6654 size_t emitter::emitSizeOfInsDsc(instrDesc* id)
6655 {
6656     if (emitIsTinyInsDsc(id))
6657     {
6658         return TINY_IDSC_SIZE;
6659     }
6660
6661     if (emitIsScnsInsDsc(id))
6662     {
6663         return SMALL_IDSC_SIZE;
6664     }
6665
6666     assert((unsigned)id->idInsFmt() < emitFmtCount);
6667
6668     ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
6669
6670     // An INS_call instruction may use a "fat" direct/indirect call descriptor
6671     // except for a local call to a label (i.e. call to a finally)
6672     // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
6673     //  INS_call instruction always uses one of these idOps
6674
6675     if (id->idIns() == INS_call)
6676     {
6677         assert(idOp == ID_OP_CALL || // is a direct   call
6678                idOp == ID_OP_SPEC || // is a indirect call
6679                idOp == ID_OP_JMP);   // is a local call to finally clause
6680     }
6681
6682     switch (idOp)
6683     {
6684         case ID_OP_NONE:
6685             break;
6686
6687         case ID_OP_LBL:
6688             return sizeof(instrDescLbl);
6689
6690         case ID_OP_JMP:
6691             return sizeof(instrDescJmp);
6692
6693         case ID_OP_CALL:
6694         case ID_OP_SPEC:
6695             if (id->idIsLargeCall())
6696             {
6697                 /* Must be a "fat" indirect call descriptor */
6698                 return sizeof(instrDescCGCA);
6699             }
6700
6701             __fallthrough;
6702
6703         case ID_OP_SCNS:
6704         case ID_OP_CNS:
6705         case ID_OP_DSP:
6706         case ID_OP_DSP_CNS:
6707         case ID_OP_AMD:
6708         case ID_OP_AMD_CNS:
6709             if (id->idIsLargeCns())
6710             {
6711                 if (id->idIsLargeDsp())
6712                 {
6713                     return sizeof(instrDescCnsDsp);
6714                 }
6715                 else
6716                 {
6717                     return sizeof(instrDescCns);
6718                 }
6719             }
6720             else
6721             {
6722                 if (id->idIsLargeDsp())
6723                 {
6724                     return sizeof(instrDescDsp);
6725                 }
6726                 else
6727                 {
6728                     return sizeof(instrDesc);
6729                 }
6730             }
6731
6732         default:
6733             NO_WAY("unexpected instruction descriptor format");
6734             break;
6735     }
6736
6737     return sizeof(instrDesc);
6738 }
6739
6740 /*****************************************************************************/
6741 #ifdef DEBUG
6742 /*****************************************************************************
6743  *
6744  *  Return a string that represents the given register.
6745  */
6746
6747 const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
6748 {
6749     static char          rb[2][128];
6750     static unsigned char rbc = 0;
6751
6752     const char* rn = emitComp->compRegVarName(reg, varName);
6753
6754 #ifdef _TARGET_AMD64_
6755     char suffix = '\0';
6756
6757     switch (EA_SIZE(attr))
6758     {
6759         case EA_32BYTE:
6760             return emitYMMregName(reg);
6761
6762         case EA_16BYTE:
6763             return emitXMMregName(reg);
6764
6765         case EA_8BYTE:
6766             if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
6767             {
6768                 return emitXMMregName(reg);
6769             }
6770             break;
6771
6772         case EA_4BYTE:
6773             if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
6774             {
6775                 return emitXMMregName(reg);
6776             }
6777
6778             if (reg > REG_R15)
6779             {
6780                 break;
6781             }
6782
6783             if (reg > REG_RDI)
6784             {
6785                 suffix = 'd';
6786                 goto APPEND_SUFFIX;
6787             }
6788             rbc        = (rbc + 1) % 2;
6789             rb[rbc][0] = 'e';
6790             rb[rbc][1] = rn[1];
6791             rb[rbc][2] = rn[2];
6792             rb[rbc][3] = 0;
6793             rn         = rb[rbc];
6794             break;
6795
6796         case EA_2BYTE:
6797             if (reg > REG_RDI)
6798             {
6799                 suffix = 'w';
6800                 goto APPEND_SUFFIX;
6801             }
6802             rn++;
6803             break;
6804
6805         case EA_1BYTE:
6806             if (reg > REG_RDI)
6807             {
6808                 suffix = 'b';
6809             APPEND_SUFFIX:
6810                 rbc        = (rbc + 1) % 2;
6811                 rb[rbc][0] = rn[0];
6812                 rb[rbc][1] = rn[1];
6813                 if (rn[2])
6814                 {
6815                     assert(rn[3] == 0);
6816                     rb[rbc][2] = rn[2];
6817                     rb[rbc][3] = suffix;
6818                     rb[rbc][4] = 0;
6819                 }
6820                 else
6821                 {
6822                     rb[rbc][2] = suffix;
6823                     rb[rbc][3] = 0;
6824                 }
6825             }
6826             else
6827             {
6828                 rbc        = (rbc + 1) % 2;
6829                 rb[rbc][0] = rn[1];
6830                 if (reg < 4)
6831                 {
6832                     rb[rbc][1] = 'l';
6833                     rb[rbc][2] = 0;
6834                 }
6835                 else
6836                 {
6837                     rb[rbc][1] = rn[2];
6838                     rb[rbc][2] = 'l';
6839                     rb[rbc][3] = 0;
6840                 }
6841             }
6842
6843             rn = rb[rbc];
6844             break;
6845
6846         default:
6847             break;
6848     }
6849 #endif // _TARGET_AMD64_
6850
6851 #ifdef _TARGET_X86_
6852     assert(strlen(rn) >= 3);
6853
6854     switch (EA_SIZE(attr))
6855     {
6856 #ifndef LEGACY_BACKEND
6857         case EA_32BYTE:
6858             return emitYMMregName(reg);
6859
6860         case EA_16BYTE:
6861             return emitXMMregName(reg);
6862
6863         case EA_8BYTE:
6864             if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
6865             {
6866                 return emitXMMregName(reg);
6867             }
6868             break;
6869
6870         case EA_4BYTE:
6871             if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
6872             {
6873                 return emitXMMregName(reg);
6874             }
6875             break;
6876 #else  // LEGACY_BACKEND
6877         case EA_4BYTE:
6878             break;
6879 #endif // LEGACY_BACKEND
6880
6881         case EA_2BYTE:
6882             rn++;
6883             break;
6884
6885         case EA_1BYTE:
6886             rbc        = (rbc + 1) % 2;
6887             rb[rbc][0] = rn[1];
6888             rb[rbc][1] = 'l';
6889             strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
6890
6891             rn = rb[rbc];
6892             break;
6893
6894         default:
6895             break;
6896     }
6897 #endif // _TARGET_X86_
6898
6899 #if 0
6900     // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
6901     // however it's possibly not interesting most of the time.
6902     if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
6903     {
6904         if (rn != rb[rbc])
6905         {
6906             rbc = (rbc+1)%2;
6907             strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
6908             rn = rb[rbc];
6909         }
6910
6911         if (EA_IS_GCREF(attr))
6912         {
6913             strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
6914         }
6915         else if (EA_IS_BYREF(attr))
6916         {
6917             strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
6918         }
6919     }
6920 #endif // 0
6921
6922     return rn;
6923 }
6924
6925 /*****************************************************************************
6926  *
6927  *  Return a string that represents the given FP register.
6928  */
6929
6930 const char* emitter::emitFPregName(unsigned reg, bool varName)
6931 {
6932     assert(reg < REG_COUNT);
6933
6934     return emitComp->compFPregVarName((regNumber)(reg), varName);
6935 }
6936
6937 /*****************************************************************************
6938  *
6939  *  Return a string that represents the given XMM register.
6940  */
6941
6942 const char* emitter::emitXMMregName(unsigned reg)
6943 {
6944     static const char* const regNames[] = {
6945 #define REGDEF(name, rnum, mask, sname) "x" sname,
6946 #ifndef LEGACY_BACKEND
6947 #include "register.h"
6948 #else // LEGACY_BACKEND
6949 #include "registerxmm.h"
6950 #endif // LEGACY_BACKEND
6951     };
6952
6953     assert(reg < REG_COUNT);
6954     assert(reg < _countof(regNames));
6955
6956     return regNames[reg];
6957 }
6958
6959 /*****************************************************************************
6960  *
6961  *  Return a string that represents the given YMM register.
6962  */
6963
6964 const char* emitter::emitYMMregName(unsigned reg)
6965 {
6966     static const char* const regNames[] = {
6967 #define REGDEF(name, rnum, mask, sname) "y" sname,
6968 #ifndef LEGACY_BACKEND
6969 #include "register.h"
6970 #else // LEGACY_BACKEND
6971 #include "registerxmm.h"
6972 #endif // LEGACY_BACKEND
6973     };
6974
6975     assert(reg < REG_COUNT);
6976     assert(reg < _countof(regNames));
6977
6978     return regNames[reg];
6979 }
6980
6981 /*****************************************************************************
6982  *
6983  *  Display a static data member reference.
6984  */
6985
6986 void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
6987 {
6988     int doffs;
6989
6990     /* Filter out the special case of fs:[offs] */
6991
6992     // Munge any pointers if we want diff-able disassembly
6993     if (emitComp->opts.disDiffable)
6994     {
6995         ssize_t top12bits = (offs >> 20);
6996         if ((top12bits != 0) && (top12bits != -1))
6997         {
6998             offs = 0xD1FFAB1E;
6999         }
7000     }
7001
7002     if (fldHnd == FLD_GLOBAL_FS)
7003     {
7004         printf("FS:[0x%04X]", offs);
7005         return;
7006     }
7007
7008     if (fldHnd == FLD_GLOBAL_DS)
7009     {
7010         printf("[0x%04X]", offs);
7011         return;
7012     }
7013
7014     printf("[");
7015
7016     doffs = Compiler::eeGetJitDataOffs(fldHnd);
7017
7018     if (reloc)
7019     {
7020         printf("reloc ");
7021     }
7022
7023     if (doffs >= 0)
7024     {
7025         if (doffs & 1)
7026         {
7027             printf("@CNS%02u", doffs - 1);
7028         }
7029         else
7030         {
7031             printf("@RWD%02u", doffs);
7032         }
7033
7034         if (offs)
7035         {
7036             printf("%+Id", offs);
7037         }
7038     }
7039     else
7040     {
7041         printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
7042
7043         if (offs)
7044         {
7045             printf("%+Id", offs);
7046         }
7047     }
7048
7049     printf("]");
7050
7051     if (emitComp->opts.varNames && offs < 0)
7052     {
7053         printf("'%s", emitComp->eeGetFieldName(fldHnd));
7054         if (offs)
7055         {
7056             printf("%+Id", offs);
7057         }
7058         printf("'");
7059     }
7060 }
7061
7062 /*****************************************************************************
7063  *
7064  *  Display a stack frame reference.
7065  */
7066
7067 void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
7068 {
7069     int  addr;
7070     bool bEBP;
7071
7072     printf("[");
7073
7074     if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
7075     {
7076         if (varx < 0)
7077         {
7078             printf("TEMP_%02u", -varx);
7079         }
7080         else
7081         {
7082             printf("V%02u", +varx);
7083         }
7084
7085         if (disp < 0)
7086         {
7087             printf("-0x%X", -disp);
7088         }
7089         else if (disp > 0)
7090         {
7091             printf("+0x%X", +disp);
7092         }
7093     }
7094
7095     if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7096     {
7097         if (!asmfm)
7098         {
7099             printf(" ");
7100         }
7101
7102         addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
7103
7104         if (bEBP)
7105         {
7106             printf(STR_FPBASE);
7107
7108             if (addr < 0)
7109             {
7110                 printf("-%02XH", -addr);
7111             }
7112             else if (addr > 0)
7113             {
7114                 printf("+%02XH", addr);
7115             }
7116         }
7117         else
7118         {
7119             /* Adjust the offset by amount currently pushed on the stack */
7120
7121             printf(STR_SPBASE);
7122
7123             if (addr < 0)
7124             {
7125                 printf("-%02XH", -addr);
7126             }
7127             else if (addr > 0)
7128             {
7129                 printf("+%02XH", addr);
7130             }
7131
7132 #if !FEATURE_FIXED_OUT_ARGS
7133
7134             if (emitCurStackLvl)
7135                 printf("+%02XH", emitCurStackLvl);
7136
7137 #endif // !FEATURE_FIXED_OUT_ARGS
7138         }
7139     }
7140
7141     printf("]");
7142
7143     if (varx >= 0 && emitComp->opts.varNames)
7144     {
7145         LclVarDsc*  varDsc;
7146         const char* varName;
7147
7148         assert((unsigned)varx < emitComp->lvaCount);
7149         varDsc  = emitComp->lvaTable + varx;
7150         varName = emitComp->compLocalVarName(varx, offs);
7151
7152         if (varName)
7153         {
7154             printf("'%s", varName);
7155
7156             if (disp < 0)
7157             {
7158                 printf("-%d", -disp);
7159             }
7160             else if (disp > 0)
7161             {
7162                 printf("+%d", +disp);
7163             }
7164
7165             printf("'");
7166         }
7167     }
7168 }
7169
7170 /*****************************************************************************
7171  *
7172  *  Display an reloc value
7173  *  If we are formatting for an assembly listing don't print the hex value
7174  *  since it will prevent us from doing assembly diffs
7175  */
7176 void emitter::emitDispReloc(ssize_t value)
7177 {
7178     if (emitComp->opts.disAsm)
7179     {
7180         printf("(reloc)");
7181     }
7182     else
7183     {
7184         printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
7185     }
7186 }
7187
7188 /*****************************************************************************
7189  *
7190  *  Display an address mode.
7191  */
7192
7193 void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
7194 {
7195     bool    nsep = false;
7196     ssize_t disp;
7197
7198     unsigned     jtno = 0;
7199     dataSection* jdsc = nullptr;
7200
7201     /* The displacement field is in an unusual place for calls */
7202
7203     disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
7204
7205     /* Display a jump table label if this is a switch table jump */
7206
7207     if (id->idIns() == INS_i_jmp)
7208     {
7209         UNATIVE_OFFSET offs = 0;
7210
7211         /* Find the appropriate entry in the data section list */
7212
7213         for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
7214         {
7215             UNATIVE_OFFSET size = jdsc->dsSize;
7216
7217             /* Is this a label table? */
7218
7219             if (size & 1)
7220             {
7221                 size--;
7222                 jtno++;
7223
7224                 if (offs == id->idDebugOnlyInfo()->idMemCookie)
7225                 {
7226                     break;
7227                 }
7228             }
7229
7230             offs += size;
7231         }
7232
7233         /* If we've found a matching entry then is a table jump */
7234
7235         if (jdsc)
7236         {
7237             if (id->idIsDspReloc())
7238             {
7239                 printf("reloc ");
7240             }
7241             printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
7242         }
7243
7244         disp -= id->idDebugOnlyInfo()->idMemCookie;
7245     }
7246
7247     bool frameRef = false;
7248
7249     printf("[");
7250
7251     if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
7252     {
7253         printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7254         nsep = true;
7255         if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
7256         {
7257             frameRef = true;
7258         }
7259         else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
7260         {
7261             frameRef = true;
7262         }
7263     }
7264
7265     if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
7266     {
7267         size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
7268
7269         if (nsep)
7270         {
7271             printf("+");
7272         }
7273         if (scale > 1)
7274         {
7275             printf("%u*", scale);
7276         }
7277         printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
7278         nsep = true;
7279     }
7280
7281     if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
7282     {
7283         if (nsep)
7284         {
7285             printf("+");
7286         }
7287         emitDispReloc(disp);
7288     }
7289     else
7290     {
7291         // Munge any pointers if we want diff-able disassembly
7292         if (emitComp->opts.disDiffable)
7293         {
7294             ssize_t top12bits = (disp >> 20);
7295             if ((top12bits != 0) && (top12bits != -1))
7296             {
7297                 disp = 0xD1FFAB1E;
7298             }
7299         }
7300
7301         if (disp > 0)
7302         {
7303             if (nsep)
7304             {
7305                 printf("+");
7306             }
7307             if (frameRef)
7308             {
7309                 printf("%02XH", disp);
7310             }
7311             else if (disp < 1000)
7312             {
7313                 printf("%d", disp);
7314             }
7315             else if (disp <= 0xFFFF)
7316             {
7317                 printf("%04XH", disp);
7318             }
7319             else
7320             {
7321                 printf("%08XH", disp);
7322             }
7323         }
7324         else if (disp < 0)
7325         {
7326             if (frameRef)
7327             {
7328                 printf("-%02XH", -disp);
7329             }
7330             else if (disp > -1000)
7331             {
7332                 printf("-%d", -disp);
7333             }
7334             else if (disp >= -0xFFFF)
7335             {
7336                 printf("-%04XH", -disp);
7337             }
7338             else if ((disp & 0x7F000000) != 0x7F000000)
7339             {
7340                 printf("%08XH", disp);
7341             }
7342             else
7343             {
7344                 printf("-%08XH", -disp);
7345             }
7346         }
7347         else if (!nsep)
7348         {
7349             printf("%04XH", disp);
7350         }
7351     }
7352
7353     printf("]");
7354
7355     // pretty print string if it looks like one
7356     if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA))
7357     {
7358         const wchar_t* str = emitComp->eeGetCPString(disp);
7359         if (str != nullptr)
7360         {
7361             printf("      '%S'", str);
7362         }
7363     }
7364
7365     if (jdsc && !noDetail)
7366     {
7367         unsigned     cnt = (jdsc->dsSize - 1) / TARGET_POINTER_SIZE;
7368         BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
7369
7370 #ifdef _TARGET_AMD64_
7371 #define SIZE_LETTER "Q"
7372 #else
7373 #define SIZE_LETTER "D"
7374 #endif
7375         printf("\n\n    J_M%03u_DS%02u LABEL   " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
7376
7377         /* Display the label table (it's stored as "BasicBlock*" values) */
7378
7379         do
7380         {
7381             insGroup* lab;
7382
7383             /* Convert the BasicBlock* value to an IG address */
7384
7385             lab = (insGroup*)emitCodeGetCookie(*bbp++);
7386             assert(lab);
7387
7388             printf("\n            D" SIZE_LETTER "      G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
7389         } while (--cnt);
7390     }
7391 }
7392
7393 /*****************************************************************************
7394  *
7395  *  If the given instruction is a shift, display the 2nd operand.
7396  */
7397
7398 void emitter::emitDispShift(instruction ins, int cnt)
7399 {
7400     switch (ins)
7401     {
7402         case INS_rcl_1:
7403         case INS_rcr_1:
7404         case INS_rol_1:
7405         case INS_ror_1:
7406         case INS_shl_1:
7407         case INS_shr_1:
7408         case INS_sar_1:
7409             printf(", 1");
7410             break;
7411
7412         case INS_rcl:
7413         case INS_rcr:
7414         case INS_rol:
7415         case INS_ror:
7416         case INS_shl:
7417         case INS_shr:
7418         case INS_sar:
7419             printf(", cl");
7420             break;
7421
7422         case INS_rcl_N:
7423         case INS_rcr_N:
7424         case INS_rol_N:
7425         case INS_ror_N:
7426         case INS_shl_N:
7427         case INS_shr_N:
7428         case INS_sar_N:
7429             printf(", %d", cnt);
7430             break;
7431
7432         default:
7433             break;
7434     }
7435 }
7436
7437 /*****************************************************************************
7438  *
7439  *  Display (optionally) the bytes for the instruction encoding in hex
7440  */
7441
7442 void emitter::emitDispInsHex(BYTE* code, size_t sz)
7443 {
7444     // We do not display the instruction hex if we want diff-able disassembly
7445     if (!emitComp->opts.disDiffable)
7446     {
7447 #ifdef _TARGET_AMD64_
7448         // how many bytes per instruction we format for
7449         const size_t digits = 10;
7450 #else // _TARGET_X86
7451         const size_t digits = 6;
7452 #endif
7453         printf(" ");
7454         for (unsigned i = 0; i < sz; i++)
7455         {
7456             printf("%02X", (*((BYTE*)(code + i))));
7457         }
7458
7459         if (sz < digits)
7460         {
7461             printf("%.*s", 2 * (digits - sz), "                         ");
7462         }
7463     }
7464 }
7465
7466 /*****************************************************************************
7467  *
7468  *  Display the given instruction.
7469  */
7470
7471 void emitter::emitDispIns(
7472     instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
7473 {
7474     emitAttr    attr;
7475     const char* sstr;
7476
7477     instruction ins = id->idIns();
7478
7479     if (emitComp->verbose)
7480     {
7481         unsigned idNum = id->idDebugOnlyInfo()->idNum;
7482         printf("IN%04x: ", idNum);
7483     }
7484
7485 #define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
7486
7487     /* Display a constant value if the instruction references one */
7488
7489     if (!isNew)
7490     {
7491         switch (id->idInsFmt())
7492         {
7493             int offs;
7494
7495             case IF_MRD_RRD:
7496             case IF_MWR_RRD:
7497             case IF_MRW_RRD:
7498
7499             case IF_RRD_MRD:
7500             case IF_RWR_MRD:
7501             case IF_RRW_MRD:
7502
7503             case IF_MRD_CNS:
7504             case IF_MWR_CNS:
7505             case IF_MRW_CNS:
7506             case IF_MRW_SHF:
7507
7508             case IF_MRD:
7509             case IF_MWR:
7510             case IF_MRW:
7511
7512 #if FEATURE_STACK_FP_X87
7513
7514             case IF_TRD_MRD:
7515             case IF_TWR_MRD:
7516             case IF_TRW_MRD:
7517
7518             // case IF_MRD_TRD:
7519             // case IF_MRW_TRD:
7520             case IF_MWR_TRD:
7521
7522 #endif // FEATURE_STACK_FP_X87
7523             case IF_MRD_OFF:
7524
7525                 /* Is this actually a reference to a data section? */
7526
7527                 offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
7528
7529                 if (offs >= 0)
7530                 {
7531                     void* addr;
7532
7533                     /* Display a data section reference */
7534
7535                     assert((unsigned)offs < emitConsDsc.dsdOffs);
7536                     addr = emitConsBlock ? emitConsBlock + offs : nullptr;
7537
7538 #if 0
7539                 // TODO-XArch-Cleanup: Fix or remove this code.
7540                 /* Is the operand an integer or floating-point value? */
7541
7542                 bool isFP = false;
7543
7544                 if  (CodeGen::instIsFP(id->idIns()))
7545                 {
7546                     switch (id->idIns())
7547                     {
7548                     case INS_fild:
7549                     case INS_fildl:
7550                         break;
7551
7552                     default:
7553                         isFP = true;
7554                         break;
7555                     }
7556                 }
7557
7558                 if (offs & 1)
7559                     printf("@CNS%02u", offs);
7560                 else
7561                     printf("@RWD%02u", offs);
7562
7563                 printf("      ");
7564
7565                 if  (addr)
7566                 {
7567                     addr = 0;
7568                     // TODO-XArch-Bug?:
7569                     //          This was busted by switching the order
7570                     //          in which we output the code block vs.
7571                     //          the data blocks -- when we get here,
7572                     //          the data block has not been filled in
7573                     //          yet, so we'll display garbage.
7574
7575                     if  (isFP)
7576                     {
7577                         if  (id->idOpSize() == EA_4BYTE)
7578                             printf("DF      %f \n", addr ? *(float   *)addr : 0);
7579                         else
7580                             printf("DQ      %lf\n", addr ? *(double  *)addr : 0);
7581                     }
7582                     else
7583                     {
7584                         if  (id->idOpSize() <= EA_4BYTE)
7585                             printf("DD      %d \n", addr ? *(int     *)addr : 0);
7586                         else
7587                             printf("DQ      %D \n", addr ? *(__int64 *)addr : 0);
7588                     }
7589                 }
7590 #endif
7591                 }
7592                 break;
7593
7594             default:
7595                 break;
7596         }
7597     }
7598
7599     // printf("[F=%s] "   , emitIfName(id->idInsFmt()));
7600     // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
7601     // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
7602     // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
7603     // printf("[A=%08X] " , emitSimpleStkMask);
7604     // printf("[A=%08X] " , emitSimpleByrefStkMask);
7605     // printf("[L=%02u] " , id->idCodeSize());
7606
7607     if (!emitComp->opts.dspEmit && !isNew && !asmfm)
7608     {
7609         doffs = true;
7610     }
7611
7612     /* Display the instruction offset */
7613
7614     emitDispInsOffs(offset, doffs);
7615
7616     if (code != nullptr)
7617     {
7618         /* Display the instruction hex code */
7619
7620         emitDispInsHex(code, sz);
7621     }
7622
7623     /* Display the instruction name */
7624
7625     sstr = codeGen->genInsName(ins);
7626
7627     if (IsAVXInstruction(ins))
7628     {
7629         printf(" v%-8s", sstr);
7630     }
7631     else
7632     {
7633         printf(" %-9s", sstr);
7634     }
7635 #ifndef FEATURE_PAL
7636     if (strnlen_s(sstr, 10) >= 8)
7637 #else  // FEATURE_PAL
7638     if (strnlen(sstr, 10) >= 8)
7639 #endif // FEATURE_PAL
7640     {
7641         printf(" ");
7642     }
7643
7644     /* By now the size better be set to something */
7645
7646     assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
7647
7648     /* Figure out the operand size */
7649
7650     if (id->idGCref() == GCT_GCREF)
7651     {
7652         attr = EA_GCREF;
7653         sstr = "gword ptr ";
7654     }
7655     else if (id->idGCref() == GCT_BYREF)
7656     {
7657         attr = EA_BYREF;
7658         sstr = "bword ptr ";
7659     }
7660     else
7661     {
7662         attr = id->idOpSize();
7663         sstr = codeGen->genSizeStr(attr);
7664
7665         if (ins == INS_lea)
7666         {
7667 #ifdef _TARGET_AMD64_
7668             assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
7669 #else
7670             assert(attr == EA_4BYTE);
7671 #endif
7672             sstr = "";
7673         }
7674     }
7675
7676     /* Now see what instruction format we've got */
7677
7678     // First print the implicit register usage
7679     if (instrHasImplicitRegPairDest(ins))
7680     {
7681         printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
7682     }
7683     else if (instrIs3opImul(ins))
7684     {
7685         regNumber tgtReg = inst3opImulReg(ins);
7686         printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
7687     }
7688
7689     switch (id->idInsFmt())
7690     {
7691         ssize_t     val;
7692         ssize_t     offs;
7693         CnsVal      cnsVal;
7694         const char* methodName;
7695
7696         case IF_CNS:
7697             val = emitGetInsSC(id);
7698 #ifdef _TARGET_AMD64_
7699             // no 8-byte immediates allowed here!
7700             assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
7701 #endif
7702             if (id->idIsCnsReloc())
7703             {
7704                 emitDispReloc(val);
7705             }
7706             else
7707             {
7708             PRINT_CONSTANT:
7709                 // Munge any pointers if we want diff-able disassembly
7710                 if (emitComp->opts.disDiffable)
7711                 {
7712                     ssize_t top12bits = (val >> 20);
7713                     if ((top12bits != 0) && (top12bits != -1))
7714                     {
7715                         val = 0xD1FFAB1E;
7716                     }
7717                 }
7718                 if ((val > -1000) && (val < 1000))
7719                 {
7720                     printf("%d", val);
7721                 }
7722                 else if ((val > 0) || ((val & 0x7F000000) != 0x7F000000))
7723                 {
7724                     printf("0x%IX", val);
7725                 }
7726                 else
7727                 { // (val < 0)
7728                     printf("-0x%IX", -val);
7729                 }
7730             }
7731             break;
7732
7733         case IF_ARD:
7734         case IF_AWR:
7735         case IF_ARW:
7736
7737 #if FEATURE_STACK_FP_X87
7738
7739         case IF_TRD_ARD:
7740         case IF_TWR_ARD:
7741         case IF_TRW_ARD:
7742
7743         // case IF_ARD_TRD:
7744         case IF_AWR_TRD:
7745 // case IF_ARW_TRD:
7746
7747 #endif // FEATURE_STACK_FP_X87
7748             if (ins == INS_call && id->idIsCallRegPtr())
7749             {
7750                 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7751                 break;
7752             }
7753
7754             printf("%s", sstr);
7755             emitDispAddrMode(id, isNew);
7756             emitDispShift(ins);
7757
7758             if (ins == INS_call)
7759             {
7760                 assert(id->idInsFmt() == IF_ARD);
7761
7762                 /* Ignore indirect calls */
7763
7764                 if (id->idDebugOnlyInfo()->idMemCookie == 0)
7765                 {
7766                     break;
7767                 }
7768
7769                 assert(id->idDebugOnlyInfo()->idMemCookie);
7770
7771                 /* This is a virtual call */
7772
7773                 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
7774                 printf("%s", methodName);
7775             }
7776             break;
7777
7778         case IF_RRD_ARD:
7779         case IF_RWR_ARD:
7780         case IF_RRW_ARD:
7781 #ifdef _TARGET_AMD64_
7782             if (ins == INS_movsxd)
7783             {
7784                 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
7785             }
7786             else
7787 #endif
7788                 if (ins == INS_movsx || ins == INS_movzx)
7789             {
7790                 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
7791             }
7792             else
7793             {
7794                 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
7795             }
7796             emitDispAddrMode(id);
7797             break;
7798
7799         case IF_RRW_ARD_CNS:
7800         case IF_RWR_ARD_CNS:
7801         {
7802             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
7803             emitDispAddrMode(id);
7804             emitGetInsAmdCns(id, &cnsVal);
7805
7806             val = cnsVal.cnsVal;
7807             printf(", ");
7808
7809             if (cnsVal.cnsReloc)
7810             {
7811                 emitDispReloc(val);
7812             }
7813             else
7814             {
7815                 goto PRINT_CONSTANT;
7816             }
7817
7818             break;
7819         }
7820
7821         case IF_AWR_RRD_CNS:
7822         {
7823             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
7824             // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
7825             sstr = codeGen->genSizeStr(EA_ATTR(16));
7826             printf(sstr);
7827             emitDispAddrMode(id);
7828             printf(", %s", emitRegName(id->idReg1(), attr));
7829
7830             emitGetInsAmdCns(id, &cnsVal);
7831
7832             val = cnsVal.cnsVal;
7833             printf(", ");
7834
7835             if (cnsVal.cnsReloc)
7836             {
7837                 emitDispReloc(val);
7838             }
7839             else
7840             {
7841                 goto PRINT_CONSTANT;
7842             }
7843
7844             break;
7845         }
7846
7847         case IF_RWR_RRD_ARD:
7848             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7849             emitDispAddrMode(id);
7850             break;
7851
7852         case IF_RWR_RRD_ARD_CNS:
7853         {
7854             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7855             emitDispAddrMode(id);
7856             emitGetInsAmdCns(id, &cnsVal);
7857
7858             val = cnsVal.cnsVal;
7859             printf(", ");
7860
7861             if (cnsVal.cnsReloc)
7862             {
7863                 emitDispReloc(val);
7864             }
7865             else
7866             {
7867                 goto PRINT_CONSTANT;
7868             }
7869
7870             break;
7871         }
7872
7873         case IF_ARD_RRD:
7874         case IF_AWR_RRD:
7875         case IF_ARW_RRD:
7876
7877             printf("%s", sstr);
7878             emitDispAddrMode(id);
7879             printf(", %s", emitRegName(id->idReg1(), attr));
7880             break;
7881
7882         case IF_ARD_CNS:
7883         case IF_AWR_CNS:
7884         case IF_ARW_CNS:
7885         case IF_ARW_SHF:
7886
7887             printf("%s", sstr);
7888             emitDispAddrMode(id);
7889             emitGetInsAmdCns(id, &cnsVal);
7890             val = cnsVal.cnsVal;
7891 #ifdef _TARGET_AMD64_
7892             // no 8-byte immediates allowed here!
7893             assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
7894 #endif
7895             if (id->idInsFmt() == IF_ARW_SHF)
7896             {
7897                 emitDispShift(ins, (BYTE)val);
7898             }
7899             else
7900             {
7901                 printf(", ");
7902                 if (cnsVal.cnsReloc)
7903                 {
7904                     emitDispReloc(val);
7905                 }
7906                 else
7907                 {
7908                     goto PRINT_CONSTANT;
7909                 }
7910             }
7911             break;
7912
7913         case IF_SRD:
7914         case IF_SWR:
7915         case IF_SRW:
7916
7917 #if FEATURE_STACK_FP_X87
7918         case IF_TRD_SRD:
7919         case IF_TWR_SRD:
7920         case IF_TRW_SRD:
7921
7922         // case IF_SRD_TRD:
7923         // case IF_SRW_TRD:
7924         case IF_SWR_TRD:
7925
7926 #endif // FEATURE_STACK_FP_X87
7927
7928             printf("%s", sstr);
7929
7930 #if !FEATURE_FIXED_OUT_ARGS
7931             if (ins == INS_pop)
7932                 emitCurStackLvl -= sizeof(int);
7933 #endif
7934
7935             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7936                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7937
7938 #if !FEATURE_FIXED_OUT_ARGS
7939             if (ins == INS_pop)
7940                 emitCurStackLvl += sizeof(int);
7941 #endif
7942
7943             emitDispShift(ins);
7944             break;
7945
7946         case IF_SRD_RRD:
7947         case IF_SWR_RRD:
7948         case IF_SRW_RRD:
7949
7950             printf("%s", sstr);
7951
7952             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7953                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7954
7955             printf(", %s", emitRegName(id->idReg1(), attr));
7956             break;
7957
7958         case IF_SRD_CNS:
7959         case IF_SWR_CNS:
7960         case IF_SRW_CNS:
7961         case IF_SRW_SHF:
7962
7963             printf("%s", sstr);
7964
7965             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7966                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7967
7968             emitGetInsCns(id, &cnsVal);
7969             val = cnsVal.cnsVal;
7970 #ifdef _TARGET_AMD64_
7971             // no 8-byte immediates allowed here!
7972             assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
7973 #endif
7974             if (id->idInsFmt() == IF_SRW_SHF)
7975             {
7976                 emitDispShift(ins, (BYTE)val);
7977             }
7978             else
7979             {
7980                 printf(", ");
7981                 if (cnsVal.cnsReloc)
7982                 {
7983                     emitDispReloc(val);
7984                 }
7985                 else
7986                 {
7987                     goto PRINT_CONSTANT;
7988                 }
7989             }
7990             break;
7991
7992         case IF_RRD_SRD:
7993         case IF_RWR_SRD:
7994         case IF_RRW_SRD:
7995 #ifdef _TARGET_AMD64_
7996             if (ins == INS_movsxd)
7997             {
7998                 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
7999             }
8000             else
8001 #endif
8002                 if (ins == INS_movsx || ins == INS_movzx)
8003             {
8004                 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
8005             }
8006             else
8007             {
8008                 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8009             }
8010
8011             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8012                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8013
8014             break;
8015
8016         case IF_RRW_SRD_CNS:
8017         case IF_RWR_SRD_CNS:
8018         {
8019             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8020             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8021                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8022             emitGetInsCns(id, &cnsVal);
8023
8024             val = cnsVal.cnsVal;
8025             printf(", ");
8026
8027             if (cnsVal.cnsReloc)
8028             {
8029                 emitDispReloc(val);
8030             }
8031             else
8032             {
8033                 goto PRINT_CONSTANT;
8034             }
8035             break;
8036         }
8037
8038         case IF_RWR_RRD_SRD:
8039             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8040             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8041                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8042             break;
8043
8044         case IF_RWR_RRD_SRD_CNS:
8045         {
8046             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8047             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8048                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8049             emitGetInsCns(id, &cnsVal);
8050
8051             val = cnsVal.cnsVal;
8052             printf(", ");
8053
8054             if (cnsVal.cnsReloc)
8055             {
8056                 emitDispReloc(val);
8057             }
8058             else
8059             {
8060                 goto PRINT_CONSTANT;
8061             }
8062             break;
8063         }
8064
8065         case IF_RRD_RRD:
8066         case IF_RWR_RRD:
8067         case IF_RRW_RRD:
8068             if (ins == INS_mov_i2xmm)
8069             {
8070                 printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8071             }
8072             else if (ins == INS_mov_xmm2i)
8073             {
8074                 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
8075             }
8076             else if (ins == INS_pmovmskb)
8077             {
8078                 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8079             }
8080 #ifndef LEGACY_BACKEND
8081             else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
8082             {
8083                 printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8084             }
8085 #endif
8086             else if ((ins == INS_cvttsd2si)
8087 #ifndef LEGACY_BACKEND
8088                      || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si)
8089 #endif
8090                      || 0)
8091             {
8092                 printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
8093             }
8094 #ifdef _TARGET_AMD64_
8095             else if (ins == INS_movsxd)
8096             {
8097                 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
8098             }
8099 #endif // _TARGET_AMD64_
8100             else if (ins == INS_movsx || ins == INS_movzx)
8101             {
8102                 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
8103             }
8104             else if (ins == INS_bt)
8105             {
8106                 // INS_bt operands are reversed. Display them in the normal order.
8107                 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
8108             }
8109 #ifdef FEATURE_HW_INTRINSICS
8110             else if (ins == INS_crc32 && attr != EA_8BYTE)
8111             {
8112                 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8113             }
8114 #endif // FEATURE_HW_INTRINSICS
8115             else
8116             {
8117                 printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
8118             }
8119             break;
8120
8121         case IF_RRW_RRW:
8122             assert(ins == INS_xchg);
8123             printf("%s,", emitRegName(id->idReg1(), attr));
8124             printf(" %s", emitRegName(id->idReg2(), attr));
8125             break;
8126
8127         case IF_RWR_RRD_RRD:
8128             assert(IsAVXInstruction(ins));
8129             assert(IsThreeOperandAVXInstruction(ins));
8130             printf("%s, ", emitRegName(id->idReg1(), attr));
8131             printf("%s, ", emitRegName(id->idReg2(), attr));
8132             printf("%s", emitRegName(id->idReg3(), attr));
8133             break;
8134         case IF_RWR_RRD_RRD_CNS:
8135             assert(IsAVXInstruction(ins));
8136             assert(IsThreeOperandAVXInstruction(ins));
8137             printf("%s, ", emitRegName(id->idReg1(), attr));
8138             printf("%s, ", emitRegName(id->idReg2(), attr));
8139             printf("%s, ", emitRegName(id->idReg3(), attr));
8140             val = emitGetInsSC(id);
8141             goto PRINT_CONSTANT;
8142             break;
8143         case IF_RWR_RRD_RRD_RRD:
8144             assert(IsAVXOnlyInstruction(ins));
8145             assert(UseVEXEncoding());
8146             printf("%s, ", emitRegName(id->idReg1(), attr));
8147             printf("%s, ", emitRegName(id->idReg2(), attr));
8148             printf("%s, ", emitRegName(id->idReg3(), attr));
8149             printf("%s", emitRegName(id->idReg4(), attr));
8150             break;
8151         case IF_RRW_RRW_CNS:
8152             printf("%s,", emitRegName(id->idReg1(), attr));
8153             printf(" %s", emitRegName(id->idReg2(), attr));
8154             val = emitGetInsSC(id);
8155 #ifdef _TARGET_AMD64_
8156             // no 8-byte immediates allowed here!
8157             assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8158 #endif
8159             printf(", ");
8160             if (id->idIsCnsReloc())
8161             {
8162                 emitDispReloc(val);
8163             }
8164             else
8165             {
8166                 goto PRINT_CONSTANT;
8167             }
8168             break;
8169
8170         case IF_RRD:
8171         case IF_RWR:
8172         case IF_RRW:
8173             printf("%s", emitRegName(id->idReg1(), attr));
8174             emitDispShift(ins);
8175             break;
8176
8177         case IF_RRW_SHF:
8178             printf("%s", emitRegName(id->idReg1(), attr));
8179             emitDispShift(ins, (BYTE)emitGetInsSC(id));
8180             break;
8181
8182         case IF_RRD_MRD:
8183         case IF_RWR_MRD:
8184         case IF_RRW_MRD:
8185
8186             if (ins == INS_movsx || ins == INS_movzx)
8187             {
8188                 attr = EA_PTRSIZE;
8189             }
8190 #ifdef _TARGET_AMD64_
8191             else if (ins == INS_movsxd)
8192             {
8193                 attr = EA_PTRSIZE;
8194             }
8195 #endif
8196             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8197             offs = emitGetInsDsp(id);
8198             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8199             break;
8200
8201         case IF_RRW_MRD_CNS:
8202         case IF_RWR_MRD_CNS:
8203         {
8204             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8205             offs = emitGetInsDsp(id);
8206             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8207             emitGetInsDcmCns(id, &cnsVal);
8208
8209             val = cnsVal.cnsVal;
8210             printf(", ");
8211
8212             if (cnsVal.cnsReloc)
8213             {
8214                 emitDispReloc(val);
8215             }
8216             else
8217             {
8218                 goto PRINT_CONSTANT;
8219             }
8220             break;
8221         }
8222
8223         case IF_MWR_RRD_CNS:
8224         {
8225             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8226             // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8227             sstr = codeGen->genSizeStr(EA_ATTR(16));
8228             printf(sstr);
8229             offs = emitGetInsDsp(id);
8230             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8231             printf(", %s", emitRegName(id->idReg1(), attr));
8232             emitGetInsDcmCns(id, &cnsVal);
8233
8234             val = cnsVal.cnsVal;
8235             printf(", ");
8236
8237             if (cnsVal.cnsReloc)
8238             {
8239                 emitDispReloc(val);
8240             }
8241             else
8242             {
8243                 goto PRINT_CONSTANT;
8244             }
8245
8246             break;
8247         }
8248
8249         case IF_RWR_RRD_MRD:
8250             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8251             offs = emitGetInsDsp(id);
8252             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8253             break;
8254
8255         case IF_RWR_RRD_MRD_CNS:
8256         {
8257             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8258             offs = emitGetInsDsp(id);
8259             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8260             emitGetInsDcmCns(id, &cnsVal);
8261
8262             val = cnsVal.cnsVal;
8263             printf(", ");
8264
8265             if (cnsVal.cnsReloc)
8266             {
8267                 emitDispReloc(val);
8268             }
8269             else
8270             {
8271                 goto PRINT_CONSTANT;
8272             }
8273             break;
8274         }
8275
8276         case IF_RWR_MRD_OFF:
8277
8278             printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
8279             offs = emitGetInsDsp(id);
8280             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8281             break;
8282
8283         case IF_MRD_RRD:
8284         case IF_MWR_RRD:
8285         case IF_MRW_RRD:
8286
8287             printf("%s", sstr);
8288             offs = emitGetInsDsp(id);
8289             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8290             printf(", %s", emitRegName(id->idReg1(), attr));
8291             break;
8292
8293         case IF_MRD_CNS:
8294         case IF_MWR_CNS:
8295         case IF_MRW_CNS:
8296         case IF_MRW_SHF:
8297
8298             printf("%s", sstr);
8299             offs = emitGetInsDsp(id);
8300             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8301             emitGetInsDcmCns(id, &cnsVal);
8302             val = cnsVal.cnsVal;
8303 #ifdef _TARGET_AMD64_
8304             // no 8-byte immediates allowed here!
8305             assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8306 #endif
8307             if (cnsVal.cnsReloc)
8308             {
8309                 emitDispReloc(val);
8310             }
8311             else if (id->idInsFmt() == IF_MRW_SHF)
8312             {
8313                 emitDispShift(ins, (BYTE)val);
8314             }
8315             else
8316             {
8317                 printf(", ");
8318                 goto PRINT_CONSTANT;
8319             }
8320             break;
8321
8322         case IF_MRD:
8323         case IF_MWR:
8324         case IF_MRW:
8325
8326 #if FEATURE_STACK_FP_X87
8327
8328         case IF_TRD_MRD:
8329         case IF_TWR_MRD:
8330         case IF_TRW_MRD:
8331
8332         // case IF_MRD_TRD:
8333         // case IF_MRW_TRD:
8334         case IF_MWR_TRD:
8335
8336 #endif // FEATURE_STACK_FP_X87
8337
8338             printf("%s", sstr);
8339             offs = emitGetInsDsp(id);
8340             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8341             emitDispShift(ins);
8342             break;
8343
8344         case IF_MRD_OFF:
8345
8346             printf("offset ");
8347             offs = emitGetInsDsp(id);
8348             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8349             break;
8350
8351         case IF_RRD_CNS:
8352         case IF_RWR_CNS:
8353         case IF_RRW_CNS:
8354             printf("%s, ", emitRegName(id->idReg1(), attr));
8355             val = emitGetInsSC(id);
8356             if (id->idIsCnsReloc())
8357             {
8358                 emitDispReloc(val);
8359             }
8360             else
8361             {
8362                 goto PRINT_CONSTANT;
8363             }
8364             break;
8365
8366 #if FEATURE_STACK_FP_X87
8367         case IF_TRD_FRD:
8368         case IF_TWR_FRD:
8369         case IF_TRW_FRD:
8370             switch (ins)
8371             {
8372                 case INS_fld:
8373                 case INS_fxch:
8374                     break;
8375
8376                 default:
8377                     printf("%s, ", emitFPregName(0));
8378                     break;
8379             }
8380             printf("%s", emitFPregName((unsigned)id->idReg1()));
8381             break;
8382
8383         case IF_FRD_TRD:
8384         case IF_FWR_TRD:
8385         case IF_FRW_TRD:
8386             printf("%s", emitFPregName((unsigned)id->idReg1()));
8387             if (ins != INS_fst && ins != INS_fstp)
8388                 printf(", %s", emitFPregName(0));
8389             break;
8390 #endif // FEATURE_STACK_FP_X87
8391
8392         case IF_LABEL:
8393         case IF_RWR_LABEL:
8394         case IF_SWR_LABEL:
8395
8396             if (ins == INS_lea)
8397             {
8398                 printf("%s, ", emitRegName(id->idReg1(), attr));
8399             }
8400             else if (ins == INS_mov)
8401             {
8402                 /* mov   dword ptr [frame.callSiteReturnAddress], label */
8403                 assert(id->idInsFmt() == IF_SWR_LABEL);
8404                 instrDescLbl* idlbl = (instrDescLbl*)id;
8405
8406                 emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
8407
8408                 printf(", ");
8409             }
8410
8411             if (((instrDescJmp*)id)->idjShort)
8412             {
8413                 printf("SHORT ");
8414             }
8415
8416             if (id->idIsBound())
8417             {
8418                 printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
8419             }
8420             else
8421             {
8422                 printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
8423             }
8424             break;
8425
8426         case IF_METHOD:
8427         case IF_METHPTR:
8428             if (id->idIsCallAddr())
8429             {
8430                 offs       = (ssize_t)id->idAddr()->iiaAddr;
8431                 methodName = "";
8432             }
8433             else
8434             {
8435                 offs       = 0;
8436                 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
8437             }
8438
8439             if (id->idInsFmt() == IF_METHPTR)
8440             {
8441                 printf("[");
8442             }
8443
8444             if (offs)
8445             {
8446                 if (id->idIsDspReloc())
8447                 {
8448                     printf("reloc ");
8449                 }
8450                 printf("%08X", offs);
8451             }
8452             else
8453             {
8454                 printf("%s", methodName);
8455             }
8456
8457             if (id->idInsFmt() == IF_METHPTR)
8458             {
8459                 printf("]");
8460             }
8461
8462             break;
8463
8464 #if FEATURE_STACK_FP_X87
8465         case IF_TRD:
8466         case IF_TWR:
8467         case IF_TRW:
8468 #endif // FEATURE_STACK_FP_X87
8469         case IF_NONE:
8470             break;
8471
8472         default:
8473             printf("unexpected format %s", emitIfName(id->idInsFmt()));
8474             assert(!"unexpectedFormat");
8475             break;
8476     }
8477
8478     if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
8479     {
8480         // Code size in the instrDesc is different from the actual code size we've been given!
8481         printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
8482     }
8483
8484     printf("\n");
8485 }
8486
8487 /*****************************************************************************/
8488 #endif
8489
8490 /*****************************************************************************
8491  *
8492  *  Output nBytes bytes of NOP instructions
8493  */
8494
8495 static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
8496 {
8497     assert(nBytes <= 15);
8498
8499 #ifndef _TARGET_AMD64_
8500     // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
8501     // more efficient real NOP: 0x0F 0x1F +modR/M
8502     // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
8503     // because debugger and msdis don't like it, so maybe VIA doesn't either
8504     // So instead just stick to repeating single byte nops
8505
8506     switch (nBytes)
8507     {
8508         case 15:
8509             *dst++ = 0x90;
8510             __fallthrough;
8511         case 14:
8512             *dst++ = 0x90;
8513             __fallthrough;
8514         case 13:
8515             *dst++ = 0x90;
8516             __fallthrough;
8517         case 12:
8518             *dst++ = 0x90;
8519             __fallthrough;
8520         case 11:
8521             *dst++ = 0x90;
8522             __fallthrough;
8523         case 10:
8524             *dst++ = 0x90;
8525             __fallthrough;
8526         case 9:
8527             *dst++ = 0x90;
8528             __fallthrough;
8529         case 8:
8530             *dst++ = 0x90;
8531             __fallthrough;
8532         case 7:
8533             *dst++ = 0x90;
8534             __fallthrough;
8535         case 6:
8536             *dst++ = 0x90;
8537             __fallthrough;
8538         case 5:
8539             *dst++ = 0x90;
8540             __fallthrough;
8541         case 4:
8542             *dst++ = 0x90;
8543             __fallthrough;
8544         case 3:
8545             *dst++ = 0x90;
8546             __fallthrough;
8547         case 2:
8548             *dst++ = 0x90;
8549             __fallthrough;
8550         case 1:
8551             *dst++ = 0x90;
8552             break;
8553         case 0:
8554             break;
8555     }
8556 #else  // _TARGET_AMD64_
8557     switch (nBytes)
8558     {
8559         case 2:
8560             *dst++ = 0x66;
8561             __fallthrough;
8562         case 1:
8563             *dst++ = 0x90;
8564             break;
8565         case 0:
8566             break;
8567         case 3:
8568             *dst++ = 0x0F;
8569             *dst++ = 0x1F;
8570             *dst++ = 0x00;
8571             break;
8572         case 4:
8573             *dst++ = 0x0F;
8574             *dst++ = 0x1F;
8575             *dst++ = 0x40;
8576             *dst++ = 0x00;
8577             break;
8578         case 6:
8579             *dst++ = 0x66;
8580             __fallthrough;
8581         case 5:
8582             *dst++ = 0x0F;
8583             *dst++ = 0x1F;
8584             *dst++ = 0x44;
8585             *dst++ = 0x00;
8586             *dst++ = 0x00;
8587             break;
8588         case 7:
8589             *dst++ = 0x0F;
8590             *dst++ = 0x1F;
8591             *dst++ = 0x80;
8592             *dst++ = 0x00;
8593             *dst++ = 0x00;
8594             *dst++ = 0x00;
8595             *dst++ = 0x00;
8596             break;
8597         case 15:
8598             // More than 3 prefixes is slower than just 2 NOPs
8599             dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
8600             break;
8601         case 14:
8602             // More than 3 prefixes is slower than just 2 NOPs
8603             dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
8604             break;
8605         case 13:
8606             // More than 3 prefixes is slower than just 2 NOPs
8607             dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
8608             break;
8609         case 12:
8610             // More than 3 prefixes is slower than just 2 NOPs
8611             dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
8612             break;
8613         case 11:
8614             *dst++ = 0x66;
8615             __fallthrough;
8616         case 10:
8617             *dst++ = 0x66;
8618             __fallthrough;
8619         case 9:
8620             *dst++ = 0x66;
8621             __fallthrough;
8622         case 8:
8623             *dst++ = 0x0F;
8624             *dst++ = 0x1F;
8625             *dst++ = 0x84;
8626             *dst++ = 0x00;
8627             *dst++ = 0x00;
8628             *dst++ = 0x00;
8629             *dst++ = 0x00;
8630             *dst++ = 0x00;
8631             break;
8632     }
8633 #endif // _TARGET_AMD64_
8634
8635     return dst;
8636 }
8637
8638 /*****************************************************************************
8639  *
8640  *  Output an instruction involving an address mode.
8641  */
8642
8643 BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
8644 {
8645     regNumber reg;
8646     regNumber rgx;
8647     ssize_t   dsp;
8648     bool      dspInByte;
8649     bool      dspIsZero;
8650
8651     instruction ins  = id->idIns();
8652     emitAttr    size = id->idOpSize();
8653     size_t      opsz = EA_SIZE_IN_BYTES(size);
8654
8655     // Get the base/index registers
8656     reg = id->idAddr()->iiaAddrMode.amBaseReg;
8657     rgx = id->idAddr()->iiaAddrMode.amIndxReg;
8658
8659     // For INS_call the instruction size is actually the return value size
8660     if (ins == INS_call)
8661     {
8662         // Special case: call via a register
8663         if (id->idIsCallRegPtr())
8664         {
8665             code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
8666
8667             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
8668             dst += emitOutputWord(dst, opcode);
8669             goto DONE;
8670         }
8671
8672         // The displacement field is in an unusual place for calls
8673         dsp = emitGetInsCIdisp(id);
8674
8675 #ifdef _TARGET_AMD64_
8676
8677         // Compute the REX prefix if it exists
8678         if (IsExtendedReg(reg, EA_PTRSIZE))
8679         {
8680             insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
8681             // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8682             reg = (regNumber)RegEncoding(reg);
8683         }
8684
8685         if (IsExtendedReg(rgx, EA_PTRSIZE))
8686         {
8687             insEncodeRegSIB(ins, rgx, &code);
8688             // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8689             rgx = (regNumber)RegEncoding(rgx);
8690         }
8691
8692         // And emit the REX prefix
8693         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8694
8695 #endif // _TARGET_AMD64_
8696
8697         goto GOT_DSP;
8698     }
8699
8700     // Is there a large constant operand?
8701     if (addc && (size > EA_1BYTE))
8702     {
8703         ssize_t cval = addc->cnsVal;
8704
8705         // Does the constant fit in a byte?
8706         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
8707         {
8708             if (id->idInsFmt() != IF_ARW_SHF)
8709             {
8710                 code |= 2;
8711             }
8712
8713             opsz = 1;
8714         }
8715     }
8716
8717     // Emit VEX prefix if required
8718     // There are some callers who already add VEX prefix and call this routine.
8719     // Therefore, add VEX prefix is one is not already present.
8720     code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
8721
8722     // For this format, moves do not support a third operand, so we only need to handle the binary ops.
8723     if (TakesVexPrefix(ins))
8724     {
8725         if (IsDstDstSrcAVXInstruction(ins))
8726         {
8727             regNumber src1 = id->idReg2();
8728
8729             if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS))
8730             {
8731                 src1 = id->idReg1();
8732             }
8733
8734             // encode source operand reg in 'vvvv' bits in 1's complement form
8735             code = insEncodeReg3456(ins, src1, size, code);
8736         }
8737         else if (IsDstSrcSrcAVXInstruction(ins))
8738         {
8739             code = insEncodeReg3456(ins, id->idReg2(), size, code);
8740         }
8741     }
8742
8743     // Emit the REX prefix if required
8744     if (TakesRexWPrefix(ins, size))
8745     {
8746         code = AddRexWPrefix(ins, code);
8747     }
8748
8749     if (IsExtendedReg(reg, EA_PTRSIZE))
8750     {
8751         insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
8752         // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8753         reg = (regNumber)RegEncoding(reg);
8754     }
8755
8756     if (IsExtendedReg(rgx, EA_PTRSIZE))
8757     {
8758         insEncodeRegSIB(ins, rgx, &code);
8759         // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8760         rgx = (regNumber)RegEncoding(rgx);
8761     }
8762
8763     // Special case emitting AVX instructions
8764     if (Is4ByteSSE4OrAVXInstruction(ins))
8765     {
8766         unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
8767         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8768
8769         if (UseVEXEncoding())
8770         {
8771             // Emit last opcode byte
8772             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
8773             assert((code & 0xFF) == 0);
8774             dst += emitOutputByte(dst, (code >> 8) & 0xFF);
8775         }
8776         else
8777         {
8778             dst += emitOutputWord(dst, code >> 16);
8779             dst += emitOutputWord(dst, code & 0xFFFF);
8780         }
8781
8782         code = regcode;
8783     }
8784     // Is this a 'big' opcode?
8785     else if (code & 0xFF000000)
8786     {
8787         // Output the REX prefix
8788         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8789
8790         // Output the highest word of the opcode
8791         // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
8792         // and encoded as part of VEX prefix.
8793         if (code & 0xFF000000)
8794         {
8795             dst += emitOutputWord(dst, code >> 16);
8796             code &= 0x0000FFFF;
8797         }
8798     }
8799     else if (code & 0x00FF0000)
8800     {
8801         // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
8802         assert(ins != INS_bt);
8803
8804         // Output the REX prefix
8805         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8806
8807         // Output the highest byte of the opcode
8808         if (code & 0x00FF0000)
8809         {
8810             dst += emitOutputByte(dst, code >> 16);
8811             code &= 0x0000FFFF;
8812         }
8813
8814         // Use the large version if this is not a byte. This trick will not
8815         // work in case of SSE2 and AVX instructions.
8816         if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
8817         {
8818             code++;
8819         }
8820     }
8821     else if (CodeGen::instIsFP(ins))
8822     {
8823 #if FEATURE_STACK_FP_X87
8824         assert(size == EA_4BYTE || size == EA_8BYTE || ins == INS_fldcw || ins == INS_fnstcw);
8825 #else  // !FEATURE_STACK_FP_X87
8826         assert(size == EA_4BYTE || size == EA_8BYTE);
8827 #endif // ! FEATURE_STACK_FP_X87
8828
8829         if (size == EA_8BYTE)
8830         {
8831             code += 4;
8832         }
8833     }
8834     else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
8835     {
8836         /* Is the operand size larger than a byte? */
8837
8838         switch (size)
8839         {
8840             case EA_1BYTE:
8841                 break;
8842
8843             case EA_2BYTE:
8844
8845                 /* Output a size prefix for a 16-bit operand */
8846
8847                 dst += emitOutputByte(dst, 0x66);
8848
8849                 __fallthrough;
8850
8851             case EA_4BYTE:
8852 #ifdef _TARGET_AMD64_
8853             case EA_8BYTE:
8854 #endif
8855
8856                 /* Set the 'w' bit to get the large version */
8857
8858                 code |= 0x1;
8859                 break;
8860
8861 #ifdef _TARGET_X86_
8862             case EA_8BYTE:
8863
8864                 /* Double operand - set the appropriate bit */
8865
8866                 code |= 0x04;
8867                 break;
8868
8869 #endif // _TARGET_X86_
8870
8871             default:
8872                 NO_WAY("unexpected size");
8873                 break;
8874         }
8875     }
8876
8877     // Output the REX prefix
8878     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8879
8880     // Get the displacement value
8881     dsp = emitGetInsAmdAny(id);
8882
8883 GOT_DSP:
8884
8885     dspInByte = ((signed char)dsp == (ssize_t)dsp);
8886     dspIsZero = (dsp == 0);
8887
8888     if (id->idIsDspReloc())
8889     {
8890         dspInByte = false; // relocs can't be placed in a byte
8891     }
8892
8893     // Is there a [scaled] index component?
8894     if (rgx == REG_NA)
8895     {
8896         // The address is of the form "[reg+disp]"
8897         switch (reg)
8898         {
8899             case REG_NA:
8900                 if (id->idIsDspReloc())
8901                 {
8902                     INT32 addlDelta = 0;
8903
8904                     // The address is of the form "[disp]"
8905                     // On x86 - disp is relative to zero
8906                     // On Amd64 - disp is relative to RIP
8907                     if (Is4ByteSSE4OrAVXInstruction(ins))
8908                     {
8909                         dst += emitOutputByte(dst, code | 0x05);
8910                     }
8911                     else
8912                     {
8913                         dst += emitOutputWord(dst, code | 0x0500);
8914                     }
8915
8916                     if (addc)
8917                     {
8918                         // It is of the form "ins [disp], immed"
8919                         // For emitting relocation, we also need to take into account of the
8920                         // additional bytes of code emitted for immed val.
8921
8922                         ssize_t cval = addc->cnsVal;
8923
8924 #ifdef _TARGET_AMD64_
8925                         // all these opcodes only take a sign-extended 4-byte immediate
8926                         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
8927 #else
8928                         noway_assert(opsz <= 4);
8929 #endif
8930
8931                         switch (opsz)
8932                         {
8933                             case 0:
8934                             case 4:
8935                             case 8:
8936                                 addlDelta = -4;
8937                                 break;
8938                             case 2:
8939                                 addlDelta = -2;
8940                                 break;
8941                             case 1:
8942                                 addlDelta = -1;
8943                                 break;
8944
8945                             default:
8946                                 assert(!"unexpected operand size");
8947                                 unreached();
8948                         }
8949                     }
8950
8951 #ifdef _TARGET_AMD64_
8952                     // We emit zero on Amd64, to avoid the assert in emitOutputLong()
8953                     dst += emitOutputLong(dst, 0);
8954 #else
8955                     dst += emitOutputLong(dst, dsp);
8956 #endif
8957                     emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
8958                                          addlDelta);
8959                 }
8960                 else
8961                 {
8962 #ifdef _TARGET_X86_
8963                     if (Is4ByteSSE4OrAVXInstruction(ins))
8964                     {
8965                         dst += emitOutputByte(dst, code | 0x05);
8966                     }
8967                     else
8968                     {
8969                         dst += emitOutputWord(dst, code | 0x0500);
8970                     }
8971 #else  //_TARGET_AMD64_
8972                     // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
8973                     // This addr mode should never be used while generating relocatable ngen code nor if
8974                     // the addr can be encoded as pc-relative address.
8975                     noway_assert(!emitComp->opts.compReloc);
8976                     noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
8977                     noway_assert((int)dsp == dsp);
8978
8979                     // This requires, specifying a SIB byte after ModRM byte.
8980                     if (Is4ByteSSE4OrAVXInstruction(ins))
8981                     {
8982                         dst += emitOutputByte(dst, code | 0x04);
8983                     }
8984                     else
8985                     {
8986                         dst += emitOutputWord(dst, code | 0x0400);
8987                     }
8988                     dst += emitOutputByte(dst, 0x25);
8989 #endif //_TARGET_AMD64_
8990                     dst += emitOutputLong(dst, dsp);
8991                 }
8992                 break;
8993
8994             case REG_EBP:
8995                 if (Is4ByteSSE4OrAVXInstruction(ins))
8996                 {
8997                     // Does the offset fit in a byte?
8998                     if (dspInByte)
8999                     {
9000                         dst += emitOutputByte(dst, code | 0x45);
9001                         dst += emitOutputByte(dst, dsp);
9002                     }
9003                     else
9004                     {
9005                         dst += emitOutputByte(dst, code | 0x85);
9006                         dst += emitOutputLong(dst, dsp);
9007
9008                         if (id->idIsDspReloc())
9009                         {
9010                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9011                         }
9012                     }
9013                 }
9014                 else
9015                 {
9016                     // Does the offset fit in a byte?
9017                     if (dspInByte)
9018                     {
9019                         dst += emitOutputWord(dst, code | 0x4500);
9020                         dst += emitOutputByte(dst, dsp);
9021                     }
9022                     else
9023                     {
9024                         dst += emitOutputWord(dst, code | 0x8500);
9025                         dst += emitOutputLong(dst, dsp);
9026
9027                         if (id->idIsDspReloc())
9028                         {
9029                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9030                         }
9031                     }
9032                 }
9033                 break;
9034
9035             case REG_ESP:
9036 #ifdef LEGACY_BACKEND
9037                 // REG_ESP could be REG_R12, which applies to any instruction
9038                 //
9039                 // This assert isn't too helpful from the OptJit point of view
9040                 //
9041                 // a better question is why is it here at all
9042                 //
9043                 assert((ins == INS_lea) || (ins == INS_mov) || (ins == INS_test) || (ins == INS_cmp) ||
9044                        (ins == INS_fld && dspIsZero) || (ins == INS_fstp && dspIsZero) ||
9045                        (ins == INS_fistp && dspIsZero) || IsSSE2Instruction(ins) || IsAVXInstruction(ins) ||
9046                        (ins == INS_or));
9047 #endif // LEGACY_BACKEND
9048
9049                 if (Is4ByteSSE4OrAVXInstruction(ins))
9050                 {
9051                     // Is the offset 0 or does it at least fit in a byte?
9052                     if (dspIsZero)
9053                     {
9054                         dst += emitOutputByte(dst, code | 0x04);
9055                         dst += emitOutputByte(dst, 0x24);
9056                     }
9057                     else if (dspInByte)
9058                     {
9059                         dst += emitOutputByte(dst, code | 0x44);
9060                         dst += emitOutputByte(dst, 0x24);
9061                         dst += emitOutputByte(dst, dsp);
9062                     }
9063                     else
9064                     {
9065                         dst += emitOutputByte(dst, code | 0x84);
9066                         dst += emitOutputByte(dst, 0x24);
9067                         dst += emitOutputLong(dst, dsp);
9068                         if (id->idIsDspReloc())
9069                         {
9070                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9071                         }
9072                     }
9073                 }
9074                 else
9075                 {
9076                     // Is the offset 0 or does it at least fit in a byte?
9077                     if (dspIsZero)
9078                     {
9079                         dst += emitOutputWord(dst, code | 0x0400);
9080                         dst += emitOutputByte(dst, 0x24);
9081                     }
9082                     else if (dspInByte)
9083                     {
9084                         dst += emitOutputWord(dst, code | 0x4400);
9085                         dst += emitOutputByte(dst, 0x24);
9086                         dst += emitOutputByte(dst, dsp);
9087                     }
9088                     else
9089                     {
9090                         dst += emitOutputWord(dst, code | 0x8400);
9091                         dst += emitOutputByte(dst, 0x24);
9092                         dst += emitOutputLong(dst, dsp);
9093                         if (id->idIsDspReloc())
9094                         {
9095                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9096                         }
9097                     }
9098                 }
9099                 break;
9100
9101             default:
9102                 if (Is4ByteSSE4OrAVXInstruction(ins))
9103                 {
9104                     // Put the register in the opcode
9105                     code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr);
9106
9107                     // Is there a displacement?
9108                     if (dspIsZero)
9109                     {
9110                         // This is simply "[reg]"
9111                         dst += emitOutputByte(dst, code);
9112                     }
9113                     else
9114                     {
9115                         // This is [reg + dsp]" -- does the offset fit in a byte?
9116                         if (dspInByte)
9117                         {
9118                             dst += emitOutputByte(dst, code | 0x40);
9119                             dst += emitOutputByte(dst, dsp);
9120                         }
9121                         else
9122                         {
9123                             dst += emitOutputByte(dst, code | 0x80);
9124                             dst += emitOutputLong(dst, dsp);
9125                             if (id->idIsDspReloc())
9126                             {
9127                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9128                             }
9129                         }
9130                     }
9131                 }
9132                 else
9133                 {
9134                     // Put the register in the opcode
9135                     code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
9136
9137                     // Is there a displacement?
9138                     if (dspIsZero)
9139                     {
9140                         // This is simply "[reg]"
9141                         dst += emitOutputWord(dst, code);
9142                     }
9143                     else
9144                     {
9145                         // This is [reg + dsp]" -- does the offset fit in a byte?
9146                         if (dspInByte)
9147                         {
9148                             dst += emitOutputWord(dst, code | 0x4000);
9149                             dst += emitOutputByte(dst, dsp);
9150                         }
9151                         else
9152                         {
9153                             dst += emitOutputWord(dst, code | 0x8000);
9154                             dst += emitOutputLong(dst, dsp);
9155                             if (id->idIsDspReloc())
9156                             {
9157                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9158                             }
9159                         }
9160                     }
9161                 }
9162
9163                 break;
9164         }
9165     }
9166     else
9167     {
9168         unsigned regByte;
9169
9170         // We have a scaled index operand
9171         unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
9172
9173         // Is the index operand scaled?
9174         if (mul > 1)
9175         {
9176             // Is there a base register?
9177             if (reg != REG_NA)
9178             {
9179                 // The address is "[reg + {2/4/8} * rgx + icon]"
9180                 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
9181                           insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9182
9183                 if (Is4ByteSSE4OrAVXInstruction(ins))
9184                 {
9185                     // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9186                     if (dspIsZero && reg != REG_EBP)
9187                     {
9188                         // The address is "[reg + {2/4/8} * rgx]"
9189                         dst += emitOutputByte(dst, code | 0x04);
9190                         dst += emitOutputByte(dst, regByte);
9191                     }
9192                     else
9193                     {
9194                         // The address is "[reg + {2/4/8} * rgx + disp]"
9195                         if (dspInByte)
9196                         {
9197                             dst += emitOutputByte(dst, code | 0x44);
9198                             dst += emitOutputByte(dst, regByte);
9199                             dst += emitOutputByte(dst, dsp);
9200                         }
9201                         else
9202                         {
9203                             dst += emitOutputByte(dst, code | 0x84);
9204                             dst += emitOutputByte(dst, regByte);
9205                             dst += emitOutputLong(dst, dsp);
9206                             if (id->idIsDspReloc())
9207                             {
9208                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9209                             }
9210                         }
9211                     }
9212                 }
9213                 else
9214                 {
9215                     // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9216                     if (dspIsZero && reg != REG_EBP)
9217                     {
9218                         // The address is "[reg + {2/4/8} * rgx]"
9219                         dst += emitOutputWord(dst, code | 0x0400);
9220                         dst += emitOutputByte(dst, regByte);
9221                     }
9222                     else
9223                     {
9224                         // The address is "[reg + {2/4/8} * rgx + disp]"
9225                         if (dspInByte)
9226                         {
9227                             dst += emitOutputWord(dst, code | 0x4400);
9228                             dst += emitOutputByte(dst, regByte);
9229                             dst += emitOutputByte(dst, dsp);
9230                         }
9231                         else
9232                         {
9233                             dst += emitOutputWord(dst, code | 0x8400);
9234                             dst += emitOutputByte(dst, regByte);
9235                             dst += emitOutputLong(dst, dsp);
9236                             if (id->idIsDspReloc())
9237                             {
9238                                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9239                             }
9240                         }
9241                     }
9242                 }
9243             }
9244             else
9245             {
9246                 // The address is "[{2/4/8} * rgx + icon]"
9247                 regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
9248                           insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9249
9250                 if (Is4ByteSSE4OrAVXInstruction(ins))
9251                 {
9252                     dst += emitOutputByte(dst, code | 0x04);
9253                 }
9254                 else
9255                 {
9256                     dst += emitOutputWord(dst, code | 0x0400);
9257                 }
9258
9259                 dst += emitOutputByte(dst, regByte);
9260
9261                 // Special case: jump through a jump table
9262                 if (ins == INS_i_jmp)
9263                 {
9264                     dsp += (size_t)emitConsBlock;
9265                 }
9266
9267                 dst += emitOutputLong(dst, dsp);
9268                 if (id->idIsDspReloc())
9269                 {
9270                     emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9271                 }
9272             }
9273         }
9274         else
9275         {
9276             // The address is "[reg+rgx+dsp]"
9277             regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
9278
9279             if (Is4ByteSSE4OrAVXInstruction(ins))
9280             {
9281                 if (dspIsZero && reg != REG_EBP)
9282                 {
9283                     // This is [reg+rgx]"
9284                     dst += emitOutputByte(dst, code | 0x04);
9285                     dst += emitOutputByte(dst, regByte);
9286                 }
9287                 else
9288                 {
9289                     // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9290                     if (dspInByte)
9291                     {
9292                         dst += emitOutputByte(dst, code | 0x44);
9293                         dst += emitOutputByte(dst, regByte);
9294                         dst += emitOutputByte(dst, dsp);
9295                     }
9296                     else
9297                     {
9298                         dst += emitOutputByte(dst, code | 0x84);
9299                         dst += emitOutputByte(dst, regByte);
9300                         dst += emitOutputLong(dst, dsp);
9301                         if (id->idIsDspReloc())
9302                         {
9303                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9304                         }
9305                     }
9306                 }
9307             }
9308             else
9309             {
9310                 if (dspIsZero && reg != REG_EBP)
9311                 {
9312                     // This is [reg+rgx]"
9313                     dst += emitOutputWord(dst, code | 0x0400);
9314                     dst += emitOutputByte(dst, regByte);
9315                 }
9316                 else
9317                 {
9318                     // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9319                     if (dspInByte)
9320                     {
9321                         dst += emitOutputWord(dst, code | 0x4400);
9322                         dst += emitOutputByte(dst, regByte);
9323                         dst += emitOutputByte(dst, dsp);
9324                     }
9325                     else
9326                     {
9327                         dst += emitOutputWord(dst, code | 0x8400);
9328                         dst += emitOutputByte(dst, regByte);
9329                         dst += emitOutputLong(dst, dsp);
9330                         if (id->idIsDspReloc())
9331                         {
9332                             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9333                         }
9334                     }
9335                 }
9336             }
9337         }
9338     }
9339
9340     // Now generate the constant value, if present
9341     if (addc)
9342     {
9343         ssize_t cval = addc->cnsVal;
9344
9345 #ifdef _TARGET_AMD64_
9346         // all these opcodes only take a sign-extended 4-byte immediate
9347         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9348 #endif
9349
9350         switch (opsz)
9351         {
9352             case 0:
9353             case 4:
9354             case 8:
9355                 dst += emitOutputLong(dst, cval);
9356                 break;
9357             case 2:
9358                 dst += emitOutputWord(dst, cval);
9359                 break;
9360             case 1:
9361                 dst += emitOutputByte(dst, cval);
9362                 break;
9363
9364             default:
9365                 assert(!"unexpected operand size");
9366         }
9367
9368         if (addc->cnsReloc)
9369         {
9370             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
9371             assert(opsz == 4);
9372         }
9373     }
9374
9375 DONE:
9376
9377     // Does this instruction operate on a GC ref value?
9378     if (id->idGCref())
9379     {
9380         switch (id->idInsFmt())
9381         {
9382             case IF_ARD:
9383             case IF_AWR:
9384             case IF_ARW:
9385                 break;
9386
9387             case IF_RRD_ARD:
9388                 break;
9389
9390             case IF_RWR_ARD:
9391                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9392                 break;
9393
9394             case IF_RRW_ARD:
9395                 // Mark the destination register as holding a GCT_BYREF
9396                 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9397                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
9398                 break;
9399
9400             case IF_ARD_RRD:
9401             case IF_AWR_RRD:
9402                 break;
9403
9404             case IF_ARD_CNS:
9405             case IF_AWR_CNS:
9406                 break;
9407
9408             case IF_ARW_RRD:
9409             case IF_ARW_CNS:
9410                 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9411                 break;
9412
9413             default:
9414 #ifdef DEBUG
9415                 emitDispIns(id, false, false, false);
9416 #endif
9417                 assert(!"unexpected GC ref instruction format");
9418         }
9419
9420         // mul can never produce a GC ref
9421         assert(!instrIs3opImul(ins));
9422         assert(ins != INS_mulEAX && ins != INS_imulEAX);
9423     }
9424     else
9425     {
9426         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
9427         {
9428             switch (id->idInsFmt())
9429             {
9430                 case IF_RWR_ARD:
9431                     emitGCregDeadUpd(id->idReg1(), dst);
9432                     break;
9433                 default:
9434                     break;
9435             }
9436
9437             if (ins == INS_mulEAX || ins == INS_imulEAX)
9438             {
9439                 emitGCregDeadUpd(REG_EAX, dst);
9440                 emitGCregDeadUpd(REG_EDX, dst);
9441             }
9442
9443             // For the three operand imul instruction the target register
9444             // is encoded in the opcode
9445
9446             if (instrIs3opImul(ins))
9447             {
9448                 regNumber tgtReg = inst3opImulReg(ins);
9449                 emitGCregDeadUpd(tgtReg, dst);
9450             }
9451         }
9452     }
9453
9454     return dst;
9455 }
9456
9457 /*****************************************************************************
9458  *
9459  *  Output an instruction involving a stack frame value.
9460  */
9461
9462 BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9463 {
9464     int  adr;
9465     int  dsp;
9466     bool EBPbased;
9467     bool dspInByte;
9468     bool dspIsZero;
9469
9470     instruction ins  = id->idIns();
9471     emitAttr    size = id->idOpSize();
9472     size_t      opsz = EA_SIZE_IN_BYTES(size);
9473
9474     assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
9475
9476     // Is there a large constant operand?
9477     if (addc && (size > EA_1BYTE))
9478     {
9479         ssize_t cval = addc->cnsVal;
9480
9481         // Does the constant fit in a byte?
9482         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9483         {
9484             if (id->idInsFmt() != IF_SRW_SHF)
9485             {
9486                 code |= 2;
9487             }
9488
9489             opsz = 1;
9490         }
9491     }
9492
9493     // Add VEX prefix if required.
9494     // There are some callers who already add VEX prefix and call this routine.
9495     // Therefore, add VEX prefix is one is not already present.
9496     code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9497
9498     // Compute the REX prefix
9499     if (TakesRexWPrefix(ins, size))
9500     {
9501         code = AddRexWPrefix(ins, code);
9502     }
9503
9504     // Special case emitting AVX instructions
9505     if (Is4ByteSSE4OrAVXInstruction(ins))
9506     {
9507         unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
9508         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9509
9510         if (UseVEXEncoding())
9511         {
9512             // Emit last opcode byte
9513             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9514             assert((code & 0xFF) == 0);
9515             dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9516         }
9517         else
9518         {
9519             dst += emitOutputWord(dst, code >> 16);
9520             dst += emitOutputWord(dst, code & 0xFFFF);
9521         }
9522
9523         code = regcode;
9524     }
9525     // Is this a 'big' opcode?
9526     else if (code & 0xFF000000)
9527     {
9528         // Output the REX prefix
9529         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9530
9531         // Output the highest word of the opcode
9532         // We need to check again because in case of AVX instructions the leading
9533         // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
9534         if (code & 0xFF000000)
9535         {
9536             dst += emitOutputWord(dst, code >> 16);
9537             code &= 0x0000FFFF;
9538         }
9539     }
9540     else if (code & 0x00FF0000)
9541     {
9542         // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
9543         assert(ins != INS_bt);
9544
9545         // Output the REX prefix
9546         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9547
9548         // Output the highest byte of the opcode.
9549         // We need to check again because in case of AVX instructions the leading
9550         // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
9551         if (code & 0x00FF0000)
9552         {
9553             dst += emitOutputByte(dst, code >> 16);
9554             code &= 0x0000FFFF;
9555         }
9556
9557         // Use the large version if this is not a byte
9558         if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSE2Instruction(ins) &&
9559             !IsAVXInstruction(ins))
9560         {
9561             code |= 0x1;
9562         }
9563     }
9564     else if (CodeGen::instIsFP(ins))
9565     {
9566         assert(size == EA_4BYTE || size == EA_8BYTE);
9567
9568         if (size == EA_8BYTE)
9569         {
9570             code += 4;
9571         }
9572     }
9573     else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
9574     {
9575         // Is the operand size larger than a byte?
9576         switch (size)
9577         {
9578             case EA_1BYTE:
9579                 break;
9580
9581             case EA_2BYTE:
9582                 // Output a size prefix for a 16-bit operand
9583                 dst += emitOutputByte(dst, 0x66);
9584                 __fallthrough;
9585
9586             case EA_4BYTE:
9587 #ifdef _TARGET_AMD64_
9588             case EA_8BYTE:
9589 #endif // _TARGET_AMD64_
9590
9591                 /* Set the 'w' size bit to indicate 32-bit operation
9592                  * Note that incrementing "code" for INS_call (0xFF) would
9593                  * overflow, whereas setting the lower bit to 1 just works out
9594                  */
9595
9596                 code |= 0x01;
9597                 break;
9598
9599 #ifdef _TARGET_X86_
9600             case EA_8BYTE:
9601
9602                 // Double operand - set the appropriate bit.
9603                 // I don't know what a legitimate reason to end up in this case would be
9604                 // considering that FP is taken care of above...
9605                 // what is an instruction that takes a double which is not covered by the
9606                 // above instIsFP? Of the list in instrsxarch, only INS_fprem
9607                 code |= 0x04;
9608                 NO_WAY("bad 8 byte op");
9609                 break;
9610 #endif // _TARGET_X86_
9611
9612             default:
9613                 NO_WAY("unexpected size");
9614                 break;
9615         }
9616     }
9617
9618     // Output the REX prefix
9619     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9620
9621     // Figure out the variable's frame position
9622     int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
9623
9624     adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
9625     dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
9626
9627     dspInByte = ((signed char)dsp == (int)dsp);
9628     dspIsZero = (dsp == 0);
9629
9630     // for stack varaibles the dsp should never be a reloc
9631     assert(id->idIsDspReloc() == 0);
9632
9633     if (EBPbased)
9634     {
9635         // EBP-based variable: does the offset fit in a byte?
9636         if (Is4ByteSSE4OrAVXInstruction(ins))
9637         {
9638             if (dspInByte)
9639             {
9640                 dst += emitOutputByte(dst, code | 0x45);
9641                 dst += emitOutputByte(dst, dsp);
9642             }
9643             else
9644             {
9645                 dst += emitOutputByte(dst, code | 0x85);
9646                 dst += emitOutputLong(dst, dsp);
9647             }
9648         }
9649         else
9650         {
9651             if (dspInByte)
9652             {
9653                 dst += emitOutputWord(dst, code | 0x4500);
9654                 dst += emitOutputByte(dst, dsp);
9655             }
9656             else
9657             {
9658                 dst += emitOutputWord(dst, code | 0x8500);
9659                 dst += emitOutputLong(dst, dsp);
9660             }
9661         }
9662     }
9663     else
9664     {
9665
9666 #if !FEATURE_FIXED_OUT_ARGS
9667         // Adjust the offset by the amount currently pushed on the CPU stack
9668         dsp += emitCurStackLvl;
9669 #endif
9670
9671         dspInByte = ((signed char)dsp == (int)dsp);
9672         dspIsZero = (dsp == 0);
9673
9674         // Does the offset fit in a byte?
9675         if (Is4ByteSSE4OrAVXInstruction(ins))
9676         {
9677             if (dspInByte)
9678             {
9679                 if (dspIsZero)
9680                 {
9681                     dst += emitOutputByte(dst, code | 0x04);
9682                     dst += emitOutputByte(dst, 0x24);
9683                 }
9684                 else
9685                 {
9686                     dst += emitOutputByte(dst, code | 0x44);
9687                     dst += emitOutputByte(dst, 0x24);
9688                     dst += emitOutputByte(dst, dsp);
9689                 }
9690             }
9691             else
9692             {
9693                 dst += emitOutputByte(dst, code | 0x84);
9694                 dst += emitOutputByte(dst, 0x24);
9695                 dst += emitOutputLong(dst, dsp);
9696             }
9697         }
9698         else
9699         {
9700             if (dspInByte)
9701             {
9702                 if (dspIsZero)
9703                 {
9704                     dst += emitOutputWord(dst, code | 0x0400);
9705                     dst += emitOutputByte(dst, 0x24);
9706                 }
9707                 else
9708                 {
9709                     dst += emitOutputWord(dst, code | 0x4400);
9710                     dst += emitOutputByte(dst, 0x24);
9711                     dst += emitOutputByte(dst, dsp);
9712                 }
9713             }
9714             else
9715             {
9716                 dst += emitOutputWord(dst, code | 0x8400);
9717                 dst += emitOutputByte(dst, 0x24);
9718                 dst += emitOutputLong(dst, dsp);
9719             }
9720         }
9721     }
9722
9723     // Now generate the constant value, if present
9724     if (addc)
9725     {
9726         ssize_t cval = addc->cnsVal;
9727
9728 #ifdef _TARGET_AMD64_
9729         // all these opcodes only take a sign-extended 4-byte immediate
9730         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9731 #endif
9732
9733         switch (opsz)
9734         {
9735             case 0:
9736             case 4:
9737             case 8:
9738                 dst += emitOutputLong(dst, cval);
9739                 break;
9740             case 2:
9741                 dst += emitOutputWord(dst, cval);
9742                 break;
9743             case 1:
9744                 dst += emitOutputByte(dst, cval);
9745                 break;
9746
9747             default:
9748                 assert(!"unexpected operand size");
9749         }
9750
9751         if (addc->cnsReloc)
9752         {
9753             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
9754             assert(opsz == 4);
9755         }
9756     }
9757
9758     // Does this instruction operate on a GC ref value?
9759     if (id->idGCref())
9760     {
9761         // Factor in the sub-variable offset
9762         adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
9763
9764         switch (id->idInsFmt())
9765         {
9766             case IF_SRD:
9767                 // Read  stack                    -- no change
9768                 break;
9769
9770             case IF_SWR: // Stack Write (So we need to update GC live for stack var)
9771                 // Write stack                    -- GC var may be born
9772                 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
9773                 break;
9774
9775             case IF_SRD_CNS:
9776                 // Read  stack                    -- no change
9777                 break;
9778
9779             case IF_SWR_CNS:
9780                 // Write stack                    -- no change
9781                 break;
9782
9783             case IF_SRD_RRD:
9784             case IF_RRD_SRD:
9785                 // Read  stack   , read  register -- no change
9786                 break;
9787
9788             case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
9789
9790                 // Read  stack   , write register -- GC reg may be born
9791                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9792                 break;
9793
9794             case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
9795                 // Read  register, write stack    -- GC var may be born
9796                 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
9797                 break;
9798
9799             case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
9800
9801                 // reg could have been a GCREF as GCREF + int=BYREF
9802                 //                             or BYREF+/-int=BYREF
9803                 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9804                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9805                 break;
9806
9807             case IF_SRW_CNS:
9808             case IF_SRW_RRD:
9809             // += -= of a byref, no change
9810
9811             case IF_SRW:
9812                 break;
9813
9814             default:
9815 #ifdef DEBUG
9816                 emitDispIns(id, false, false, false);
9817 #endif
9818                 assert(!"unexpected GC ref instruction format");
9819         }
9820     }
9821     else
9822     {
9823         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
9824         {
9825             switch (id->idInsFmt())
9826             {
9827                 case IF_RWR_SRD: // Register Write, Stack Read
9828                 case IF_RRW_SRD: // Register Read/Write, Stack Read
9829                     emitGCregDeadUpd(id->idReg1(), dst);
9830                     break;
9831                 default:
9832                     break;
9833             }
9834
9835             if (ins == INS_mulEAX || ins == INS_imulEAX)
9836             {
9837                 emitGCregDeadUpd(REG_EAX, dst);
9838                 emitGCregDeadUpd(REG_EDX, dst);
9839             }
9840
9841             // For the three operand imul instruction the target register
9842             // is encoded in the opcode
9843
9844             if (instrIs3opImul(ins))
9845             {
9846                 regNumber tgtReg = inst3opImulReg(ins);
9847                 emitGCregDeadUpd(tgtReg, dst);
9848             }
9849         }
9850     }
9851
9852     return dst;
9853 }
9854
9855 /*****************************************************************************
9856  *
9857  *  Output an instruction with a static data member (class variable).
9858  */
9859
9860 BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9861 {
9862     BYTE*                addr;
9863     CORINFO_FIELD_HANDLE fldh;
9864     ssize_t              offs;
9865     int                  doff;
9866
9867     emitAttr    size      = id->idOpSize();
9868     size_t      opsz      = EA_SIZE_IN_BYTES(size);
9869     instruction ins       = id->idIns();
9870     bool        isMoffset = false;
9871
9872     // Get hold of the field handle and offset
9873     fldh = id->idAddr()->iiaFieldHnd;
9874     offs = emitGetInsDsp(id);
9875
9876     // Special case: mov reg, fs:[ddd]
9877     if (fldh == FLD_GLOBAL_FS)
9878     {
9879         dst += emitOutputByte(dst, 0x64);
9880     }
9881
9882     // Compute VEX prefix
9883     // Some of its callers already add VEX prefix and then call this routine.
9884     // Therefore add VEX prefix is not already present.
9885     code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9886
9887     // Compute the REX prefix
9888     if (TakesRexWPrefix(ins, size))
9889     {
9890         code = AddRexWPrefix(ins, code);
9891     }
9892
9893     // Is there a large constant operand?
9894     if (addc && (size > EA_1BYTE))
9895     {
9896         ssize_t cval = addc->cnsVal;
9897         // Does the constant fit in a byte?
9898         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9899         {
9900             if (id->idInsFmt() != IF_MRW_SHF)
9901             {
9902                 code |= 2;
9903             }
9904
9905             opsz = 1;
9906         }
9907     }
9908 #ifdef _TARGET_X86_
9909     else
9910     {
9911         // Special case: "mov eax, [addr]" and "mov [addr], eax"
9912         // Amd64: this is one case where addr can be 64-bit in size.  This is
9913         // currently unused or not enabled on amd64 as it always uses RIP
9914         // relative addressing which results in smaller instruction size.
9915         if (ins == INS_mov && id->idReg1() == REG_EAX)
9916         {
9917             switch (id->idInsFmt())
9918             {
9919                 case IF_RWR_MRD:
9920
9921                     assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
9922
9923                     code &= ~((code_t)0xFFFFFFFF);
9924                     code |= 0xA0;
9925                     isMoffset = true;
9926                     break;
9927
9928                 case IF_MWR_RRD:
9929
9930                     assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
9931
9932                     code &= ~((code_t)0xFFFFFFFF);
9933                     code |= 0xA2;
9934                     isMoffset = true;
9935                     break;
9936
9937                 default:
9938                     break;
9939             }
9940         }
9941     }
9942 #endif //_TARGET_X86_
9943
9944     // Special case emitting AVX instructions
9945     if (Is4ByteSSE4OrAVXInstruction(ins))
9946     {
9947         unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
9948         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9949
9950         if (UseVEXEncoding())
9951         {
9952             // Emit last opcode byte
9953             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9954             assert((code & 0xFF) == 0);
9955             dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9956         }
9957         else
9958         {
9959             dst += emitOutputWord(dst, code >> 16);
9960             dst += emitOutputWord(dst, code & 0xFFFF);
9961         }
9962
9963         // Emit Mod,R/M byte
9964         dst += emitOutputByte(dst, regcode | 0x05);
9965         code = 0;
9966     }
9967     // Is this a 'big' opcode?
9968     else if (code & 0xFF000000)
9969     {
9970         // Output the REX prefix
9971         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9972
9973         // Output the highest word of the opcode.
9974         // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
9975         if (code & 0xFF000000)
9976         {
9977             dst += emitOutputWord(dst, code >> 16);
9978         }
9979         code &= 0x0000FFFF;
9980     }
9981     else if (code & 0x00FF0000)
9982     {
9983         // Output the REX prefix
9984         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9985
9986         // Check again as VEX prefix would have encoded leading opcode byte
9987         if (code & 0x00FF0000)
9988         {
9989             dst += emitOutputByte(dst, code >> 16);
9990             code &= 0x0000FFFF;
9991         }
9992
9993         if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
9994              insIsCMOV(ins)) &&
9995             size != EA_1BYTE)
9996         {
9997             // movsx and movzx are 'big' opcodes but also have the 'w' bit
9998             code++;
9999         }
10000     }
10001     else if (CodeGen::instIsFP(ins))
10002     {
10003         assert(size == EA_4BYTE || size == EA_8BYTE);
10004
10005         if (size == EA_8BYTE)
10006         {
10007             code += 4;
10008         }
10009     }
10010     else
10011     {
10012         // Is the operand size larger than a byte?
10013         switch (size)
10014         {
10015             case EA_1BYTE:
10016                 break;
10017
10018             case EA_2BYTE:
10019                 // Output a size prefix for a 16-bit operand
10020                 dst += emitOutputByte(dst, 0x66);
10021                 __fallthrough;
10022
10023             case EA_4BYTE:
10024 #ifdef _TARGET_AMD64_
10025             case EA_8BYTE:
10026 #endif
10027                 // Set the 'w' bit to get the large version
10028                 code |= 0x1;
10029                 break;
10030
10031 #ifdef _TARGET_X86_
10032             case EA_8BYTE:
10033                 // Double operand - set the appropriate bit
10034                 code |= 0x04;
10035                 break;
10036 #endif // _TARGET_X86_
10037
10038             default:
10039                 assert(!"unexpected size");
10040         }
10041     }
10042
10043     // Output the REX prefix
10044     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10045
10046     if (code)
10047     {
10048         if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
10049         {
10050             dst += emitOutputByte(dst, code);
10051         }
10052         else
10053         {
10054             dst += emitOutputWord(dst, code);
10055         }
10056     }
10057
10058     // Do we have a constant or a static data member?
10059     doff = Compiler::eeGetJitDataOffs(fldh);
10060     if (doff >= 0)
10061     {
10062         addr = emitConsBlock + doff;
10063
10064         int byteSize = EA_SIZE_IN_BYTES(size);
10065
10066 #ifndef LEGACY_BACKEND
10067         // this instruction has a fixed size (4) src.
10068         if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
10069         {
10070             byteSize = 4;
10071         }
10072         // This has a fixed size (8) source.
10073         if (ins == INS_vbroadcastsd)
10074         {
10075             byteSize = 8;
10076         }
10077 #endif // !LEGACY_BACKEND
10078
10079         // Check that the offset is properly aligned (i.e. the ddd in [ddd])
10080         assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
10081     }
10082     else
10083     {
10084         // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
10085         if (jitStaticFldIsGlobAddr(fldh))
10086         {
10087             addr = nullptr;
10088         }
10089         else
10090         {
10091             addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
10092             if (addr == nullptr)
10093             {
10094                 NO_WAY("could not obtain address of static field");
10095             }
10096         }
10097     }
10098
10099     BYTE* target = (addr + offs);
10100
10101     if (!isMoffset)
10102     {
10103         INT32 addlDelta = 0;
10104
10105         if (addc)
10106         {
10107             // It is of the form "ins [disp], immed"
10108             // For emitting relocation, we also need to take into account of the
10109             // additional bytes of code emitted for immed val.
10110
10111             ssize_t cval = addc->cnsVal;
10112
10113 #ifdef _TARGET_AMD64_
10114             // all these opcodes only take a sign-extended 4-byte immediate
10115             noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10116 #else
10117             noway_assert(opsz <= 4);
10118 #endif
10119
10120             switch (opsz)
10121             {
10122                 case 0:
10123                 case 4:
10124                 case 8:
10125                     addlDelta = -4;
10126                     break;
10127                 case 2:
10128                     addlDelta = -2;
10129                     break;
10130                 case 1:
10131                     addlDelta = -1;
10132                     break;
10133
10134                 default:
10135                     assert(!"unexpected operand size");
10136                     unreached();
10137             }
10138         }
10139
10140 #ifdef _TARGET_AMD64_
10141         // All static field and data section constant accesses should be marked as relocatable
10142         noway_assert(id->idIsDspReloc());
10143         dst += emitOutputLong(dst, 0);
10144 #else  //_TARGET_X86_
10145         dst += emitOutputLong(dst, (int)target);
10146 #endif //_TARGET_X86_
10147
10148         if (id->idIsDspReloc())
10149         {
10150             emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
10151         }
10152     }
10153     else
10154     {
10155 #ifdef _TARGET_AMD64_
10156         // This code path should never be hit on amd64 since it always uses RIP relative addressing.
10157         // In future if ever there is a need to enable this special case, also enable the logic
10158         // that sets isMoffset to true on amd64.
10159         unreached();
10160 #else //_TARGET_X86_
10161
10162         dst += emitOutputSizeT(dst, (ssize_t)target);
10163
10164         if (id->idIsDspReloc())
10165         {
10166             emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET);
10167         }
10168
10169 #endif //_TARGET_X86_
10170     }
10171
10172     // Now generate the constant value, if present
10173     if (addc)
10174     {
10175         ssize_t cval = addc->cnsVal;
10176
10177 #ifdef _TARGET_AMD64_
10178         // all these opcodes only take a sign-extended 4-byte immediate
10179         noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10180 #endif
10181
10182         switch (opsz)
10183         {
10184             case 0:
10185             case 4:
10186             case 8:
10187                 dst += emitOutputLong(dst, cval);
10188                 break;
10189             case 2:
10190                 dst += emitOutputWord(dst, cval);
10191                 break;
10192             case 1:
10193                 dst += emitOutputByte(dst, cval);
10194                 break;
10195
10196             default:
10197                 assert(!"unexpected operand size");
10198         }
10199         if (addc->cnsReloc)
10200         {
10201             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10202             assert(opsz == 4);
10203         }
10204     }
10205
10206     // Does this instruction operate on a GC ref value?
10207     if (id->idGCref())
10208     {
10209         switch (id->idInsFmt())
10210         {
10211             case IF_MRD:
10212             case IF_MRW:
10213             case IF_MWR:
10214                 break;
10215
10216             case IF_RRD_MRD:
10217                 break;
10218
10219             case IF_RWR_MRD:
10220                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10221                 break;
10222
10223             case IF_MRD_RRD:
10224             case IF_MWR_RRD:
10225             case IF_MRW_RRD:
10226                 break;
10227
10228             case IF_MRD_CNS:
10229             case IF_MWR_CNS:
10230             case IF_MRW_CNS:
10231                 break;
10232
10233             case IF_RRW_MRD:
10234
10235                 assert(id->idGCref() == GCT_BYREF);
10236                 assert(ins == INS_add || ins == INS_sub);
10237
10238                 // Mark it as holding a GCT_BYREF
10239                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10240                 break;
10241
10242             default:
10243 #ifdef DEBUG
10244                 emitDispIns(id, false, false, false);
10245 #endif
10246                 assert(!"unexpected GC ref instruction format");
10247         }
10248     }
10249     else
10250     {
10251         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10252         {
10253             switch (id->idInsFmt())
10254             {
10255                 case IF_RWR_MRD:
10256                     emitGCregDeadUpd(id->idReg1(), dst);
10257                     break;
10258                 default:
10259                     break;
10260             }
10261
10262             if (ins == INS_mulEAX || ins == INS_imulEAX)
10263             {
10264                 emitGCregDeadUpd(REG_EAX, dst);
10265                 emitGCregDeadUpd(REG_EDX, dst);
10266             }
10267
10268             // For the three operand imul instruction the target register
10269             // is encoded in the opcode
10270
10271             if (instrIs3opImul(ins))
10272             {
10273                 regNumber tgtReg = inst3opImulReg(ins);
10274                 emitGCregDeadUpd(tgtReg, dst);
10275             }
10276         }
10277     }
10278
10279     return dst;
10280 }
10281
10282 /*****************************************************************************
10283  *
10284  *  Output an instruction with one register operand.
10285  */
10286
10287 BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
10288 {
10289     code_t code;
10290
10291     instruction ins  = id->idIns();
10292     regNumber   reg  = id->idReg1();
10293     emitAttr    size = id->idOpSize();
10294
10295     // We would to update GC info correctly
10296     assert(!IsSSE2Instruction(ins));
10297     assert(!IsAVXInstruction(ins));
10298
10299     // Get the 'base' opcode
10300     switch (ins)
10301     {
10302         case INS_inc:
10303         case INS_dec:
10304
10305 #ifdef _TARGET_AMD64_
10306             if (true)
10307 #else
10308             if (size == EA_1BYTE)
10309 #endif
10310             {
10311                 assert(INS_inc_l == INS_inc + 1);
10312                 assert(INS_dec_l == INS_dec + 1);
10313
10314                 // Can't use the compact form, use the long form
10315                 ins = (instruction)(ins + 1);
10316                 if (size == EA_2BYTE)
10317                 {
10318                     // Output a size prefix for a 16-bit operand
10319                     dst += emitOutputByte(dst, 0x66);
10320                 }
10321
10322                 code = insCodeRR(ins);
10323                 if (size != EA_1BYTE)
10324                 {
10325                     // Set the 'w' bit to get the large version
10326                     code |= 0x1;
10327                 }
10328
10329                 if (TakesRexWPrefix(ins, size))
10330                 {
10331                     code = AddRexWPrefix(ins, code);
10332                 }
10333
10334                 // Register...
10335                 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
10336
10337                 // Output the REX prefix
10338                 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10339
10340                 dst += emitOutputWord(dst, code | (regcode << 8));
10341             }
10342             else
10343             {
10344                 if (size == EA_2BYTE)
10345                 {
10346                     // Output a size prefix for a 16-bit operand
10347                     dst += emitOutputByte(dst, 0x66);
10348                 }
10349                 dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
10350             }
10351             break;
10352
10353         case INS_pop:
10354         case INS_pop_hide:
10355         case INS_push:
10356         case INS_push_hide:
10357
10358             assert(size == EA_PTRSIZE);
10359             code = insEncodeOpreg(ins, reg, size);
10360
10361             assert(!TakesVexPrefix(ins));
10362             assert(!TakesRexWPrefix(ins, size));
10363
10364             // Output the REX prefix
10365             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10366
10367             dst += emitOutputByte(dst, code);
10368             break;
10369
10370         case INS_seto:
10371         case INS_setno:
10372         case INS_setb:
10373         case INS_setae:
10374         case INS_sete:
10375         case INS_setne:
10376         case INS_setbe:
10377         case INS_seta:
10378         case INS_sets:
10379         case INS_setns:
10380         case INS_setpe:
10381         case INS_setpo:
10382         case INS_setl:
10383         case INS_setge:
10384         case INS_setle:
10385         case INS_setg:
10386
10387             assert(id->idGCref() == GCT_NONE);
10388             assert(size == EA_1BYTE);
10389
10390             code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
10391
10392             // Output the REX prefix
10393             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10394
10395             // We expect this to always be a 'big' opcode
10396             assert(code & 0x00FF0000);
10397
10398             dst += emitOutputByte(dst, code >> 16);
10399             dst += emitOutputWord(dst, code & 0x0000FFFF);
10400
10401             break;
10402
10403         case INS_mulEAX:
10404         case INS_imulEAX:
10405
10406             // Kill off any GC refs in EAX or EDX
10407             emitGCregDeadUpd(REG_EAX, dst);
10408             emitGCregDeadUpd(REG_EDX, dst);
10409
10410             __fallthrough;
10411
10412         default:
10413
10414             assert(id->idGCref() == GCT_NONE);
10415
10416             code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
10417
10418             if (size != EA_1BYTE)
10419             {
10420                 // Set the 'w' bit to get the large version
10421                 code |= 0x1;
10422
10423                 if (size == EA_2BYTE)
10424                 {
10425                     // Output a size prefix for a 16-bit operand
10426                     dst += emitOutputByte(dst, 0x66);
10427                 }
10428             }
10429
10430             code = AddVexPrefixIfNeeded(ins, code, size);
10431
10432             if (TakesRexWPrefix(ins, size))
10433             {
10434                 code = AddRexWPrefix(ins, code);
10435             }
10436
10437             // Output the REX prefix
10438             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10439
10440             dst += emitOutputWord(dst, code);
10441             break;
10442     }
10443
10444     // Are we writing the register? if so then update the GC information
10445     switch (id->idInsFmt())
10446     {
10447         case IF_RRD:
10448             break;
10449         case IF_RWR:
10450             if (id->idGCref())
10451             {
10452                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10453             }
10454             else
10455             {
10456                 emitGCregDeadUpd(id->idReg1(), dst);
10457             }
10458             break;
10459         case IF_RRW:
10460         {
10461 #ifdef DEBUG
10462             regMaskTP regMask = genRegMask(reg);
10463 #endif
10464             if (id->idGCref())
10465             {
10466                 // The reg must currently be holding either a gcref or a byref
10467                 // and the instruction must be inc or dec
10468                 assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
10469                        (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
10470                 assert(id->idGCref() == GCT_BYREF);
10471                 // Mark it as holding a GCT_BYREF
10472                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10473             }
10474             else
10475             {
10476                 // Can't use RRW to trash a GC ref.  It's OK for unverifiable code
10477                 // to trash Byrefs.
10478                 assert((emitThisGCrefRegs & regMask) == 0);
10479             }
10480         }
10481         break;
10482         default:
10483 #ifdef DEBUG
10484             emitDispIns(id, false, false, false);
10485 #endif
10486             assert(!"unexpected instruction format");
10487             break;
10488     }
10489
10490     return dst;
10491 }
10492
10493 /*****************************************************************************
10494  *
10495  *  Output an instruction with two register operands.
10496  */
10497
10498 BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
10499 {
10500     code_t code;
10501
10502     instruction ins  = id->idIns();
10503     regNumber   reg1 = id->idReg1();
10504     regNumber   reg2 = id->idReg2();
10505     emitAttr    size = id->idOpSize();
10506
10507     // Get the 'base' opcode
10508     code = insCodeRM(ins);
10509     code = AddVexPrefixIfNeeded(ins, code, size);
10510     if (IsSSEOrAVXInstruction(ins))
10511     {
10512         code = insEncodeRMreg(ins, code);
10513
10514         if (TakesRexWPrefix(ins, size))
10515         {
10516             code = AddRexWPrefix(ins, code);
10517         }
10518     }
10519     else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
10520     {
10521         code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
10522 #ifdef _TARGET_AMD64_
10523
10524         assert((size < EA_4BYTE) || (insIsCMOV(ins)));
10525         if ((size == EA_8BYTE) || (ins == INS_movsx))
10526         {
10527             code = AddRexWPrefix(ins, code);
10528         }
10529     }
10530     else if (ins == INS_movsxd)
10531     {
10532         code = insEncodeRMreg(ins, code);
10533
10534 #endif // _TARGET_AMD64_
10535     }
10536 #ifdef FEATURE_HW_INTRINSICS
10537     else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt))
10538     {
10539         code = insEncodeRMreg(ins, code);
10540         if ((ins == INS_crc32) && (size > EA_1BYTE))
10541         {
10542             code |= 0x0100;
10543         }
10544
10545         if (size == EA_2BYTE)
10546         {
10547             assert(ins == INS_crc32);
10548             dst += emitOutputByte(dst, 0x66);
10549         }
10550         else if (size == EA_8BYTE)
10551         {
10552             code = AddRexWPrefix(ins, code);
10553         }
10554     }
10555 #endif // FEATURE_HW_INTRINSICS
10556     else
10557     {
10558         code = insEncodeMRreg(ins, insCodeMR(ins));
10559
10560         if (ins != INS_test)
10561         {
10562             code |= 2;
10563         }
10564
10565         switch (size)
10566         {
10567             case EA_1BYTE:
10568                 noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
10569                 noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
10570                 break;
10571
10572             case EA_2BYTE:
10573                 // Output a size prefix for a 16-bit operand
10574                 dst += emitOutputByte(dst, 0x66);
10575                 __fallthrough;
10576
10577             case EA_4BYTE:
10578                 // Set the 'w' bit to get the large version
10579                 code |= 0x1;
10580                 break;
10581
10582 #ifdef _TARGET_AMD64_
10583             case EA_8BYTE:
10584                 // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
10585                 // Don't need to zero out the high bits explicitly
10586                 if ((ins != INS_xor) || (reg1 != reg2))
10587                 {
10588                     code = AddRexWPrefix(ins, code);
10589                 }
10590
10591                 // Set the 'w' bit to get the large version
10592                 code |= 0x1;
10593                 break;
10594
10595 #endif // _TARGET_AMD64_
10596
10597             default:
10598                 assert(!"unexpected size");
10599         }
10600     }
10601
10602     unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
10603     regCode |= insEncodeReg012(ins, reg2, size, &code);
10604
10605     if (TakesVexPrefix(ins))
10606     {
10607         // In case of AVX instructions that take 3 operands, we generally want to encode reg1
10608         // as first source.  In this case, reg1 is both a source and a destination.
10609         // The exception is the "merge" 3-operand case, where we have a move instruction, such
10610         // as movss, and we want to merge the source with itself.
10611         //
10612         // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
10613         // now we use the single source as source1 and source2.
10614         if (IsDstDstSrcAVXInstruction(ins))
10615         {
10616             // encode source/dest operand reg in 'vvvv' bits in 1's complement form
10617             code = insEncodeReg3456(ins, reg1, size, code);
10618         }
10619         else if (IsDstSrcSrcAVXInstruction(ins))
10620         {
10621             // encode source operand reg in 'vvvv' bits in 1's complement form
10622             code = insEncodeReg3456(ins, reg2, size, code);
10623         }
10624     }
10625
10626     // Output the REX prefix
10627     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10628
10629     if (code & 0xFF000000)
10630     {
10631         // Output the highest word of the opcode
10632         dst += emitOutputWord(dst, code >> 16);
10633         code &= 0x0000FFFF;
10634
10635         if (Is4ByteSSE4Instruction(ins))
10636         {
10637             // Output 3rd byte of the opcode
10638             dst += emitOutputByte(dst, code);
10639             code &= 0xFF00;
10640         }
10641     }
10642     else if (code & 0x00FF0000)
10643     {
10644         dst += emitOutputByte(dst, code >> 16);
10645         code &= 0x0000FFFF;
10646     }
10647
10648     // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10649     if ((code & 0xFF00) == 0xC000)
10650     {
10651         dst += emitOutputWord(dst, code | (regCode << 8));
10652     }
10653     else if ((code & 0xFF) == 0x00)
10654     {
10655         // This case happens for SSE4/AVX instructions only
10656         assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins));
10657
10658         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10659         dst += emitOutputByte(dst, (0xC0 | regCode));
10660     }
10661     else
10662     {
10663         dst += emitOutputWord(dst, code);
10664         dst += emitOutputByte(dst, (0xC0 | regCode));
10665     }
10666
10667     // Does this instruction operate on a GC ref value?
10668     if (id->idGCref())
10669     {
10670         switch (id->idInsFmt())
10671         {
10672             case IF_RRD_RRD:
10673                 break;
10674
10675             case IF_RWR_RRD:
10676
10677                 if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
10678                 {
10679                     // We're relocating "this" in the prolog
10680                     assert(emitComp->lvaIsOriginalThisArg(0));
10681                     assert(emitComp->lvaTable[0].lvRegister);
10682                     assert(emitComp->lvaTable[0].lvRegNum == reg1);
10683
10684                     if (emitFullGCinfo)
10685                     {
10686                         emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
10687                         break;
10688                     }
10689                     else
10690                     {
10691                         /* If emitFullGCinfo==false, the we don't use any
10692                            regPtrDsc's and so explictly note the location
10693                            of "this" in GCEncode.cpp
10694                          */
10695                     }
10696                 }
10697
10698                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10699                 break;
10700
10701             case IF_RRW_RRD:
10702
10703                 switch (id->idIns())
10704                 {
10705                     /*
10706                         This must be one of the following cases:
10707
10708                         xor reg, reg        to assign NULL
10709
10710                         and r1 , r2         if (ptr1 && ptr2) ...
10711                         or  r1 , r2         if (ptr1 || ptr2) ...
10712
10713                         add r1 , r2         to compute a normal byref
10714                         sub r1 , r2         to compute a strange byref (VC only)
10715
10716                     */
10717                     case INS_xor:
10718                         assert(id->idReg1() == id->idReg2());
10719                         emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10720                         break;
10721
10722                     case INS_or:
10723                     case INS_and:
10724                         emitGCregDeadUpd(id->idReg1(), dst);
10725                         break;
10726
10727                     case INS_add:
10728                     case INS_sub:
10729                         assert(id->idGCref() == GCT_BYREF);
10730
10731 #ifdef DEBUG
10732                         regMaskTP regMask;
10733                         regMask = genRegMask(reg1) | genRegMask(reg2);
10734
10735                         // r1/r2 could have been a GCREF as GCREF + int=BYREF
10736                         //                            or BYREF+/-int=BYREF
10737                         assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
10738                                ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
10739 #endif
10740                         // Mark r1 as holding a byref
10741                         emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10742                         break;
10743
10744                     default:
10745 #ifdef DEBUG
10746                         emitDispIns(id, false, false, false);
10747 #endif
10748                         assert(!"unexpected GC reg update instruction");
10749                 }
10750
10751                 break;
10752
10753             case IF_RRW_RRW:
10754                 // This must be "xchg reg1, reg2"
10755                 assert(id->idIns() == INS_xchg);
10756
10757                 // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
10758                 // register pointer mask.
10759                 CLANG_FORMAT_COMMENT_ANCHOR;
10760
10761 #ifndef LEGACY_BACKEND
10762                 GCtype gc1, gc2;
10763
10764                 gc1 = emitRegGCtype(reg1);
10765                 gc2 = emitRegGCtype(reg2);
10766
10767                 if (gc1 != gc2)
10768                 {
10769                     // Kill the GC-info about the GC registers
10770
10771                     if (needsGC(gc1))
10772                     {
10773                         emitGCregDeadUpd(reg1, dst);
10774                     }
10775
10776                     if (needsGC(gc2))
10777                     {
10778                         emitGCregDeadUpd(reg2, dst);
10779                     }
10780
10781                     // Now, swap the info
10782
10783                     if (needsGC(gc1))
10784                     {
10785                         emitGCregLiveUpd(gc1, reg2, dst);
10786                     }
10787
10788                     if (needsGC(gc2))
10789                     {
10790                         emitGCregLiveUpd(gc2, reg1, dst);
10791                     }
10792                 }
10793 #endif // !LEGACY_BACKEND
10794                 break;
10795
10796             default:
10797 #ifdef DEBUG
10798                 emitDispIns(id, false, false, false);
10799 #endif
10800                 assert(!"unexpected GC ref instruction format");
10801         }
10802     }
10803     else
10804     {
10805         if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10806         {
10807             switch (id->idInsFmt())
10808             {
10809                 case IF_RRD_CNS:
10810                     // INS_mulEAX can not be used with any of these formats
10811                     assert(ins != INS_mulEAX && ins != INS_imulEAX);
10812
10813                     // For the three operand imul instruction the target
10814                     // register is encoded in the opcode
10815
10816                     if (instrIs3opImul(ins))
10817                     {
10818                         regNumber tgtReg = inst3opImulReg(ins);
10819                         emitGCregDeadUpd(tgtReg, dst);
10820                     }
10821                     break;
10822
10823                 case IF_RWR_RRD:
10824                 case IF_RRW_RRD:
10825                     // INS_movxmm2i writes to reg2.
10826                     if (ins == INS_mov_xmm2i)
10827                     {
10828                         emitGCregDeadUpd(id->idReg2(), dst);
10829                     }
10830                     else
10831                     {
10832                         emitGCregDeadUpd(id->idReg1(), dst);
10833                     }
10834                     break;
10835
10836                 default:
10837                     break;
10838             }
10839         }
10840     }
10841
10842     return dst;
10843 }
10844
10845 BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
10846 {
10847     code_t code;
10848
10849     instruction ins = id->idIns();
10850     assert(IsAVXInstruction(ins));
10851     assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
10852     regNumber targetReg = id->idReg1();
10853     regNumber src1      = id->idReg2();
10854     regNumber src2      = id->idReg3();
10855     emitAttr  size      = id->idOpSize();
10856
10857     code = insCodeRM(ins);
10858     code = AddVexPrefixIfNeeded(ins, code, size);
10859     code = insEncodeRMreg(ins, code);
10860
10861     if (TakesRexWPrefix(ins, size))
10862     {
10863         code = AddRexWPrefix(ins, code);
10864     }
10865
10866     unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
10867     regCode |= insEncodeReg012(ins, src2, size, &code);
10868     // encode source operand reg in 'vvvv' bits in 1's complement form
10869     code = insEncodeReg3456(ins, src1, size, code);
10870
10871     // Output the REX prefix
10872     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10873
10874     // Is this a 'big' opcode?
10875     if (code & 0xFF000000)
10876     {
10877         // Output the highest word of the opcode
10878         dst += emitOutputWord(dst, code >> 16);
10879         code &= 0x0000FFFF;
10880     }
10881     else if (code & 0x00FF0000)
10882     {
10883         dst += emitOutputByte(dst, code >> 16);
10884         code &= 0x0000FFFF;
10885     }
10886
10887     // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10888     if ((code & 0xFF00) == 0xC000)
10889     {
10890         dst += emitOutputWord(dst, code | (regCode << 8));
10891     }
10892     else if ((code & 0xFF) == 0x00)
10893     {
10894         // This case happens for AVX instructions only
10895         assert(IsAVXInstruction(ins));
10896
10897         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10898         dst += emitOutputByte(dst, (0xC0 | regCode));
10899     }
10900     else
10901     {
10902         dst += emitOutputWord(dst, code);
10903         dst += emitOutputByte(dst, (0xC0 | regCode));
10904     }
10905
10906     noway_assert(!id->idGCref());
10907
10908     return dst;
10909 }
10910
10911 /*****************************************************************************
10912  *
10913  *  Output an instruction with a register and constant operands.
10914  */
10915
10916 BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
10917 {
10918     code_t      code;
10919     emitAttr    size      = id->idOpSize();
10920     instruction ins       = id->idIns();
10921     regNumber   reg       = id->idReg1();
10922     ssize_t     val       = emitGetInsSC(id);
10923     bool        valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
10924
10925     // BT reg,imm might be useful but it requires special handling of the immediate value
10926     // (it is always encoded in a byte). Let's not complicate things until this is needed.
10927     assert(ins != INS_bt);
10928
10929     if (id->idIsCnsReloc())
10930     {
10931         valInByte = false; // relocs can't be placed in a byte
10932     }
10933
10934     noway_assert(emitVerifyEncodable(ins, size, reg));
10935
10936 #ifndef LEGACY_BACKEND
10937     if (IsSSEOrAVXInstruction(ins))
10938     {
10939         // Handle SSE2 instructions of the form "opcode reg, immed8"
10940
10941         assert(id->idGCref() == GCT_NONE);
10942         assert(valInByte);
10943         // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
10944         regNumber regOpcode;
10945         switch (ins)
10946         {
10947             case INS_psrldq:
10948                 regOpcode = (regNumber)3;
10949                 break;
10950             case INS_pslldq:
10951                 regOpcode = (regNumber)7;
10952                 break;
10953             case INS_psrld:
10954             case INS_psrlw:
10955             case INS_psrlq:
10956                 regOpcode = (regNumber)2;
10957                 break;
10958             case INS_pslld:
10959             case INS_psllw:
10960             case INS_psllq:
10961                 regOpcode = (regNumber)6;
10962                 break;
10963             case INS_psrad:
10964             case INS_psraw:
10965                 regOpcode = (regNumber)4;
10966                 break;
10967             default:
10968                 assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
10969                 regOpcode = REG_NA;
10970                 break;
10971         }
10972
10973         // Get the 'base' opcode.
10974         code = insCodeMI(ins);
10975         code = AddVexPrefixIfNeeded(ins, code, size);
10976         code = insEncodeMIreg(ins, reg, size, code);
10977         assert(code & 0x00FF0000);
10978         if (TakesVexPrefix(ins))
10979         {
10980             // The 'vvvv' bits encode the destination register, which for this case (RI)
10981             // is the same as the source.
10982             code = insEncodeReg3456(ins, reg, size, code);
10983         }
10984
10985         unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
10986
10987         // Output the REX prefix
10988         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10989
10990         if (code & 0xFF000000)
10991         {
10992             dst += emitOutputWord(dst, code >> 16);
10993         }
10994         else if (code & 0xFF0000)
10995         {
10996             dst += emitOutputByte(dst, code >> 16);
10997         }
10998
10999         dst += emitOutputWord(dst, code | regcode);
11000
11001         dst += emitOutputByte(dst, val);
11002
11003         return dst;
11004     }
11005 #endif // !LEGACY_BACKEND
11006
11007     // The 'mov' opcode is special
11008     if (ins == INS_mov)
11009     {
11010         code = insCodeACC(ins);
11011         assert(code < 0x100);
11012
11013         code |= 0x08; // Set the 'w' bit
11014         unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11015         code |= regcode;
11016
11017         // This is INS_mov and will not take VEX prefix
11018         assert(!TakesVexPrefix(ins));
11019
11020         if (TakesRexWPrefix(ins, size))
11021         {
11022             code = AddRexWPrefix(ins, code);
11023         }
11024
11025         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11026
11027         dst += emitOutputByte(dst, code);
11028         if (size == EA_4BYTE)
11029         {
11030             dst += emitOutputLong(dst, val);
11031         }
11032 #ifdef _TARGET_AMD64_
11033         else
11034         {
11035             assert(size == EA_PTRSIZE);
11036             dst += emitOutputSizeT(dst, val);
11037         }
11038 #endif
11039
11040         if (id->idIsCnsReloc())
11041         {
11042             emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
11043         }
11044
11045         goto DONE;
11046     }
11047
11048     // Decide which encoding is the shortest
11049     bool useSigned, useACC;
11050
11051     if (reg == REG_EAX && !instrIs3opImul(ins))
11052     {
11053         if (size == EA_1BYTE || (ins == INS_test))
11054         {
11055             // For al, ACC encoding is always the smallest
11056             useSigned = false;
11057             useACC    = true;
11058         }
11059         else
11060         {
11061             /* For ax/eax, we avoid ACC encoding for small constants as we
11062              * can emit the small constant and have it sign-extended.
11063              * For big constants, the ACC encoding is better as we can use
11064              * the 1 byte opcode
11065              */
11066
11067             if (valInByte)
11068             {
11069                 // avoid using ACC encoding
11070                 useSigned = true;
11071                 useACC    = false;
11072             }
11073             else
11074             {
11075                 useSigned = false;
11076                 useACC    = true;
11077             }
11078         }
11079     }
11080     else
11081     {
11082         useACC = false;
11083
11084         if (valInByte)
11085         {
11086             useSigned = true;
11087         }
11088         else
11089         {
11090             useSigned = false;
11091         }
11092     }
11093
11094     // "test" has no 's' bit
11095     if (ins == INS_test)
11096     {
11097         useSigned = false;
11098     }
11099
11100     // Get the 'base' opcode
11101     if (useACC)
11102     {
11103         assert(!useSigned);
11104         code = insCodeACC(ins);
11105     }
11106     else
11107     {
11108         assert(!useSigned || valInByte);
11109
11110         // Some instructions (at least 'imul') do not have a
11111         // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
11112         if (valInByte && useSigned && insNeedsRRIb(ins))
11113         {
11114             code = insEncodeRRIb(ins, reg, size);
11115         }
11116         else
11117         {
11118             code = insCodeMI(ins);
11119             code = AddVexPrefixIfNeeded(ins, code, size);
11120             code = insEncodeMIreg(ins, reg, size, code);
11121         }
11122     }
11123
11124     switch (size)
11125     {
11126         case EA_1BYTE:
11127             break;
11128
11129         case EA_2BYTE:
11130             // Output a size prefix for a 16-bit operand
11131             dst += emitOutputByte(dst, 0x66);
11132             __fallthrough;
11133
11134         case EA_4BYTE:
11135             // Set the 'w' bit to get the large version
11136             code |= 0x1;
11137             break;
11138
11139 #ifdef _TARGET_AMD64_
11140         case EA_8BYTE:
11141             /* Set the 'w' bit to get the large version */
11142             /* and the REX.W bit to get the really large version */
11143
11144             code = AddRexWPrefix(ins, code);
11145             code |= 0x1;
11146             break;
11147 #endif
11148
11149         default:
11150             assert(!"unexpected size");
11151     }
11152
11153     // Output the REX prefix
11154     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11155
11156     // Does the value fit in a sign-extended byte?
11157     // Important!  Only set the 's' bit when we have a size larger than EA_1BYTE.
11158     // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
11159
11160     if (useSigned && (size > EA_1BYTE))
11161     {
11162         // We can just set the 's' bit, and issue an immediate byte
11163
11164         code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
11165         dst += emitOutputWord(dst, code);
11166         dst += emitOutputByte(dst, val);
11167     }
11168     else
11169     {
11170         // Can we use an accumulator (EAX) encoding?
11171         if (useACC)
11172         {
11173             dst += emitOutputByte(dst, code);
11174         }
11175         else
11176         {
11177             dst += emitOutputWord(dst, code);
11178         }
11179
11180         switch (size)
11181         {
11182             case EA_1BYTE:
11183                 dst += emitOutputByte(dst, val);
11184                 break;
11185             case EA_2BYTE:
11186                 dst += emitOutputWord(dst, val);
11187                 break;
11188             case EA_4BYTE:
11189                 dst += emitOutputLong(dst, val);
11190                 break;
11191 #ifdef _TARGET_AMD64_
11192             case EA_8BYTE:
11193                 dst += emitOutputLong(dst, val);
11194                 break;
11195 #endif // _TARGET_AMD64_
11196             default:
11197                 break;
11198         }
11199
11200         if (id->idIsCnsReloc())
11201         {
11202             emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11203             assert(size == EA_4BYTE);
11204         }
11205     }
11206
11207 DONE:
11208
11209     // Does this instruction operate on a GC ref value?
11210     if (id->idGCref())
11211     {
11212         switch (id->idInsFmt())
11213         {
11214             case IF_RRD_CNS:
11215                 break;
11216
11217             case IF_RWR_CNS:
11218                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11219                 break;
11220
11221             case IF_RRW_CNS:
11222                 assert(id->idGCref() == GCT_BYREF);
11223
11224 #ifdef DEBUG
11225                 regMaskTP regMask;
11226                 regMask = genRegMask(reg);
11227                 // FIXNOW review the other places and relax the assert there too
11228
11229                 // The reg must currently be holding either a gcref or a byref
11230                 // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
11231                 if (emitThisGCrefRegs & regMask)
11232                 {
11233                     assert(ins == INS_add);
11234                 }
11235                 if (emitThisByrefRegs & regMask)
11236                 {
11237                     assert(ins == INS_add || ins == INS_sub);
11238                 }
11239 #endif
11240                 // Mark it as holding a GCT_BYREF
11241                 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11242                 break;
11243
11244             default:
11245 #ifdef DEBUG
11246                 emitDispIns(id, false, false, false);
11247 #endif
11248                 assert(!"unexpected GC ref instruction format");
11249         }
11250
11251         // mul can never produce a GC ref
11252         assert(!instrIs3opImul(ins));
11253         assert(ins != INS_mulEAX && ins != INS_imulEAX);
11254     }
11255     else
11256     {
11257         switch (id->idInsFmt())
11258         {
11259             case IF_RRD_CNS:
11260                 // INS_mulEAX can not be used with any of these formats
11261                 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11262
11263                 // For the three operand imul instruction the target
11264                 // register is encoded in the opcode
11265
11266                 if (instrIs3opImul(ins))
11267                 {
11268                     regNumber tgtReg = inst3opImulReg(ins);
11269                     emitGCregDeadUpd(tgtReg, dst);
11270                 }
11271                 break;
11272
11273             case IF_RRW_CNS:
11274             case IF_RWR_CNS:
11275                 assert(!instrIs3opImul(ins));
11276
11277                 emitGCregDeadUpd(id->idReg1(), dst);
11278                 break;
11279
11280             default:
11281 #ifdef DEBUG
11282                 emitDispIns(id, false, false, false);
11283 #endif
11284                 assert(!"unexpected GC ref instruction format");
11285         }
11286     }
11287
11288     return dst;
11289 }
11290
11291 /*****************************************************************************
11292  *
11293  *  Output an instruction with a constant operand.
11294  */
11295
11296 BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
11297 {
11298     code_t      code;
11299     instruction ins       = id->idIns();
11300     emitAttr    size      = id->idOpSize();
11301     ssize_t     val       = emitGetInsSC(id);
11302     bool        valInByte = ((signed char)val == val);
11303
11304     // We would to update GC info correctly
11305     assert(!IsSSE2Instruction(ins));
11306     assert(!IsAVXInstruction(ins));
11307
11308 #ifdef _TARGET_AMD64_
11309     // all these opcodes take a sign-extended 4-byte immediate, max
11310     noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
11311 #endif
11312
11313     if (id->idIsCnsReloc())
11314     {
11315         valInByte = false; // relocs can't be placed in a byte
11316
11317         // Of these instructions only the push instruction can have reloc
11318         assert(ins == INS_push || ins == INS_push_hide);
11319     }
11320
11321     switch (ins)
11322     {
11323         case INS_jge:
11324             assert((val >= -128) && (val <= 127));
11325             dst += emitOutputByte(dst, insCode(ins));
11326             dst += emitOutputByte(dst, val);
11327             break;
11328
11329         case INS_loop:
11330             assert((val >= -128) && (val <= 127));
11331             dst += emitOutputByte(dst, insCodeMI(ins));
11332             dst += emitOutputByte(dst, val);
11333             break;
11334
11335         case INS_ret:
11336             assert(val);
11337             dst += emitOutputByte(dst, insCodeMI(ins));
11338             dst += emitOutputWord(dst, val);
11339             break;
11340
11341         case INS_push_hide:
11342         case INS_push:
11343             code = insCodeMI(ins);
11344
11345             // Does the operand fit in a byte?
11346             if (valInByte)
11347             {
11348                 dst += emitOutputByte(dst, code | 2);
11349                 dst += emitOutputByte(dst, val);
11350             }
11351             else
11352             {
11353                 if (TakesRexWPrefix(ins, size))
11354                 {
11355                     code = AddRexWPrefix(ins, code);
11356                     dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11357                 }
11358
11359                 dst += emitOutputByte(dst, code);
11360                 dst += emitOutputLong(dst, val);
11361                 if (id->idIsCnsReloc())
11362                 {
11363                     emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11364                 }
11365             }
11366
11367             // Did we push a GC ref value?
11368             if (id->idGCref())
11369             {
11370 #ifdef DEBUG
11371                 printf("UNDONE: record GCref push [cns]\n");
11372 #endif
11373             }
11374
11375             break;
11376
11377         default:
11378             assert(!"unexpected instruction");
11379     }
11380
11381     return dst;
11382 }
11383
11384 /*****************************************************************************
11385  *
11386  *  Output a local jump instruction.
11387  *  This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
11388  *  needs to get bound to an actual address and processed by branch shortening.
11389  */
11390
11391 BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
11392 {
11393     unsigned srcOffs;
11394     unsigned dstOffs;
11395     ssize_t  distVal;
11396
11397     instrDescJmp* id  = (instrDescJmp*)i;
11398     instruction   ins = id->idIns();
11399     bool          jmp;
11400     bool          relAddr = true; // does the instruction use relative-addressing?
11401
11402     // SSE2 doesnt make any sense here
11403     assert(!IsSSE2Instruction(ins));
11404     assert(!IsAVXInstruction(ins));
11405
11406     size_t ssz;
11407     size_t lsz;
11408
11409     switch (ins)
11410     {
11411         default:
11412             ssz = JCC_SIZE_SMALL;
11413             lsz = JCC_SIZE_LARGE;
11414             jmp = true;
11415             break;
11416
11417         case INS_jmp:
11418             ssz = JMP_SIZE_SMALL;
11419             lsz = JMP_SIZE_LARGE;
11420             jmp = true;
11421             break;
11422
11423         case INS_call:
11424             ssz = lsz = CALL_INST_SIZE;
11425             jmp       = false;
11426             break;
11427
11428         case INS_push_hide:
11429         case INS_push:
11430             ssz = lsz = 5;
11431             jmp       = false;
11432             relAddr   = false;
11433             break;
11434
11435         case INS_mov:
11436         case INS_lea:
11437             ssz = lsz = id->idCodeSize();
11438             jmp       = false;
11439             relAddr   = false;
11440             break;
11441     }
11442
11443     // Figure out the distance to the target
11444     srcOffs = emitCurCodeOffs(dst);
11445     dstOffs = id->idAddr()->iiaIGlabel->igOffs;
11446
11447     if (relAddr)
11448     {
11449         distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
11450     }
11451     else
11452     {
11453         distVal = (ssize_t)emitOffsetToPtr(dstOffs);
11454     }
11455
11456     if (dstOffs <= srcOffs)
11457     {
11458         // This is a backward jump - distance is known at this point
11459         CLANG_FORMAT_COMMENT_ANCHOR;
11460
11461 #if DEBUG_EMIT
11462         if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
11463         {
11464             size_t blkOffs = id->idjIG->igOffs;
11465
11466             if (INTERESTING_JUMP_NUM == 0)
11467             {
11468                 printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
11469             }
11470             printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
11471             printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
11472             printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
11473         }
11474 #endif
11475
11476         // Can we use a short jump?
11477         if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
11478         {
11479             emitSetShortJump(id);
11480         }
11481     }
11482     else
11483     {
11484         // This is a  forward jump - distance will be an upper limit
11485         emitFwdJumps = true;
11486
11487         // The target offset will be closer by at least 'emitOffsAdj', but only if this
11488         // jump doesn't cross the hot-cold boundary.
11489         if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
11490         {
11491             dstOffs -= emitOffsAdj;
11492             distVal -= emitOffsAdj;
11493         }
11494
11495         // Record the location of the jump for later patching
11496         id->idjOffs = dstOffs;
11497
11498         // Are we overflowing the id->idjOffs bitfield?
11499         if (id->idjOffs != dstOffs)
11500         {
11501             IMPL_LIMITATION("Method is too large");
11502         }
11503
11504 #if DEBUG_EMIT
11505         if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
11506         {
11507             size_t blkOffs = id->idjIG->igOffs;
11508
11509             if (INTERESTING_JUMP_NUM == 0)
11510             {
11511                 printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
11512             }
11513             printf("[4] Jump  block is at %08X\n", blkOffs);
11514             printf("[4] Jump        is at %08X\n", srcOffs);
11515             printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
11516         }
11517 #endif
11518
11519         // Can we use a short jump?
11520         if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
11521         {
11522             emitSetShortJump(id);
11523         }
11524     }
11525
11526     // Adjust the offset to emit relative to the end of the instruction
11527     if (relAddr)
11528     {
11529         distVal -= id->idjShort ? ssz : lsz;
11530     }
11531
11532 #ifdef DEBUG
11533     if (0 && emitComp->verbose)
11534     {
11535         size_t sz          = id->idjShort ? ssz : lsz;
11536         int    distValSize = id->idjShort ? 4 : 8;
11537         printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
11538                emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
11539                distVal);
11540     }
11541 #endif
11542
11543     // What size jump should we use?
11544     if (id->idjShort)
11545     {
11546         // Short jump
11547         assert(!id->idjKeepLong);
11548         assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
11549
11550         assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
11551         assert(JMP_SIZE_SMALL == 2);
11552
11553         assert(jmp);
11554
11555         if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
11556         {
11557             emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
11558
11559 #ifdef DEBUG
11560             if (emitComp->verbose)
11561             {
11562                 printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
11563             }
11564 #endif
11565         }
11566
11567         dst += emitOutputByte(dst, insCode(ins));
11568
11569         // For forward jumps, record the address of the distance value
11570         id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
11571
11572         dst += emitOutputByte(dst, distVal);
11573     }
11574     else
11575     {
11576         code_t code;
11577
11578         // Long  jump
11579         if (jmp)
11580         {
11581             // clang-format off
11582             assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
11583             assert(INS_jo  + (INS_l_jmp - INS_jmp) == INS_l_jo);
11584             assert(INS_jb  + (INS_l_jmp - INS_jmp) == INS_l_jb);
11585             assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
11586             assert(INS_je  + (INS_l_jmp - INS_jmp) == INS_l_je);
11587             assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
11588             assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
11589             assert(INS_ja  + (INS_l_jmp - INS_jmp) == INS_l_ja);
11590             assert(INS_js  + (INS_l_jmp - INS_jmp) == INS_l_js);
11591             assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
11592             assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
11593             assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
11594             assert(INS_jl  + (INS_l_jmp - INS_jmp) == INS_l_jl);
11595             assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
11596             assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
11597             assert(INS_jg  + (INS_l_jmp - INS_jmp) == INS_l_jg);
11598             // clang-format on
11599
11600             code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
11601         }
11602         else if (ins == INS_push || ins == INS_push_hide)
11603         {
11604             assert(insCodeMI(INS_push) == 0x68);
11605             code = 0x68;
11606         }
11607         else if (ins == INS_mov)
11608         {
11609             // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
11610             insFormat tmpInsFmt   = id->idInsFmt();
11611             insGroup* tmpIGlabel  = id->idAddr()->iiaIGlabel;
11612             bool      tmpDspReloc = id->idIsDspReloc();
11613
11614             id->idInsFmt(IF_SWR_CNS);
11615             id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
11616             id->idSetIsDspReloc(false);
11617
11618             dst = emitOutputSV(dst, id, insCodeMI(ins));
11619
11620             // Restore id fields with original values
11621             id->idInsFmt(tmpInsFmt);
11622             id->idAddr()->iiaIGlabel = tmpIGlabel;
11623             id->idSetIsDspReloc(tmpDspReloc);
11624             code = 0xCC;
11625         }
11626         else if (ins == INS_lea)
11627         {
11628             // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
11629             // We basically are doing what emitIns_R_AI does.
11630             // TODO-XArch-Cleanup: revisit this.
11631             instrDescAmd  idAmdStackLocal;
11632             instrDescAmd* idAmd = &idAmdStackLocal;
11633             *(instrDesc*)idAmd  = *(instrDesc*)id; // copy all the "core" fields
11634             memset((BYTE*)idAmd + sizeof(instrDesc), 0,
11635                    sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
11636
11637             idAmd->idInsFmt(IF_RWR_ARD);
11638             idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
11639             idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
11640             emitSetAmdDisp(idAmd, distVal); // set the displacement
11641             idAmd->idSetIsDspReloc(id->idIsDspReloc());
11642             assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
11643
11644             UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
11645             idAmd->idCodeSize(sz);
11646
11647             code = insCodeRM(ins);
11648             code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
11649
11650             dst = emitOutputAM(dst, idAmd, code, nullptr);
11651
11652             code = 0xCC;
11653
11654             // For forward jumps, record the address of the distance value
11655             // Hard-coded 4 here because we already output the displacement, as the last thing.
11656             id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
11657
11658             // We're done
11659             return dst;
11660         }
11661         else
11662         {
11663             code = 0xE8;
11664         }
11665
11666         if (ins != INS_mov)
11667         {
11668             dst += emitOutputByte(dst, code);
11669
11670             if (code & 0xFF00)
11671             {
11672                 dst += emitOutputByte(dst, code >> 8);
11673             }
11674         }
11675
11676         // For forward jumps, record the address of the distance value
11677         id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
11678
11679         dst += emitOutputLong(dst, distVal);
11680
11681 #ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
11682         if (emitComp->opts.compReloc)
11683 #endif
11684         {
11685             if (!relAddr)
11686             {
11687                 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
11688             }
11689             else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
11690             {
11691                 assert(id->idjKeepLong);
11692                 emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
11693             }
11694         }
11695     }
11696
11697     // Local calls kill all registers
11698     if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
11699     {
11700         emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
11701     }
11702
11703     return dst;
11704 }
11705
11706 /*****************************************************************************
11707  *
11708  *  Append the machine code corresponding to the given instruction descriptor
11709  *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
11710  *  is the instruction group that contains the instruction. Updates '*dp' to
11711  *  point past the generated code, and returns the size of the instruction
11712  *  descriptor in bytes.
11713  */
11714
11715 #ifdef _PREFAST_
11716 #pragma warning(push)
11717 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11718 #endif
11719 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
11720 {
11721     assert(emitIssuing);
11722
11723     BYTE*         dst           = *dp;
11724     size_t        sz            = sizeof(instrDesc);
11725     instruction   ins           = id->idIns();
11726     unsigned char callInstrSize = 0;
11727
11728 #ifdef DEBUG
11729     bool dspOffs = emitComp->opts.dspGCtbls;
11730 #endif // DEBUG
11731
11732     emitAttr size = id->idOpSize();
11733
11734     assert(REG_NA == (int)REG_NA);
11735
11736     assert(ins != INS_imul || size >= EA_4BYTE);                  // Has no 'w' bit
11737     assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
11738
11739     VARSET_TP GCvars(VarSetOps::UninitVal());
11740
11741     // What instruction format have we got?
11742     switch (id->idInsFmt())
11743     {
11744         code_t   code;
11745         unsigned regcode;
11746         int      args;
11747         CnsVal   cnsVal;
11748
11749         BYTE* addr;
11750         bool  recCall;
11751
11752         regMaskTP gcrefRegs;
11753         regMaskTP byrefRegs;
11754
11755         /********************************************************************/
11756         /*                        No operands                               */
11757         /********************************************************************/
11758         case IF_NONE:
11759             // the loop alignment pseudo instruction
11760             if (ins == INS_align)
11761             {
11762                 sz  = TINY_IDSC_SIZE;
11763                 dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
11764                 assert(((size_t)dst & 0x0f) == 0);
11765                 break;
11766             }
11767
11768             if (ins == INS_nop)
11769             {
11770                 dst = emitOutputNOP(dst, id->idCodeSize());
11771                 break;
11772             }
11773
11774             // the cdq instruction kills the EDX register implicitly
11775             if (ins == INS_cdq)
11776             {
11777                 emitGCregDeadUpd(REG_EDX, dst);
11778             }
11779
11780             __fallthrough;
11781
11782 #if FEATURE_STACK_FP_X87
11783         case IF_TRD:
11784         case IF_TWR:
11785         case IF_TRW:
11786 #endif // FEATURE_STACK_FP_X87
11787
11788             assert(id->idGCref() == GCT_NONE);
11789
11790             code = insCodeMR(ins);
11791
11792 #ifdef _TARGET_AMD64_
11793             // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
11794             code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
11795
11796             if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
11797             {
11798                 code = AddRexWPrefix(ins, code);
11799             }
11800             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11801 #endif
11802             // Is this a 'big' opcode?
11803             if (code & 0xFF000000)
11804             {
11805                 // The high word and then the low word
11806                 dst += emitOutputWord(dst, code >> 16);
11807                 code &= 0x0000FFFF;
11808                 dst += emitOutputWord(dst, code);
11809             }
11810             else if (code & 0x00FF0000)
11811             {
11812                 // The high byte and then the low word
11813                 dst += emitOutputByte(dst, code >> 16);
11814                 code &= 0x0000FFFF;
11815                 dst += emitOutputWord(dst, code);
11816             }
11817             else if (code & 0xFF00)
11818             {
11819                 // The 2 byte opcode
11820                 dst += emitOutputWord(dst, code);
11821             }
11822             else
11823             {
11824                 // The 1 byte opcode
11825                 dst += emitOutputByte(dst, code);
11826             }
11827
11828             break;
11829
11830         /********************************************************************/
11831         /*                Simple constant, local label, method              */
11832         /********************************************************************/
11833
11834         case IF_CNS:
11835             dst = emitOutputIV(dst, id);
11836             sz  = emitSizeOfInsDsc(id);
11837             break;
11838
11839         case IF_LABEL:
11840         case IF_RWR_LABEL:
11841         case IF_SWR_LABEL:
11842             assert(id->idGCref() == GCT_NONE);
11843             assert(id->idIsBound());
11844
11845             // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
11846             dst = emitOutputLJ(dst, id);
11847             sz  = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
11848             break;
11849
11850         case IF_METHOD:
11851         case IF_METHPTR:
11852             // Assume we'll be recording this call
11853             recCall = true;
11854
11855             // Get hold of the argument count and field Handle
11856             args = emitGetInsCDinfo(id);
11857
11858             // Is this a "fat" call descriptor?
11859             if (id->idIsLargeCall())
11860             {
11861                 instrDescCGCA* idCall = (instrDescCGCA*)id;
11862                 gcrefRegs             = idCall->idcGcrefRegs;
11863                 byrefRegs             = idCall->idcByrefRegs;
11864                 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
11865                 sz = sizeof(instrDescCGCA);
11866             }
11867             else
11868             {
11869                 assert(!id->idIsLargeDsp());
11870                 assert(!id->idIsLargeCns());
11871
11872                 gcrefRegs = emitDecodeCallGCregs(id);
11873                 byrefRegs = 0;
11874                 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
11875                 sz = sizeof(instrDesc);
11876             }
11877
11878             addr = (BYTE*)id->idAddr()->iiaAddr;
11879             assert(addr != nullptr);
11880
11881             // Some helpers don't get recorded in GC tables
11882             if (id->idIsNoGC())
11883             {
11884                 recCall = false;
11885             }
11886
11887             // What kind of a call do we have here?
11888             if (id->idInsFmt() == IF_METHPTR)
11889             {
11890                 // This is call indirect via a method pointer
11891
11892                 code = insCodeMR(ins);
11893                 if (ins == INS_i_jmp)
11894                 {
11895                     code |= 1;
11896                 }
11897
11898                 if (id->idIsDspReloc())
11899                 {
11900                     dst += emitOutputWord(dst, code | 0x0500);
11901 #ifdef _TARGET_AMD64_
11902                     dst += emitOutputLong(dst, 0);
11903 #else
11904                     dst += emitOutputLong(dst, (int)addr);
11905 #endif
11906                     emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
11907                 }
11908                 else
11909                 {
11910 #ifdef _TARGET_X86_
11911                     dst += emitOutputWord(dst, code | 0x0500);
11912 #else  //_TARGET_AMD64_
11913                     // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
11914                     // This addr mode should never be used while generating relocatable ngen code nor if
11915                     // the addr can be encoded as pc-relative address.
11916                     noway_assert(!emitComp->opts.compReloc);
11917                     noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
11918                     noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
11919
11920                     // This requires, specifying a SIB byte after ModRM byte.
11921                     dst += emitOutputWord(dst, code | 0x0400);
11922                     dst += emitOutputByte(dst, 0x25);
11923 #endif //_TARGET_AMD64_
11924                     dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
11925                 }
11926                 goto DONE_CALL;
11927             }
11928
11929             // Else
11930             // This is call direct where we know the target, thus we can
11931             // use a direct call; the target to jump to is in iiaAddr.
11932             assert(id->idInsFmt() == IF_METHOD);
11933
11934             // Output the call opcode followed by the target distance
11935             dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
11936
11937             ssize_t offset;
11938 #ifdef _TARGET_AMD64_
11939             // All REL32 on Amd64 go through recordRelocation.  Here we will output zero to advance dst.
11940             offset = 0;
11941             assert(id->idIsDspReloc());
11942 #else
11943             // Calculate PC relative displacement.
11944             // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
11945             // only allow a 32-bit offset, so we correctly use sizeof(INT32)
11946             offset = addr - (dst + sizeof(INT32));
11947 #endif
11948
11949             dst += emitOutputLong(dst, offset);
11950
11951             if (id->idIsDspReloc())
11952             {
11953                 emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
11954             }
11955
11956         DONE_CALL:
11957
11958             /* We update the GC info before the call as the variables cannot be
11959                used by the call. Killing variables before the call helps with
11960                boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
11961                If we ever track aliased variables (which could be used by the
11962                call), we would have to keep them alive past the call.
11963              */
11964             assert(FitsIn<unsigned char>(dst - *dp));
11965             callInstrSize = static_cast<unsigned char>(dst - *dp);
11966             emitUpdateLiveGCvars(GCvars, *dp);
11967
11968             // If the method returns a GC ref, mark EAX appropriately
11969             if (id->idGCref() == GCT_GCREF)
11970             {
11971                 gcrefRegs |= RBM_EAX;
11972             }
11973             else if (id->idGCref() == GCT_BYREF)
11974             {
11975                 byrefRegs |= RBM_EAX;
11976             }
11977
11978 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
11979             // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
11980             if (id->idIsLargeCall())
11981             {
11982                 instrDescCGCA* idCall = (instrDescCGCA*)id;
11983                 if (idCall->idSecondGCref() == GCT_GCREF)
11984                 {
11985                     gcrefRegs |= RBM_RDX;
11986                 }
11987                 else if (idCall->idSecondGCref() == GCT_BYREF)
11988                 {
11989                     byrefRegs |= RBM_RDX;
11990                 }
11991             }
11992 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
11993
11994             // If the GC register set has changed, report the new set
11995             if (gcrefRegs != emitThisGCrefRegs)
11996             {
11997                 emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
11998             }
11999
12000             if (byrefRegs != emitThisByrefRegs)
12001             {
12002                 emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
12003             }
12004
12005             if (recCall || args)
12006             {
12007                 // For callee-pop, all arguments will be popped  after the call.
12008                 // For caller-pop, any GC arguments will go dead after the call.
12009
12010                 assert(callInstrSize != 0);
12011
12012                 if (args >= 0)
12013                 {
12014                     emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
12015                 }
12016                 else
12017                 {
12018                     emitStackKillArgs(dst, -args, callInstrSize);
12019                 }
12020             }
12021
12022             // Do we need to record a call location for GC purposes?
12023             if (!emitFullGCinfo && recCall)
12024             {
12025                 assert(callInstrSize != 0);
12026                 emitRecordGCcall(dst, callInstrSize);
12027             }
12028
12029 #ifdef DEBUG
12030             if (ins == INS_call)
12031             {
12032                 emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
12033                                    (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
12034             }
12035 #endif // DEBUG
12036
12037             break;
12038
12039         /********************************************************************/
12040         /*                      One register operand                        */
12041         /********************************************************************/
12042
12043         case IF_RRD:
12044         case IF_RWR:
12045         case IF_RRW:
12046             dst = emitOutputR(dst, id);
12047             sz  = TINY_IDSC_SIZE;
12048             break;
12049
12050         /********************************************************************/
12051         /*                 Register and register/constant                   */
12052         /********************************************************************/
12053
12054         case IF_RRW_SHF:
12055             code = insCodeMR(ins);
12056             // Emit the VEX prefix if it exists
12057             code = AddVexPrefixIfNeeded(ins, code, size);
12058             code = insEncodeMRreg(ins, id->idReg1(), size, code);
12059
12060             // set the W bit
12061             if (size != EA_1BYTE)
12062             {
12063                 code |= 1;
12064             }
12065
12066             // Emit the REX prefix if it exists
12067             if (TakesRexWPrefix(ins, size))
12068             {
12069                 code = AddRexWPrefix(ins, code);
12070             }
12071
12072             // Output a size prefix for a 16-bit operand
12073             if (size == EA_2BYTE)
12074             {
12075                 dst += emitOutputByte(dst, 0x66);
12076             }
12077
12078             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12079             dst += emitOutputWord(dst, code);
12080             dst += emitOutputByte(dst, emitGetInsSC(id));
12081             sz = emitSizeOfInsDsc(id);
12082
12083             // Update GC info.
12084             assert(!id->idGCref());
12085             emitGCregDeadUpd(id->idReg1(), dst);
12086             break;
12087
12088         case IF_RRD_RRD:
12089         case IF_RWR_RRD:
12090         case IF_RRW_RRD:
12091         case IF_RRW_RRW:
12092             dst = emitOutputRR(dst, id);
12093             sz  = TINY_IDSC_SIZE;
12094             break;
12095
12096         case IF_RRD_CNS:
12097         case IF_RWR_CNS:
12098         case IF_RRW_CNS:
12099             dst = emitOutputRI(dst, id);
12100             sz  = emitSizeOfInsDsc(id);
12101             break;
12102
12103         case IF_RWR_RRD_RRD:
12104             dst = emitOutputRRR(dst, id);
12105             sz  = emitSizeOfInsDsc(id);
12106             break;
12107         case IF_RWR_RRD_RRD_CNS:
12108         case IF_RWR_RRD_RRD_RRD:
12109             dst = emitOutputRRR(dst, id);
12110             sz  = emitSizeOfInsDsc(id);
12111             dst += emitOutputByte(dst, emitGetInsSC(id));
12112             break;
12113
12114         case IF_RRW_RRW_CNS:
12115             assert(id->idGCref() == GCT_NONE);
12116
12117             // Get the 'base' opcode (it's a big one)
12118             // Also, determine which operand goes where in the ModRM byte.
12119             regNumber mReg;
12120             regNumber rReg;
12121             // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128)
12122             if (hasCodeMR(ins))
12123             {
12124                 code = insCodeMR(ins);
12125                 // Emit the VEX prefix if it exists
12126                 code = AddVexPrefixIfNeeded(ins, code, size);
12127                 code = insEncodeMRreg(ins, code);
12128                 mReg = id->idReg1();
12129                 rReg = id->idReg2();
12130             }
12131             else
12132             {
12133                 code = insCodeRM(ins);
12134                 // Emit the VEX prefix if it exists
12135                 code = AddVexPrefixIfNeeded(ins, code, size);
12136                 code = insEncodeRMreg(ins, code);
12137                 mReg = id->idReg2();
12138                 rReg = id->idReg1();
12139             }
12140             assert(code & 0x00FF0000);
12141
12142             if (TakesRexWPrefix(ins, size))
12143             {
12144                 code = AddRexWPrefix(ins, code);
12145             }
12146
12147             if (TakesVexPrefix(ins))
12148             {
12149                 if (IsDstDstSrcAVXInstruction(ins))
12150                 {
12151                     // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
12152                     // This code will have to change when we support 3 operands.
12153                     // For now, we always overload this source with the destination (always reg1).
12154                     // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
12155                     // e.g. pslldq, when/if we support those instructions with 2 registers.)
12156                     // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
12157                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
12158                 }
12159                 else if (IsDstSrcSrcAVXInstruction(ins))
12160                 {
12161                     // This is a "merge" move instruction.
12162                     // Encode source operand reg in 'vvvv' bits in 1's complement form
12163                     code = insEncodeReg3456(ins, id->idReg2(), size, code);
12164                 }
12165             }
12166
12167             regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code));
12168
12169             // Output the REX prefix
12170             dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12171
12172             if (code & 0xFF000000)
12173             {
12174                 // Output the highest word of the opcode
12175                 dst += emitOutputWord(dst, code >> 16);
12176                 code &= 0x0000FFFF;
12177
12178                 if (Is4ByteSSE4Instruction(ins))
12179                 {
12180                     // Output 3rd byte of the opcode
12181                     dst += emitOutputByte(dst, code);
12182                     code &= 0xFF00;
12183                 }
12184             }
12185             else if (code & 0x00FF0000)
12186             {
12187                 dst += emitOutputByte(dst, code >> 16);
12188                 code &= 0x0000FFFF;
12189             }
12190
12191             // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
12192             if ((code & 0xFF00) == 0xC000)
12193             {
12194                 dst += emitOutputWord(dst, code | (regcode << 8));
12195             }
12196             else if ((code & 0xFF) == 0x00)
12197             {
12198                 // This case happens for SSE4/AVX instructions only
12199                 assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins));
12200
12201                 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
12202                 dst += emitOutputByte(dst, (0xC0 | regcode));
12203             }
12204             else
12205             {
12206                 dst += emitOutputWord(dst, code);
12207                 dst += emitOutputByte(dst, (0xC0 | regcode));
12208             }
12209
12210             dst += emitOutputByte(dst, emitGetInsSC(id));
12211             sz = emitSizeOfInsDsc(id);
12212
12213             // Kill any GC ref in the destination register if necessary.
12214             if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
12215             {
12216                 emitGCregDeadUpd(id->idReg1(), dst);
12217             }
12218             break;
12219
12220         /********************************************************************/
12221         /*                      Address mode operand                        */
12222         /********************************************************************/
12223
12224         case IF_ARD:
12225         case IF_AWR:
12226         case IF_ARW:
12227
12228 #if FEATURE_STACK_FP_X87
12229
12230         case IF_TRD_ARD:
12231         case IF_TWR_ARD:
12232         case IF_TRW_ARD:
12233
12234         // case IF_ARD_TRD:
12235         // case IF_ARW_TRD:
12236         case IF_AWR_TRD:
12237
12238 #endif // FEATURE_STACK_FP_X87
12239
12240             dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
12241
12242             switch (ins)
12243             {
12244                 case INS_call:
12245
12246                 IND_CALL:
12247                     // Get hold of the argument count and method handle
12248                     args = emitGetInsCIargs(id);
12249
12250                     // Is this a "fat" call descriptor?
12251                     if (id->idIsLargeCall())
12252                     {
12253                         instrDescCGCA* idCall = (instrDescCGCA*)id;
12254
12255                         gcrefRegs = idCall->idcGcrefRegs;
12256                         byrefRegs = idCall->idcByrefRegs;
12257                         VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12258                         sz = sizeof(instrDescCGCA);
12259                     }
12260                     else
12261                     {
12262                         assert(!id->idIsLargeDsp());
12263                         assert(!id->idIsLargeCns());
12264
12265                         gcrefRegs = emitDecodeCallGCregs(id);
12266                         byrefRegs = 0;
12267                         VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12268                         sz = sizeof(instrDesc);
12269                     }
12270
12271                     recCall = true;
12272
12273                     goto DONE_CALL;
12274
12275                 default:
12276                     sz = emitSizeOfInsDsc(id);
12277                     break;
12278             }
12279             break;
12280
12281         case IF_RRW_ARD_CNS:
12282         case IF_RWR_ARD_CNS:
12283             emitGetInsAmdCns(id, &cnsVal);
12284             code = insCodeRM(ins);
12285
12286             // Special case 4-byte AVX instructions
12287             if (Is4ByteSSE4OrAVXInstruction(ins))
12288             {
12289                 dst = emitOutputAM(dst, id, code, &cnsVal);
12290             }
12291             else
12292             {
12293                 code    = AddVexPrefixIfNeeded(ins, code, size);
12294                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12295                 dst     = emitOutputAM(dst, id, code | regcode, &cnsVal);
12296             }
12297
12298             sz = emitSizeOfInsDsc(id);
12299             break;
12300
12301         case IF_AWR_RRD_CNS:
12302             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12303             assert(UseVEXEncoding());
12304             emitGetInsAmdCns(id, &cnsVal);
12305             code = insCodeMR(ins);
12306             dst  = emitOutputAM(dst, id, code, &cnsVal);
12307             sz   = emitSizeOfInsDsc(id);
12308             break;
12309
12310         case IF_RRD_ARD:
12311         case IF_RWR_ARD:
12312         case IF_RRW_ARD:
12313         case IF_RWR_RRD_ARD:
12314             code = insCodeRM(ins);
12315             if (Is4ByteSSE4OrAVXInstruction(ins))
12316             {
12317                 dst = emitOutputAM(dst, id, code);
12318             }
12319             else
12320             {
12321                 code    = AddVexPrefixIfNeeded(ins, code, size);
12322                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12323                 dst     = emitOutputAM(dst, id, code | regcode);
12324             }
12325             sz = emitSizeOfInsDsc(id);
12326             break;
12327
12328         case IF_RWR_RRD_ARD_CNS:
12329         {
12330             emitGetInsAmdCns(id, &cnsVal);
12331             code = insCodeRM(ins);
12332             if (Is4ByteSSE4OrAVXInstruction(ins))
12333             {
12334                 dst = emitOutputAM(dst, id, code, &cnsVal);
12335             }
12336             else
12337             {
12338                 code    = AddVexPrefixIfNeeded(ins, code, size);
12339                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12340                 dst     = emitOutputAM(dst, id, code | regcode, &cnsVal);
12341             }
12342             sz = emitSizeOfInsDsc(id);
12343             break;
12344         }
12345
12346         case IF_ARD_RRD:
12347         case IF_AWR_RRD:
12348         case IF_ARW_RRD:
12349             code    = insCodeMR(ins);
12350             code    = AddVexPrefixIfNeeded(ins, code, size);
12351             regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12352             dst     = emitOutputAM(dst, id, code | regcode);
12353             sz      = emitSizeOfInsDsc(id);
12354             break;
12355
12356         case IF_ARD_CNS:
12357         case IF_AWR_CNS:
12358         case IF_ARW_CNS:
12359             emitGetInsAmdCns(id, &cnsVal);
12360             dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
12361             sz  = emitSizeOfInsDsc(id);
12362             break;
12363
12364         case IF_ARW_SHF:
12365             emitGetInsAmdCns(id, &cnsVal);
12366             dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
12367             sz  = emitSizeOfInsDsc(id);
12368             break;
12369
12370         /********************************************************************/
12371         /*                      Stack-based operand                         */
12372         /********************************************************************/
12373
12374         case IF_SRD:
12375         case IF_SWR:
12376         case IF_SRW:
12377
12378 #if FEATURE_STACK_FP_X87
12379
12380         case IF_TRD_SRD:
12381         case IF_TWR_SRD:
12382         case IF_TRW_SRD:
12383
12384         // case IF_SRD_TRD:
12385         // case IF_SRW_TRD:
12386         case IF_SWR_TRD:
12387
12388 #endif // FEATURE_STACK_FP_X87
12389
12390             assert(ins != INS_pop_hide);
12391             if (ins == INS_pop)
12392             {
12393                 // The offset in "pop [ESP+xxx]" is relative to the new ESP value
12394                 CLANG_FORMAT_COMMENT_ANCHOR;
12395
12396 #if !FEATURE_FIXED_OUT_ARGS
12397                 emitCurStackLvl -= sizeof(int);
12398 #endif
12399                 dst = emitOutputSV(dst, id, insCodeMR(ins));
12400
12401 #if !FEATURE_FIXED_OUT_ARGS
12402                 emitCurStackLvl += sizeof(int);
12403 #endif
12404                 break;
12405             }
12406
12407             dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
12408
12409             if (ins == INS_call)
12410             {
12411                 goto IND_CALL;
12412             }
12413
12414             break;
12415
12416         case IF_SRD_CNS:
12417         case IF_SWR_CNS:
12418         case IF_SRW_CNS:
12419             emitGetInsCns(id, &cnsVal);
12420             dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
12421             sz  = emitSizeOfInsDsc(id);
12422             break;
12423
12424         case IF_SRW_SHF:
12425             emitGetInsCns(id, &cnsVal);
12426             dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
12427             sz  = emitSizeOfInsDsc(id);
12428             break;
12429
12430         case IF_RRW_SRD_CNS:
12431         case IF_RWR_SRD_CNS:
12432             emitGetInsCns(id, &cnsVal);
12433             code = insCodeRM(ins);
12434
12435             // Special case 4-byte AVX instructions
12436             if (Is4ByteSSE4OrAVXInstruction(ins))
12437             {
12438                 dst = emitOutputSV(dst, id, code, &cnsVal);
12439             }
12440             else
12441             {
12442                 code = AddVexPrefixIfNeeded(ins, code, size);
12443
12444                 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12445                 // Note that reg1 is both a source and a destination.
12446                 //
12447                 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12448                 // now we use the single source as source1 and source2.
12449                 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12450                 if (IsDstDstSrcAVXInstruction(ins))
12451                 {
12452                     // encode source operand reg in 'vvvv' bits in 1's complement form
12453                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
12454                 }
12455
12456                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12457                 dst     = emitOutputSV(dst, id, code | regcode, &cnsVal);
12458             }
12459
12460             sz = emitSizeOfInsDsc(id);
12461             break;
12462
12463         case IF_RRD_SRD:
12464         case IF_RWR_SRD:
12465         case IF_RRW_SRD:
12466             code = insCodeRM(ins);
12467
12468             // 4-byte AVX instructions are special cased inside emitOutputSV
12469             // since they do not have space to encode ModRM byte.
12470             if (Is4ByteSSE4OrAVXInstruction(ins))
12471             {
12472                 dst = emitOutputSV(dst, id, code);
12473             }
12474             else
12475             {
12476                 code = AddVexPrefixIfNeeded(ins, code, size);
12477
12478                 if (IsDstDstSrcAVXInstruction(ins))
12479                 {
12480                     // encode source operand reg in 'vvvv' bits in 1's complement form
12481                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
12482                 }
12483
12484                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12485                 dst     = emitOutputSV(dst, id, code | regcode);
12486             }
12487             break;
12488
12489         case IF_RWR_RRD_SRD:
12490         {
12491             // This should only be called on AVX instructions
12492             assert(IsAVXInstruction(ins));
12493
12494             code = insCodeRM(ins);
12495             code = AddVexPrefixIfNeeded(ins, code, size);
12496             code = insEncodeReg3456(ins, id->idReg2(), size,
12497                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
12498
12499             // 4-byte AVX instructions are special cased inside emitOutputSV
12500             // since they do not have space to encode ModRM byte.
12501             if (Is4ByteSSE4OrAVXInstruction(ins))
12502             {
12503                 dst = emitOutputSV(dst, id, code);
12504             }
12505             else
12506             {
12507                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12508                 dst     = emitOutputSV(dst, id, code | regcode);
12509             }
12510             break;
12511         }
12512
12513         case IF_RWR_RRD_SRD_CNS:
12514         {
12515             // This should only be called on AVX instructions
12516             assert(IsAVXInstruction(ins));
12517             emitGetInsCns(id, &cnsVal);
12518
12519             code = insCodeRM(ins);
12520             code = AddVexPrefixIfNeeded(ins, code, size);
12521             code = insEncodeReg3456(ins, id->idReg2(), size,
12522                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
12523
12524             // 4-byte AVX instructions are special cased inside emitOutputSV
12525             // since they do not have space to encode ModRM byte.
12526             if (Is4ByteSSE4OrAVXInstruction(ins))
12527             {
12528                 dst = emitOutputSV(dst, id, code, &cnsVal);
12529             }
12530             else
12531             {
12532                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12533                 dst     = emitOutputSV(dst, id, code | regcode, &cnsVal);
12534             }
12535             break;
12536         }
12537
12538         case IF_SRD_RRD:
12539         case IF_SWR_RRD:
12540         case IF_SRW_RRD:
12541             code = insCodeMR(ins);
12542             code = AddVexPrefixIfNeeded(ins, code, size);
12543
12544             // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12545             // Note that reg1 is both a source and a destination.
12546             //
12547             // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12548             // now we use the single source as source1 and source2.
12549             // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12550             if (IsDstDstSrcAVXInstruction(ins))
12551             {
12552                 // encode source operand reg in 'vvvv' bits in 1's complement form
12553                 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12554             }
12555
12556             regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12557             dst     = emitOutputSV(dst, id, code | regcode);
12558             break;
12559
12560         /********************************************************************/
12561         /*                    Direct memory address                         */
12562         /********************************************************************/
12563
12564         case IF_MRD:
12565         case IF_MRW:
12566         case IF_MWR:
12567
12568 #if FEATURE_STACK_FP_X87
12569
12570         case IF_TRD_MRD:
12571         case IF_TWR_MRD:
12572         case IF_TRW_MRD:
12573
12574         // case IF_MRD_TRD:
12575         // case IF_MRW_TRD:
12576         case IF_MWR_TRD:
12577
12578 #endif // FEATURE_STACK_FP_X87
12579
12580             noway_assert(ins != INS_call);
12581             dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
12582             sz  = emitSizeOfInsDsc(id);
12583             break;
12584
12585         case IF_MRD_OFF:
12586             dst = emitOutputCV(dst, id, insCodeMI(ins));
12587             break;
12588
12589         case IF_RRW_MRD_CNS:
12590         case IF_RWR_MRD_CNS:
12591             emitGetInsDcmCns(id, &cnsVal);
12592             code = insCodeRM(ins);
12593
12594             // Special case 4-byte AVX instructions
12595             if (Is4ByteSSE4OrAVXInstruction(ins))
12596             {
12597                 dst = emitOutputCV(dst, id, code, &cnsVal);
12598             }
12599             else
12600             {
12601                 code = AddVexPrefixIfNeeded(ins, code, size);
12602
12603                 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12604                 // Note that reg1 is both a source and a destination.
12605                 //
12606                 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12607                 // now we use the single source as source1 and source2.
12608                 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12609                 if (IsDstDstSrcAVXInstruction(ins))
12610                 {
12611                     // encode source operand reg in 'vvvv' bits in 1's complement form
12612                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
12613                 }
12614
12615                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12616                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
12617             }
12618
12619             sz = emitSizeOfInsDsc(id);
12620             break;
12621
12622         case IF_MWR_RRD_CNS:
12623             assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12624             assert(UseVEXEncoding());
12625             emitGetInsDcmCns(id, &cnsVal);
12626             code = insCodeMR(ins);
12627             // only AVX2 vextracti128 and AVX vextractf128 can reach this path,
12628             // they do not need VEX.vvvv to encode the register operand
12629             dst = emitOutputCV(dst, id, code, &cnsVal);
12630             sz  = emitSizeOfInsDsc(id);
12631             break;
12632
12633         case IF_RRD_MRD:
12634         case IF_RWR_MRD:
12635         case IF_RRW_MRD:
12636             code = insCodeRM(ins);
12637             // Special case 4-byte AVX instructions
12638             if (Is4ByteSSE4OrAVXInstruction(ins))
12639             {
12640                 dst = emitOutputCV(dst, id, code);
12641             }
12642             else
12643             {
12644                 code = AddVexPrefixIfNeeded(ins, code, size);
12645
12646                 if (IsDstDstSrcAVXInstruction(ins))
12647                 {
12648                     // encode source operand reg in 'vvvv' bits in 1's complement form
12649                     code = insEncodeReg3456(ins, id->idReg1(), size, code);
12650                 }
12651
12652                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12653                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
12654             }
12655             sz = emitSizeOfInsDsc(id);
12656             break;
12657
12658         case IF_RWR_RRD_MRD:
12659         {
12660             // This should only be called on AVX instructions
12661             assert(IsAVXInstruction(ins));
12662
12663             code = insCodeRM(ins);
12664             code = AddVexPrefixIfNeeded(ins, code, size);
12665             code = insEncodeReg3456(ins, id->idReg2(), size,
12666                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
12667
12668             // Special case 4-byte AVX instructions
12669             if (Is4ByteSSE4OrAVXInstruction(ins))
12670             {
12671                 dst = emitOutputCV(dst, id, code);
12672             }
12673             else
12674             {
12675                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12676                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
12677             }
12678             sz = emitSizeOfInsDsc(id);
12679             break;
12680         }
12681
12682         case IF_RWR_RRD_MRD_CNS:
12683         {
12684             // This should only be called on AVX instructions
12685             assert(IsAVXInstruction(ins));
12686             emitGetInsCns(id, &cnsVal);
12687
12688             code = insCodeRM(ins);
12689             code = AddVexPrefixIfNeeded(ins, code, size);
12690             code = insEncodeReg3456(ins, id->idReg2(), size,
12691                                     code); // encode source operand reg in 'vvvv' bits in 1's complement form
12692
12693             // Special case 4-byte AVX instructions
12694             if (Is4ByteSSE4OrAVXInstruction(ins))
12695             {
12696                 dst = emitOutputCV(dst, id, code, &cnsVal);
12697             }
12698             else
12699             {
12700                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12701                 dst     = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
12702             }
12703             sz = emitSizeOfInsDsc(id);
12704             break;
12705         }
12706
12707         case IF_RWR_MRD_OFF:
12708             code = insCode(ins);
12709             code = AddVexPrefixIfNeeded(ins, code, size);
12710
12711             // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12712             // Note that reg1 is both a source and a destination.
12713             //
12714             // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12715             // now we use the single source as source1 and source2.
12716             // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12717             if (IsDstDstSrcAVXInstruction(ins))
12718             {
12719                 // encode source operand reg in 'vvvv' bits in 1's complement form
12720                 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12721             }
12722
12723             regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
12724             dst     = emitOutputCV(dst, id, code | 0x30 | regcode);
12725             sz      = emitSizeOfInsDsc(id);
12726             break;
12727
12728         case IF_MRD_RRD:
12729         case IF_MWR_RRD:
12730         case IF_MRW_RRD:
12731             code = insCodeMR(ins);
12732             code = AddVexPrefixIfNeeded(ins, code, size);
12733
12734             // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12735             // Note that reg1 is both a source and a destination.
12736             //
12737             // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12738             // now we use the single source as source1 and source2.
12739             // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12740             if (IsDstDstSrcAVXInstruction(ins))
12741             {
12742                 // encode source operand reg in 'vvvv' bits in 1's complement form
12743                 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12744             }
12745
12746             regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12747             dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
12748             sz      = emitSizeOfInsDsc(id);
12749             break;
12750
12751         case IF_MRD_CNS:
12752         case IF_MWR_CNS:
12753         case IF_MRW_CNS:
12754             emitGetInsDcmCns(id, &cnsVal);
12755             dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
12756             sz  = emitSizeOfInsDsc(id);
12757             break;
12758
12759         case IF_MRW_SHF:
12760             emitGetInsDcmCns(id, &cnsVal);
12761             dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
12762             sz  = emitSizeOfInsDsc(id);
12763             break;
12764
12765 #if FEATURE_STACK_FP_X87
12766
12767         /********************************************************************/
12768         /*                  FP coprocessor stack operands                   */
12769         /********************************************************************/
12770
12771         case IF_TRD_FRD:
12772         case IF_TWR_FRD:
12773         case IF_TRW_FRD:
12774             assert(id->idGCref() == GCT_NONE);
12775             dst += emitOutputWord(dst, insCodeMR(ins) | 0xC000 | (id->idReg1() << 8));
12776             break;
12777
12778         case IF_FRD_TRD:
12779         case IF_FWR_TRD:
12780         case IF_FRW_TRD:
12781             assert(id->idGCref() == GCT_NONE);
12782             dst += emitOutputWord(dst, insCodeMR(ins) | 0xC004 | (id->idReg1() << 8));
12783             break;
12784
12785 #endif // FEATURE_STACK_FP_X87
12786
12787         /********************************************************************/
12788         /*                            oops                                  */
12789         /********************************************************************/
12790
12791         default:
12792
12793 #ifdef DEBUG
12794             printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
12795             assert(!"don't know how to encode this instruction");
12796 #endif
12797             break;
12798     }
12799
12800     // Make sure we set the instruction descriptor size correctly
12801     assert(sz == emitSizeOfInsDsc(id));
12802
12803 #if !FEATURE_FIXED_OUT_ARGS
12804     bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
12805
12806 #if FEATURE_EH_FUNCLETS
12807     updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
12808 #endif // FEATURE_EH_FUNCLETS
12809
12810     // Make sure we keep the current stack level up to date
12811     if (updateStackLevel)
12812     {
12813         switch (ins)
12814         {
12815             case INS_push:
12816                 // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
12817                 // finally block for calling it locally for an op_leave.
12818                 emitStackPush(dst, id->idGCref());
12819                 break;
12820
12821             case INS_pop:
12822                 emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
12823                 break;
12824
12825             case INS_sub:
12826                 // Check for "sub ESP, icon"
12827                 if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
12828                 {
12829                     assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
12830                     emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
12831                 }
12832                 break;
12833
12834             case INS_add:
12835                 // Check for "add ESP, icon"
12836                 if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
12837                 {
12838                     assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
12839                     emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
12840                                  (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
12841                 }
12842                 break;
12843
12844             default:
12845                 break;
12846         }
12847     }
12848
12849 #endif // !FEATURE_FIXED_OUT_ARGS
12850
12851     assert((int)emitCurStackLvl >= 0);
12852
12853     // Only epilog "instructions" and some pseudo-instrs
12854     // are allowed not to generate any code
12855
12856     assert(*dp != dst || emitInstHasNoCode(ins));
12857
12858 #ifdef DEBUG
12859     if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
12860     {
12861         emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
12862     }
12863
12864     if (emitComp->compDebugBreak)
12865     {
12866         // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
12867         // at the beginning of this method.
12868         if (JitConfig.JitEmitPrintRefRegs() != 0)
12869         {
12870             printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
12871             printf("  emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
12872             printRegMaskInt(emitThisGCrefRegs);
12873             emitDispRegSet(emitThisGCrefRegs);
12874             printf("\n");
12875             printf("  emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
12876             printRegMaskInt(emitThisByrefRegs);
12877             emitDispRegSet(emitThisByrefRegs);
12878             printf("\n");
12879         }
12880
12881         // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
12882         // emitting instruction a6, (i.e. IN00a6 in jitdump).
12883         if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
12884         {
12885             assert(!"JitBreakEmitOutputInstr reached");
12886         }
12887     }
12888 #endif
12889
12890 #ifdef TRANSLATE_PDB
12891     if (*dp != dst)
12892     {
12893         // only map instruction groups to instruction groups
12894         MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
12895     }
12896 #endif
12897
12898     *dp = dst;
12899
12900 #ifdef DEBUG
12901     if (ins == INS_mulEAX || ins == INS_imulEAX)
12902     {
12903         // INS_mulEAX has implicit target of Edx:Eax. Make sure
12904         // that we detected this cleared its GC-status.
12905
12906         assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
12907     }
12908
12909     if (instrIs3opImul(ins))
12910     {
12911         // The target of the 3-operand imul is implicitly encoded. Make sure
12912         // that we detected the implicit register and cleared its GC-status.
12913
12914         regMaskTP regMask = genRegMask(inst3opImulReg(ins));
12915         assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
12916     }
12917 #endif
12918
12919     return sz;
12920 }
12921 #ifdef _PREFAST_
12922 #pragma warning(pop)
12923 #endif
12924
12925 /*****************************************************************************/
12926 /*****************************************************************************/
12927
12928 #endif // defined(_TARGET_XARCH_)