1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #if defined(_TARGET_XARCH_)
21 /*****************************************************************************/
22 /*****************************************************************************/
28 bool IsSSE2Instruction(instruction ins)
30 return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION);
33 bool IsSSE4Instruction(instruction ins)
38 return (ins >= INS_FIRST_SSE4_INSTRUCTION && ins <= INS_LAST_SSE4_INSTRUCTION);
42 bool IsSSEOrAVXInstruction(instruction ins)
44 #ifndef LEGACY_BACKEND
45 return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
46 #else // !LEGACY_BACKEND
47 return IsSSE2Instruction(ins);
48 #endif // LEGACY_BACKEND
51 bool IsAVXOnlyInstruction(instruction ins)
53 #ifndef LEGACY_BACKEND
54 return (ins >= INS_FIRST_AVX_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
60 bool emitter::IsAVXInstruction(instruction ins)
62 #ifndef LEGACY_BACKEND
63 return (UseVEXEncoding() && IsSSEOrAVXInstruction(ins));
69 #ifndef LEGACY_BACKEND
70 // Returns true if the AVX instruction is a binary operator that requires 3 operands.
71 // When we emit an instruction with only two operands, we will duplicate the destination
73 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
74 // be formalized by adding an additional field to instruction table to
75 // to indicate whether a 3-operand instruction.
76 bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
226 case INS_vinsertf128:
227 case INS_vinserti128:
232 case INS_vpermilpsvar:
233 case INS_vpermilpdvar:
241 return IsAVXInstruction(ins);
247 // Returns true if the AVX instruction requires 3 operands that duplicate the source
248 // register in the vvvv field.
249 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
250 // be formalized by adding an additional field to instruction table to
251 // to indicate whether a 3-operand instruction.
252 bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
268 return IsAVXInstruction(ins);
273 #endif // !LEGACY_BACKEND
275 // -------------------------------------------------------------------
276 // Is4ByteSSE4Instruction: Returns true if the SSE4 instruction
277 // is a 4-byte opcode.
282 // Note that this should be true for any of the instructions in instrsXArch.h
283 // that use the SSE38 or SSE3A macro.
284 bool emitter::Is4ByteSSE4Instruction(instruction ins)
286 #ifdef LEGACY_BACKEND
287 // On legacy backend SSE4 is not enabled.
290 return UseSSE4() && IsSSE4Instruction(ins) && EncodedBySSE38orSSE3A(ins);
291 #endif // LEGACY_BACKEND
294 // ------------------------------------------------------------------------------
295 // Is4ByteSSE4OrAVXInstruction: Returns true if the SSE4 or AVX instruction is a 4-byte opcode.
298 // ins - instructions
300 // Note that this should be true for any of the instructions in instrsXArch.h
301 // that use the SSE38 or SSE3A macro.
302 bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins)
304 #ifdef LEGACY_BACKEND
305 // On legacy backend SSE4 and AVX are not enabled.
308 return ((UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins))) ||
309 (UseSSE4() && IsSSE4Instruction(ins))) &&
310 EncodedBySSE38orSSE3A(ins);
311 #endif // LEGACY_BACKEND
314 #ifndef LEGACY_BACKEND
315 // Returns true if this instruction requires a VEX prefix
316 // All AVX instructions require a VEX prefix
317 bool emitter::TakesVexPrefix(instruction ins)
319 // special case vzeroupper as it requires 2-byte VEX prefix
320 // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
326 case INS_prefetchnta:
337 return IsAVXInstruction(ins);
340 // Add base VEX prefix without setting W, R, X, or B bits
341 // L bit will be set based on emitter attr.
343 // 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
344 // - R, X, B, W - bits to express corresponding REX prefixes
346 // 0-00001 - implied leading 0F opcode byte
347 // 0-00010 - implied leading 0F 38 opcode bytes
348 // 0-00011 - implied leading 0F 3A opcode bytes
349 // Rest - reserved for future use and usage of them will uresult in Undefined instruction exception
351 // - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
352 // - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1)
353 // - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
354 // these prefixes are treated mandatory when used with escape opcode 0Fh for
355 // some SIMD instructions
356 // 00 - None (0F - packed float)
357 // 01 - 66 (66 0F - packed double)
358 // 10 - F3 (F3 0F - scalar float
359 // 11 - F2 (F2 0F - scalar double)
361 // TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX
362 // prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar
363 // and AVX-128 bit operations.
364 #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
365 #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
366 #define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
367 emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
369 // Only AVX instructions require VEX prefix
370 assert(IsAVXInstruction(ins));
372 // Shouldn't have already added Vex prefix
373 assert(!hasVexPrefix(code));
375 // Set L bit to 1 in case of instructions that operate on 256-bits.
376 assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0);
377 code |= DEFAULT_3BYTE_VEX_PREFIX;
378 if (attr == EA_32BYTE)
380 code |= LBIT_IN_3BYTE_VEX_PREFIX;
385 #endif // !LEGACY_BACKEND
387 // Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
388 bool TakesRexWPrefix(instruction ins, emitAttr attr)
390 #ifndef LEGACY_BACKEND
391 // Because the current implementation of AVX does not have a way to distinguish between the register
392 // size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are
393 // required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE),
394 // and here we must special case these by the opcode.
406 #endif // !LEGACY_BACKEND
407 #ifdef _TARGET_AMD64_
408 // movsx should always sign extend out to 8 bytes just because we don't track
409 // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
410 // of the source, not the dest).
411 // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
414 // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
415 if (ins == INS_movsx || ins == INS_rex_jmp)
420 if (EA_SIZE(attr) != EA_8BYTE)
425 if (IsSSEOrAVXInstruction(ins))
444 // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
446 // These are all the instructions that default to 8-byte operand without the REX.W bit
447 // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
448 // so we never need it
449 if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
450 (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
458 #else //!_TARGET_AMD64 = _TARGET_X86_
460 #endif //!_TARGET_AMD64_
463 // Returns true if using this register will require a REX.* prefix.
464 // Since XMM registers overlap with YMM registers, this routine
465 // can also be used to know whether a YMM register if the
466 // instruction in question is AVX.
467 bool IsExtendedReg(regNumber reg)
469 #ifdef _TARGET_AMD64_
470 return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
472 // X86 JIT operates in 32-bit mode and hence extended reg are not available.
477 // Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
478 bool IsExtendedReg(regNumber reg, emitAttr attr)
480 #ifdef _TARGET_AMD64_
481 // Not a register, so doesn't need a prefix
487 // Opcode field only has 3 bits for the register, these high registers
488 // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
489 if (IsExtendedReg(reg))
494 if (EA_SIZE(attr) != EA_1BYTE)
499 // There are 12 one byte registers addressible 'below' r8b:
500 // al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
501 // The first 4 are always addressible, the last 8 are divided into 2 sets:
504 // spl, bpl, sil, dil
505 // Both sets are encoded exactly the same, the difference is the presence
506 // of a REX prefix, even a REX prefix with no other bits set (0x40).
507 // So in order to get to the second set we need a REX prefix (but no bits).
509 // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
510 // encoding/tracking/encoding registers.
511 return (reg >= REG_RSP);
513 // X86 JIT operates in 32-bit mode and hence extended reg are not available.
518 // Since XMM registers overlap with YMM registers, this routine
519 // can also used to know whether a YMM register in case of AVX instructions.
521 // Legacy X86: we have XMM0-XMM7 available but this routine cannot be used to
522 // determine whether a reg is XMM because they share the same reg numbers
523 // with integer registers. Hence always return false.
524 bool IsXMMReg(regNumber reg)
526 #ifndef LEGACY_BACKEND
527 #ifdef _TARGET_AMD64_
528 return (reg >= REG_XMM0) && (reg <= REG_XMM15);
529 #else // !_TARGET_AMD64_
530 return (reg >= REG_XMM0) && (reg <= REG_XMM7);
531 #endif // !_TARGET_AMD64_
532 #else // LEGACY_BACKEND
534 #endif // LEGACY_BACKEND
537 // Returns bits to be encoded in instruction for the given register.
538 unsigned RegEncoding(regNumber reg)
540 #ifndef LEGACY_BACKEND
541 static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE");
543 return (unsigned)(reg & 0x7);
546 // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
547 // SSE2: separate 1-byte prefix gets added before opcode.
548 // AVX: specific bits within VEX prefix need to be set in bit-inverted form.
549 emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
551 if (UseVEXEncoding() && IsAVXInstruction(ins))
553 // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
554 if (TakesVexPrefix(ins))
556 assert(hasVexPrefix(code));
558 // W-bit is the only bit that is added in non bit-inverted form.
559 return emitter::code_t(code | 0x00008000000000ULL);
562 #ifdef _TARGET_AMD64_
563 return emitter::code_t(code | 0x4800000000ULL);
565 assert(!"UNREACHED");
570 #ifdef _TARGET_AMD64_
572 emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
574 if (UseVEXEncoding() && IsAVXInstruction(ins))
576 // Right now support 3-byte VEX prefix
577 if (TakesVexPrefix(ins))
579 assert(hasVexPrefix(code));
581 // R-bit is added in bit-inverted form.
582 return code & 0xFF7FFFFFFFFFFFULL;
586 return code | 0x4400000000ULL;
589 emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
591 if (UseVEXEncoding() && IsAVXInstruction(ins))
593 // Right now support 3-byte VEX prefix
594 if (TakesVexPrefix(ins))
596 assert(hasVexPrefix(code));
598 // X-bit is added in bit-inverted form.
599 return code & 0xFFBFFFFFFFFFFFULL;
603 return code | 0x4200000000ULL;
606 emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
608 if (UseVEXEncoding() && IsAVXInstruction(ins))
610 // Right now support 3-byte VEX prefix
611 if (TakesVexPrefix(ins))
613 assert(hasVexPrefix(code));
615 // B-bit is added in bit-inverted form.
616 return code & 0xFFDFFFFFFFFFFFULL;
620 return code | 0x4100000000ULL;
623 // Adds REX prefix (0x40) without W, R, X or B bits set
624 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
626 assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
627 return code | 0x4000000000ULL;
630 #endif //_TARGET_AMD64_
632 bool isPrefix(BYTE b)
634 assert(b != 0); // Caller should check this
635 assert(b != 0x67); // We don't use the address size prefix
636 assert(b != 0x65); // The GS segment override prefix is emitted separately
637 assert(b != 0x64); // The FS segment override prefix is emitted separately
638 assert(b != 0xF0); // The lock prefix is emitted separately
639 assert(b != 0x2E); // We don't use the CS segment override prefix
640 assert(b != 0x3E); // Or the DS segment override prefix
641 assert(b != 0x26); // Or the ES segment override prefix
642 assert(b != 0x36); // Or the SS segment override prefix
644 // That just leaves the size prefixes used in SSE opcodes:
645 // Scalar Double Scalar Single Packed Double
646 return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
649 // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
650 unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
652 #ifndef LEGACY_BACKEND
653 if (hasVexPrefix(code))
655 // Only AVX instructions should have a VEX prefix
656 assert(UseVEXEncoding() && IsAVXInstruction(ins));
657 code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
658 code &= 0x00000000FFFFFFFFLL;
660 WORD leadingBytes = 0;
661 BYTE check = (code >> 24) & 0xFF;
664 // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
665 // 4-byte opcode: with the bytes ordered as 0x22114433
666 // check for a prefix in the 11 position
667 BYTE sizePrefix = (code >> 16) & 0xFF;
668 if (sizePrefix != 0 && isPrefix(sizePrefix))
670 // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
672 // 00 - None (0F - packed float)
673 // 01 - 66 (66 0F - packed double)
674 // 10 - F3 (F3 0F - scalar float
675 // 11 - F2 (F2 0F - scalar double)
688 assert(!"unrecognized SIMD size prefix");
692 // Now the byte in the 22 position must be an escape byte 0F
693 leadingBytes = check;
694 assert(leadingBytes == 0x0F);
696 // Get rid of both sizePrefix and escape byte
697 code &= 0x0000FFFFLL;
699 // Check the byte in the 33 position to see if it is 3A or 38.
700 // In such a case escape bytes must be 0x0F3A or 0x0F38
702 if (check == 0x3A || check == 0x38)
704 leadingBytes = (leadingBytes << 8) | check;
705 code &= 0x0000FF00LL;
711 // 2-byte opcode with the bytes ordered as 0x0011RM22
712 // the byte in position 11 must be an escape byte.
713 leadingBytes = (code >> 16) & 0xFF;
714 assert(leadingBytes == 0x0F || leadingBytes == 0x00);
718 // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
719 // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
720 // implied leading bytes
721 switch (leadingBytes)
724 // there is no leading byte
736 assert(!"encountered unknown leading bytes");
741 // VEX.2211RM33 got transformed as VEX.0000RM33
742 // VEX.0011RM22 got transformed as VEX.0000RM22
744 // Now output VEX prefix leaving the 4-byte opcode
745 emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
746 emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
747 emitOutputByte(dst + 2, vexPrefix & 0xFF);
750 #endif // !LEGACY_BACKEND
752 #ifdef _TARGET_AMD64_
753 if (code > 0x00FFFFFFFFLL)
755 BYTE prefix = (code >> 32) & 0xFF;
756 noway_assert(prefix >= 0x40 && prefix <= 0x4F);
757 code &= 0x00000000FFFFFFFFLL;
759 // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
760 // we can remove this code as well
762 // The REX prefix is required to come after all other prefixes.
763 // Some of our 'opcodes' actually include some prefixes, if that
764 // is the case, shift them over and place the REX prefix after
765 // the other prefixes, and emit any prefix that got moved out.
766 BYTE check = (code >> 24) & 0xFF;
769 // 3-byte opcode: with the bytes ordered as 0x00113322
770 // check for a prefix in the 11 position
771 check = (code >> 16) & 0xFF;
772 if (check != 0 && isPrefix(check))
774 // Swap the rex prefix and whatever this prefix is
775 code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
776 // and then emit the other prefix
777 return emitOutputByte(dst, check);
782 // 4-byte opcode with the bytes ordered as 0x22114433
783 // first check for a prefix in the 11 position
784 BYTE check2 = (code >> 16) & 0xFF;
785 if (isPrefix(check2))
787 assert(!isPrefix(check)); // We currently don't use this, so it is untested
790 // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
791 // Change to c2rrc1XXXX, and emit check2 now
792 code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL));
796 // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
797 // Change to c2XXrrXXXX, and emit check2 now
798 code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL));
800 return emitOutputByte(dst, check2);
804 return emitOutputByte(dst, prefix);
806 #endif // _TARGET_AMD64_
811 #ifdef _TARGET_AMD64_
812 /*****************************************************************************
813 * Is the last instruction emitted a call instruction?
815 bool emitter::emitIsLastInsCall()
817 if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
825 /*****************************************************************************
826 * We're about to create an epilog. If the last instruction we output was a 'call',
827 * then we need to insert a NOP, to allow for proper exception-handling behavior.
829 void emitter::emitOutputPreEpilogNOP()
831 if (emitIsLastInsCall())
837 #endif //_TARGET_AMD64_
839 // Size of rex prefix in bytes
840 unsigned emitter::emitGetRexPrefixSize(instruction ins)
842 // In case of AVX instructions, REX prefixes are part of VEX prefix.
843 // And hence requires no additional byte to encode REX prefixes.
844 if (IsAVXInstruction(ins))
849 // If not AVX, then we would need 1-byte to encode REX prefix.
853 // Size of vex prefix in bytes
854 unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
856 // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a
857 // scope for size win by using 2-byte vex prefix for some of the
858 // scalar, avx-128 and most common avx-256 instructions.
859 if (IsAVXInstruction(ins))
864 // If not AVX, then we don't need to encode vex prefix.
868 // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
869 // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
870 // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
871 // instruction size estimate will be accurate.
872 // Basically this function will decrease the vexPrefixSize,
873 // so that opcodeSize + vexPrefixAdjustedSize will be the right size.
874 // rightOpcodeSize + vexPrefixSize
875 //=(opcodeSize - ExtrabytesSize) + vexPrefixSize
876 //=opcodeSize + (vexPrefixSize - ExtrabytesSize)
877 //=opcodeSize + vexPrefixAdjustedSize
878 unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
880 #ifndef LEGACY_BACKEND
881 if (IsAVXInstruction(ins))
883 unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
884 // Currently vex prefix size is hard coded as 3 bytes,
885 // In future we should support 2 bytes vex prefix.
886 assert(vexPrefixAdjustedSize == 3);
888 // In this case, opcode will contains escape prefix at least one byte,
889 // vexPrefixAdjustedSize should be minus one.
890 vexPrefixAdjustedSize -= 1;
892 // Get the fourth byte in Opcode.
893 // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
894 BYTE check = (code >> 24) & 0xFF;
897 // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
898 // 4-byte opcode: with the bytes ordered as 0x22114433
899 // Simd prefix is at the first byte.
900 BYTE sizePrefix = (code >> 16) & 0xFF;
901 if (sizePrefix != 0 && isPrefix(sizePrefix))
903 vexPrefixAdjustedSize -= 1;
906 // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
907 // But in this case the opcode has not counted R\M part.
908 // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
909 //=opcodeSize + VexPrefixAdjustedSize -1 + 1
910 //=opcodeSize + VexPrefixAdjustedSize
911 // So although we may have second byte escape prefix, we won't decrease vexPrefixAjustedSize.
914 return vexPrefixAdjustedSize;
916 #endif // !LEGACY_BACKEND
920 // Get size of rex or vex prefix emitted in code
921 unsigned emitter::emitGetPrefixSize(code_t code)
923 if (hasVexPrefix(code))
928 if (hasRexPrefix(code))
937 /*****************************************************************************
939 * Record a non-empty stack
942 void emitter::emitMarkStackLvl(unsigned stackLevel)
944 assert(int(stackLevel) >= 0);
945 assert(emitCurStackLvl == 0);
946 assert(emitCurIG->igStkLvl == 0);
947 assert(emitCurIGfreeNext == emitCurIGfreeBase);
949 assert(stackLevel && stackLevel % sizeof(int) == 0);
951 emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
953 if (emitMaxStackDepth < emitCurStackLvl)
955 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
956 emitMaxStackDepth = emitCurStackLvl;
961 /*****************************************************************************
963 * Get hold of the address mode displacement value for an indirect call.
966 inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
968 if (id->idIsLargeCall())
970 return ((instrDescCGCA*)id)->idcDisp;
974 assert(!id->idIsLargeDsp());
975 assert(!id->idIsLargeCns());
977 return id->idAddr()->iiaAddrMode.amDisp;
981 /** ***************************************************************************
983 * The following table is used by the instIsFP()/instUse/DefFlags() helpers.
986 #define INST_DEF_FL 0x20 // does the instruction set flags?
987 #define INST_USE_FL 0x40 // does the instruction use flags?
990 const BYTE CodeGenInterface::instInfo[] =
992 #define INST0(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
993 #define INST1(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
994 #define INST2(id, nm, fp, um, rf, wf, mr, mi ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
995 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
996 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
997 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
1008 /*****************************************************************************
1010 * Initialize the table used by emitInsModeFormat().
1014 const BYTE emitter::emitInsModeFmtTab[] =
1016 #define INST0(id, nm, fp, um, rf, wf, mr ) um,
1017 #define INST1(id, nm, fp, um, rf, wf, mr ) um,
1018 #define INST2(id, nm, fp, um, rf, wf, mr, mi ) um,
1019 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) um,
1020 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) um,
1021 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um,
1033 unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab);
1036 /*****************************************************************************
1038 * Combine the given base format with the update mode of the instuction.
1041 inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
1043 assert(IF_RRD + IUM_RD == IF_RRD);
1044 assert(IF_RRD + IUM_WR == IF_RWR);
1045 assert(IF_RRD + IUM_RW == IF_RRW);
1047 return (insFormat)(base + emitInsUpdateMode(ins));
1050 /*****************************************************************************
1052 * A version of scInsModeFormat() that handles X87 floating-point instructions.
1055 #if FEATURE_STACK_FP_X87
1056 emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst)
1058 if (CodeGen::instIsFP(ins))
1060 assert(IF_TRD_SRD + 1 == IF_TWR_SRD);
1061 assert(IF_TRD_SRD + 2 == IF_TRW_SRD);
1063 assert(IF_TRD_MRD + 1 == IF_TWR_MRD);
1064 assert(IF_TRD_MRD + 2 == IF_TRW_MRD);
1066 assert(IF_TRD_ARD + 1 == IF_TWR_ARD);
1067 assert(IF_TRD_ARD + 2 == IF_TRW_ARD);
1075 return (insFormat)(FPst);
1079 return (insFormat)(FPld + 1);
1084 return (insFormat)(FPld);
1087 return (insFormat)(FPld + 2);
1092 return emitInsModeFormat(ins, base);
1095 #endif // FEATURE_STACK_FP_X87
1097 // This is a helper we need due to Vs Whidbey #254016 in order to distinguish
1098 // if we can not possibly be updating an integer register. This is not the best
1099 // solution, but the other ones (see bug) are going to be much more complicated.
1100 // The issue here is that on legacy x86, the XMM registers use the same register numbers
1101 // as the general purpose registers, so we need to distinguish them.
1102 // We really only need this for x86 where this issue exists.
1103 bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
1105 instruction ins = id->idIns();
1107 // The following SSE2 instructions write to a general purpose integer register.
1108 if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si
1109 #ifndef LEGACY_BACKEND
1110 || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || ins == INS_pmovmskb ||
1111 ins == INS_pextrw || ins == INS_pextrb || ins == INS_pextrd || ins == INS_pextrq || ins == INS_extractps
1112 #endif // !LEGACY_BACKEND
1121 /*****************************************************************************
1123 * Returns the base encoding of the given CPU instruction.
1126 inline size_t insCode(instruction ins)
1132 #define INST0(id, nm, fp, um, rf, wf, mr ) mr,
1133 #define INST1(id, nm, fp, um, rf, wf, mr ) mr,
1134 #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr,
1135 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr,
1136 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr,
1137 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
1148 assert((unsigned)ins < _countof(insCodes));
1149 assert((insCodes[ins] != BAD_CODE));
1151 return insCodes[ins];
1154 /*****************************************************************************
1156 * Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
1159 inline size_t insCodeACC(instruction ins)
1163 size_t insCodesACC[] =
1165 #define INST0(id, nm, fp, um, rf, wf, mr )
1166 #define INST1(id, nm, fp, um, rf, wf, mr )
1167 #define INST2(id, nm, fp, um, rf, wf, mr, mi )
1168 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
1169 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) a4,
1170 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4,
1181 assert((unsigned)ins < _countof(insCodesACC));
1182 assert((insCodesACC[ins] != BAD_CODE));
1184 return insCodesACC[ins];
1187 /*****************************************************************************
1189 * Returns the "register" encoding of the given CPU instruction.
1192 inline size_t insCodeRR(instruction ins)
1196 size_t insCodesRR[] =
1198 #define INST0(id, nm, fp, um, rf, wf, mr )
1199 #define INST1(id, nm, fp, um, rf, wf, mr )
1200 #define INST2(id, nm, fp, um, rf, wf, mr, mi )
1201 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
1202 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 )
1203 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr,
1214 assert((unsigned)ins < _countof(insCodesRR));
1215 assert((insCodesRR[ins] != BAD_CODE));
1217 return insCodesRR[ins];
1222 size_t insCodesRM[] =
1224 #define INST0(id, nm, fp, um, rf, wf, mr )
1225 #define INST1(id, nm, fp, um, rf, wf, mr )
1226 #define INST2(id, nm, fp, um, rf, wf, mr, mi )
1227 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) rm,
1228 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) rm,
1229 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
1240 // Returns true iff the give CPU instruction has an RM encoding.
1241 inline bool hasCodeRM(instruction ins)
1243 assert((unsigned)ins < _countof(insCodesRM));
1244 return ((insCodesRM[ins] != BAD_CODE));
1247 /*****************************************************************************
1249 * Returns the "reg, [r/m]" encoding of the given CPU instruction.
1252 inline size_t insCodeRM(instruction ins)
1254 assert((unsigned)ins < _countof(insCodesRM));
1255 assert((insCodesRM[ins] != BAD_CODE));
1257 return insCodesRM[ins];
1262 size_t insCodesMI[] =
1264 #define INST0(id, nm, fp, um, rf, wf, mr )
1265 #define INST1(id, nm, fp, um, rf, wf, mr )
1266 #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mi,
1267 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mi,
1268 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mi,
1269 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
1280 // Returns true iff the give CPU instruction has an MI encoding.
1281 inline bool hasCodeMI(instruction ins)
1283 assert((unsigned)ins < _countof(insCodesMI));
1284 return ((insCodesMI[ins] != BAD_CODE));
1287 /*****************************************************************************
1289 * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
1292 inline size_t insCodeMI(instruction ins)
1294 assert((unsigned)ins < _countof(insCodesMI));
1295 assert((insCodesMI[ins] != BAD_CODE));
1297 return insCodesMI[ins];
1302 size_t insCodesMR[] =
1304 #define INST0(id, nm, fp, um, rf, wf, mr )
1305 #define INST1(id, nm, fp, um, rf, wf, mr ) mr,
1306 #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr,
1307 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr,
1308 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr,
1309 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
1320 // Returns true iff the give CPU instruction has an MR encoding.
1321 inline bool hasCodeMR(instruction ins)
1323 assert((unsigned)ins < _countof(insCodesMR));
1324 return ((insCodesMR[ins] != BAD_CODE));
1327 /*****************************************************************************
1329 * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
1332 inline size_t insCodeMR(instruction ins)
1334 assert((unsigned)ins < _countof(insCodesMR));
1335 assert((insCodesMR[ins] != BAD_CODE));
1337 return insCodesMR[ins];
1340 // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
1341 bool emitter::EncodedBySSE38orSSE3A(instruction ins)
1343 const size_t SSE38 = 0x0F660038;
1344 const size_t SSE3A = 0x0F66003A;
1345 const size_t MASK = 0xFFFF00FF;
1351 insCode = insCodeRM(ins);
1353 else if (hasCodeMI(ins))
1355 insCode = insCodeMI(ins);
1357 else if (hasCodeMR(ins))
1359 insCode = insCodeMR(ins);
1363 return insCode == SSE38 || insCode == SSE3A;
1366 /*****************************************************************************
1368 * Returns an encoding for the specified register to be used in the bit0-2
1369 * part of an opcode.
1372 inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
1374 assert(reg < REG_STK);
1376 #ifndef LEGACY_BACKEND
1377 #ifdef _TARGET_AMD64_
1378 // Either code is not NULL or reg is not an extended reg.
1379 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1380 // which would require code != NULL.
1381 assert(code != nullptr || !IsExtendedReg(reg));
1383 if (IsExtendedReg(reg))
1385 *code = AddRexBPrefix(ins, *code); // REX.B
1387 else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1389 // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1390 // not the corresponding AH, CH, DH, or BH
1391 *code = AddRexPrefix(ins, *code); // REX
1393 #endif // _TARGET_AMD64_
1395 unsigned regBits = RegEncoding(reg);
1397 #else // LEGACY_BACKEND
1399 unsigned regBits = reg;
1401 #endif // LEGACY_BACKEND
1403 assert(regBits < 8);
1407 /*****************************************************************************
1409 * Returns an encoding for the specified register to be used in the bit3-5
1410 * part of an opcode.
1413 inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
1415 assert(reg < REG_STK);
1417 #ifndef LEGACY_BACKEND
1418 #ifdef _TARGET_AMD64_
1419 // Either code is not NULL or reg is not an extended reg.
1420 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1421 // which would require code != NULL.
1422 assert(code != nullptr || !IsExtendedReg(reg));
1424 if (IsExtendedReg(reg))
1426 *code = AddRexRPrefix(ins, *code); // REX.R
1428 else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1430 // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1431 // not the corresponding AH, CH, DH, or BH
1432 *code = AddRexPrefix(ins, *code); // REX
1434 #endif // _TARGET_AMD64_
1436 unsigned regBits = RegEncoding(reg);
1438 #else // LEGACY_BACKEND
1440 unsigned regBits = reg;
1442 #endif // LEGACY_BACKEND
1444 assert(regBits < 8);
1445 return (regBits << 3);
1448 /***********************************************************************************
1450 * Returns modified AVX opcode with the specified register encoded in bits 3-6 of
1451 * byte 2 of VEX prefix.
1453 inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
1455 #ifndef LEGACY_BACKEND
1456 assert(reg < REG_STK);
1457 assert(IsAVXInstruction(ins));
1458 assert(hasVexPrefix(code));
1460 // Get 4-bit register encoding
1461 // RegEncoding() gives lower 3 bits
1462 // IsExtendedReg() gives MSB.
1463 code_t regBits = RegEncoding(reg);
1464 if (IsExtendedReg(reg))
1469 // VEX prefix encodes register operand in 1's complement form
1470 // Shift count = 4-bytes of opcode + 0-2 bits
1471 assert(regBits <= 0xF);
1473 return code ^ regBits;
1480 /*****************************************************************************
1482 * Returns an encoding for the specified register to be used in the bit3-5
1483 * part of an SIB byte (unshifted).
1484 * Used exclusively to generate the REX.X bit and truncate the register.
1487 inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
1489 assert(reg < REG_STK);
1491 #ifdef _TARGET_AMD64_
1492 // Either code is not NULL or reg is not an extended reg.
1493 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1494 // which would require code != NULL.
1495 assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
1497 if (IsExtendedReg(reg))
1499 *code = AddRexXPrefix(ins, *code); // REX.X
1501 unsigned regBits = RegEncoding(reg);
1502 #else // !_TARGET_AMD64_
1503 unsigned regBits = reg;
1504 #endif // !_TARGET_AMD64_
1506 assert(regBits < 8);
1510 /*****************************************************************************
1512 * Returns the "[r/m]" opcode with the mod/RM field set to register.
1515 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
1517 // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1518 // Otherwise, it will be placed after the 4 byte encoding.
1519 if ((code & 0xFF00) == 0)
1521 assert((code & 0xC000) == 0);
1528 /*****************************************************************************
1530 * Returns the given "[r/m]" opcode with the mod/RM field set to register.
1533 inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
1535 // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1536 // Otherwise, it will be placed after the 4 byte encoding.
1537 if ((code & 0xFF00) == 0)
1539 assert((code & 0xC000) == 0);
1545 /*****************************************************************************
1547 * Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
1548 * the given register.
1551 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1553 assert((code & 0xC000) == 0);
1555 unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1560 /*****************************************************************************
1562 * Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
1563 * the given register.
1566 inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1568 assert((code & 0xC000) == 0);
1570 unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1575 /*****************************************************************************
1577 * Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
1578 * "reg,reg,imm8" form.
1580 inline bool insNeedsRRIb(instruction ins)
1582 // If this list gets longer, use a switch or a table.
1583 return ins == INS_imul;
1586 /*****************************************************************************
1588 * Returns the "reg,reg,imm8" opcode with both the reg's set to the
1589 * the given register.
1591 inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
1593 assert(size == EA_4BYTE); // All we handle for now.
1594 assert(insNeedsRRIb(ins));
1595 // If this list gets longer, use a switch, or a table lookup.
1596 code_t code = 0x69c0;
1597 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1598 // We use the same register as source and destination. (Could have another version that does both regs...)
1600 code |= (regcode << 3);
1604 /*****************************************************************************
1606 * Returns the "+reg" opcode with the the given register set into the low
1607 * nibble of the opcode
1610 inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
1612 code_t code = insCodeRR(ins);
1613 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1618 /*****************************************************************************
1620 * Return the 'SS' field value for the given index scale factor.
1623 inline unsigned emitter::insSSval(unsigned scale)
1625 assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1627 const static BYTE scales[] = {
1638 return scales[scale - 1];
1641 const instruction emitJumpKindInstructions[] = {INS_nop,
1643 #define JMP_SMALL(en, rev, ins) INS_##ins,
1644 #include "emitjmps.h"
1648 const emitJumpKind emitReverseJumpKinds[] = {
1651 #define JMP_SMALL(en, rev, ins) EJ_##rev,
1652 #include "emitjmps.h"
1655 /*****************************************************************************
1656 * Look up the instruction for a jump kind
1659 /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
1661 assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
1662 return emitJumpKindInstructions[jumpKind];
1665 /*****************************************************************************
1666 * Reverse the conditional jump
1669 /* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
1671 assert(jumpKind < EJ_COUNT);
1672 return emitReverseJumpKinds[jumpKind];
1675 /*****************************************************************************
1676 * The size for these instructions is less than EA_4BYTE,
1677 * but the target register need not be byte-addressable
1680 inline bool emitInstHasNoCode(instruction ins)
1682 if (ins == INS_align)
1690 /*****************************************************************************
1691 * When encoding instructions that operate on byte registers
1692 * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
1693 * otherwise we will incorrectly encode the instruction
1696 bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
1698 #if CPU_HAS_BYTE_REGS
1699 if (size != EA_1BYTE) // Not operating on a byte register is fine
1704 if ((ins != INS_movsx) && // These three instructions support high register
1705 (ins != INS_movzx) // encodings for reg1
1706 #ifdef FEATURE_HW_INTRINSICS
1707 && (ins != INS_crc32)
1711 // reg1 must be a byte-able register
1712 if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
1717 // if reg2 is not REG_NA then reg2 must be a byte-able register
1718 if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
1723 // The instruction can be encoded
1727 /*****************************************************************************
1729 * Estimate the size (in bytes of generated code) of the given instruction.
1732 inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
1734 UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
1735 #ifdef _TARGET_AMD64_
1736 size += emitGetPrefixSize(code);
1741 inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
1743 return emitInsSize(insCodeRM(ins));
1746 inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
1748 emitAttr size = EA_SIZE(attr);
1752 // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
1753 // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
1754 // This would probably be better expressed as a different format or something?
1755 code_t code = insCodeRM(ins);
1757 if ((code & 0xFF00) != 0)
1763 sz = emitInsSize(insEncodeRMreg(ins, code));
1766 // Most 16-bit operand instructions will need a prefix
1767 if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
1773 sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
1776 if (!hasRexPrefix(code))
1778 if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
1779 IsExtendedReg(reg2, attr))
1781 sz += emitGetRexPrefixSize(ins);
1788 /*****************************************************************************/
1790 inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
1792 UNATIVE_OFFSET size = emitInsSize(code);
1793 UNATIVE_OFFSET offs;
1794 bool offsIsUpperBound = true;
1795 bool EBPbased = true;
1797 /* Is this a temporary? */
1801 /* An address off of ESP takes an extra byte */
1803 if (!emitHasFramePtr)
1808 #ifndef LEGACY_BACKEND
1809 // The offset is already assigned. Find the temp.
1810 TempDsc* tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_USED);
1813 // It might be in the free lists, if we're working on zero initializing the temps.
1814 tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_FREE);
1816 assert(tmp != nullptr);
1817 offs = tmp->tdTempOffs();
1819 // We only care about the magnitude of the offset here, to determine instruction size.
1820 if (emitComp->isFramePointerUsed())
1829 // SP-based offsets must already be positive.
1830 assert((int)offs >= 0);
1832 #else // LEGACY_BACKEND
1833 /* We'll have to estimate the max. possible offset of this temp */
1835 // TODO: Get an estimate of the temp offset instead of assuming
1836 // TODO: that any temp may be at the max. temp offset!!!!!!!!!!
1838 if (emitComp->lvaTempsHaveLargerOffsetThanVars())
1840 offs = emitLclSize + emitMaxTmpSize;
1844 offs = emitMaxTmpSize;
1847 offsIsUpperBound = false;
1848 #endif // LEGACY_BACKEND
1853 /* Get the frame offset of the (non-temp) variable */
1855 offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
1857 /* An address off of ESP takes an extra byte */
1864 /* Is this a stack parameter reference? */
1866 if (emitComp->lvaIsParameter(var)
1867 #if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
1868 && !emitComp->lvaIsRegArgument(var)
1869 #endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
1872 /* If no EBP frame, arguments are off of ESP, above temps */
1876 assert((int)offs >= 0);
1878 offsIsUpperBound = false; // since #temps can increase
1879 offs += emitMaxTmpSize;
1884 /* Locals off of EBP are at negative offsets */
1888 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
1889 // If localloc is not used, then ebp chaining is done and hence
1890 // offset of locals will be at negative offsets, Otherwise offsets
1891 // will be positive. In future, when RBP gets positioned in the
1892 // middle of the frame so as to optimize instruction encoding size,
1893 // the below asserts needs to be modified appropriately.
1894 // However, for Unix platforms, we always do frame pointer chaining,
1895 // so offsets from the frame pointer will always be negative.
1896 if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
1898 noway_assert((int)offs >= 0);
1903 // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
1904 CLANG_FORMAT_COMMENT_ANCHOR;
1906 #ifdef UNIX_AMD64_ABI
1907 LclVarDsc* varDsc = emitComp->lvaTable + var;
1908 bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
1909 // Register passed args could have a stack offset of 0.
1910 noway_assert((int)offs < 0 || isRegPassedArg);
1911 #else // !UNIX_AMD64_ABI
1912 noway_assert((int)offs < 0);
1913 #endif // !UNIX_AMD64_ABI
1916 assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
1918 // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
1919 if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
1920 unsigned(var) == emitComp->lvaStubArgumentVar)
1922 offs -= emitMaxTmpSize;
1927 // offset is negative
1928 return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
1930 #ifdef _TARGET_AMD64_
1931 // This case arises for localloc frames
1934 return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
1939 if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
1941 offs += emitMaxTmpSize;
1946 assert((int)offs >= 0);
1948 #if !FEATURE_FIXED_OUT_ARGS
1950 /* Are we addressing off of ESP? */
1952 if (!emitHasFramePtr)
1954 /* Adjust the effective offset if necessary */
1956 if (emitCntStackDepth)
1957 offs += emitCurStackLvl;
1959 // we could (and used to) check for the special case [sp] here but the stack offset
1960 // estimator was off, and there is very little harm in overestimating for such a
1964 #endif // !FEATURE_FIXED_OUT_ARGS
1966 // printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
1967 // emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
1969 #ifdef _TARGET_AMD64_
1970 bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
1972 bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
1975 #ifdef LEGACY_BACKEND
1976 /* If we are using a small encoding, there is a danger that we might
1977 end up having to use a larger encoding. Record 'offs' so that
1978 we can detect if such a situation occurs */
1980 if (useSmallEncoding && !offsIsUpperBound)
1982 if (emitGrowableMaxByteOffs < offs)
1984 emitGrowableMaxByteOffs = offs;
1986 // Remember which instruction this is
1987 emitMaxByteOffsIdNum = emitInsCount;
1991 #endif // LEGACY_BACKEND
1993 // If it is ESP based, and the offset is zero, we will not encode the disp part.
1994 if (!EBPbased && offs == 0)
2000 return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
2004 inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val)
2006 instruction ins = id->idIns();
2007 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2008 UNATIVE_OFFSET prefix = 0;
2009 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2011 #ifdef _TARGET_AMD64_
2012 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2013 // all other opcodes take a sign-extended 4-byte immediate
2014 noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
2015 #endif // _TARGET_AMD64_
2017 if (valSize > sizeof(int))
2019 valSize = sizeof(int);
2022 if (id->idIsCnsReloc())
2024 valInByte = false; // relocs can't be placed in a byte
2025 assert(valSize == sizeof(int));
2030 valSize = sizeof(char);
2033 // 16-bit operand instructions need a prefix.
2034 // This referes to 66h size prefix override
2035 if (id->idOpSize() == EA_2BYTE)
2040 return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp);
2043 /*****************************************************************************/
2045 static bool baseRegisterRequiresSibByte(regNumber base)
2047 #ifdef _TARGET_AMD64_
2048 return base == REG_ESP || base == REG_R12;
2050 return base == REG_ESP;
2054 static bool baseRegisterRequiresDisplacement(regNumber base)
2056 #ifdef _TARGET_AMD64_
2057 return base == REG_EBP || base == REG_R13;
2059 return base == REG_EBP;
2063 UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
2065 emitAttr attrSize = id->idOpSize();
2066 instruction ins = id->idIns();
2067 /* The displacement field is in an unusual place for calls */
2068 ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
2069 bool dspInByte = ((signed char)dsp == (ssize_t)dsp);
2070 bool dspIsZero = (dsp == 0);
2071 UNATIVE_OFFSET size;
2073 // Note that the values in reg and rgx are used in this method to decide
2074 // how many bytes will be needed by the address [reg+rgx+cns]
2075 // this includes the prefix bytes when reg or rgx are registers R8-R15
2079 // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
2080 // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
2081 // ideally these should really be the only idInsFmts that we see here
2082 // but we have some outliers to deal with:
2083 // emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
2084 // emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
2086 switch (id->idInsFmt())
2097 reg = id->idAddr()->iiaAddrMode.amBaseReg;
2098 rgx = id->idAddr()->iiaAddrMode.amIndxReg;
2102 if (id->idIsDspReloc())
2104 dspInByte = false; // relocs can't be placed in a byte
2105 dspIsZero = false; // relocs won't always be zero
2108 if (code & 0xFF000000)
2112 else if (code & 0x00FF0000)
2114 // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
2115 assert(ins != INS_bt);
2117 assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2118 || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
2119 || (ins == INS_movzx) || (ins == INS_movsx)
2120 // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
2121 || isPrefetch(ins));
2128 // Most 16-bit operands will require a size prefix.
2129 // This refers to 66h size prefix override.
2130 CLANG_FORMAT_COMMENT_ANCHOR;
2132 #if FEATURE_STACK_FP_X87
2133 if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
2134 #else // FEATURE_STACK_FP_X87
2135 if (attrSize == EA_2BYTE)
2136 #endif // FEATURE_STACK_FP_X87
2142 size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2144 if (hasRexPrefix(code))
2147 size += emitGetRexPrefixSize(ins);
2149 else if (TakesRexWPrefix(ins, attrSize))
2152 size += emitGetRexPrefixSize(ins);
2154 else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
2155 ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize)))
2157 // Should have a REX byte
2158 size += emitGetRexPrefixSize(ins);
2163 /* The address is of the form "[reg+disp]" */
2167 /* The address is of the form "[disp]" */
2169 size += sizeof(INT32);
2171 #ifdef _TARGET_AMD64_
2172 // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
2173 if (!id->idIsDspReloc())
2181 // If this is just "call reg", we're done.
2182 if (id->idIsCallRegPtr())
2184 assert(ins == INS_call);
2189 // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
2190 if (baseRegisterRequiresSibByte(reg))
2195 // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
2196 // Otherwise, the displacement can be elided if it is zero.
2197 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2202 /* Does the offset fit in a byte? */
2206 size += sizeof(char);
2210 size += sizeof(INT32);
2215 /* An index register is present */
2219 /* Is the index value scaled? */
2221 if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
2223 /* Is there a base register? */
2227 /* The address is "[reg + {2/4/8} * rgx + icon]" */
2229 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2231 /* The address is "[reg + {2/4/8} * rgx]" */
2235 /* The address is "[reg + {2/4/8} * rgx + disp]" */
2239 size += sizeof(char);
2243 size += sizeof(int);
2249 /* The address is "[{2/4/8} * rgx + icon]" */
2251 size += sizeof(INT32);
2256 if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
2258 /* Swap reg and rgx, such that reg is not EBP/R13 */
2259 regNumber tmp = reg;
2260 id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
2261 id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
2264 /* The address is "[reg+rgx+dsp]" */
2266 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2268 /* This is [reg+rgx]" */
2272 /* This is [reg+rgx+dsp]" */
2276 size += sizeof(char);
2280 size += sizeof(int);
2289 inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
2291 instruction ins = id->idIns();
2292 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2293 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2295 // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
2296 // but it requires special handling of the immediate value (it is always encoded in a byte).
2297 // Let's not complicate things until this is needed.
2298 assert(ins != INS_bt);
2300 #ifdef _TARGET_AMD64_
2301 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2302 // all other opcodes take a sign-extended 4-byte immediate
2303 noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
2304 #endif // _TARGET_AMD64_
2306 if (valSize > sizeof(INT32))
2308 valSize = sizeof(INT32);
2311 if (id->idIsCnsReloc())
2313 valInByte = false; // relocs can't be placed in a byte
2314 assert(valSize == sizeof(INT32));
2319 valSize = sizeof(char);
2322 return valSize + emitInsSizeAM(id, code);
2325 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
2327 instruction ins = id->idIns();
2329 // fgMorph changes any statics that won't fit into 32-bit addresses
2330 // into constants with an indir, rather than GT_CLS_VAR
2331 // so we should only hit this path for statics that are RIP-relative
2332 UNATIVE_OFFSET size = sizeof(INT32);
2334 // Most 16-bit operand instructions will need a prefix.
2335 // This refers to 66h size prefix override.
2337 if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
2342 return size + emitInsSize(code);
2345 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
2347 instruction ins = id->idIns();
2348 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2349 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2351 #ifndef _TARGET_AMD64_
2352 // occasionally longs get here on x86
2353 if (valSize > sizeof(INT32))
2354 valSize = sizeof(INT32);
2355 #endif // !_TARGET_AMD64_
2357 if (id->idIsCnsReloc())
2359 valInByte = false; // relocs can't be placed in a byte
2360 assert(valSize == sizeof(INT32));
2365 valSize = sizeof(char);
2368 return valSize + emitInsSizeCV(id, code);
2371 /*****************************************************************************
2373 * Allocate instruction descriptors for instructions with address modes.
2376 inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
2378 if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2380 instrDescAmd* id = emitAllocInstrAmd(size);
2382 id->idSetIsLargeDsp();
2384 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2386 id->idaAmdVal = dsp;
2392 instrDesc* id = emitAllocInstr(size);
2394 id->idAddr()->iiaAddrMode.amDisp = dsp;
2395 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2401 /*****************************************************************************
2403 * Set the displacement field in an instruction. Only handles instrDescAmd type.
2406 inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
2408 if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2410 id->idSetIsLargeDsp();
2412 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2414 id->idaAmdVal = dsp;
2418 id->idSetIsSmallDsp();
2419 id->idAddr()->iiaAddrMode.amDisp = dsp;
2420 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2424 /*****************************************************************************
2426 * Allocate an instruction descriptor for an instruction that uses both
2427 * an address mode displacement and a constant.
2430 emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
2432 if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
2434 if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2436 instrDesc* id = emitAllocInstr(size);
2438 id->idSmallCns(cns);
2440 id->idAddr()->iiaAddrMode.amDisp = dsp;
2441 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2447 instrDescCns* id = emitAllocInstrCns(size);
2449 id->idSetIsLargeCns();
2450 id->idcCnsVal = cns;
2452 id->idAddr()->iiaAddrMode.amDisp = dsp;
2453 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2460 if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2462 instrDescAmd* id = emitAllocInstrAmd(size);
2464 id->idSetIsLargeDsp();
2466 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2468 id->idaAmdVal = dsp;
2470 id->idSmallCns(cns);
2476 instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
2478 id->idSetIsLargeCns();
2479 id->idacCnsVal = cns;
2481 id->idSetIsLargeDsp();
2483 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2485 id->idacAmdVal = dsp;
2492 /*****************************************************************************
2494 * The next instruction will be a loop head entry point
2495 * So insert a dummy instruction here to ensure that
2496 * the x86 I-cache alignment rule is followed.
2499 void emitter::emitLoopAlign()
2501 /* Insert a pseudo-instruction to ensure that we align
2502 the next instruction properly */
2504 instrDesc* id = emitNewInstrTiny(EA_1BYTE);
2505 id->idIns(INS_align);
2506 id->idCodeSize(15); // We may need to skip up to 15 bytes of code
2507 emitCurIGsize += 15;
2510 /*****************************************************************************
2512 * Add a NOP instruction of the given size.
2515 void emitter::emitIns_Nop(unsigned size)
2519 instrDesc* id = emitNewInstr();
2521 id->idInsFmt(IF_NONE);
2522 id->idCodeSize(size);
2525 emitCurIGsize += size;
2528 /*****************************************************************************
2530 * Add an instruction with no operands.
2533 static bool isX87InsWithNoOperands(instruction ins)
2535 #if FEATURE_STACK_FP_X87
2536 return (ins == INS_f2xm1 || ins == INS_fchs || ins == INS_fld1 || ins == INS_fld1 || ins == INS_fldl2e ||
2537 ins == INS_fldz || ins == INS_fprem || ins == INS_frndint || ins == INS_fscale);
2538 #else // !FEATURE_STACK_FP_X87
2540 #endif // !FEATURE_STACK_FP_X87
2544 void emitter::emitIns(instruction ins)
2547 instrDesc* id = emitNewInstr();
2548 code_t code = insCodeMR(ins);
2551 #if FEATURE_STACK_FP_X87
2552 if (ins != INS_fabs && ins != INS_fsqrt && ins != INS_fsin && ins != INS_fcos)
2553 #endif // FEATURE_STACK_FP_X87
2556 // We cannot have #ifdef inside macro expansion.
2557 bool assertCond = (ins == INS_cdq || isX87InsWithNoOperands(ins) || ins == INS_int3 || ins == INS_lock ||
2558 ins == INS_leave || ins == INS_movsb || ins == INS_movsd || ins == INS_movsp ||
2559 ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd || ins == INS_r_movsp ||
2560 ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
2561 ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
2562 #ifndef LEGACY_BACKEND
2563 // These instructions take zero operands
2564 || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence
2572 assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
2574 if (code & 0xFF000000)
2576 sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
2578 else if (code & 0x00FF0000)
2582 else if (code & 0x0000FF00)
2591 #ifndef LEGACY_BACKEND
2592 // vzeroupper includes its 2-byte VEX prefix in its MR code.
2593 assert((ins != INS_vzeroupper) || (sz == 3));
2596 insFormat fmt = IF_NONE;
2598 #if FEATURE_STACK_FP_X87
2599 if (CodeGen::instIsFP(ins))
2601 fmt = emitInsModeFormat(ins, IF_TRD);
2603 #endif // FEATURE_STACK_FP_X87
2610 emitCurIGsize += sz;
2613 #if !defined(LEGACY_BACKEND)
2614 // Add an instruction with no operands, but whose encoding depends on the size
2615 // (Only CDQ/CQO currently)
2616 void emitter::emitIns(instruction ins, emitAttr attr)
2619 instrDesc* id = emitNewInstr(attr);
2620 code_t code = insCodeMR(ins);
2621 assert(ins == INS_cdq);
2622 assert((code & 0xFFFFFF00) == 0);
2625 insFormat fmt = IF_NONE;
2627 sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
2628 if (TakesRexWPrefix(ins, attr))
2630 sz += emitGetRexPrefixSize(ins);
2638 emitCurIGsize += sz;
2641 //------------------------------------------------------------------------
2642 // emitMapFmtForIns: map the instruction format based on the instruction.
2643 // Shift-by-a-constant instructions have a special format.
2646 // fmt - the instruction format to map
2647 // ins - the instruction
2650 // The mapped instruction format.
2652 emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
2684 //------------------------------------------------------------------------
2685 // emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
2688 // fmt - the instruction format to map
2691 // The mapped instruction format.
2693 emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
2708 case IF_RWR_ARD_CNS:
2709 return IF_RWR_MRD_CNS;
2712 case IF_RRW_ARD_CNS:
2713 return IF_RRW_MRD_CNS;
2714 case IF_RWR_RRD_ARD:
2715 return IF_RWR_RRD_MRD;
2716 case IF_RWR_RRD_ARD_CNS:
2717 return IF_RWR_RRD_MRD_CNS;
2733 case IF_AWR_RRD_CNS:
2734 return IF_MWR_RRD_CNS;
2744 //------------------------------------------------------------------------
2745 // emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
2748 // indir - the memory operand.
2749 // id - the instrDesc to fill in.
2750 // fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
2751 // GT_CLS_VAR_ADDR), this function will map it to the correct format.
2752 // ins - the instruction we are generating. This might affect the instruction format we choose.
2755 // The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
2758 // For base address of int constant:
2759 // -- the caller must have added the int constant base to the instrDesc when creating it via
2760 // emitNewInstrAmdCns().
2761 // For simple address modes (base + scale * index + offset):
2762 // -- the base register, index register, and scale factor are set.
2763 // -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
2764 // emitNewInstrAmdCns().
2766 // The instruction format is set.
2768 // idSetIsDspReloc() is called if necessary.
2770 void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
2772 assert(fmt != IF_NONE);
2774 GenTree* memBase = indir->Base();
2776 if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
2778 CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
2780 // Static always need relocs
2781 if (!jitStaticFldIsGlobAddr(fldHnd))
2784 // fgMorphField() changes any statics that won't fit into 32-bit addresses into
2785 // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
2786 // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
2788 // Data section constants: these get allocated close to code block of the method and
2789 // always addressable IP relative. These too should be marked as relocatable.
2791 id->idSetIsDspReloc();
2794 id->idAddr()->iiaFieldHnd = fldHnd;
2795 id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
2797 else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
2799 // Absolute addresses marked as contained should fit within the base of addr mode.
2800 assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
2802 // Either not generating relocatable code, or addr must be an icon handle, or the
2803 // constant is zero (which we won't generate a relocation for).
2804 assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0));
2806 if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
2808 id->idSetIsDspReloc();
2811 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2812 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2813 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; // for completeness
2815 id->idInsFmt(emitMapFmtForIns(fmt, ins));
2817 // Absolute address must have already been set in the instrDesc constructor.
2818 assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
2822 if (memBase != nullptr)
2824 id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
2828 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2831 if (indir->HasIndex())
2833 id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
2837 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2839 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
2841 id->idInsFmt(emitMapFmtForIns(fmt, ins));
2843 // disp must have already been set in the instrDesc constructor.
2844 assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly
2848 // Takes care of storing all incoming register parameters
2849 // into its corresponding shadow space (defined by the x64 ABI)
2850 void emitter::spillIntArgRegsToShadowSlots()
2856 assert(emitComp->compGeneratingProlog);
2858 for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
2860 regNumber argReg = intArgRegs[argNum];
2862 // The offsets for the shadow space start at RSP + 8
2863 // (right before the caller return address)
2864 int offset = (argNum + 1) * EA_PTRSIZE;
2866 id = emitNewInstrAmd(EA_PTRSIZE, offset);
2868 id->idInsFmt(IF_AWR_RRD);
2869 id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
2870 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2871 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
2873 // The offset has already been set in the intrDsc ctor,
2874 // make sure we got it right.
2875 assert(emitGetInsAmdAny(id) == ssize_t(offset));
2878 sz = emitInsSizeAM(id, insCodeMR(INS_mov));
2880 emitCurIGsize += sz;
2884 //------------------------------------------------------------------------
2885 // emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss")
2886 // instruction for a GT_IND node.
2889 // ins - the instruction to emit
2890 // attr - the instruction operand size
2891 // dstReg - the destination register
2892 // mem - the GT_IND node
2894 void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem)
2896 assert(mem->OperIs(GT_IND));
2898 GenTree* addr = mem->Addr();
2900 if (addr->OperGet() == GT_CLS_VAR_ADDR)
2902 emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, 0);
2906 if (addr->OperGet() == GT_LCL_VAR_ADDR)
2908 GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2909 emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0);
2910 codeGen->genUpdateLife(varNode);
2914 assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
2915 ssize_t offset = mem->Offset();
2916 instrDesc* id = emitNewInstrAmd(attr, offset);
2919 emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
2920 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2923 emitCurIGsize += sz;
2926 //------------------------------------------------------------------------
2927 // emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2928 // instruction for a GT_STOREIND node.
2931 // ins - the instruction to emit
2932 // attr - the instruction operand size
2933 // mem - the GT_STOREIND node
2935 void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
2937 assert(mem->OperIs(GT_STOREIND));
2939 GenTree* addr = mem->Addr();
2940 GenTree* data = mem->Data();
2942 if (addr->OperGet() == GT_CLS_VAR_ADDR)
2944 if (data->isContainedIntOrIImmed())
2946 emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
2950 assert(!data->isContained());
2951 emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
2956 if (addr->OperGet() == GT_LCL_VAR_ADDR)
2958 GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2959 if (data->isContainedIntOrIImmed())
2961 emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2965 assert(!data->isContained());
2966 emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2968 codeGen->genUpdateLife(varNode);
2972 ssize_t offset = mem->Offset();
2976 if (data->isContainedIntOrIImmed())
2978 int icon = (int)data->AsIntConCommon()->IconValue();
2979 id = emitNewInstrAmdCns(attr, offset, icon);
2981 emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
2982 sz = emitInsSizeAM(id, insCodeMI(ins), icon);
2987 assert(!data->isContained());
2988 id = emitNewInstrAmd(attr, offset);
2990 emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
2991 id->idReg1(data->gtRegNum);
2992 sz = emitInsSizeAM(id, insCodeMR(ins));
2997 emitCurIGsize += sz;
3000 //------------------------------------------------------------------------
3001 // emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
3002 // instruction for a GT_STORE_LCL_VAR node.
3005 // ins - the instruction to emit
3006 // attr - the instruction operand size
3007 // varNode - the GT_STORE_LCL_VAR node
3009 void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode)
3011 assert(varNode->OperIs(GT_STORE_LCL_VAR));
3012 assert(varNode->gtRegNum == REG_NA); // stack store
3014 GenTree* data = varNode->gtGetOp1();
3015 codeGen->inst_set_SV_var(varNode);
3017 if (data->isContainedIntOrIImmed())
3019 emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
3023 assert(!data->isContained());
3024 emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
3026 codeGen->genUpdateLife(varNode);
3029 //------------------------------------------------------------------------
3030 // emitInsBinary: Emits an instruction for a node which takes two operands
3033 // ins - the instruction to emit
3034 // attr - the instruction operand size
3035 // dst - the destination and first source operand
3036 // src - the second source operand
3039 // i) caller of this routine needs to call genConsumeReg()
3040 // ii) caller of this routine needs to call genProduceReg()
3041 regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
3043 // We can only have one memory operand and only src can be a constant operand
3044 // However, the handling for a given operand type (mem, cns, or other) is fairly
3045 // consistent regardless of whether they are src or dst. As such, we will find
3046 // the type of each operand and only check them against src/dst where relevant.
3048 GenTree* memOp = nullptr;
3049 GenTree* cnsOp = nullptr;
3050 GenTree* otherOp = nullptr;
3052 if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp())
3054 // dst can only be a modrm
3055 assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) ||
3056 instrIs3opImul(ins)); // dst on 3opImul isn't really the dst
3057 assert(!src->isUsedFromMemory());
3061 if (src->isContained())
3063 assert(src->IsCnsIntOrI());
3071 else if (src->isContained() || src->isUsedFromSpillTemp())
3073 assert(!dst->isUsedFromMemory());
3076 if ((src->IsCnsIntOrI() || src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp())
3078 assert(!src->isUsedFromMemory() || src->IsCnsFltOrDbl());
3083 assert(src->isUsedFromMemory());
3088 // At this point, we either have a memory operand or we don't.
3090 // If we don't then the logic is very simple and we will either be emitting a
3091 // `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise.
3093 // If we do have a memory operand, the logic is a bit more complicated as we need
3094 // to do different things depending on the type of memory operand. These types include:
3096 // * Indirect access
3099 // * Addressing mode [base + index * scale + offset]
3103 // Most of these types (except Indirect: Class variable and Indirect: Addressing mode)
3104 // give us a a local variable number and an offset and access memory on the stack
3106 // Indirect: Class variable is used for access static class variables and gives us a handle
3107 // to the memory location we read from
3109 // Indirect: Addressing mode is used for the remaining memory accesses and will give us
3110 // a base address, an index, a scale, and an offset. These are combined to let us easily
3111 // access the given memory location.
3113 // In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]`
3114 // or `[mem], reg`) by comparing memOp to src to determine which `emitIns_*` method needs
3115 // to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable)
3116 // where only src can be the immediate.
3118 if (memOp != nullptr)
3120 TempDsc* tmpDsc = nullptr;
3121 unsigned varNum = BAD_VAR_NUM;
3122 unsigned offset = (unsigned)-1;
3124 if (memOp->isUsedFromSpillTemp())
3126 assert(memOp->IsRegOptional());
3128 tmpDsc = codeGen->getSpillTempDsc(memOp);
3129 varNum = tmpDsc->tdTempNum();
3132 emitComp->tmpRlsTemp(tmpDsc);
3134 else if (memOp->isIndir())
3136 GenTreeIndir* memIndir = memOp->AsIndir();
3137 GenTree* memBase = memIndir->gtOp1;
3139 switch (memBase->OperGet())
3141 case GT_LCL_VAR_ADDR:
3143 varNum = memBase->AsLclVarCommon()->GetLclNum();
3146 // Ensure that all the GenTreeIndir values are set to their defaults.
3147 assert(!memIndir->HasIndex());
3148 assert(memIndir->Scale() == 1);
3149 assert(memIndir->Offset() == 0);
3154 case GT_CLS_VAR_ADDR:
3158 assert(otherOp == dst);
3159 assert(cnsOp == nullptr);
3161 if (instrHasImplicitRegPairDest(ins))
3163 // src is a class static variable
3164 // dst is implicit - RDX:RAX
3165 emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
3169 // src is a class static variable
3170 // dst is a register
3171 emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
3176 assert(memOp == dst);
3178 if (cnsOp != nullptr)
3180 assert(cnsOp == src);
3181 assert(otherOp == nullptr);
3182 assert(src->IsCnsIntOrI());
3184 // src is an contained immediate
3185 // dst is a class static variable
3186 emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0,
3187 (int)src->gtIntConCommon.IconValue());
3191 assert(otherOp == src);
3193 // src is a register
3194 // dst is a class static variable
3195 emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
3199 return dst->gtRegNum;
3202 default: // Addressing mode [base + index * scale + offset]
3204 instrDesc* id = nullptr;
3206 if (cnsOp != nullptr)
3208 assert(memOp == dst);
3209 assert(cnsOp == src);
3210 assert(otherOp == nullptr);
3211 assert(src->IsCnsIntOrI());
3213 id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue());
3217 ssize_t offset = memIndir->Offset();
3218 id = emitNewInstrAmd(attr, offset);
3221 GenTree* regTree = (memOp == src) ? dst : src;
3223 // there must be one non-contained op
3224 assert(!regTree->isContained());
3225 id->idReg1(regTree->gtRegNum);
3227 assert(id != nullptr);
3229 id->idIns(ins); // Set the instruction.
3231 // Determine the instruction format
3232 insFormat fmt = IF_NONE;
3236 assert(cnsOp == nullptr);
3237 assert(otherOp == dst);
3239 if (instrHasImplicitRegPairDest(ins))
3241 fmt = emitInsModeFormat(ins, IF_ARD);
3245 fmt = emitInsModeFormat(ins, IF_RRD_ARD);
3250 assert(memOp == dst);
3252 if (cnsOp != nullptr)
3254 assert(cnsOp == src);
3255 assert(otherOp == nullptr);
3256 assert(src->IsCnsIntOrI());
3258 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
3262 assert(otherOp == src);
3263 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
3266 assert(fmt != IF_NONE);
3267 emitHandleMemOp(memIndir, id, fmt, ins);
3269 // Determine the instruction size
3270 UNATIVE_OFFSET sz = 0;
3274 assert(otherOp == dst);
3275 assert(cnsOp == nullptr);
3277 if (instrHasImplicitRegPairDest(ins))
3279 sz = emitInsSizeAM(id, insCode(ins));
3283 sz = emitInsSizeAM(id, insCodeRM(ins));
3288 assert(memOp == dst);
3290 if (cnsOp != nullptr)
3292 assert(memOp == dst);
3293 assert(cnsOp == src);
3294 assert(otherOp == nullptr);
3296 sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue());
3300 assert(otherOp == src);
3301 sz = emitInsSizeAM(id, insCodeMR(ins));
3309 emitCurIGsize += sz;
3311 return (memOp == src) ? dst->gtRegNum : REG_NA;
3317 switch (memOp->OperGet())
3320 case GT_STORE_LCL_FLD:
3322 GenTreeLclFld* lclField = memOp->AsLclFld();
3323 varNum = lclField->GetLclNum();
3324 offset = lclField->gtLclFld.gtLclOffs;
3330 assert(memOp->IsRegOptional() || !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate());
3331 varNum = memOp->AsLclVar()->GetLclNum();
3342 // Ensure we got a good varNum and offset.
3343 // We also need to check for `tmpDsc != nullptr` since spill temp numbers
3344 // are negative and start with -1, which also happens to be BAD_VAR_NUM.
3345 assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
3346 assert(offset != (unsigned)-1);
3350 assert(otherOp == dst);
3351 assert(cnsOp == nullptr);
3353 if (instrHasImplicitRegPairDest(ins))
3355 // src is a stack based local variable
3356 // dst is implicit - RDX:RAX
3357 emitIns_S(ins, attr, varNum, offset);
3361 // src is a stack based local variable
3362 // dst is a register
3363 emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
3368 assert(memOp == dst);
3369 assert((dst->gtRegNum == REG_NA) || dst->IsRegOptional());
3371 if (cnsOp != nullptr)
3373 assert(cnsOp == src);
3374 assert(otherOp == nullptr);
3375 assert(src->IsCnsIntOrI());
3377 // src is an contained immediate
3378 // dst is a stack based local variable
3379 emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
3383 assert(otherOp == src);
3384 assert(!src->isContained());
3386 // src is a register
3387 // dst is a stack based local variable
3388 emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
3392 else if (cnsOp != nullptr) // reg, immed
3394 assert(cnsOp == src);
3395 assert(otherOp == dst);
3397 if (src->IsCnsIntOrI())
3399 assert(!dst->isContained());
3400 GenTreeIntConCommon* intCns = src->AsIntConCommon();
3401 emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue());
3405 assert(src->IsCnsFltOrDbl());
3406 GenTreeDblCon* dblCns = src->AsDblCon();
3408 CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns));
3409 emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
3414 assert(otherOp == nullptr);
3415 assert(!src->isContained() && !dst->isContained());
3417 if (instrHasImplicitRegPairDest(ins))
3419 emitIns_R(ins, attr, src->gtRegNum);
3423 emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
3427 return dst->gtRegNum;
3430 //------------------------------------------------------------------------
3431 // emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
3433 // Responsible for emitting a single instruction that will perform an operation of the form:
3434 // *addr = *addr <BinOp> src
3439 // ins - instruction to generate
3440 // attr - emitter attribute for instruction
3441 // storeInd - indir for RMW addressing mode
3442 // src - source operand of instruction
3445 // Lowering has taken care of recognizing the StoreInd pattern of:
3446 // StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
3447 // The address to store is already sitting in a register.
3450 // This is a no-produce operation, meaning that no register output will
3451 // be produced for future use in the code stream.
3453 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
3455 GenTree* addr = storeInd->Addr();
3456 addr = addr->gtSkipReloadOrCopy();
3457 assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
3458 addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
3460 instrDesc* id = nullptr;
3464 if (addr->OperGet() != GT_CLS_VAR_ADDR)
3466 offset = storeInd->Offset();
3469 if (src->isContainedIntOrIImmed())
3471 GenTreeIntConCommon* intConst = src->AsIntConCommon();
3472 id = emitNewInstrAmdCns(attr, offset, (int)intConst->IconValue());
3473 emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
3475 sz = emitInsSizeAM(id, insCodeMI(ins), (int)intConst->IconValue());
3479 assert(!src->isContained()); // there must be one non-contained src
3482 id = emitNewInstrAmd(attr, offset);
3483 emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
3484 id->idReg1(src->gtRegNum);
3486 sz = emitInsSizeAM(id, insCodeMR(ins));
3492 emitCurIGsize += sz;
3495 //------------------------------------------------------------------------
3496 // emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
3498 // Responsible for emitting a single instruction that will perform an operation of the form:
3499 // *addr = UnaryOp *addr
3504 // ins - instruction to generate
3505 // attr - emitter attribute for instruction
3506 // storeInd - indir for RMW addressing mode
3509 // Lowering has taken care of recognizing the StoreInd pattern of:
3510 // StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
3511 // The address to store is already sitting in a register.
3514 // This is a no-produce operation, meaning that no register output will
3515 // be produced for future use in the code stream.
3517 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
3519 GenTree* addr = storeInd->Addr();
3520 addr = addr->gtSkipReloadOrCopy();
3521 assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
3522 addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
3525 if (addr->OperGet() != GT_CLS_VAR_ADDR)
3527 offset = storeInd->Offset();
3530 instrDesc* id = emitNewInstrAmd(attr, offset);
3531 emitHandleMemOp(storeInd, id, IF_ARW, ins);
3533 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3537 emitCurIGsize += sz;
3540 #endif // !LEGACY_BACKEND
3542 #if FEATURE_STACK_FP_X87
3543 /*****************************************************************************
3545 * Add an instruction of the form "op ST(0),ST(n)".
3548 void emitter::emitIns_F0_F(instruction ins, unsigned fpreg)
3550 UNATIVE_OFFSET sz = 2;
3551 instrDesc* id = emitNewInstr();
3552 insFormat fmt = emitInsModeFormat(ins, IF_TRD_FRD);
3556 id->idReg1((regNumber)fpreg);
3560 emitCurIGsize += sz;
3563 /*****************************************************************************
3565 * Add an instruction of the form "op ST(n),ST(0)".
3568 void emitter::emitIns_F_F0(instruction ins, unsigned fpreg)
3570 UNATIVE_OFFSET sz = 2;
3571 instrDesc* id = emitNewInstr();
3572 insFormat fmt = emitInsModeFormat(ins, IF_FRD_TRD);
3576 id->idReg1((regNumber)fpreg);
3580 emitCurIGsize += sz;
3582 #endif // FEATURE_STACK_FP_X87
3584 /*****************************************************************************
3586 * Add an instruction referencing a single register.
3589 void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
3591 emitAttr size = EA_SIZE(attr);
3593 assert(size <= EA_PTRSIZE);
3594 noway_assert(emitVerifyEncodable(ins, size, reg));
3597 instrDesc* id = emitNewInstrTiny(attr);
3603 #ifdef _TARGET_AMD64_
3605 sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
3607 #else // !_TARGET_AMD64_
3609 if (size == EA_1BYTE)
3610 sz = 2; // Use the long form as the small one has no 'w' bit
3612 sz = 1; // Use short form
3614 #endif // !_TARGET_AMD64_
3623 /* We don't currently push/pop small values */
3625 assert(size == EA_PTRSIZE);
3632 /* All the sixteen INS_setCCs are contiguous. */
3634 if (INS_seto <= ins && ins <= INS_setg)
3636 // Rough check that we used the endpoints for the range check
3638 assert(INS_seto + 0xF == INS_setg);
3640 // The caller must specify EA_1BYTE for 'attr'
3642 assert(attr == EA_1BYTE);
3644 /* We expect this to always be a 'big' opcode */
3646 assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
3659 insFormat fmt = emitInsModeFormat(ins, IF_RRD);
3665 // 16-bit operand instructions will need a prefix.
3666 // This refers to 66h size prefix override.
3667 if (size == EA_2BYTE)
3673 sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
3676 if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
3678 sz += emitGetRexPrefixSize(ins);
3684 emitCurIGsize += sz;
3686 emitAdjustStackDepthPushPop(ins);
3689 /*****************************************************************************
3691 * Add an instruction referencing a register and a constant.
3694 void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
3696 emitAttr size = EA_SIZE(attr);
3698 // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
3699 assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
3701 noway_assert(emitVerifyEncodable(ins, size, reg));
3703 #ifdef _TARGET_AMD64_
3704 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3705 // all other opcodes take a sign-extended 4-byte immediate
3706 noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
3711 insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS);
3712 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
3714 // BT reg,imm might be useful but it requires special handling of the immediate value
3715 // (it is always encoded in a byte). Let's not complicate things until this is needed.
3716 assert(ins != INS_bt);
3718 // Figure out the size of the instruction
3722 #ifdef _TARGET_AMD64_
3723 // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
3724 // and this isn't a reloc constant.
3725 if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
3727 attr = size = EA_4BYTE;
3730 if (size > EA_4BYTE)
3732 sz = 9; // Really it is 10, but we'll add one more later
3735 #endif // _TARGET_AMD64_
3750 valInByte = true; // shift amount always placed in a byte
3755 if (EA_IS_CNS_RELOC(attr))
3757 valInByte = false; // relocs can't be placed in a byte
3762 if (IsSSEOrAVXInstruction(ins))
3766 else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins))
3777 if (reg == REG_EAX && !instrIs3opImul(ins))
3786 #ifdef _TARGET_AMD64_
3787 if (size > EA_4BYTE)
3789 // We special-case anything that takes a full 8-byte constant.
3793 #endif // _TARGET_AMD64_
3795 sz += EA_SIZE_IN_BYTES(attr);
3802 sz += emitGetVexPrefixSize(ins, attr);
3804 // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
3805 // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
3806 // register. So we also need to check if that built-in register is an extended register.
3807 if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
3809 sz += emitGetRexPrefixSize(ins);
3812 #if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
3816 id = emitNewInstrSC(attr, val);
3821 // 16-bit operand instructions will need a prefix
3822 if (size == EA_2BYTE)
3830 emitCurIGsize += sz;
3834 emitAdjustStackDepth(ins, val);
3838 /*****************************************************************************
3840 * Add an instruction referencing an integer constant.
3843 void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
3847 bool valInByte = ((signed char)val == val);
3849 #ifdef _TARGET_AMD64_
3850 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3851 // all other opcodes take a sign-extended 4-byte immediate
3852 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3855 if (EA_IS_CNS_RELOC(attr))
3857 valInByte = false; // relocs can't be placed in a byte
3873 sz = valInByte ? 2 : 5;
3877 NO_WAY("unexpected instruction");
3880 id = emitNewInstrSC(attr, val);
3882 id->idInsFmt(IF_CNS);
3886 emitCurIGsize += sz;
3888 emitAdjustStackDepthPushPop(ins);
3891 /*****************************************************************************
3893 * Add a "jump through a table" instruction.
3896 void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
3898 assert(EA_SIZE(attr) == EA_4BYTE);
3900 UNATIVE_OFFSET sz = 3 + 4;
3901 const instruction ins = INS_i_jmp;
3903 if (IsExtendedReg(reg, attr))
3905 sz += emitGetRexPrefixSize(ins);
3908 instrDesc* id = emitNewInstrAmd(attr, base);
3911 id->idInsFmt(IF_ARD);
3912 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
3913 id->idAddr()->iiaAddrMode.amIndxReg = reg;
3914 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP;
3917 id->idDebugOnlyInfo()->idMemCookie = base;
3923 emitCurIGsize += sz;
3926 /*****************************************************************************
3928 * Add an instruction with a static data member operand. If 'size' is 0, the
3929 * instruction operates on the address of the static member instead of its
3930 * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
3933 void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
3935 // Static always need relocs
3936 if (!jitStaticFldIsGlobAddr(fldHnd))
3938 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3944 /* Are we pushing the offset of the class variable? */
3946 if (EA_IS_OFFSET(attr))
3948 assert(ins == INS_push);
3949 sz = 1 + TARGET_POINTER_SIZE;
3951 id = emitNewInstrDsp(EA_1BYTE, offs);
3953 id->idInsFmt(IF_MRD_OFF);
3957 #if FEATURE_STACK_FP_X87
3958 insFormat fmt = emitInsModeFormat(ins, IF_MRD, IF_TRD_MRD, IF_MWR_TRD);
3959 #else // !FEATURE_STACK_FP_X87
3960 insFormat fmt = emitInsModeFormat(ins, IF_MRD);
3961 #endif // !FEATURE_STACK_FP_X87
3963 id = emitNewInstrDsp(attr, offs);
3966 sz = emitInsSizeCV(id, insCodeMR(ins));
3970 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
3972 if (TakesRexWPrefix(ins, attr))
3975 sz += emitGetRexPrefixSize(ins);
3978 id->idAddr()->iiaFieldHnd = fldHnd;
3983 emitCurIGsize += sz;
3985 emitAdjustStackDepthPushPop(ins);
3988 /*****************************************************************************
3990 * Add an instruction with two register operands.
3993 void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
3995 emitAttr size = EA_SIZE(attr);
3997 /* We don't want to generate any useless mov instructions! */
3998 CLANG_FORMAT_COMMENT_ANCHOR;
4000 #ifdef _TARGET_AMD64_
4001 // Same-reg 4-byte mov can be useful because it performs a
4002 // zero-extension to 8 bytes.
4003 assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
4005 assert(ins != INS_mov || reg1 != reg2);
4006 #endif // _TARGET_AMD64_
4008 assert(size <= EA_32BYTE);
4009 noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
4011 UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
4013 if (Is4ByteSSE4Instruction(ins))
4015 // The 4-Byte SSE4 instructions require one additional byte
4019 /* Special case: "XCHG" uses a different format */
4020 insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
4022 instrDesc* id = emitNewInstrTiny(attr);
4030 emitCurIGsize += sz;
4033 /*****************************************************************************
4035 * Add an instruction with two register operands and an integer constant.
4038 void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
4040 // SSE2 version requires 5 bytes and SSE4/AVX version 6 bytes
4041 UNATIVE_OFFSET sz = 4;
4042 if (IsSSEOrAVXInstruction(ins))
4044 // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
4045 // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
4046 // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
4047 sz = (UseVEXEncoding() || UseSSE4()) ? 6 : 5;
4050 #ifdef _TARGET_AMD64_
4051 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4052 // all other opcodes take a sign-extended 4-byte immediate
4053 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4056 instrDesc* id = emitNewInstrSC(attr, ival);
4059 if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
4061 sz += emitGetRexPrefixSize(ins);
4064 #ifndef LEGACY_BACKEND
4065 if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding())
4070 #endif // !LEGACY_BACKEND
4073 id->idInsFmt(IF_RRW_RRW_CNS);
4079 emitCurIGsize += sz;
4082 #ifndef LEGACY_BACKEND
4083 void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
4085 assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta);
4087 instrDesc* id = emitNewInstrAmd(attr, offs);
4091 id->idInsFmt(IF_ARD);
4092 id->idAddr()->iiaAddrMode.amBaseReg = base;
4093 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4095 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
4099 emitCurIGsize += sz;
4102 void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt)
4104 ssize_t offs = indir->Offset();
4105 instrDesc* id = emitNewInstrAmd(attr, offs);
4110 emitHandleMemOp(indir, id, fmt, ins);
4112 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4116 emitCurIGsize += sz;
4119 void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
4121 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4122 assert(IsSSEOrAVXInstruction(ins));
4124 ssize_t offs = indir->Offset();
4125 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4130 emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
4132 // Plus one for the 1-byte immediate (ival)
4133 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4135 if (Is4ByteSSE4Instruction(ins))
4137 // The 4-Byte SSE4 instructions require two additional bytes
4144 emitCurIGsize += sz;
4147 void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
4149 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4150 assert(IsSSEOrAVXInstruction(ins));
4152 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4157 id->idInsFmt(IF_RRW_ARD_CNS);
4158 id->idAddr()->iiaAddrMode.amBaseReg = base;
4159 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4161 // Plus one for the 1-byte immediate (ival)
4162 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4164 if (Is4ByteSSE4Instruction(ins))
4166 // The 4-Byte SSE4 instructions require two additional bytes
4173 emitCurIGsize += sz;
4176 void emitter::emitIns_R_C_I(
4177 instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4179 // Static always need relocs
4180 if (!jitStaticFldIsGlobAddr(fldHnd))
4182 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4185 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4186 assert(IsSSEOrAVXInstruction(ins));
4188 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4189 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4191 if (Is4ByteSSE4Instruction(ins))
4193 // The 4-Byte SSE4 instructions require two additional bytes
4198 id->idInsFmt(IF_RRW_MRD_CNS);
4200 id->idAddr()->iiaFieldHnd = fldHnd;
4204 emitCurIGsize += sz;
4207 void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
4209 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4210 assert(IsSSEOrAVXInstruction(ins));
4212 instrDesc* id = emitNewInstrCns(attr, ival);
4214 emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4216 if (Is4ByteSSE4Instruction(ins))
4218 // The 4-Byte SSE4 instructions require two additional bytes
4223 id->idInsFmt(IF_RRW_SRD_CNS);
4225 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4228 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4234 emitCurIGsize += sz;
4237 void emitter::emitIns_R_R_A(
4238 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt)
4240 assert(IsSSEOrAVXInstruction(ins));
4241 assert(IsThreeOperandAVXInstruction(ins));
4243 ssize_t offs = indir->Offset();
4244 instrDesc* id = emitNewInstrAmd(attr, offs);
4250 emitHandleMemOp(indir, id, fmt, ins);
4252 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4256 emitCurIGsize += sz;
4258 #endif // !LEGACY_BACKEND
4260 void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
4262 assert(IsSSEOrAVXInstruction(ins));
4263 assert(IsThreeOperandAVXInstruction(ins));
4265 instrDesc* id = emitNewInstrAmd(attr, offs);
4271 id->idInsFmt(IF_RWR_RRD_ARD);
4272 id->idAddr()->iiaAddrMode.amBaseReg = base;
4273 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4275 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4279 emitCurIGsize += sz;
4282 void emitter::emitIns_R_R_C(
4283 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
4285 assert(IsSSEOrAVXInstruction(ins));
4286 assert(IsThreeOperandAVXInstruction(ins));
4288 // Static always need relocs
4289 if (!jitStaticFldIsGlobAddr(fldHnd))
4291 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4294 instrDesc* id = emitNewInstrDsp(attr, offs);
4295 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4298 id->idInsFmt(IF_RWR_RRD_MRD);
4301 id->idAddr()->iiaFieldHnd = fldHnd;
4306 emitCurIGsize += sz;
4309 /*****************************************************************************
4311 * Add an instruction with three register operands.
4314 void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
4316 assert(IsSSEOrAVXInstruction(ins));
4317 assert(IsThreeOperandAVXInstruction(ins));
4318 // Currently vex prefix only use three bytes mode.
4319 // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
4320 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4321 UNATIVE_OFFSET sz = 5;
4323 instrDesc* id = emitNewInstr(attr);
4325 id->idInsFmt(IF_RWR_RRD_RRD);
4326 id->idReg1(targetReg);
4332 emitCurIGsize += sz;
4335 void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
4337 assert(IsSSEOrAVXInstruction(ins));
4338 assert(IsThreeOperandAVXInstruction(ins));
4340 instrDesc* id = emitNewInstr(attr);
4342 emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4345 id->idInsFmt(IF_RWR_RRD_SRD);
4348 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4351 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4356 emitCurIGsize += sz;
4359 #ifndef LEGACY_BACKEND
4360 void emitter::emitIns_R_R_A_I(
4361 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
4363 assert(IsSSEOrAVXInstruction(ins));
4364 assert(IsThreeOperandAVXInstruction(ins));
4366 ssize_t offs = indir->Offset();
4367 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4373 emitHandleMemOp(indir, id, fmt, ins);
4375 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4379 emitCurIGsize += sz;
4382 void emitter::emitIns_R_R_AR_I(
4383 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
4385 assert(IsSSEOrAVXInstruction(ins));
4386 assert(IsThreeOperandAVXInstruction(ins));
4388 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4394 id->idInsFmt(IF_RWR_RRD_ARD_CNS);
4395 id->idAddr()->iiaAddrMode.amBaseReg = base;
4396 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4398 // Plus one for the 1-byte immediate (ival)
4399 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4403 emitCurIGsize += sz;
4405 #endif // !LEGACY_BACKEND
4407 void emitter::emitIns_R_R_C_I(
4408 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4410 assert(IsSSEOrAVXInstruction(ins));
4411 assert(IsThreeOperandAVXInstruction(ins));
4413 // Static always need relocs
4414 if (!jitStaticFldIsGlobAddr(fldHnd))
4416 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4419 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4420 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4423 id->idInsFmt(IF_RWR_RRD_MRD_CNS);
4426 id->idAddr()->iiaFieldHnd = fldHnd;
4430 emitCurIGsize += sz;
4433 /**********************************************************************************
4434 * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
4437 * ins - the instruction to add
4438 * attr - the emitter attribute for instruction
4439 * targetReg - the target (destination) register
4440 * reg1 - the first source register
4441 * reg2 - the second source register
4442 * ival - the immediate value
4445 void emitter::emitIns_R_R_R_I(
4446 instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
4448 assert(IsSSEOrAVXInstruction(ins));
4449 assert(IsThreeOperandAVXInstruction(ins));
4450 // Currently vex prefix only use three bytes mode.
4451 // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6
4452 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4453 UNATIVE_OFFSET sz = 6;
4455 instrDesc* id = emitNewInstrCns(attr, ival);
4457 id->idInsFmt(IF_RWR_RRD_RRD_CNS);
4458 id->idReg1(targetReg);
4464 emitCurIGsize += sz;
4467 void emitter::emitIns_R_R_S_I(
4468 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
4470 assert(IsSSEOrAVXInstruction(ins));
4471 assert(IsThreeOperandAVXInstruction(ins));
4473 instrDesc* id = emitNewInstrCns(attr, ival);
4475 emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
4478 id->idInsFmt(IF_RWR_RRD_SRD_CNS);
4481 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4484 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4489 emitCurIGsize += sz;
4492 #ifndef LEGACY_BACKEND
4494 void emitter::emitIns_R_R_R_R(
4495 instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4497 assert(isAvxBlendv(ins));
4498 assert(UseVEXEncoding());
4499 // Currently vex prefix only use three bytes mode.
4500 // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4501 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4502 UNATIVE_OFFSET sz = 6;
4504 // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4505 // which encodes the fourth register into imm8[7:4]
4506 int ival = (reg3 - XMMBASE) << 4; // convert reg3 to ival
4508 instrDesc* id = emitNewInstrCns(attr, ival);
4510 id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4511 id->idReg1(targetReg);
4518 emitCurIGsize += sz;
4521 #endif // !LEGACY_BACKEND
4523 /*****************************************************************************
4525 * Add an instruction with a register + static member operands.
4527 void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
4529 // Static always need relocs
4530 if (!jitStaticFldIsGlobAddr(fldHnd))
4532 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4535 emitAttr size = EA_SIZE(attr);
4537 assert(size <= EA_32BYTE);
4538 noway_assert(emitVerifyEncodable(ins, size, reg));
4543 // Are we MOV'ing the offset of the class variable into EAX?
4544 if (EA_IS_OFFSET(attr))
4546 id = emitNewInstrDsp(EA_1BYTE, offs);
4548 id->idInsFmt(IF_RWR_MRD_OFF);
4550 assert(ins == INS_mov && reg == REG_EAX);
4552 // Special case: "mov eax, [addr]" is smaller
4553 sz = 1 + TARGET_POINTER_SIZE;
4557 insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
4559 id = emitNewInstrDsp(attr, offs);
4564 // Special case: "mov eax, [addr]" is smaller.
4565 // This case is not enabled for amd64 as it always uses RIP relative addressing
4566 // and it results in smaller instruction size than encoding 64-bit addr in the
4568 if (ins == INS_mov && reg == REG_EAX)
4570 sz = 1 + TARGET_POINTER_SIZE;
4571 if (size == EA_2BYTE)
4575 #endif //_TARGET_X86_
4577 sz = emitInsSizeCV(id, insCodeRM(ins));
4580 // Special case: mov reg, fs:[ddd]
4581 if (fldHnd == FLD_GLOBAL_FS)
4588 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4591 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4593 sz += emitGetRexPrefixSize(ins);
4599 id->idAddr()->iiaFieldHnd = fldHnd;
4602 emitCurIGsize += sz;
4605 /*****************************************************************************
4607 * Add an instruction with a static member + register operands.
4610 void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
4612 // Static always need relocs
4613 if (!jitStaticFldIsGlobAddr(fldHnd))
4615 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4618 emitAttr size = EA_SIZE(attr);
4620 #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
4621 // For x86 RyuJIT it is valid to storeind a double sized operand in an xmm reg to memory
4622 assert(size <= EA_8BYTE);
4624 assert(size <= EA_PTRSIZE);
4627 noway_assert(emitVerifyEncodable(ins, size, reg));
4629 instrDesc* id = emitNewInstrDsp(attr, offs);
4630 insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD);
4638 // Special case: "mov [addr], EAX" is smaller.
4639 // This case is not enable for amd64 as it always uses RIP relative addressing
4640 // and it will result in smaller instruction size than encoding 64-bit addr in
4642 if (ins == INS_mov && reg == REG_EAX)
4644 sz = 1 + TARGET_POINTER_SIZE;
4645 if (size == EA_2BYTE)
4649 #endif //_TARGET_X86_
4651 sz = emitInsSizeCV(id, insCodeMR(ins));
4654 // Special case: mov reg, fs:[ddd]
4655 if (fldHnd == FLD_GLOBAL_FS)
4661 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
4664 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4666 sz += emitGetRexPrefixSize(ins);
4672 id->idAddr()->iiaFieldHnd = fldHnd;
4675 emitCurIGsize += sz;
4678 /*****************************************************************************
4680 * Add an instruction with a static member + constant.
4683 void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
4685 // Static always need relocs
4686 if (!jitStaticFldIsGlobAddr(fldHnd))
4688 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4708 fmt = emitInsModeFormat(ins, IF_MRD_CNS);
4712 instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
4716 code_t code = insCodeMI(ins);
4717 UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
4720 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
4722 // REX prefix, if not already included in "code"
4723 if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
4725 sz += emitGetRexPrefixSize(ins);
4728 id->idAddr()->iiaFieldHnd = fldHnd;
4732 emitCurIGsize += sz;
4735 void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
4737 assert(ins == INS_mov);
4738 assert(dst->bbFlags & BBF_JMP_TARGET);
4740 instrDescLbl* id = emitNewInstrLbl();
4743 id->idInsFmt(IF_SWR_LABEL);
4744 id->idAddr()->iiaBBlabel = dst;
4746 /* The label reference is always long */
4749 id->idjKeepLong = 1;
4751 /* Record the current IG and offset within it */
4753 id->idjIG = emitCurIG;
4754 id->idjOffs = emitCurIGsize;
4756 /* Append this instruction to this IG's jump list */
4758 id->idjNext = emitCurIGjmpList;
4759 emitCurIGjmpList = id;
4761 UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs);
4762 id->dstLclVar.initLclVarAddr(varx, offs);
4764 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4771 #ifndef _TARGET_AMD64_
4772 // Storing the address of a basicBlock will need a reloc
4773 // as the instruction uses the absolute address,
4774 // not a relative address.
4776 // On Amd64, Absolute code addresses should always go through a reloc to
4777 // to be encoded as RIP rel32 offset.
4778 if (emitComp->opts.compReloc)
4781 id->idSetIsDspReloc();
4787 emitCurIGsize += sz;
4790 /*****************************************************************************
4792 * Add a label instruction.
4794 void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
4796 assert(ins == INS_lea);
4797 assert(dst->bbFlags & BBF_JMP_TARGET);
4799 instrDescJmp* id = emitNewInstrJmp();
4803 id->idInsFmt(IF_RWR_LABEL);
4804 id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
4805 id->idAddr()->iiaBBlabel = dst;
4807 /* The label reference is always long */
4810 id->idjKeepLong = 1;
4812 /* Record the current IG and offset within it */
4814 id->idjIG = emitCurIG;
4815 id->idjOffs = emitCurIGsize;
4817 /* Append this instruction to this IG's jump list */
4819 id->idjNext = emitCurIGjmpList;
4820 emitCurIGjmpList = id;
4823 // Mark the catch return
4824 if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
4826 id->idDebugOnlyInfo()->idCatchRet = true;
4834 // Set the relocation flags - these give hint to zap to perform
4835 // relocation of the specified 32bit address.
4837 // Note the relocation flags influence the size estimate.
4838 id->idSetRelocFlags(attr);
4840 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4844 emitCurIGsize += sz;
4847 /*****************************************************************************
4849 * The following adds instructions referencing address modes.
4852 void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
4854 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
4856 #ifdef _TARGET_AMD64_
4857 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4858 // all other opcodes take a sign-extended 4-byte immediate
4859 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4879 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
4884 Useful if you want to trap moves with 0 constant
4885 if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
4892 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
4896 id->idAddr()->iiaAddrMode.amBaseReg = reg;
4897 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4899 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
4901 sz = emitInsSizeAM(id, insCodeMI(ins), val);
4905 emitCurIGsize += sz;
4908 void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
4910 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
4912 #ifdef _TARGET_AMD64_
4913 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4914 // all other opcodes take a sign-extended 4-byte immediate
4915 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4935 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
4940 Useful if you want to trap moves with 0 constant
4941 if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
4948 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
4952 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
4953 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4955 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
4957 sz = emitInsSizeAM(id, insCodeMI(ins), val);
4961 emitCurIGsize += sz;
4964 void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
4966 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
4967 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
4971 if (ireg == base && disp == 0)
4973 // Maybe the emitter is not the common place for this optimization, but it's a better choke point
4974 // for all the emitIns(ins, tree), we would have to be analyzing at each call site
4981 instrDesc* id = emitNewInstrAmd(attr, disp);
4982 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
4988 id->idAddr()->iiaAddrMode.amBaseReg = base;
4989 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4991 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
4993 sz = emitInsSizeAM(id, insCodeRM(ins));
4995 if (Is4ByteSSE4Instruction(ins))
4997 // The 4-Byte SSE4 instructions require two additional bytes
5004 emitCurIGsize += sz;
5007 void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5009 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5010 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5013 instrDesc* id = emitNewInstrAmd(attr, disp);
5014 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5020 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5021 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5023 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5025 sz = emitInsSizeAM(id, insCodeRM(ins));
5029 emitCurIGsize += sz;
5032 void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5035 instrDesc* id = emitNewInstrAmd(attr, disp);
5040 #if FEATURE_STACK_FP_X87
5041 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5042 #else // !FEATURE_STACK_FP_X87
5043 fmt = emitInsModeFormat(ins, IF_ARD);
5044 #endif // !FEATURE_STACK_FP_X87
5048 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5050 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
5051 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5059 id->idAddr()->iiaAddrMode.amBaseReg = base;
5060 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5062 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5064 sz = emitInsSizeAM(id, insCodeMR(ins));
5068 emitCurIGsize += sz;
5070 emitAdjustStackDepthPushPop(ins);
5073 #ifndef LEGACY_BACKEND
5074 void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
5076 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
5077 assert(base != REG_NA);
5078 assert(ireg != REG_NA);
5079 instrDesc* id = emitNewInstrAmdCns(attr, disp, ival);
5082 id->idInsFmt(IF_AWR_RRD_CNS);
5083 id->idAddr()->iiaAddrMode.amBaseReg = base;
5084 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5087 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5089 // Plus one for the 1-byte immediate (ival)
5090 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)) + 1;
5094 emitCurIGsize += sz;
5098 void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5101 instrDesc* id = emitNewInstrAmd(attr, disp);
5106 #if FEATURE_STACK_FP_X87
5107 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5108 #else // FEATURE_STACK_FP_X87
5109 fmt = emitInsModeFormat(ins, IF_ARD);
5110 #endif // FEATURE_STACK_FP_X87
5114 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5116 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5117 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5125 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5126 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5128 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5130 sz = emitInsSizeAM(id, insCodeMR(ins));
5134 emitCurIGsize += sz;
5136 emitAdjustStackDepthPushPop(ins);
5139 void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
5141 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5143 #ifdef _TARGET_AMD64_
5144 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5145 // all other opcodes take a sign-extended 4-byte immediate
5146 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5166 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5171 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5175 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5176 id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5177 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5179 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5181 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5185 emitCurIGsize += sz;
5188 void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
5190 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5191 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5194 instrDesc* id = emitNewInstrAmd(attr, disp);
5195 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5201 id->idAddr()->iiaAddrMode.amBaseReg = base;
5202 id->idAddr()->iiaAddrMode.amIndxReg = index;
5203 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5205 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5207 sz = emitInsSizeAM(id, insCodeRM(ins));
5211 emitCurIGsize += sz;
5214 void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
5217 instrDesc* id = emitNewInstrAmd(attr, disp);
5222 #if FEATURE_STACK_FP_X87
5223 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5224 #else // FEATURE_STACK_FP_X87
5225 fmt = emitInsModeFormat(ins, IF_ARD);
5226 #endif // FEATURE_STACK_FP_X87
5230 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5232 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5233 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5241 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5242 id->idAddr()->iiaAddrMode.amIndxReg = index;
5243 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
5245 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5247 sz = emitInsSizeAM(id, insCodeMR(ins));
5251 emitCurIGsize += sz;
5253 emitAdjustStackDepthPushPop(ins);
5256 void emitter::emitIns_I_ARX(
5257 instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
5259 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5261 #ifdef _TARGET_AMD64_
5262 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5263 // all other opcodes take a sign-extended 4-byte immediate
5264 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5284 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5289 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5294 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5295 id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5296 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5298 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5300 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5304 emitCurIGsize += sz;
5307 void emitter::emitIns_R_ARX(
5308 instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5310 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5311 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5314 instrDesc* id = emitNewInstrAmd(attr, disp);
5315 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5321 id->idAddr()->iiaAddrMode.amBaseReg = base;
5322 id->idAddr()->iiaAddrMode.amIndxReg = index;
5323 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5325 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5327 sz = emitInsSizeAM(id, insCodeRM(ins));
5331 emitCurIGsize += sz;
5334 void emitter::emitIns_ARX_R(
5335 instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5338 instrDesc* id = emitNewInstrAmd(attr, disp);
5343 #if FEATURE_STACK_FP_X87
5344 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5345 #else // !FEATURE_STACK_FP_X87
5346 fmt = emitInsModeFormat(ins, IF_ARD);
5347 #endif // !FEATURE_STACK_FP_X87
5351 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5353 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5354 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5362 id->idAddr()->iiaAddrMode.amBaseReg = base;
5363 id->idAddr()->iiaAddrMode.amIndxReg = index;
5364 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5366 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5368 sz = emitInsSizeAM(id, insCodeMR(ins));
5372 emitCurIGsize += sz;
5374 emitAdjustStackDepthPushPop(ins);
5377 void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
5379 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5381 #ifdef _TARGET_AMD64_
5382 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5383 // all other opcodes take a sign-extended 4-byte immediate
5384 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5404 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5409 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5413 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5414 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5415 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5417 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5419 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5423 emitCurIGsize += sz;
5426 void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5428 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5429 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5432 instrDesc* id = emitNewInstrAmd(attr, disp);
5433 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5439 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5440 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5441 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5443 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5445 sz = emitInsSizeAM(id, insCodeRM(ins));
5449 emitCurIGsize += sz;
5452 void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5455 instrDesc* id = emitNewInstrAmd(attr, disp);
5460 #if FEATURE_STACK_FP_X87
5461 fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
5462 #else // !FEATURE_STACK_FP_X87
5463 fmt = emitInsModeFormat(ins, IF_ARD);
5464 #endif // !FEATURE_STACK_FP_X87
5468 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5469 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5470 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5478 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5479 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5480 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5482 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5484 sz = emitInsSizeAM(id, insCodeMR(ins));
5488 emitCurIGsize += sz;
5490 emitAdjustStackDepthPushPop(ins);
5493 #ifdef FEATURE_HW_INTRINSICS
5494 void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir)
5496 if (UseVEXEncoding())
5498 emitIns_R_R_A(ins, attr, reg, reg1, indir, IF_RWR_RRD_ARD);
5504 emitIns_R_R(INS_movaps, attr, reg, reg1);
5506 emitIns_R_A(ins, attr, reg, indir, IF_RRW_ARD);
5510 void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base)
5512 if (UseVEXEncoding())
5514 emitIns_R_R_AR(ins, attr, reg, reg1, base, 0);
5520 emitIns_R_R(INS_movaps, attr, reg, reg1);
5522 emitIns_R_AR(ins, attr, reg, base, 0);
5526 void emitter::emitIns_SIMD_R_R_C(
5527 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs)
5529 if (UseVEXEncoding())
5531 emitIns_R_R_C(ins, attr, reg, reg1, fldHnd, offs);
5537 emitIns_R_R(INS_movaps, attr, reg, reg1);
5539 emitIns_R_C(ins, attr, reg, fldHnd, offs);
5543 void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2)
5545 if (UseVEXEncoding())
5547 emitIns_R_R_R(ins, attr, reg, reg1, reg2);
5553 // Ensure we aren't overwriting op2
5554 assert(reg2 != reg);
5556 emitIns_R_R(INS_movaps, attr, reg, reg1);
5558 emitIns_R_R(ins, attr, reg, reg2);
5562 static bool isSseShift(instruction ins)
5582 //------------------------------------------------------------------------
5583 // IsDstSrcImmAvxInstruction: check if instruction has "R(M) R(M) I" format
5584 // for EVEX, VEX and legacy SSE encodings and has no (E)VEX.NDS
5587 // instruction -- processor instruction to check
5590 // true if instruction has "R(M) R(M) I" format and has no (E)VEX.NDS
5592 static bool IsDstSrcImmAvxInstruction(instruction ins)
5610 void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int ival)
5612 // TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I
5613 bool isShift = isSseShift(ins);
5614 if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift))
5616 emitIns_R_R_I(ins, attr, reg, reg1, ival);
5622 emitIns_R_R(INS_movaps, attr, reg, reg1);
5624 // TODO-XARCH-BUG emitOutputRI cannot work with SSE2 shift instruction on imm8 > 127, so we replace it by the
5625 // semantic alternatives. https://github.com/dotnet/coreclr/issues/16543
5626 if (isShift && ival > 127)
5630 emitIns_R_I(ins, attr, reg, ival);
5634 void emitter::emitIns_SIMD_R_R_R_R(
5635 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3)
5637 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
5638 if (UseVEXEncoding())
5640 // convert SSE encoding of SSE4.1 instructions to VEX encoding
5644 ins = INS_vblendvps;
5647 ins = INS_vblendvpd;
5650 ins = INS_vpblendvb;
5655 emitIns_R_R_R_R(ins, attr, reg, reg1, reg2, reg3);
5659 assert(isSse41Blendv(ins));
5660 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
5661 if (reg3 != REG_XMM0)
5663 // Ensure we aren't overwriting op1 or op2
5664 assert(reg1 != REG_XMM0);
5665 assert(reg2 != REG_XMM0);
5667 emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3);
5671 // Ensure we aren't overwriting op2 or op3
5672 assert(reg2 != reg);
5673 assert((reg3 == REG_XMM0) || (reg != REG_XMM0));
5675 emitIns_R_R(INS_movaps, attr, reg, reg1);
5677 emitIns_R_R(ins, attr, reg, reg2);
5681 void emitter::emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs)
5683 if (UseVEXEncoding())
5685 emitIns_R_R_S(ins, attr, reg, reg1, varx, offs);
5691 emitIns_R_R(INS_movaps, attr, reg, reg1);
5693 emitIns_R_S(ins, attr, reg, varx, offs);
5697 void emitter::emitIns_SIMD_R_R_A_I(
5698 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival)
5700 if (UseVEXEncoding())
5702 emitIns_R_R_A_I(ins, attr, reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS);
5708 emitIns_R_R(INS_movaps, attr, reg, reg1);
5710 emitIns_R_A_I(ins, attr, reg, indir, ival);
5714 void emitter::emitIns_SIMD_R_R_AR_I(
5715 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base, int ival)
5717 if (UseVEXEncoding())
5719 emitIns_R_R_AR_I(ins, attr, reg, reg1, base, 0, ival);
5725 emitIns_R_R(INS_movaps, attr, reg, reg1);
5727 emitIns_R_AR_I(ins, attr, reg, base, 0, ival);
5731 void emitter::emitIns_SIMD_R_R_C_I(
5732 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
5734 if (UseVEXEncoding())
5736 emitIns_R_R_C_I(ins, attr, reg, reg1, fldHnd, offs, ival);
5742 emitIns_R_R(INS_movaps, attr, reg, reg1);
5744 emitIns_R_C_I(ins, attr, reg, fldHnd, offs, ival);
5748 void emitter::emitIns_SIMD_R_R_R_I(
5749 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int ival)
5751 if (UseVEXEncoding())
5753 emitIns_R_R_R_I(ins, attr, reg, reg1, reg2, ival);
5759 // Ensure we aren't overwriting op2
5760 assert(reg2 != reg);
5762 emitIns_R_R(INS_movaps, attr, reg, reg1);
5764 emitIns_R_R_I(ins, attr, reg, reg2, ival);
5768 void emitter::emitIns_SIMD_R_R_S_I(
5769 instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs, int ival)
5771 if (UseVEXEncoding())
5773 emitIns_R_R_S_I(ins, attr, reg, reg1, varx, offs, ival);
5779 emitIns_R_R(INS_movaps, attr, reg, reg1);
5781 emitIns_R_S_I(ins, attr, reg, varx, offs, ival);
5784 #endif // FEATURE_HW_INTRINSICS
5786 /*****************************************************************************
5788 * The following add instructions referencing stack-based local variables.
5791 void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
5793 instrDesc* id = emitNewInstr(attr);
5794 UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
5795 #if FEATURE_STACK_FP_X87
5796 insFormat fmt = emitInsModeFormat(ins, IF_SRD, IF_TRD_SRD, IF_SWR_TRD);
5797 #else // !FEATURE_STACK_FP_X87
5798 insFormat fmt = emitInsModeFormat(ins, IF_SRD);
5799 #endif // !FEATURE_STACK_FP_X87
5801 // 16-bit operand instructions will need a prefix
5802 if (EA_SIZE(attr) == EA_2BYTE)
5808 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
5810 // 64-bit operand instructions will need a REX.W prefix
5811 if (TakesRexWPrefix(ins, attr))
5813 sz += emitGetRexPrefixSize(ins);
5818 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5822 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5825 emitCurIGsize += sz;
5827 emitAdjustStackDepthPushPop(ins);
5830 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
5832 instrDesc* id = emitNewInstr(attr);
5833 UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
5834 insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD);
5837 if (attr == EA_1BYTE)
5839 assert(isByteReg(ireg));
5842 // 16-bit operand instructions will need a prefix
5843 if (EA_SIZE(attr) == EA_2BYTE)
5849 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
5851 // 64-bit operand instructions will need a REX.W prefix
5852 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
5854 sz += emitGetRexPrefixSize(ins);
5860 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5863 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5866 emitCurIGsize += sz;
5869 void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
5871 emitAttr size = EA_SIZE(attr);
5872 noway_assert(emitVerifyEncodable(ins, size, ireg));
5874 instrDesc* id = emitNewInstr(attr);
5875 UNATIVE_OFFSET sz = emitInsSizeSV(insCodeRM(ins), varx, offs);
5876 insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD);
5878 // Most 16-bit operand instructions need a prefix
5879 if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
5885 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
5887 // 64-bit operand instructions will need a REX.W prefix
5888 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
5890 sz += emitGetRexPrefixSize(ins);
5896 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5899 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5902 emitCurIGsize += sz;
5905 void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
5907 #ifdef _TARGET_AMD64_
5908 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5909 // all other opcodes take a sign-extended 4-byte immediate
5910 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5930 fmt = emitInsModeFormat(ins, IF_SRD_CNS);
5934 instrDesc* id = emitNewInstrCns(attr, val);
5937 UNATIVE_OFFSET sz = emitInsSizeSV(id, varx, offs, val);
5940 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
5942 // 64-bit operand instructions will need a REX.W prefix
5943 if (TakesRexWPrefix(ins, attr))
5945 sz += emitGetRexPrefixSize(ins);
5948 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
5951 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
5954 emitCurIGsize += sz;
5957 /*****************************************************************************
5959 * Record that a jump instruction uses the short encoding
5962 void emitter::emitSetShortJump(instrDescJmp* id)
5964 if (id->idjKeepLong)
5969 id->idjShort = true;
5972 /*****************************************************************************
5974 * Add a jmp instruction.
5977 void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
5980 instrDescJmp* id = emitNewInstrJmp();
5982 assert(dst->bbFlags & BBF_JMP_TARGET);
5985 id->idInsFmt(IF_LABEL);
5986 id->idAddr()->iiaBBlabel = dst;
5989 // Mark the finally call
5990 if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
5992 id->idDebugOnlyInfo()->idFinallyCall = true;
5996 /* Assume the jump will be long */
5999 id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
6001 /* Record the jump's IG and offset within it */
6003 id->idjIG = emitCurIG;
6004 id->idjOffs = emitCurIGsize;
6006 /* Append this jump to this IG's jump list */
6008 id->idjNext = emitCurIGjmpList;
6009 emitCurIGjmpList = id;
6015 /* Figure out the max. size of the jump/call instruction */
6017 if (ins == INS_call)
6019 sz = CALL_INST_SIZE;
6021 else if (ins == INS_push || ins == INS_push_hide)
6023 // Pushing the address of a basicBlock will need a reloc
6024 // as the instruction uses the absolute address,
6025 // not a relative address
6026 if (emitComp->opts.compReloc)
6028 id->idSetIsDspReloc();
6030 sz = PUSH_INST_SIZE;
6036 /* This is a jump - assume the worst */
6038 sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
6040 /* Can we guess at the jump distance? */
6042 tgt = (insGroup*)emitCodeGetCookie(dst);
6047 UNATIVE_OFFSET srcOffs;
6050 assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
6052 /* This is a backward jump - figure out the distance */
6054 srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
6056 /* Compute the distance estimate */
6058 jmpDist = srcOffs - tgt->igOffs;
6059 assert((int)jmpDist > 0);
6061 /* How much beyond the max. short distance does the jump go? */
6063 extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
6066 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6068 if (INTERESTING_JUMP_NUM == 0)
6070 printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6072 printf("[0] Jump source is at %08X\n", srcOffs);
6073 printf("[0] Label block is at %08X\n", tgt->igOffs);
6074 printf("[0] Jump distance - %04X\n", jmpDist);
6077 printf("[0] Distance excess = %d \n", extra);
6082 if (extra <= 0 && !id->idjKeepLong)
6084 /* Wonderful - this jump surely will be short */
6086 emitSetShortJump(id);
6087 sz = JMP_SIZE_SMALL;
6093 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6095 if (INTERESTING_JUMP_NUM == 0)
6097 printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6099 printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
6100 emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
6101 printf("[0] Label block is unknown\n");
6110 emitCurIGsize += sz;
6112 emitAdjustStackDepthPushPop(ins);
6115 #if !FEATURE_FIXED_OUT_ARGS
6117 //------------------------------------------------------------------------
6118 // emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
6121 // ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
6124 // 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6125 // 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6127 void emitter::emitAdjustStackDepthPushPop(instruction ins)
6129 if (ins == INS_push)
6131 emitCurStackLvl += emitCntStackDepth;
6133 if (emitMaxStackDepth < emitCurStackLvl)
6135 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6136 emitMaxStackDepth = emitCurStackLvl;
6139 else if (ins == INS_pop)
6141 emitCurStackLvl -= emitCntStackDepth;
6142 assert((int)emitCurStackLvl >= 0);
6146 //------------------------------------------------------------------------
6147 // emitAdjustStackDepth: Adjust the current and maximum stack depth.
6150 // ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
6151 // It is assumed that the add/sub is on the stack pointer.
6152 // val - the number of bytes to add to or subtract from the stack pointer.
6155 // 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6156 // 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6158 void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
6160 // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
6161 if (emitCntStackDepth == 0)
6166 S_UINT32 newStackLvl(emitCurStackLvl);
6167 newStackLvl += S_UINT32(val);
6168 noway_assert(!newStackLvl.IsOverflow());
6170 emitCurStackLvl = newStackLvl.Value();
6172 if (emitMaxStackDepth < emitCurStackLvl)
6174 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6175 emitMaxStackDepth = emitCurStackLvl;
6178 else if (ins == INS_add)
6180 S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
6181 noway_assert(!newStackLvl.IsOverflow());
6183 emitCurStackLvl = newStackLvl.Value();
6187 #endif // EMIT_TRACK_STACK_DEPTH
6189 /*****************************************************************************
6191 * Add a call instruction (direct or indirect).
6192 * argSize<0 means that the caller will pop the arguments
6194 * The other arguments are interpreted depending on callType as shown:
6195 * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
6197 * EC_FUNC_TOKEN : addr is the method address
6198 * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
6199 * EC_FUNC_ADDR : addr is the absolute address of the function
6200 * EC_FUNC_VIRTUAL : "call [ireg+disp]"
6202 * If callType is one of these emitCallTypes, addr has to be NULL.
6203 * EC_INDIR_R : "call ireg".
6204 * EC_INDIR_SR : "call lcl<disp>" (eg. call [ebp-8]).
6205 * EC_INDIR_C : "call clsVar<disp>" (eg. call [clsVarAddr])
6206 * EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]"
6211 void emitter::emitIns_Call(EmitCallType callType,
6212 CORINFO_METHOD_HANDLE methHnd,
6213 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
6217 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
6218 VARSET_VALARG_TP ptrVars,
6219 regMaskTP gcrefRegs,
6220 regMaskTP byrefRegs,
6221 IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
6222 regNumber ireg, // = REG_NA
6223 regNumber xreg, // = REG_NA
6224 unsigned xmul, // = 0
6225 ssize_t disp, // = 0
6226 bool isJump, // = false
6227 bool isNoGC) // = false
6230 /* Sanity check the arguments depending on callType */
6232 assert(callType < EC_COUNT);
6233 assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
6234 (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
6235 assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
6236 assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
6237 assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
6238 assert(callType != EC_INDIR_SR ||
6239 (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
6240 assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
6242 // Our stack level should be always greater than the bytes of arguments we push. Just
6244 assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
6247 if (emitComp->opts.compNeedStackProbes)
6249 // If we've pushed more than JIT_RESERVED_STACK allows, do an aditional stack probe
6250 // Else, just make sure the prolog does a probe for us. Invariant we're trying
6251 // to get is that at any point we go out to unmanaged code, there is at least
6252 // CORINFO_STACKPROBE_DEPTH bytes of stack available.
6254 // The reason why we are not doing one probe for the max size at the prolog
6255 // is that when don't have the max depth precomputed (it can depend on codegen),
6256 // and we need it at the time we generate locallocs
6258 // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
6259 // account everything except for the arguments of a callee.
6263 if ((TARGET_POINTER_SIZE + // return address for call
6264 emitComp->genStackLevel +
6265 // Current stack level. This gets resetted on every
6266 // localloc and on the prolog (invariant is that
6267 // genStackLevel is 0 on basic block entry and exit and
6268 // after any alloca). genStackLevel will include any arguments
6269 // to the call, so we will insert an aditional probe if
6270 // we've consumed more than JIT_RESERVED_STACK bytes
6271 // of stack, which is what the prolog probe covers (in
6272 // addition to the EE requested size)
6273 (emitComp->compHndBBtabCount * TARGET_POINTER_SIZE)
6274 // Hidden slots for calling finallys
6275 ) >= JIT_RESERVED_STACK)
6277 // This happens when you have a call with a lot of arguments or a call is done
6278 // when there's a lot of stuff pushed on the stack (for example a call whos returned
6279 // value is an argument of another call that has pushed stuff on the stack)
6280 // This should't be very frequent.
6281 // For different values of JIT_RESERVED_STACK
6283 // For mscorlib (109605 calls)
6285 // 14190 probes in prologs (56760 bytes of code)
6287 // JIT_RESERVED_STACK = 16 : 5452 extra probes
6288 // JIT_RESERVED_STACK = 32 : 1084 extra probes
6289 // JIT_RESERVED_STACK = 64 : 1 extra probes
6290 // JIT_RESERVED_STACK = 96 : 0 extra probes
6291 emitComp->genGenerateStackProbe();
6295 if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog)
6297 if (emitComp->compStackProbePrologDone)
6299 // We already generated a probe and this call is not happening
6300 // at a depth >= JIT_RESERVED_STACK, so nothing to do here
6304 // 3 possible ways to get here:
6305 // - We are in an epilog and haven't generated a probe in the prolog.
6306 // This shouldn't happen as we don't generate any calls in epilog.
6307 // - We are in the prolog, but doing a call before generating the probe.
6308 // This shouldn't happen at all.
6309 // - We are in the prolog, did not generate a probe but now we need
6310 // to generate a probe because we need a call (eg: profiler). We'll
6313 // In any case, we need a probe
6315 // Ignore the profiler callback for now.
6316 if (!emitComp->compIsProfilerHookNeeded())
6318 assert(!"We do not expect to get here");
6319 emitComp->genGenerateStackProbe();
6325 // We will need a probe and will generate it in the prolog
6326 emitComp->genNeedPrologStackProbe = true;
6330 #endif // STACK_PROBES
6337 /* This is the saved set of registers after a normal call */
6338 unsigned savedSet = RBM_CALLEE_SAVED;
6340 /* some special helper calls have a different saved set registers */
6344 // Get the set of registers that this call kills and remove it from the saved set.
6345 savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd));
6349 assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
6352 /* Trim out any callee-trashed registers from the live set */
6354 gcrefRegs &= savedSet;
6355 byrefRegs &= savedSet;
6358 if (EMIT_GC_VERBOSE)
6360 printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
6361 dumpConvertedVarSet(emitComp, ptrVars);
6362 printf(", gcrefRegs=");
6363 printRegMaskInt(gcrefRegs);
6364 emitDispRegSet(gcrefRegs);
6365 printf(", byrefRegs=");
6366 printRegMaskInt(byrefRegs);
6367 emitDispRegSet(byrefRegs);
6372 assert(argSize % REGSIZE_BYTES == 0);
6373 argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide
6375 /* Managed RetVal: emit sequence point for the call */
6376 if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
6378 codeGen->genIPmappingAdd(ilOffset, false);
6382 We need to allocate the appropriate instruction descriptor based
6383 on whether this is a direct/indirect call, and whether we need to
6384 record an updated set of live GC variables.
6386 The stats for a ton of classes is as follows:
6388 Direct call w/o GC vars 220,216
6389 Indir. call w/o GC vars 144,781
6391 Direct call with GC vars 9,440
6392 Indir. call with GC vars 5,768
6395 if (callType >= EC_FUNC_VIRTUAL)
6397 /* Indirect call, virtual calls */
6399 assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
6400 callType == EC_INDIR_C || callType == EC_INDIR_ARD);
6402 id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
6403 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6407 // Helper/static/nonvirtual/function calls (direct or through handle),
6408 // and calls to an absolute addr.
6410 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
6412 id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
6413 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6416 /* Update the emitter's live GC ref sets */
6418 VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
6419 emitThisGCrefRegs = gcrefRegs;
6420 emitThisByrefRegs = byrefRegs;
6422 /* Set the instruction - special case jumping a function */
6423 instruction ins = INS_call;
6427 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
6428 if (callType == EC_FUNC_TOKEN)
6439 id->idSetIsNoGC(isNoGC);
6441 // Record the address: method, indirection, or funcptr
6442 if (callType >= EC_FUNC_VIRTUAL)
6444 // This is an indirect call (either a virtual call or func ptr call)
6449 // Indirect call using an absolute code address.
6450 // Must be marked as relocatable and is done at the
6451 // branch target location.
6452 goto CALL_ADDR_MODE;
6454 case EC_INDIR_R: // the address is in a register
6456 id->idSetIsCallRegPtr();
6460 case EC_INDIR_ARD: // the address is an indirection
6462 goto CALL_ADDR_MODE;
6464 case EC_INDIR_SR: // the address is in a lcl var
6466 id->idInsFmt(IF_SRD);
6467 // disp is really a lclVarNum
6468 noway_assert((unsigned)disp == (size_t)disp);
6469 id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
6470 sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0);
6474 case EC_FUNC_VIRTUAL:
6480 // The function is "ireg" if id->idIsCallRegPtr(),
6481 // else [ireg+xmul*xreg+disp]
6483 id->idInsFmt(IF_ARD);
6485 id->idAddr()->iiaAddrMode.amBaseReg = ireg;
6486 id->idAddr()->iiaAddrMode.amIndxReg = xreg;
6487 id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
6489 sz = emitInsSizeAM(id, insCodeMR(INS_call));
6491 if (ireg == REG_NA && xreg == REG_NA)
6493 if (codeGen->genCodeIndirAddrNeedsReloc(disp))
6495 id->idSetIsDspReloc();
6497 #ifdef _TARGET_AMD64_
6500 // An absolute indir address that doesn't need reloc should fit within 32-bits
6501 // to be encoded as offset relative to zero. This addr mode requires an extra
6503 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
6506 #endif //_TARGET_AMD64_
6512 NO_WAY("unexpected instruction");
6516 else if (callType == EC_FUNC_TOKEN_INDIR)
6518 /* "call [method_addr]" */
6520 assert(addr != nullptr);
6522 id->idInsFmt(IF_METHPTR);
6523 id->idAddr()->iiaAddr = (BYTE*)addr;
6526 // Since this is an indirect call through a pointer and we don't
6527 // currently pass in emitAttr into this function, we query codegen
6528 // whether addr needs a reloc.
6529 if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
6531 id->idSetIsDspReloc();
6533 #ifdef _TARGET_AMD64_
6536 // An absolute indir address that doesn't need reloc should fit within 32-bits
6537 // to be encoded as offset relative to zero. This addr mode requires an extra
6539 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
6542 #endif //_TARGET_AMD64_
6546 /* This is a simple direct call: "call helper/method/addr" */
6548 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
6550 assert(addr != nullptr);
6552 id->idInsFmt(IF_METHOD);
6555 id->idAddr()->iiaAddr = (BYTE*)addr;
6557 if (callType == EC_FUNC_ADDR)
6559 id->idSetIsCallAddr();
6562 // Direct call to a method and no addr indirection is needed.
6563 if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
6565 id->idSetIsDspReloc();
6570 if (emitComp->verbose && 0)
6572 if (id->idIsLargeCall())
6574 if (callType >= EC_FUNC_VIRTUAL)
6576 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
6577 VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
6581 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
6582 VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
6587 id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
6588 id->idDebugOnlyInfo()->idCallSig = sigInfo;
6592 if (addr != nullptr)
6594 codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
6596 #endif // LATE_DISASM
6601 emitCurIGsize += sz;
6603 #if !FEATURE_FIXED_OUT_ARGS
6605 /* The call will pop the arguments */
6607 if (emitCntStackDepth && argSize > 0)
6609 noway_assert((ssize_t)emitCurStackLvl >= argSize);
6610 emitCurStackLvl -= (int)argSize;
6611 assert((int)emitCurStackLvl >= 0);
6614 #endif // !FEATURE_FIXED_OUT_ARGS
6618 /*****************************************************************************
6620 * The following called for each recorded instruction -- use for debugging.
6622 void emitter::emitInsSanityCheck(instrDesc* id)
6624 // make certain you only try to put relocs on things that can have them.
6625 ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
6626 if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
6631 if (!id->idIsTiny())
6633 if (id->idIsDspReloc())
6635 assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
6636 idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
6640 if (id->idIsCnsReloc())
6642 assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
6643 idOp == ID_OP_CALL || idOp == ID_OP_JMP);
6649 /*****************************************************************************
6651 * Return the allocated size (in bytes) of the given instruction descriptor.
6654 size_t emitter::emitSizeOfInsDsc(instrDesc* id)
6656 if (emitIsTinyInsDsc(id))
6658 return TINY_IDSC_SIZE;
6661 if (emitIsScnsInsDsc(id))
6663 return SMALL_IDSC_SIZE;
6666 assert((unsigned)id->idInsFmt() < emitFmtCount);
6668 ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
6670 // An INS_call instruction may use a "fat" direct/indirect call descriptor
6671 // except for a local call to a label (i.e. call to a finally)
6672 // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
6673 // INS_call instruction always uses one of these idOps
6675 if (id->idIns() == INS_call)
6677 assert(idOp == ID_OP_CALL || // is a direct call
6678 idOp == ID_OP_SPEC || // is a indirect call
6679 idOp == ID_OP_JMP); // is a local call to finally clause
6688 return sizeof(instrDescLbl);
6691 return sizeof(instrDescJmp);
6695 if (id->idIsLargeCall())
6697 /* Must be a "fat" indirect call descriptor */
6698 return sizeof(instrDescCGCA);
6709 if (id->idIsLargeCns())
6711 if (id->idIsLargeDsp())
6713 return sizeof(instrDescCnsDsp);
6717 return sizeof(instrDescCns);
6722 if (id->idIsLargeDsp())
6724 return sizeof(instrDescDsp);
6728 return sizeof(instrDesc);
6733 NO_WAY("unexpected instruction descriptor format");
6737 return sizeof(instrDesc);
6740 /*****************************************************************************/
6742 /*****************************************************************************
6744 * Return a string that represents the given register.
6747 const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
6749 static char rb[2][128];
6750 static unsigned char rbc = 0;
6752 const char* rn = emitComp->compRegVarName(reg, varName);
6754 #ifdef _TARGET_AMD64_
6757 switch (EA_SIZE(attr))
6760 return emitYMMregName(reg);
6763 return emitXMMregName(reg);
6766 if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
6768 return emitXMMregName(reg);
6773 if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
6775 return emitXMMregName(reg);
6788 rbc = (rbc + 1) % 2;
6810 rbc = (rbc + 1) % 2;
6817 rb[rbc][3] = suffix;
6822 rb[rbc][2] = suffix;
6828 rbc = (rbc + 1) % 2;
6849 #endif // _TARGET_AMD64_
6852 assert(strlen(rn) >= 3);
6854 switch (EA_SIZE(attr))
6856 #ifndef LEGACY_BACKEND
6858 return emitYMMregName(reg);
6861 return emitXMMregName(reg);
6864 if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
6866 return emitXMMregName(reg);
6871 if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
6873 return emitXMMregName(reg);
6876 #else // LEGACY_BACKEND
6879 #endif // LEGACY_BACKEND
6886 rbc = (rbc + 1) % 2;
6889 strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
6897 #endif // _TARGET_X86_
6900 // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
6901 // however it's possibly not interesting most of the time.
6902 if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
6907 strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
6911 if (EA_IS_GCREF(attr))
6913 strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
6915 else if (EA_IS_BYREF(attr))
6917 strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
6925 /*****************************************************************************
6927 * Return a string that represents the given FP register.
6930 const char* emitter::emitFPregName(unsigned reg, bool varName)
6932 assert(reg < REG_COUNT);
6934 return emitComp->compFPregVarName((regNumber)(reg), varName);
6937 /*****************************************************************************
6939 * Return a string that represents the given XMM register.
6942 const char* emitter::emitXMMregName(unsigned reg)
6944 static const char* const regNames[] = {
6945 #define REGDEF(name, rnum, mask, sname) "x" sname,
6946 #ifndef LEGACY_BACKEND
6947 #include "register.h"
6948 #else // LEGACY_BACKEND
6949 #include "registerxmm.h"
6950 #endif // LEGACY_BACKEND
6953 assert(reg < REG_COUNT);
6954 assert(reg < _countof(regNames));
6956 return regNames[reg];
6959 /*****************************************************************************
6961 * Return a string that represents the given YMM register.
6964 const char* emitter::emitYMMregName(unsigned reg)
6966 static const char* const regNames[] = {
6967 #define REGDEF(name, rnum, mask, sname) "y" sname,
6968 #ifndef LEGACY_BACKEND
6969 #include "register.h"
6970 #else // LEGACY_BACKEND
6971 #include "registerxmm.h"
6972 #endif // LEGACY_BACKEND
6975 assert(reg < REG_COUNT);
6976 assert(reg < _countof(regNames));
6978 return regNames[reg];
6981 /*****************************************************************************
6983 * Display a static data member reference.
6986 void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
6990 /* Filter out the special case of fs:[offs] */
6992 // Munge any pointers if we want diff-able disassembly
6993 if (emitComp->opts.disDiffable)
6995 ssize_t top12bits = (offs >> 20);
6996 if ((top12bits != 0) && (top12bits != -1))
7002 if (fldHnd == FLD_GLOBAL_FS)
7004 printf("FS:[0x%04X]", offs);
7008 if (fldHnd == FLD_GLOBAL_DS)
7010 printf("[0x%04X]", offs);
7016 doffs = Compiler::eeGetJitDataOffs(fldHnd);
7027 printf("@CNS%02u", doffs - 1);
7031 printf("@RWD%02u", doffs);
7036 printf("%+Id", offs);
7041 printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
7045 printf("%+Id", offs);
7051 if (emitComp->opts.varNames && offs < 0)
7053 printf("'%s", emitComp->eeGetFieldName(fldHnd));
7056 printf("%+Id", offs);
7062 /*****************************************************************************
7064 * Display a stack frame reference.
7067 void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
7074 if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
7078 printf("TEMP_%02u", -varx);
7082 printf("V%02u", +varx);
7087 printf("-0x%X", -disp);
7091 printf("+0x%X", +disp);
7095 if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7102 addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
7110 printf("-%02XH", -addr);
7114 printf("+%02XH", addr);
7119 /* Adjust the offset by amount currently pushed on the stack */
7125 printf("-%02XH", -addr);
7129 printf("+%02XH", addr);
7132 #if !FEATURE_FIXED_OUT_ARGS
7134 if (emitCurStackLvl)
7135 printf("+%02XH", emitCurStackLvl);
7137 #endif // !FEATURE_FIXED_OUT_ARGS
7143 if (varx >= 0 && emitComp->opts.varNames)
7146 const char* varName;
7148 assert((unsigned)varx < emitComp->lvaCount);
7149 varDsc = emitComp->lvaTable + varx;
7150 varName = emitComp->compLocalVarName(varx, offs);
7154 printf("'%s", varName);
7158 printf("-%d", -disp);
7162 printf("+%d", +disp);
7170 /*****************************************************************************
7172 * Display an reloc value
7173 * If we are formatting for an assembly listing don't print the hex value
7174 * since it will prevent us from doing assembly diffs
7176 void emitter::emitDispReloc(ssize_t value)
7178 if (emitComp->opts.disAsm)
7184 printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
7188 /*****************************************************************************
7190 * Display an address mode.
7193 void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
7199 dataSection* jdsc = nullptr;
7201 /* The displacement field is in an unusual place for calls */
7203 disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
7205 /* Display a jump table label if this is a switch table jump */
7207 if (id->idIns() == INS_i_jmp)
7209 UNATIVE_OFFSET offs = 0;
7211 /* Find the appropriate entry in the data section list */
7213 for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
7215 UNATIVE_OFFSET size = jdsc->dsSize;
7217 /* Is this a label table? */
7224 if (offs == id->idDebugOnlyInfo()->idMemCookie)
7233 /* If we've found a matching entry then is a table jump */
7237 if (id->idIsDspReloc())
7241 printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
7244 disp -= id->idDebugOnlyInfo()->idMemCookie;
7247 bool frameRef = false;
7251 if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
7253 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7255 if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
7259 else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
7265 if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
7267 size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
7275 printf("%u*", scale);
7277 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
7281 if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
7287 emitDispReloc(disp);
7291 // Munge any pointers if we want diff-able disassembly
7292 if (emitComp->opts.disDiffable)
7294 ssize_t top12bits = (disp >> 20);
7295 if ((top12bits != 0) && (top12bits != -1))
7309 printf("%02XH", disp);
7311 else if (disp < 1000)
7315 else if (disp <= 0xFFFF)
7317 printf("%04XH", disp);
7321 printf("%08XH", disp);
7328 printf("-%02XH", -disp);
7330 else if (disp > -1000)
7332 printf("-%d", -disp);
7334 else if (disp >= -0xFFFF)
7336 printf("-%04XH", -disp);
7338 else if ((disp & 0x7F000000) != 0x7F000000)
7340 printf("%08XH", disp);
7344 printf("-%08XH", -disp);
7349 printf("%04XH", disp);
7355 // pretty print string if it looks like one
7356 if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA))
7358 const wchar_t* str = emitComp->eeGetCPString(disp);
7361 printf(" '%S'", str);
7365 if (jdsc && !noDetail)
7367 unsigned cnt = (jdsc->dsSize - 1) / TARGET_POINTER_SIZE;
7368 BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
7370 #ifdef _TARGET_AMD64_
7371 #define SIZE_LETTER "Q"
7373 #define SIZE_LETTER "D"
7375 printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
7377 /* Display the label table (it's stored as "BasicBlock*" values) */
7383 /* Convert the BasicBlock* value to an IG address */
7385 lab = (insGroup*)emitCodeGetCookie(*bbp++);
7388 printf("\n D" SIZE_LETTER " G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
7393 /*****************************************************************************
7395 * If the given instruction is a shift, display the 2nd operand.
7398 void emitter::emitDispShift(instruction ins, int cnt)
7429 printf(", %d", cnt);
7437 /*****************************************************************************
7439 * Display (optionally) the bytes for the instruction encoding in hex
7442 void emitter::emitDispInsHex(BYTE* code, size_t sz)
7444 // We do not display the instruction hex if we want diff-able disassembly
7445 if (!emitComp->opts.disDiffable)
7447 #ifdef _TARGET_AMD64_
7448 // how many bytes per instruction we format for
7449 const size_t digits = 10;
7450 #else // _TARGET_X86
7451 const size_t digits = 6;
7454 for (unsigned i = 0; i < sz; i++)
7456 printf("%02X", (*((BYTE*)(code + i))));
7461 printf("%.*s", 2 * (digits - sz), " ");
7466 /*****************************************************************************
7468 * Display the given instruction.
7471 void emitter::emitDispIns(
7472 instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
7477 instruction ins = id->idIns();
7479 if (emitComp->verbose)
7481 unsigned idNum = id->idDebugOnlyInfo()->idNum;
7482 printf("IN%04x: ", idNum);
7485 #define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
7487 /* Display a constant value if the instruction references one */
7491 switch (id->idInsFmt())
7512 #if FEATURE_STACK_FP_X87
7522 #endif // FEATURE_STACK_FP_X87
7525 /* Is this actually a reference to a data section? */
7527 offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
7533 /* Display a data section reference */
7535 assert((unsigned)offs < emitConsDsc.dsdOffs);
7536 addr = emitConsBlock ? emitConsBlock + offs : nullptr;
7539 // TODO-XArch-Cleanup: Fix or remove this code.
7540 /* Is the operand an integer or floating-point value? */
7544 if (CodeGen::instIsFP(id->idIns()))
7546 switch (id->idIns())
7559 printf("@CNS%02u", offs);
7561 printf("@RWD%02u", offs);
7569 // This was busted by switching the order
7570 // in which we output the code block vs.
7571 // the data blocks -- when we get here,
7572 // the data block has not been filled in
7573 // yet, so we'll display garbage.
7577 if (id->idOpSize() == EA_4BYTE)
7578 printf("DF %f \n", addr ? *(float *)addr : 0);
7580 printf("DQ %lf\n", addr ? *(double *)addr : 0);
7584 if (id->idOpSize() <= EA_4BYTE)
7585 printf("DD %d \n", addr ? *(int *)addr : 0);
7587 printf("DQ %D \n", addr ? *(__int64 *)addr : 0);
7599 // printf("[F=%s] " , emitIfName(id->idInsFmt()));
7600 // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
7601 // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
7602 // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
7603 // printf("[A=%08X] " , emitSimpleStkMask);
7604 // printf("[A=%08X] " , emitSimpleByrefStkMask);
7605 // printf("[L=%02u] " , id->idCodeSize());
7607 if (!emitComp->opts.dspEmit && !isNew && !asmfm)
7612 /* Display the instruction offset */
7614 emitDispInsOffs(offset, doffs);
7616 if (code != nullptr)
7618 /* Display the instruction hex code */
7620 emitDispInsHex(code, sz);
7623 /* Display the instruction name */
7625 sstr = codeGen->genInsName(ins);
7627 if (IsAVXInstruction(ins))
7629 printf(" v%-8s", sstr);
7633 printf(" %-9s", sstr);
7636 if (strnlen_s(sstr, 10) >= 8)
7637 #else // FEATURE_PAL
7638 if (strnlen(sstr, 10) >= 8)
7639 #endif // FEATURE_PAL
7644 /* By now the size better be set to something */
7646 assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
7648 /* Figure out the operand size */
7650 if (id->idGCref() == GCT_GCREF)
7653 sstr = "gword ptr ";
7655 else if (id->idGCref() == GCT_BYREF)
7658 sstr = "bword ptr ";
7662 attr = id->idOpSize();
7663 sstr = codeGen->genSizeStr(attr);
7667 #ifdef _TARGET_AMD64_
7668 assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
7670 assert(attr == EA_4BYTE);
7676 /* Now see what instruction format we've got */
7678 // First print the implicit register usage
7679 if (instrHasImplicitRegPairDest(ins))
7681 printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
7683 else if (instrIs3opImul(ins))
7685 regNumber tgtReg = inst3opImulReg(ins);
7686 printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
7689 switch (id->idInsFmt())
7694 const char* methodName;
7697 val = emitGetInsSC(id);
7698 #ifdef _TARGET_AMD64_
7699 // no 8-byte immediates allowed here!
7700 assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
7702 if (id->idIsCnsReloc())
7709 // Munge any pointers if we want diff-able disassembly
7710 if (emitComp->opts.disDiffable)
7712 ssize_t top12bits = (val >> 20);
7713 if ((top12bits != 0) && (top12bits != -1))
7718 if ((val > -1000) && (val < 1000))
7722 else if ((val > 0) || ((val & 0x7F000000) != 0x7F000000))
7724 printf("0x%IX", val);
7728 printf("-0x%IX", -val);
7737 #if FEATURE_STACK_FP_X87
7747 #endif // FEATURE_STACK_FP_X87
7748 if (ins == INS_call && id->idIsCallRegPtr())
7750 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7755 emitDispAddrMode(id, isNew);
7758 if (ins == INS_call)
7760 assert(id->idInsFmt() == IF_ARD);
7762 /* Ignore indirect calls */
7764 if (id->idDebugOnlyInfo()->idMemCookie == 0)
7769 assert(id->idDebugOnlyInfo()->idMemCookie);
7771 /* This is a virtual call */
7773 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
7774 printf("%s", methodName);
7781 #ifdef _TARGET_AMD64_
7782 if (ins == INS_movsxd)
7784 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
7788 if (ins == INS_movsx || ins == INS_movzx)
7790 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
7794 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
7796 emitDispAddrMode(id);
7799 case IF_RRW_ARD_CNS:
7800 case IF_RWR_ARD_CNS:
7802 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
7803 emitDispAddrMode(id);
7804 emitGetInsAmdCns(id, &cnsVal);
7806 val = cnsVal.cnsVal;
7809 if (cnsVal.cnsReloc)
7815 goto PRINT_CONSTANT;
7821 case IF_AWR_RRD_CNS:
7823 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
7824 // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
7825 sstr = codeGen->genSizeStr(EA_ATTR(16));
7827 emitDispAddrMode(id);
7828 printf(", %s", emitRegName(id->idReg1(), attr));
7830 emitGetInsAmdCns(id, &cnsVal);
7832 val = cnsVal.cnsVal;
7835 if (cnsVal.cnsReloc)
7841 goto PRINT_CONSTANT;
7847 case IF_RWR_RRD_ARD:
7848 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7849 emitDispAddrMode(id);
7852 case IF_RWR_RRD_ARD_CNS:
7854 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
7855 emitDispAddrMode(id);
7856 emitGetInsAmdCns(id, &cnsVal);
7858 val = cnsVal.cnsVal;
7861 if (cnsVal.cnsReloc)
7867 goto PRINT_CONSTANT;
7878 emitDispAddrMode(id);
7879 printf(", %s", emitRegName(id->idReg1(), attr));
7888 emitDispAddrMode(id);
7889 emitGetInsAmdCns(id, &cnsVal);
7890 val = cnsVal.cnsVal;
7891 #ifdef _TARGET_AMD64_
7892 // no 8-byte immediates allowed here!
7893 assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
7895 if (id->idInsFmt() == IF_ARW_SHF)
7897 emitDispShift(ins, (BYTE)val);
7902 if (cnsVal.cnsReloc)
7908 goto PRINT_CONSTANT;
7917 #if FEATURE_STACK_FP_X87
7926 #endif // FEATURE_STACK_FP_X87
7930 #if !FEATURE_FIXED_OUT_ARGS
7932 emitCurStackLvl -= sizeof(int);
7935 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7936 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7938 #if !FEATURE_FIXED_OUT_ARGS
7940 emitCurStackLvl += sizeof(int);
7952 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7953 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7955 printf(", %s", emitRegName(id->idReg1(), attr));
7965 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
7966 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
7968 emitGetInsCns(id, &cnsVal);
7969 val = cnsVal.cnsVal;
7970 #ifdef _TARGET_AMD64_
7971 // no 8-byte immediates allowed here!
7972 assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
7974 if (id->idInsFmt() == IF_SRW_SHF)
7976 emitDispShift(ins, (BYTE)val);
7981 if (cnsVal.cnsReloc)
7987 goto PRINT_CONSTANT;
7995 #ifdef _TARGET_AMD64_
7996 if (ins == INS_movsxd)
7998 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
8002 if (ins == INS_movsx || ins == INS_movzx)
8004 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
8008 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8011 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8012 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8016 case IF_RRW_SRD_CNS:
8017 case IF_RWR_SRD_CNS:
8019 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8020 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8021 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8022 emitGetInsCns(id, &cnsVal);
8024 val = cnsVal.cnsVal;
8027 if (cnsVal.cnsReloc)
8033 goto PRINT_CONSTANT;
8038 case IF_RWR_RRD_SRD:
8039 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8040 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8041 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8044 case IF_RWR_RRD_SRD_CNS:
8046 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8047 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8048 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8049 emitGetInsCns(id, &cnsVal);
8051 val = cnsVal.cnsVal;
8054 if (cnsVal.cnsReloc)
8060 goto PRINT_CONSTANT;
8068 if (ins == INS_mov_i2xmm)
8070 printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8072 else if (ins == INS_mov_xmm2i)
8074 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
8076 else if (ins == INS_pmovmskb)
8078 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8080 #ifndef LEGACY_BACKEND
8081 else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
8083 printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8086 else if ((ins == INS_cvttsd2si)
8087 #ifndef LEGACY_BACKEND
8088 || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si)
8092 printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
8094 #ifdef _TARGET_AMD64_
8095 else if (ins == INS_movsxd)
8097 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
8099 #endif // _TARGET_AMD64_
8100 else if (ins == INS_movsx || ins == INS_movzx)
8102 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
8104 else if (ins == INS_bt)
8106 // INS_bt operands are reversed. Display them in the normal order.
8107 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
8109 #ifdef FEATURE_HW_INTRINSICS
8110 else if (ins == INS_crc32 && attr != EA_8BYTE)
8112 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8114 #endif // FEATURE_HW_INTRINSICS
8117 printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
8122 assert(ins == INS_xchg);
8123 printf("%s,", emitRegName(id->idReg1(), attr));
8124 printf(" %s", emitRegName(id->idReg2(), attr));
8127 case IF_RWR_RRD_RRD:
8128 assert(IsAVXInstruction(ins));
8129 assert(IsThreeOperandAVXInstruction(ins));
8130 printf("%s, ", emitRegName(id->idReg1(), attr));
8131 printf("%s, ", emitRegName(id->idReg2(), attr));
8132 printf("%s", emitRegName(id->idReg3(), attr));
8134 case IF_RWR_RRD_RRD_CNS:
8135 assert(IsAVXInstruction(ins));
8136 assert(IsThreeOperandAVXInstruction(ins));
8137 printf("%s, ", emitRegName(id->idReg1(), attr));
8138 printf("%s, ", emitRegName(id->idReg2(), attr));
8139 printf("%s, ", emitRegName(id->idReg3(), attr));
8140 val = emitGetInsSC(id);
8141 goto PRINT_CONSTANT;
8143 case IF_RWR_RRD_RRD_RRD:
8144 assert(IsAVXOnlyInstruction(ins));
8145 assert(UseVEXEncoding());
8146 printf("%s, ", emitRegName(id->idReg1(), attr));
8147 printf("%s, ", emitRegName(id->idReg2(), attr));
8148 printf("%s, ", emitRegName(id->idReg3(), attr));
8149 printf("%s", emitRegName(id->idReg4(), attr));
8151 case IF_RRW_RRW_CNS:
8152 printf("%s,", emitRegName(id->idReg1(), attr));
8153 printf(" %s", emitRegName(id->idReg2(), attr));
8154 val = emitGetInsSC(id);
8155 #ifdef _TARGET_AMD64_
8156 // no 8-byte immediates allowed here!
8157 assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8160 if (id->idIsCnsReloc())
8166 goto PRINT_CONSTANT;
8173 printf("%s", emitRegName(id->idReg1(), attr));
8178 printf("%s", emitRegName(id->idReg1(), attr));
8179 emitDispShift(ins, (BYTE)emitGetInsSC(id));
8186 if (ins == INS_movsx || ins == INS_movzx)
8190 #ifdef _TARGET_AMD64_
8191 else if (ins == INS_movsxd)
8196 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8197 offs = emitGetInsDsp(id);
8198 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8201 case IF_RRW_MRD_CNS:
8202 case IF_RWR_MRD_CNS:
8204 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8205 offs = emitGetInsDsp(id);
8206 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8207 emitGetInsDcmCns(id, &cnsVal);
8209 val = cnsVal.cnsVal;
8212 if (cnsVal.cnsReloc)
8218 goto PRINT_CONSTANT;
8223 case IF_MWR_RRD_CNS:
8225 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8226 // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8227 sstr = codeGen->genSizeStr(EA_ATTR(16));
8229 offs = emitGetInsDsp(id);
8230 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8231 printf(", %s", emitRegName(id->idReg1(), attr));
8232 emitGetInsDcmCns(id, &cnsVal);
8234 val = cnsVal.cnsVal;
8237 if (cnsVal.cnsReloc)
8243 goto PRINT_CONSTANT;
8249 case IF_RWR_RRD_MRD:
8250 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8251 offs = emitGetInsDsp(id);
8252 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8255 case IF_RWR_RRD_MRD_CNS:
8257 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8258 offs = emitGetInsDsp(id);
8259 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8260 emitGetInsDcmCns(id, &cnsVal);
8262 val = cnsVal.cnsVal;
8265 if (cnsVal.cnsReloc)
8271 goto PRINT_CONSTANT;
8276 case IF_RWR_MRD_OFF:
8278 printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
8279 offs = emitGetInsDsp(id);
8280 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8288 offs = emitGetInsDsp(id);
8289 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8290 printf(", %s", emitRegName(id->idReg1(), attr));
8299 offs = emitGetInsDsp(id);
8300 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8301 emitGetInsDcmCns(id, &cnsVal);
8302 val = cnsVal.cnsVal;
8303 #ifdef _TARGET_AMD64_
8304 // no 8-byte immediates allowed here!
8305 assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8307 if (cnsVal.cnsReloc)
8311 else if (id->idInsFmt() == IF_MRW_SHF)
8313 emitDispShift(ins, (BYTE)val);
8318 goto PRINT_CONSTANT;
8326 #if FEATURE_STACK_FP_X87
8336 #endif // FEATURE_STACK_FP_X87
8339 offs = emitGetInsDsp(id);
8340 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8347 offs = emitGetInsDsp(id);
8348 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8354 printf("%s, ", emitRegName(id->idReg1(), attr));
8355 val = emitGetInsSC(id);
8356 if (id->idIsCnsReloc())
8362 goto PRINT_CONSTANT;
8366 #if FEATURE_STACK_FP_X87
8377 printf("%s, ", emitFPregName(0));
8380 printf("%s", emitFPregName((unsigned)id->idReg1()));
8386 printf("%s", emitFPregName((unsigned)id->idReg1()));
8387 if (ins != INS_fst && ins != INS_fstp)
8388 printf(", %s", emitFPregName(0));
8390 #endif // FEATURE_STACK_FP_X87
8398 printf("%s, ", emitRegName(id->idReg1(), attr));
8400 else if (ins == INS_mov)
8402 /* mov dword ptr [frame.callSiteReturnAddress], label */
8403 assert(id->idInsFmt() == IF_SWR_LABEL);
8404 instrDescLbl* idlbl = (instrDescLbl*)id;
8406 emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
8411 if (((instrDescJmp*)id)->idjShort)
8416 if (id->idIsBound())
8418 printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
8422 printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
8428 if (id->idIsCallAddr())
8430 offs = (ssize_t)id->idAddr()->iiaAddr;
8436 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
8439 if (id->idInsFmt() == IF_METHPTR)
8446 if (id->idIsDspReloc())
8450 printf("%08X", offs);
8454 printf("%s", methodName);
8457 if (id->idInsFmt() == IF_METHPTR)
8464 #if FEATURE_STACK_FP_X87
8468 #endif // FEATURE_STACK_FP_X87
8473 printf("unexpected format %s", emitIfName(id->idInsFmt()));
8474 assert(!"unexpectedFormat");
8478 if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
8480 // Code size in the instrDesc is different from the actual code size we've been given!
8481 printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
8487 /*****************************************************************************/
8490 /*****************************************************************************
8492 * Output nBytes bytes of NOP instructions
8495 static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
8497 assert(nBytes <= 15);
8499 #ifndef _TARGET_AMD64_
8500 // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
8501 // more efficient real NOP: 0x0F 0x1F +modR/M
8502 // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
8503 // because debugger and msdis don't like it, so maybe VIA doesn't either
8504 // So instead just stick to repeating single byte nops
8556 #else // _TARGET_AMD64_
8598 // More than 3 prefixes is slower than just 2 NOPs
8599 dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
8602 // More than 3 prefixes is slower than just 2 NOPs
8603 dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
8606 // More than 3 prefixes is slower than just 2 NOPs
8607 dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
8610 // More than 3 prefixes is slower than just 2 NOPs
8611 dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
8633 #endif // _TARGET_AMD64_
8638 /*****************************************************************************
8640 * Output an instruction involving an address mode.
8643 BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
8651 instruction ins = id->idIns();
8652 emitAttr size = id->idOpSize();
8653 size_t opsz = EA_SIZE_IN_BYTES(size);
8655 // Get the base/index registers
8656 reg = id->idAddr()->iiaAddrMode.amBaseReg;
8657 rgx = id->idAddr()->iiaAddrMode.amIndxReg;
8659 // For INS_call the instruction size is actually the return value size
8660 if (ins == INS_call)
8662 // Special case: call via a register
8663 if (id->idIsCallRegPtr())
8665 code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
8667 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
8668 dst += emitOutputWord(dst, opcode);
8672 // The displacement field is in an unusual place for calls
8673 dsp = emitGetInsCIdisp(id);
8675 #ifdef _TARGET_AMD64_
8677 // Compute the REX prefix if it exists
8678 if (IsExtendedReg(reg, EA_PTRSIZE))
8680 insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
8681 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8682 reg = (regNumber)RegEncoding(reg);
8685 if (IsExtendedReg(rgx, EA_PTRSIZE))
8687 insEncodeRegSIB(ins, rgx, &code);
8688 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8689 rgx = (regNumber)RegEncoding(rgx);
8692 // And emit the REX prefix
8693 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8695 #endif // _TARGET_AMD64_
8700 // Is there a large constant operand?
8701 if (addc && (size > EA_1BYTE))
8703 ssize_t cval = addc->cnsVal;
8705 // Does the constant fit in a byte?
8706 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
8708 if (id->idInsFmt() != IF_ARW_SHF)
8717 // Emit VEX prefix if required
8718 // There are some callers who already add VEX prefix and call this routine.
8719 // Therefore, add VEX prefix is one is not already present.
8720 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
8722 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
8723 if (TakesVexPrefix(ins))
8725 if (IsDstDstSrcAVXInstruction(ins))
8727 regNumber src1 = id->idReg2();
8729 if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS))
8731 src1 = id->idReg1();
8734 // encode source operand reg in 'vvvv' bits in 1's complement form
8735 code = insEncodeReg3456(ins, src1, size, code);
8737 else if (IsDstSrcSrcAVXInstruction(ins))
8739 code = insEncodeReg3456(ins, id->idReg2(), size, code);
8743 // Emit the REX prefix if required
8744 if (TakesRexWPrefix(ins, size))
8746 code = AddRexWPrefix(ins, code);
8749 if (IsExtendedReg(reg, EA_PTRSIZE))
8751 insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
8752 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8753 reg = (regNumber)RegEncoding(reg);
8756 if (IsExtendedReg(rgx, EA_PTRSIZE))
8758 insEncodeRegSIB(ins, rgx, &code);
8759 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
8760 rgx = (regNumber)RegEncoding(rgx);
8763 // Special case emitting AVX instructions
8764 if (Is4ByteSSE4OrAVXInstruction(ins))
8766 unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
8767 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8769 if (UseVEXEncoding())
8771 // Emit last opcode byte
8772 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
8773 assert((code & 0xFF) == 0);
8774 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
8778 dst += emitOutputWord(dst, code >> 16);
8779 dst += emitOutputWord(dst, code & 0xFFFF);
8784 // Is this a 'big' opcode?
8785 else if (code & 0xFF000000)
8787 // Output the REX prefix
8788 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8790 // Output the highest word of the opcode
8791 // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
8792 // and encoded as part of VEX prefix.
8793 if (code & 0xFF000000)
8795 dst += emitOutputWord(dst, code >> 16);
8799 else if (code & 0x00FF0000)
8801 // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
8802 assert(ins != INS_bt);
8804 // Output the REX prefix
8805 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8807 // Output the highest byte of the opcode
8808 if (code & 0x00FF0000)
8810 dst += emitOutputByte(dst, code >> 16);
8814 // Use the large version if this is not a byte. This trick will not
8815 // work in case of SSE2 and AVX instructions.
8816 if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
8821 else if (CodeGen::instIsFP(ins))
8823 #if FEATURE_STACK_FP_X87
8824 assert(size == EA_4BYTE || size == EA_8BYTE || ins == INS_fldcw || ins == INS_fnstcw);
8825 #else // !FEATURE_STACK_FP_X87
8826 assert(size == EA_4BYTE || size == EA_8BYTE);
8827 #endif // ! FEATURE_STACK_FP_X87
8829 if (size == EA_8BYTE)
8834 else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
8836 /* Is the operand size larger than a byte? */
8845 /* Output a size prefix for a 16-bit operand */
8847 dst += emitOutputByte(dst, 0x66);
8852 #ifdef _TARGET_AMD64_
8856 /* Set the 'w' bit to get the large version */
8864 /* Double operand - set the appropriate bit */
8869 #endif // _TARGET_X86_
8872 NO_WAY("unexpected size");
8877 // Output the REX prefix
8878 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
8880 // Get the displacement value
8881 dsp = emitGetInsAmdAny(id);
8885 dspInByte = ((signed char)dsp == (ssize_t)dsp);
8886 dspIsZero = (dsp == 0);
8888 if (id->idIsDspReloc())
8890 dspInByte = false; // relocs can't be placed in a byte
8893 // Is there a [scaled] index component?
8896 // The address is of the form "[reg+disp]"
8900 if (id->idIsDspReloc())
8902 INT32 addlDelta = 0;
8904 // The address is of the form "[disp]"
8905 // On x86 - disp is relative to zero
8906 // On Amd64 - disp is relative to RIP
8907 if (Is4ByteSSE4OrAVXInstruction(ins))
8909 dst += emitOutputByte(dst, code | 0x05);
8913 dst += emitOutputWord(dst, code | 0x0500);
8918 // It is of the form "ins [disp], immed"
8919 // For emitting relocation, we also need to take into account of the
8920 // additional bytes of code emitted for immed val.
8922 ssize_t cval = addc->cnsVal;
8924 #ifdef _TARGET_AMD64_
8925 // all these opcodes only take a sign-extended 4-byte immediate
8926 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
8928 noway_assert(opsz <= 4);
8946 assert(!"unexpected operand size");
8951 #ifdef _TARGET_AMD64_
8952 // We emit zero on Amd64, to avoid the assert in emitOutputLong()
8953 dst += emitOutputLong(dst, 0);
8955 dst += emitOutputLong(dst, dsp);
8957 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
8963 if (Is4ByteSSE4OrAVXInstruction(ins))
8965 dst += emitOutputByte(dst, code | 0x05);
8969 dst += emitOutputWord(dst, code | 0x0500);
8971 #else //_TARGET_AMD64_
8972 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
8973 // This addr mode should never be used while generating relocatable ngen code nor if
8974 // the addr can be encoded as pc-relative address.
8975 noway_assert(!emitComp->opts.compReloc);
8976 noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
8977 noway_assert((int)dsp == dsp);
8979 // This requires, specifying a SIB byte after ModRM byte.
8980 if (Is4ByteSSE4OrAVXInstruction(ins))
8982 dst += emitOutputByte(dst, code | 0x04);
8986 dst += emitOutputWord(dst, code | 0x0400);
8988 dst += emitOutputByte(dst, 0x25);
8989 #endif //_TARGET_AMD64_
8990 dst += emitOutputLong(dst, dsp);
8995 if (Is4ByteSSE4OrAVXInstruction(ins))
8997 // Does the offset fit in a byte?
9000 dst += emitOutputByte(dst, code | 0x45);
9001 dst += emitOutputByte(dst, dsp);
9005 dst += emitOutputByte(dst, code | 0x85);
9006 dst += emitOutputLong(dst, dsp);
9008 if (id->idIsDspReloc())
9010 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9016 // Does the offset fit in a byte?
9019 dst += emitOutputWord(dst, code | 0x4500);
9020 dst += emitOutputByte(dst, dsp);
9024 dst += emitOutputWord(dst, code | 0x8500);
9025 dst += emitOutputLong(dst, dsp);
9027 if (id->idIsDspReloc())
9029 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9036 #ifdef LEGACY_BACKEND
9037 // REG_ESP could be REG_R12, which applies to any instruction
9039 // This assert isn't too helpful from the OptJit point of view
9041 // a better question is why is it here at all
9043 assert((ins == INS_lea) || (ins == INS_mov) || (ins == INS_test) || (ins == INS_cmp) ||
9044 (ins == INS_fld && dspIsZero) || (ins == INS_fstp && dspIsZero) ||
9045 (ins == INS_fistp && dspIsZero) || IsSSE2Instruction(ins) || IsAVXInstruction(ins) ||
9047 #endif // LEGACY_BACKEND
9049 if (Is4ByteSSE4OrAVXInstruction(ins))
9051 // Is the offset 0 or does it at least fit in a byte?
9054 dst += emitOutputByte(dst, code | 0x04);
9055 dst += emitOutputByte(dst, 0x24);
9059 dst += emitOutputByte(dst, code | 0x44);
9060 dst += emitOutputByte(dst, 0x24);
9061 dst += emitOutputByte(dst, dsp);
9065 dst += emitOutputByte(dst, code | 0x84);
9066 dst += emitOutputByte(dst, 0x24);
9067 dst += emitOutputLong(dst, dsp);
9068 if (id->idIsDspReloc())
9070 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9076 // Is the offset 0 or does it at least fit in a byte?
9079 dst += emitOutputWord(dst, code | 0x0400);
9080 dst += emitOutputByte(dst, 0x24);
9084 dst += emitOutputWord(dst, code | 0x4400);
9085 dst += emitOutputByte(dst, 0x24);
9086 dst += emitOutputByte(dst, dsp);
9090 dst += emitOutputWord(dst, code | 0x8400);
9091 dst += emitOutputByte(dst, 0x24);
9092 dst += emitOutputLong(dst, dsp);
9093 if (id->idIsDspReloc())
9095 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9102 if (Is4ByteSSE4OrAVXInstruction(ins))
9104 // Put the register in the opcode
9105 code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr);
9107 // Is there a displacement?
9110 // This is simply "[reg]"
9111 dst += emitOutputByte(dst, code);
9115 // This is [reg + dsp]" -- does the offset fit in a byte?
9118 dst += emitOutputByte(dst, code | 0x40);
9119 dst += emitOutputByte(dst, dsp);
9123 dst += emitOutputByte(dst, code | 0x80);
9124 dst += emitOutputLong(dst, dsp);
9125 if (id->idIsDspReloc())
9127 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9134 // Put the register in the opcode
9135 code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
9137 // Is there a displacement?
9140 // This is simply "[reg]"
9141 dst += emitOutputWord(dst, code);
9145 // This is [reg + dsp]" -- does the offset fit in a byte?
9148 dst += emitOutputWord(dst, code | 0x4000);
9149 dst += emitOutputByte(dst, dsp);
9153 dst += emitOutputWord(dst, code | 0x8000);
9154 dst += emitOutputLong(dst, dsp);
9155 if (id->idIsDspReloc())
9157 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9170 // We have a scaled index operand
9171 unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
9173 // Is the index operand scaled?
9176 // Is there a base register?
9179 // The address is "[reg + {2/4/8} * rgx + icon]"
9180 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
9181 insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9183 if (Is4ByteSSE4OrAVXInstruction(ins))
9185 // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9186 if (dspIsZero && reg != REG_EBP)
9188 // The address is "[reg + {2/4/8} * rgx]"
9189 dst += emitOutputByte(dst, code | 0x04);
9190 dst += emitOutputByte(dst, regByte);
9194 // The address is "[reg + {2/4/8} * rgx + disp]"
9197 dst += emitOutputByte(dst, code | 0x44);
9198 dst += emitOutputByte(dst, regByte);
9199 dst += emitOutputByte(dst, dsp);
9203 dst += emitOutputByte(dst, code | 0x84);
9204 dst += emitOutputByte(dst, regByte);
9205 dst += emitOutputLong(dst, dsp);
9206 if (id->idIsDspReloc())
9208 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9215 // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9216 if (dspIsZero && reg != REG_EBP)
9218 // The address is "[reg + {2/4/8} * rgx]"
9219 dst += emitOutputWord(dst, code | 0x0400);
9220 dst += emitOutputByte(dst, regByte);
9224 // The address is "[reg + {2/4/8} * rgx + disp]"
9227 dst += emitOutputWord(dst, code | 0x4400);
9228 dst += emitOutputByte(dst, regByte);
9229 dst += emitOutputByte(dst, dsp);
9233 dst += emitOutputWord(dst, code | 0x8400);
9234 dst += emitOutputByte(dst, regByte);
9235 dst += emitOutputLong(dst, dsp);
9236 if (id->idIsDspReloc())
9238 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9246 // The address is "[{2/4/8} * rgx + icon]"
9247 regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
9248 insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9250 if (Is4ByteSSE4OrAVXInstruction(ins))
9252 dst += emitOutputByte(dst, code | 0x04);
9256 dst += emitOutputWord(dst, code | 0x0400);
9259 dst += emitOutputByte(dst, regByte);
9261 // Special case: jump through a jump table
9262 if (ins == INS_i_jmp)
9264 dsp += (size_t)emitConsBlock;
9267 dst += emitOutputLong(dst, dsp);
9268 if (id->idIsDspReloc())
9270 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9276 // The address is "[reg+rgx+dsp]"
9277 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
9279 if (Is4ByteSSE4OrAVXInstruction(ins))
9281 if (dspIsZero && reg != REG_EBP)
9283 // This is [reg+rgx]"
9284 dst += emitOutputByte(dst, code | 0x04);
9285 dst += emitOutputByte(dst, regByte);
9289 // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9292 dst += emitOutputByte(dst, code | 0x44);
9293 dst += emitOutputByte(dst, regByte);
9294 dst += emitOutputByte(dst, dsp);
9298 dst += emitOutputByte(dst, code | 0x84);
9299 dst += emitOutputByte(dst, regByte);
9300 dst += emitOutputLong(dst, dsp);
9301 if (id->idIsDspReloc())
9303 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9310 if (dspIsZero && reg != REG_EBP)
9312 // This is [reg+rgx]"
9313 dst += emitOutputWord(dst, code | 0x0400);
9314 dst += emitOutputByte(dst, regByte);
9318 // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9321 dst += emitOutputWord(dst, code | 0x4400);
9322 dst += emitOutputByte(dst, regByte);
9323 dst += emitOutputByte(dst, dsp);
9327 dst += emitOutputWord(dst, code | 0x8400);
9328 dst += emitOutputByte(dst, regByte);
9329 dst += emitOutputLong(dst, dsp);
9330 if (id->idIsDspReloc())
9332 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9340 // Now generate the constant value, if present
9343 ssize_t cval = addc->cnsVal;
9345 #ifdef _TARGET_AMD64_
9346 // all these opcodes only take a sign-extended 4-byte immediate
9347 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9355 dst += emitOutputLong(dst, cval);
9358 dst += emitOutputWord(dst, cval);
9361 dst += emitOutputByte(dst, cval);
9365 assert(!"unexpected operand size");
9370 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
9377 // Does this instruction operate on a GC ref value?
9380 switch (id->idInsFmt())
9391 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9395 // Mark the destination register as holding a GCT_BYREF
9396 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9397 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
9410 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9415 emitDispIns(id, false, false, false);
9417 assert(!"unexpected GC ref instruction format");
9420 // mul can never produce a GC ref
9421 assert(!instrIs3opImul(ins));
9422 assert(ins != INS_mulEAX && ins != INS_imulEAX);
9426 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
9428 switch (id->idInsFmt())
9431 emitGCregDeadUpd(id->idReg1(), dst);
9437 if (ins == INS_mulEAX || ins == INS_imulEAX)
9439 emitGCregDeadUpd(REG_EAX, dst);
9440 emitGCregDeadUpd(REG_EDX, dst);
9443 // For the three operand imul instruction the target register
9444 // is encoded in the opcode
9446 if (instrIs3opImul(ins))
9448 regNumber tgtReg = inst3opImulReg(ins);
9449 emitGCregDeadUpd(tgtReg, dst);
9457 /*****************************************************************************
9459 * Output an instruction involving a stack frame value.
9462 BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9470 instruction ins = id->idIns();
9471 emitAttr size = id->idOpSize();
9472 size_t opsz = EA_SIZE_IN_BYTES(size);
9474 assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
9476 // Is there a large constant operand?
9477 if (addc && (size > EA_1BYTE))
9479 ssize_t cval = addc->cnsVal;
9481 // Does the constant fit in a byte?
9482 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9484 if (id->idInsFmt() != IF_SRW_SHF)
9493 // Add VEX prefix if required.
9494 // There are some callers who already add VEX prefix and call this routine.
9495 // Therefore, add VEX prefix is one is not already present.
9496 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9498 // Compute the REX prefix
9499 if (TakesRexWPrefix(ins, size))
9501 code = AddRexWPrefix(ins, code);
9504 // Special case emitting AVX instructions
9505 if (Is4ByteSSE4OrAVXInstruction(ins))
9507 unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
9508 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9510 if (UseVEXEncoding())
9512 // Emit last opcode byte
9513 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9514 assert((code & 0xFF) == 0);
9515 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9519 dst += emitOutputWord(dst, code >> 16);
9520 dst += emitOutputWord(dst, code & 0xFFFF);
9525 // Is this a 'big' opcode?
9526 else if (code & 0xFF000000)
9528 // Output the REX prefix
9529 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9531 // Output the highest word of the opcode
9532 // We need to check again because in case of AVX instructions the leading
9533 // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
9534 if (code & 0xFF000000)
9536 dst += emitOutputWord(dst, code >> 16);
9540 else if (code & 0x00FF0000)
9542 // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
9543 assert(ins != INS_bt);
9545 // Output the REX prefix
9546 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9548 // Output the highest byte of the opcode.
9549 // We need to check again because in case of AVX instructions the leading
9550 // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
9551 if (code & 0x00FF0000)
9553 dst += emitOutputByte(dst, code >> 16);
9557 // Use the large version if this is not a byte
9558 if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSE2Instruction(ins) &&
9559 !IsAVXInstruction(ins))
9564 else if (CodeGen::instIsFP(ins))
9566 assert(size == EA_4BYTE || size == EA_8BYTE);
9568 if (size == EA_8BYTE)
9573 else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
9575 // Is the operand size larger than a byte?
9582 // Output a size prefix for a 16-bit operand
9583 dst += emitOutputByte(dst, 0x66);
9587 #ifdef _TARGET_AMD64_
9589 #endif // _TARGET_AMD64_
9591 /* Set the 'w' size bit to indicate 32-bit operation
9592 * Note that incrementing "code" for INS_call (0xFF) would
9593 * overflow, whereas setting the lower bit to 1 just works out
9602 // Double operand - set the appropriate bit.
9603 // I don't know what a legitimate reason to end up in this case would be
9604 // considering that FP is taken care of above...
9605 // what is an instruction that takes a double which is not covered by the
9606 // above instIsFP? Of the list in instrsxarch, only INS_fprem
9608 NO_WAY("bad 8 byte op");
9610 #endif // _TARGET_X86_
9613 NO_WAY("unexpected size");
9618 // Output the REX prefix
9619 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9621 // Figure out the variable's frame position
9622 int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
9624 adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
9625 dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
9627 dspInByte = ((signed char)dsp == (int)dsp);
9628 dspIsZero = (dsp == 0);
9630 // for stack varaibles the dsp should never be a reloc
9631 assert(id->idIsDspReloc() == 0);
9635 // EBP-based variable: does the offset fit in a byte?
9636 if (Is4ByteSSE4OrAVXInstruction(ins))
9640 dst += emitOutputByte(dst, code | 0x45);
9641 dst += emitOutputByte(dst, dsp);
9645 dst += emitOutputByte(dst, code | 0x85);
9646 dst += emitOutputLong(dst, dsp);
9653 dst += emitOutputWord(dst, code | 0x4500);
9654 dst += emitOutputByte(dst, dsp);
9658 dst += emitOutputWord(dst, code | 0x8500);
9659 dst += emitOutputLong(dst, dsp);
9666 #if !FEATURE_FIXED_OUT_ARGS
9667 // Adjust the offset by the amount currently pushed on the CPU stack
9668 dsp += emitCurStackLvl;
9671 dspInByte = ((signed char)dsp == (int)dsp);
9672 dspIsZero = (dsp == 0);
9674 // Does the offset fit in a byte?
9675 if (Is4ByteSSE4OrAVXInstruction(ins))
9681 dst += emitOutputByte(dst, code | 0x04);
9682 dst += emitOutputByte(dst, 0x24);
9686 dst += emitOutputByte(dst, code | 0x44);
9687 dst += emitOutputByte(dst, 0x24);
9688 dst += emitOutputByte(dst, dsp);
9693 dst += emitOutputByte(dst, code | 0x84);
9694 dst += emitOutputByte(dst, 0x24);
9695 dst += emitOutputLong(dst, dsp);
9704 dst += emitOutputWord(dst, code | 0x0400);
9705 dst += emitOutputByte(dst, 0x24);
9709 dst += emitOutputWord(dst, code | 0x4400);
9710 dst += emitOutputByte(dst, 0x24);
9711 dst += emitOutputByte(dst, dsp);
9716 dst += emitOutputWord(dst, code | 0x8400);
9717 dst += emitOutputByte(dst, 0x24);
9718 dst += emitOutputLong(dst, dsp);
9723 // Now generate the constant value, if present
9726 ssize_t cval = addc->cnsVal;
9728 #ifdef _TARGET_AMD64_
9729 // all these opcodes only take a sign-extended 4-byte immediate
9730 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9738 dst += emitOutputLong(dst, cval);
9741 dst += emitOutputWord(dst, cval);
9744 dst += emitOutputByte(dst, cval);
9748 assert(!"unexpected operand size");
9753 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
9758 // Does this instruction operate on a GC ref value?
9761 // Factor in the sub-variable offset
9762 adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
9764 switch (id->idInsFmt())
9767 // Read stack -- no change
9770 case IF_SWR: // Stack Write (So we need to update GC live for stack var)
9771 // Write stack -- GC var may be born
9772 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
9776 // Read stack -- no change
9780 // Write stack -- no change
9785 // Read stack , read register -- no change
9788 case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
9790 // Read stack , write register -- GC reg may be born
9791 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9794 case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
9795 // Read register, write stack -- GC var may be born
9796 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
9799 case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
9801 // reg could have been a GCREF as GCREF + int=BYREF
9802 // or BYREF+/-int=BYREF
9803 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9804 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9809 // += -= of a byref, no change
9816 emitDispIns(id, false, false, false);
9818 assert(!"unexpected GC ref instruction format");
9823 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
9825 switch (id->idInsFmt())
9827 case IF_RWR_SRD: // Register Write, Stack Read
9828 case IF_RRW_SRD: // Register Read/Write, Stack Read
9829 emitGCregDeadUpd(id->idReg1(), dst);
9835 if (ins == INS_mulEAX || ins == INS_imulEAX)
9837 emitGCregDeadUpd(REG_EAX, dst);
9838 emitGCregDeadUpd(REG_EDX, dst);
9841 // For the three operand imul instruction the target register
9842 // is encoded in the opcode
9844 if (instrIs3opImul(ins))
9846 regNumber tgtReg = inst3opImulReg(ins);
9847 emitGCregDeadUpd(tgtReg, dst);
9855 /*****************************************************************************
9857 * Output an instruction with a static data member (class variable).
9860 BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9863 CORINFO_FIELD_HANDLE fldh;
9867 emitAttr size = id->idOpSize();
9868 size_t opsz = EA_SIZE_IN_BYTES(size);
9869 instruction ins = id->idIns();
9870 bool isMoffset = false;
9872 // Get hold of the field handle and offset
9873 fldh = id->idAddr()->iiaFieldHnd;
9874 offs = emitGetInsDsp(id);
9876 // Special case: mov reg, fs:[ddd]
9877 if (fldh == FLD_GLOBAL_FS)
9879 dst += emitOutputByte(dst, 0x64);
9882 // Compute VEX prefix
9883 // Some of its callers already add VEX prefix and then call this routine.
9884 // Therefore add VEX prefix is not already present.
9885 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9887 // Compute the REX prefix
9888 if (TakesRexWPrefix(ins, size))
9890 code = AddRexWPrefix(ins, code);
9893 // Is there a large constant operand?
9894 if (addc && (size > EA_1BYTE))
9896 ssize_t cval = addc->cnsVal;
9897 // Does the constant fit in a byte?
9898 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9900 if (id->idInsFmt() != IF_MRW_SHF)
9911 // Special case: "mov eax, [addr]" and "mov [addr], eax"
9912 // Amd64: this is one case where addr can be 64-bit in size. This is
9913 // currently unused or not enabled on amd64 as it always uses RIP
9914 // relative addressing which results in smaller instruction size.
9915 if (ins == INS_mov && id->idReg1() == REG_EAX)
9917 switch (id->idInsFmt())
9921 assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
9923 code &= ~((code_t)0xFFFFFFFF);
9930 assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
9932 code &= ~((code_t)0xFFFFFFFF);
9942 #endif //_TARGET_X86_
9944 // Special case emitting AVX instructions
9945 if (Is4ByteSSE4OrAVXInstruction(ins))
9947 unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
9948 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9950 if (UseVEXEncoding())
9952 // Emit last opcode byte
9953 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9954 assert((code & 0xFF) == 0);
9955 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9959 dst += emitOutputWord(dst, code >> 16);
9960 dst += emitOutputWord(dst, code & 0xFFFF);
9963 // Emit Mod,R/M byte
9964 dst += emitOutputByte(dst, regcode | 0x05);
9967 // Is this a 'big' opcode?
9968 else if (code & 0xFF000000)
9970 // Output the REX prefix
9971 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9973 // Output the highest word of the opcode.
9974 // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
9975 if (code & 0xFF000000)
9977 dst += emitOutputWord(dst, code >> 16);
9981 else if (code & 0x00FF0000)
9983 // Output the REX prefix
9984 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9986 // Check again as VEX prefix would have encoded leading opcode byte
9987 if (code & 0x00FF0000)
9989 dst += emitOutputByte(dst, code >> 16);
9993 if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
9997 // movsx and movzx are 'big' opcodes but also have the 'w' bit
10001 else if (CodeGen::instIsFP(ins))
10003 assert(size == EA_4BYTE || size == EA_8BYTE);
10005 if (size == EA_8BYTE)
10012 // Is the operand size larger than a byte?
10019 // Output a size prefix for a 16-bit operand
10020 dst += emitOutputByte(dst, 0x66);
10024 #ifdef _TARGET_AMD64_
10027 // Set the 'w' bit to get the large version
10031 #ifdef _TARGET_X86_
10033 // Double operand - set the appropriate bit
10036 #endif // _TARGET_X86_
10039 assert(!"unexpected size");
10043 // Output the REX prefix
10044 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10048 if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
10050 dst += emitOutputByte(dst, code);
10054 dst += emitOutputWord(dst, code);
10058 // Do we have a constant or a static data member?
10059 doff = Compiler::eeGetJitDataOffs(fldh);
10062 addr = emitConsBlock + doff;
10064 int byteSize = EA_SIZE_IN_BYTES(size);
10066 #ifndef LEGACY_BACKEND
10067 // this instruction has a fixed size (4) src.
10068 if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
10072 // This has a fixed size (8) source.
10073 if (ins == INS_vbroadcastsd)
10077 #endif // !LEGACY_BACKEND
10079 // Check that the offset is properly aligned (i.e. the ddd in [ddd])
10080 assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
10084 // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
10085 if (jitStaticFldIsGlobAddr(fldh))
10091 addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
10092 if (addr == nullptr)
10094 NO_WAY("could not obtain address of static field");
10099 BYTE* target = (addr + offs);
10103 INT32 addlDelta = 0;
10107 // It is of the form "ins [disp], immed"
10108 // For emitting relocation, we also need to take into account of the
10109 // additional bytes of code emitted for immed val.
10111 ssize_t cval = addc->cnsVal;
10113 #ifdef _TARGET_AMD64_
10114 // all these opcodes only take a sign-extended 4-byte immediate
10115 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10117 noway_assert(opsz <= 4);
10135 assert(!"unexpected operand size");
10140 #ifdef _TARGET_AMD64_
10141 // All static field and data section constant accesses should be marked as relocatable
10142 noway_assert(id->idIsDspReloc());
10143 dst += emitOutputLong(dst, 0);
10144 #else //_TARGET_X86_
10145 dst += emitOutputLong(dst, (int)target);
10146 #endif //_TARGET_X86_
10148 if (id->idIsDspReloc())
10150 emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
10155 #ifdef _TARGET_AMD64_
10156 // This code path should never be hit on amd64 since it always uses RIP relative addressing.
10157 // In future if ever there is a need to enable this special case, also enable the logic
10158 // that sets isMoffset to true on amd64.
10160 #else //_TARGET_X86_
10162 dst += emitOutputSizeT(dst, (ssize_t)target);
10164 if (id->idIsDspReloc())
10166 emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET);
10169 #endif //_TARGET_X86_
10172 // Now generate the constant value, if present
10175 ssize_t cval = addc->cnsVal;
10177 #ifdef _TARGET_AMD64_
10178 // all these opcodes only take a sign-extended 4-byte immediate
10179 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10187 dst += emitOutputLong(dst, cval);
10190 dst += emitOutputWord(dst, cval);
10193 dst += emitOutputByte(dst, cval);
10197 assert(!"unexpected operand size");
10199 if (addc->cnsReloc)
10201 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10206 // Does this instruction operate on a GC ref value?
10209 switch (id->idInsFmt())
10220 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10235 assert(id->idGCref() == GCT_BYREF);
10236 assert(ins == INS_add || ins == INS_sub);
10238 // Mark it as holding a GCT_BYREF
10239 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10244 emitDispIns(id, false, false, false);
10246 assert(!"unexpected GC ref instruction format");
10251 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10253 switch (id->idInsFmt())
10256 emitGCregDeadUpd(id->idReg1(), dst);
10262 if (ins == INS_mulEAX || ins == INS_imulEAX)
10264 emitGCregDeadUpd(REG_EAX, dst);
10265 emitGCregDeadUpd(REG_EDX, dst);
10268 // For the three operand imul instruction the target register
10269 // is encoded in the opcode
10271 if (instrIs3opImul(ins))
10273 regNumber tgtReg = inst3opImulReg(ins);
10274 emitGCregDeadUpd(tgtReg, dst);
10282 /*****************************************************************************
10284 * Output an instruction with one register operand.
10287 BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
10291 instruction ins = id->idIns();
10292 regNumber reg = id->idReg1();
10293 emitAttr size = id->idOpSize();
10295 // We would to update GC info correctly
10296 assert(!IsSSE2Instruction(ins));
10297 assert(!IsAVXInstruction(ins));
10299 // Get the 'base' opcode
10305 #ifdef _TARGET_AMD64_
10308 if (size == EA_1BYTE)
10311 assert(INS_inc_l == INS_inc + 1);
10312 assert(INS_dec_l == INS_dec + 1);
10314 // Can't use the compact form, use the long form
10315 ins = (instruction)(ins + 1);
10316 if (size == EA_2BYTE)
10318 // Output a size prefix for a 16-bit operand
10319 dst += emitOutputByte(dst, 0x66);
10322 code = insCodeRR(ins);
10323 if (size != EA_1BYTE)
10325 // Set the 'w' bit to get the large version
10329 if (TakesRexWPrefix(ins, size))
10331 code = AddRexWPrefix(ins, code);
10335 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
10337 // Output the REX prefix
10338 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10340 dst += emitOutputWord(dst, code | (regcode << 8));
10344 if (size == EA_2BYTE)
10346 // Output a size prefix for a 16-bit operand
10347 dst += emitOutputByte(dst, 0x66);
10349 dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
10356 case INS_push_hide:
10358 assert(size == EA_PTRSIZE);
10359 code = insEncodeOpreg(ins, reg, size);
10361 assert(!TakesVexPrefix(ins));
10362 assert(!TakesRexWPrefix(ins, size));
10364 // Output the REX prefix
10365 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10367 dst += emitOutputByte(dst, code);
10387 assert(id->idGCref() == GCT_NONE);
10388 assert(size == EA_1BYTE);
10390 code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
10392 // Output the REX prefix
10393 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10395 // We expect this to always be a 'big' opcode
10396 assert(code & 0x00FF0000);
10398 dst += emitOutputByte(dst, code >> 16);
10399 dst += emitOutputWord(dst, code & 0x0000FFFF);
10406 // Kill off any GC refs in EAX or EDX
10407 emitGCregDeadUpd(REG_EAX, dst);
10408 emitGCregDeadUpd(REG_EDX, dst);
10414 assert(id->idGCref() == GCT_NONE);
10416 code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
10418 if (size != EA_1BYTE)
10420 // Set the 'w' bit to get the large version
10423 if (size == EA_2BYTE)
10425 // Output a size prefix for a 16-bit operand
10426 dst += emitOutputByte(dst, 0x66);
10430 code = AddVexPrefixIfNeeded(ins, code, size);
10432 if (TakesRexWPrefix(ins, size))
10434 code = AddRexWPrefix(ins, code);
10437 // Output the REX prefix
10438 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10440 dst += emitOutputWord(dst, code);
10444 // Are we writing the register? if so then update the GC information
10445 switch (id->idInsFmt())
10452 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10456 emitGCregDeadUpd(id->idReg1(), dst);
10462 regMaskTP regMask = genRegMask(reg);
10466 // The reg must currently be holding either a gcref or a byref
10467 // and the instruction must be inc or dec
10468 assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
10469 (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
10470 assert(id->idGCref() == GCT_BYREF);
10471 // Mark it as holding a GCT_BYREF
10472 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10476 // Can't use RRW to trash a GC ref. It's OK for unverifiable code
10477 // to trash Byrefs.
10478 assert((emitThisGCrefRegs & regMask) == 0);
10484 emitDispIns(id, false, false, false);
10486 assert(!"unexpected instruction format");
10493 /*****************************************************************************
10495 * Output an instruction with two register operands.
10498 BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
10502 instruction ins = id->idIns();
10503 regNumber reg1 = id->idReg1();
10504 regNumber reg2 = id->idReg2();
10505 emitAttr size = id->idOpSize();
10507 // Get the 'base' opcode
10508 code = insCodeRM(ins);
10509 code = AddVexPrefixIfNeeded(ins, code, size);
10510 if (IsSSEOrAVXInstruction(ins))
10512 code = insEncodeRMreg(ins, code);
10514 if (TakesRexWPrefix(ins, size))
10516 code = AddRexWPrefix(ins, code);
10519 else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
10521 code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
10522 #ifdef _TARGET_AMD64_
10524 assert((size < EA_4BYTE) || (insIsCMOV(ins)));
10525 if ((size == EA_8BYTE) || (ins == INS_movsx))
10527 code = AddRexWPrefix(ins, code);
10530 else if (ins == INS_movsxd)
10532 code = insEncodeRMreg(ins, code);
10534 #endif // _TARGET_AMD64_
10536 #ifdef FEATURE_HW_INTRINSICS
10537 else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt))
10539 code = insEncodeRMreg(ins, code);
10540 if ((ins == INS_crc32) && (size > EA_1BYTE))
10545 if (size == EA_2BYTE)
10547 assert(ins == INS_crc32);
10548 dst += emitOutputByte(dst, 0x66);
10550 else if (size == EA_8BYTE)
10552 code = AddRexWPrefix(ins, code);
10555 #endif // FEATURE_HW_INTRINSICS
10558 code = insEncodeMRreg(ins, insCodeMR(ins));
10560 if (ins != INS_test)
10568 noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
10569 noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
10573 // Output a size prefix for a 16-bit operand
10574 dst += emitOutputByte(dst, 0x66);
10578 // Set the 'w' bit to get the large version
10582 #ifdef _TARGET_AMD64_
10584 // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
10585 // Don't need to zero out the high bits explicitly
10586 if ((ins != INS_xor) || (reg1 != reg2))
10588 code = AddRexWPrefix(ins, code);
10591 // Set the 'w' bit to get the large version
10595 #endif // _TARGET_AMD64_
10598 assert(!"unexpected size");
10602 unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
10603 regCode |= insEncodeReg012(ins, reg2, size, &code);
10605 if (TakesVexPrefix(ins))
10607 // In case of AVX instructions that take 3 operands, we generally want to encode reg1
10608 // as first source. In this case, reg1 is both a source and a destination.
10609 // The exception is the "merge" 3-operand case, where we have a move instruction, such
10610 // as movss, and we want to merge the source with itself.
10612 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
10613 // now we use the single source as source1 and source2.
10614 if (IsDstDstSrcAVXInstruction(ins))
10616 // encode source/dest operand reg in 'vvvv' bits in 1's complement form
10617 code = insEncodeReg3456(ins, reg1, size, code);
10619 else if (IsDstSrcSrcAVXInstruction(ins))
10621 // encode source operand reg in 'vvvv' bits in 1's complement form
10622 code = insEncodeReg3456(ins, reg2, size, code);
10626 // Output the REX prefix
10627 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10629 if (code & 0xFF000000)
10631 // Output the highest word of the opcode
10632 dst += emitOutputWord(dst, code >> 16);
10633 code &= 0x0000FFFF;
10635 if (Is4ByteSSE4Instruction(ins))
10637 // Output 3rd byte of the opcode
10638 dst += emitOutputByte(dst, code);
10642 else if (code & 0x00FF0000)
10644 dst += emitOutputByte(dst, code >> 16);
10645 code &= 0x0000FFFF;
10648 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10649 if ((code & 0xFF00) == 0xC000)
10651 dst += emitOutputWord(dst, code | (regCode << 8));
10653 else if ((code & 0xFF) == 0x00)
10655 // This case happens for SSE4/AVX instructions only
10656 assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins));
10658 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10659 dst += emitOutputByte(dst, (0xC0 | regCode));
10663 dst += emitOutputWord(dst, code);
10664 dst += emitOutputByte(dst, (0xC0 | regCode));
10667 // Does this instruction operate on a GC ref value?
10670 switch (id->idInsFmt())
10677 if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
10679 // We're relocating "this" in the prolog
10680 assert(emitComp->lvaIsOriginalThisArg(0));
10681 assert(emitComp->lvaTable[0].lvRegister);
10682 assert(emitComp->lvaTable[0].lvRegNum == reg1);
10684 if (emitFullGCinfo)
10686 emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
10691 /* If emitFullGCinfo==false, the we don't use any
10692 regPtrDsc's and so explictly note the location
10693 of "this" in GCEncode.cpp
10698 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10703 switch (id->idIns())
10706 This must be one of the following cases:
10708 xor reg, reg to assign NULL
10710 and r1 , r2 if (ptr1 && ptr2) ...
10711 or r1 , r2 if (ptr1 || ptr2) ...
10713 add r1 , r2 to compute a normal byref
10714 sub r1 , r2 to compute a strange byref (VC only)
10718 assert(id->idReg1() == id->idReg2());
10719 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10724 emitGCregDeadUpd(id->idReg1(), dst);
10729 assert(id->idGCref() == GCT_BYREF);
10733 regMask = genRegMask(reg1) | genRegMask(reg2);
10735 // r1/r2 could have been a GCREF as GCREF + int=BYREF
10736 // or BYREF+/-int=BYREF
10737 assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
10738 ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
10740 // Mark r1 as holding a byref
10741 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10746 emitDispIns(id, false, false, false);
10748 assert(!"unexpected GC reg update instruction");
10754 // This must be "xchg reg1, reg2"
10755 assert(id->idIns() == INS_xchg);
10757 // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
10758 // register pointer mask.
10759 CLANG_FORMAT_COMMENT_ANCHOR;
10761 #ifndef LEGACY_BACKEND
10764 gc1 = emitRegGCtype(reg1);
10765 gc2 = emitRegGCtype(reg2);
10769 // Kill the GC-info about the GC registers
10773 emitGCregDeadUpd(reg1, dst);
10778 emitGCregDeadUpd(reg2, dst);
10781 // Now, swap the info
10785 emitGCregLiveUpd(gc1, reg2, dst);
10790 emitGCregLiveUpd(gc2, reg1, dst);
10793 #endif // !LEGACY_BACKEND
10798 emitDispIns(id, false, false, false);
10800 assert(!"unexpected GC ref instruction format");
10805 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10807 switch (id->idInsFmt())
10810 // INS_mulEAX can not be used with any of these formats
10811 assert(ins != INS_mulEAX && ins != INS_imulEAX);
10813 // For the three operand imul instruction the target
10814 // register is encoded in the opcode
10816 if (instrIs3opImul(ins))
10818 regNumber tgtReg = inst3opImulReg(ins);
10819 emitGCregDeadUpd(tgtReg, dst);
10825 // INS_movxmm2i writes to reg2.
10826 if (ins == INS_mov_xmm2i)
10828 emitGCregDeadUpd(id->idReg2(), dst);
10832 emitGCregDeadUpd(id->idReg1(), dst);
10845 BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
10849 instruction ins = id->idIns();
10850 assert(IsAVXInstruction(ins));
10851 assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
10852 regNumber targetReg = id->idReg1();
10853 regNumber src1 = id->idReg2();
10854 regNumber src2 = id->idReg3();
10855 emitAttr size = id->idOpSize();
10857 code = insCodeRM(ins);
10858 code = AddVexPrefixIfNeeded(ins, code, size);
10859 code = insEncodeRMreg(ins, code);
10861 if (TakesRexWPrefix(ins, size))
10863 code = AddRexWPrefix(ins, code);
10866 unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
10867 regCode |= insEncodeReg012(ins, src2, size, &code);
10868 // encode source operand reg in 'vvvv' bits in 1's complement form
10869 code = insEncodeReg3456(ins, src1, size, code);
10871 // Output the REX prefix
10872 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10874 // Is this a 'big' opcode?
10875 if (code & 0xFF000000)
10877 // Output the highest word of the opcode
10878 dst += emitOutputWord(dst, code >> 16);
10879 code &= 0x0000FFFF;
10881 else if (code & 0x00FF0000)
10883 dst += emitOutputByte(dst, code >> 16);
10884 code &= 0x0000FFFF;
10887 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10888 if ((code & 0xFF00) == 0xC000)
10890 dst += emitOutputWord(dst, code | (regCode << 8));
10892 else if ((code & 0xFF) == 0x00)
10894 // This case happens for AVX instructions only
10895 assert(IsAVXInstruction(ins));
10897 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10898 dst += emitOutputByte(dst, (0xC0 | regCode));
10902 dst += emitOutputWord(dst, code);
10903 dst += emitOutputByte(dst, (0xC0 | regCode));
10906 noway_assert(!id->idGCref());
10911 /*****************************************************************************
10913 * Output an instruction with a register and constant operands.
10916 BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
10919 emitAttr size = id->idOpSize();
10920 instruction ins = id->idIns();
10921 regNumber reg = id->idReg1();
10922 ssize_t val = emitGetInsSC(id);
10923 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
10925 // BT reg,imm might be useful but it requires special handling of the immediate value
10926 // (it is always encoded in a byte). Let's not complicate things until this is needed.
10927 assert(ins != INS_bt);
10929 if (id->idIsCnsReloc())
10931 valInByte = false; // relocs can't be placed in a byte
10934 noway_assert(emitVerifyEncodable(ins, size, reg));
10936 #ifndef LEGACY_BACKEND
10937 if (IsSSEOrAVXInstruction(ins))
10939 // Handle SSE2 instructions of the form "opcode reg, immed8"
10941 assert(id->idGCref() == GCT_NONE);
10943 // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
10944 regNumber regOpcode;
10948 regOpcode = (regNumber)3;
10951 regOpcode = (regNumber)7;
10956 regOpcode = (regNumber)2;
10961 regOpcode = (regNumber)6;
10965 regOpcode = (regNumber)4;
10968 assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
10969 regOpcode = REG_NA;
10973 // Get the 'base' opcode.
10974 code = insCodeMI(ins);
10975 code = AddVexPrefixIfNeeded(ins, code, size);
10976 code = insEncodeMIreg(ins, reg, size, code);
10977 assert(code & 0x00FF0000);
10978 if (TakesVexPrefix(ins))
10980 // The 'vvvv' bits encode the destination register, which for this case (RI)
10981 // is the same as the source.
10982 code = insEncodeReg3456(ins, reg, size, code);
10985 unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
10987 // Output the REX prefix
10988 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10990 if (code & 0xFF000000)
10992 dst += emitOutputWord(dst, code >> 16);
10994 else if (code & 0xFF0000)
10996 dst += emitOutputByte(dst, code >> 16);
10999 dst += emitOutputWord(dst, code | regcode);
11001 dst += emitOutputByte(dst, val);
11005 #endif // !LEGACY_BACKEND
11007 // The 'mov' opcode is special
11008 if (ins == INS_mov)
11010 code = insCodeACC(ins);
11011 assert(code < 0x100);
11013 code |= 0x08; // Set the 'w' bit
11014 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11017 // This is INS_mov and will not take VEX prefix
11018 assert(!TakesVexPrefix(ins));
11020 if (TakesRexWPrefix(ins, size))
11022 code = AddRexWPrefix(ins, code);
11025 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11027 dst += emitOutputByte(dst, code);
11028 if (size == EA_4BYTE)
11030 dst += emitOutputLong(dst, val);
11032 #ifdef _TARGET_AMD64_
11035 assert(size == EA_PTRSIZE);
11036 dst += emitOutputSizeT(dst, val);
11040 if (id->idIsCnsReloc())
11042 emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
11048 // Decide which encoding is the shortest
11049 bool useSigned, useACC;
11051 if (reg == REG_EAX && !instrIs3opImul(ins))
11053 if (size == EA_1BYTE || (ins == INS_test))
11055 // For al, ACC encoding is always the smallest
11061 /* For ax/eax, we avoid ACC encoding for small constants as we
11062 * can emit the small constant and have it sign-extended.
11063 * For big constants, the ACC encoding is better as we can use
11064 * the 1 byte opcode
11069 // avoid using ACC encoding
11094 // "test" has no 's' bit
11095 if (ins == INS_test)
11100 // Get the 'base' opcode
11103 assert(!useSigned);
11104 code = insCodeACC(ins);
11108 assert(!useSigned || valInByte);
11110 // Some instructions (at least 'imul') do not have a
11111 // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
11112 if (valInByte && useSigned && insNeedsRRIb(ins))
11114 code = insEncodeRRIb(ins, reg, size);
11118 code = insCodeMI(ins);
11119 code = AddVexPrefixIfNeeded(ins, code, size);
11120 code = insEncodeMIreg(ins, reg, size, code);
11130 // Output a size prefix for a 16-bit operand
11131 dst += emitOutputByte(dst, 0x66);
11135 // Set the 'w' bit to get the large version
11139 #ifdef _TARGET_AMD64_
11141 /* Set the 'w' bit to get the large version */
11142 /* and the REX.W bit to get the really large version */
11144 code = AddRexWPrefix(ins, code);
11150 assert(!"unexpected size");
11153 // Output the REX prefix
11154 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11156 // Does the value fit in a sign-extended byte?
11157 // Important! Only set the 's' bit when we have a size larger than EA_1BYTE.
11158 // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
11160 if (useSigned && (size > EA_1BYTE))
11162 // We can just set the 's' bit, and issue an immediate byte
11164 code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
11165 dst += emitOutputWord(dst, code);
11166 dst += emitOutputByte(dst, val);
11170 // Can we use an accumulator (EAX) encoding?
11173 dst += emitOutputByte(dst, code);
11177 dst += emitOutputWord(dst, code);
11183 dst += emitOutputByte(dst, val);
11186 dst += emitOutputWord(dst, val);
11189 dst += emitOutputLong(dst, val);
11191 #ifdef _TARGET_AMD64_
11193 dst += emitOutputLong(dst, val);
11195 #endif // _TARGET_AMD64_
11200 if (id->idIsCnsReloc())
11202 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11203 assert(size == EA_4BYTE);
11209 // Does this instruction operate on a GC ref value?
11212 switch (id->idInsFmt())
11218 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11222 assert(id->idGCref() == GCT_BYREF);
11226 regMask = genRegMask(reg);
11227 // FIXNOW review the other places and relax the assert there too
11229 // The reg must currently be holding either a gcref or a byref
11230 // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
11231 if (emitThisGCrefRegs & regMask)
11233 assert(ins == INS_add);
11235 if (emitThisByrefRegs & regMask)
11237 assert(ins == INS_add || ins == INS_sub);
11240 // Mark it as holding a GCT_BYREF
11241 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11246 emitDispIns(id, false, false, false);
11248 assert(!"unexpected GC ref instruction format");
11251 // mul can never produce a GC ref
11252 assert(!instrIs3opImul(ins));
11253 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11257 switch (id->idInsFmt())
11260 // INS_mulEAX can not be used with any of these formats
11261 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11263 // For the three operand imul instruction the target
11264 // register is encoded in the opcode
11266 if (instrIs3opImul(ins))
11268 regNumber tgtReg = inst3opImulReg(ins);
11269 emitGCregDeadUpd(tgtReg, dst);
11275 assert(!instrIs3opImul(ins));
11277 emitGCregDeadUpd(id->idReg1(), dst);
11282 emitDispIns(id, false, false, false);
11284 assert(!"unexpected GC ref instruction format");
11291 /*****************************************************************************
11293 * Output an instruction with a constant operand.
11296 BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
11299 instruction ins = id->idIns();
11300 emitAttr size = id->idOpSize();
11301 ssize_t val = emitGetInsSC(id);
11302 bool valInByte = ((signed char)val == val);
11304 // We would to update GC info correctly
11305 assert(!IsSSE2Instruction(ins));
11306 assert(!IsAVXInstruction(ins));
11308 #ifdef _TARGET_AMD64_
11309 // all these opcodes take a sign-extended 4-byte immediate, max
11310 noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
11313 if (id->idIsCnsReloc())
11315 valInByte = false; // relocs can't be placed in a byte
11317 // Of these instructions only the push instruction can have reloc
11318 assert(ins == INS_push || ins == INS_push_hide);
11324 assert((val >= -128) && (val <= 127));
11325 dst += emitOutputByte(dst, insCode(ins));
11326 dst += emitOutputByte(dst, val);
11330 assert((val >= -128) && (val <= 127));
11331 dst += emitOutputByte(dst, insCodeMI(ins));
11332 dst += emitOutputByte(dst, val);
11337 dst += emitOutputByte(dst, insCodeMI(ins));
11338 dst += emitOutputWord(dst, val);
11341 case INS_push_hide:
11343 code = insCodeMI(ins);
11345 // Does the operand fit in a byte?
11348 dst += emitOutputByte(dst, code | 2);
11349 dst += emitOutputByte(dst, val);
11353 if (TakesRexWPrefix(ins, size))
11355 code = AddRexWPrefix(ins, code);
11356 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11359 dst += emitOutputByte(dst, code);
11360 dst += emitOutputLong(dst, val);
11361 if (id->idIsCnsReloc())
11363 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11367 // Did we push a GC ref value?
11371 printf("UNDONE: record GCref push [cns]\n");
11378 assert(!"unexpected instruction");
11384 /*****************************************************************************
11386 * Output a local jump instruction.
11387 * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
11388 * needs to get bound to an actual address and processed by branch shortening.
11391 BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
11397 instrDescJmp* id = (instrDescJmp*)i;
11398 instruction ins = id->idIns();
11400 bool relAddr = true; // does the instruction use relative-addressing?
11402 // SSE2 doesnt make any sense here
11403 assert(!IsSSE2Instruction(ins));
11404 assert(!IsAVXInstruction(ins));
11412 ssz = JCC_SIZE_SMALL;
11413 lsz = JCC_SIZE_LARGE;
11418 ssz = JMP_SIZE_SMALL;
11419 lsz = JMP_SIZE_LARGE;
11424 ssz = lsz = CALL_INST_SIZE;
11428 case INS_push_hide:
11437 ssz = lsz = id->idCodeSize();
11443 // Figure out the distance to the target
11444 srcOffs = emitCurCodeOffs(dst);
11445 dstOffs = id->idAddr()->iiaIGlabel->igOffs;
11449 distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
11453 distVal = (ssize_t)emitOffsetToPtr(dstOffs);
11456 if (dstOffs <= srcOffs)
11458 // This is a backward jump - distance is known at this point
11459 CLANG_FORMAT_COMMENT_ANCHOR;
11462 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
11464 size_t blkOffs = id->idjIG->igOffs;
11466 if (INTERESTING_JUMP_NUM == 0)
11468 printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
11470 printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
11471 printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
11472 printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
11476 // Can we use a short jump?
11477 if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
11479 emitSetShortJump(id);
11484 // This is a forward jump - distance will be an upper limit
11485 emitFwdJumps = true;
11487 // The target offset will be closer by at least 'emitOffsAdj', but only if this
11488 // jump doesn't cross the hot-cold boundary.
11489 if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
11491 dstOffs -= emitOffsAdj;
11492 distVal -= emitOffsAdj;
11495 // Record the location of the jump for later patching
11496 id->idjOffs = dstOffs;
11498 // Are we overflowing the id->idjOffs bitfield?
11499 if (id->idjOffs != dstOffs)
11501 IMPL_LIMITATION("Method is too large");
11505 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
11507 size_t blkOffs = id->idjIG->igOffs;
11509 if (INTERESTING_JUMP_NUM == 0)
11511 printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
11513 printf("[4] Jump block is at %08X\n", blkOffs);
11514 printf("[4] Jump is at %08X\n", srcOffs);
11515 printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
11519 // Can we use a short jump?
11520 if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
11522 emitSetShortJump(id);
11526 // Adjust the offset to emit relative to the end of the instruction
11529 distVal -= id->idjShort ? ssz : lsz;
11533 if (0 && emitComp->verbose)
11535 size_t sz = id->idjShort ? ssz : lsz;
11536 int distValSize = id->idjShort ? 4 : 8;
11537 printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
11538 emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
11543 // What size jump should we use?
11547 assert(!id->idjKeepLong);
11548 assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
11550 assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
11551 assert(JMP_SIZE_SMALL == 2);
11555 if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
11557 emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
11560 if (emitComp->verbose)
11562 printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
11567 dst += emitOutputByte(dst, insCode(ins));
11569 // For forward jumps, record the address of the distance value
11570 id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
11572 dst += emitOutputByte(dst, distVal);
11581 // clang-format off
11582 assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
11583 assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
11584 assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
11585 assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
11586 assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
11587 assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
11588 assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
11589 assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
11590 assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
11591 assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
11592 assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
11593 assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
11594 assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
11595 assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
11596 assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
11597 assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
11600 code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
11602 else if (ins == INS_push || ins == INS_push_hide)
11604 assert(insCodeMI(INS_push) == 0x68);
11607 else if (ins == INS_mov)
11609 // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
11610 insFormat tmpInsFmt = id->idInsFmt();
11611 insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel;
11612 bool tmpDspReloc = id->idIsDspReloc();
11614 id->idInsFmt(IF_SWR_CNS);
11615 id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
11616 id->idSetIsDspReloc(false);
11618 dst = emitOutputSV(dst, id, insCodeMI(ins));
11620 // Restore id fields with original values
11621 id->idInsFmt(tmpInsFmt);
11622 id->idAddr()->iiaIGlabel = tmpIGlabel;
11623 id->idSetIsDspReloc(tmpDspReloc);
11626 else if (ins == INS_lea)
11628 // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
11629 // We basically are doing what emitIns_R_AI does.
11630 // TODO-XArch-Cleanup: revisit this.
11631 instrDescAmd idAmdStackLocal;
11632 instrDescAmd* idAmd = &idAmdStackLocal;
11633 *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields
11634 memset((BYTE*)idAmd + sizeof(instrDesc), 0,
11635 sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
11637 idAmd->idInsFmt(IF_RWR_ARD);
11638 idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
11639 idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
11640 emitSetAmdDisp(idAmd, distVal); // set the displacement
11641 idAmd->idSetIsDspReloc(id->idIsDspReloc());
11642 assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
11644 UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
11645 idAmd->idCodeSize(sz);
11647 code = insCodeRM(ins);
11648 code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
11650 dst = emitOutputAM(dst, idAmd, code, nullptr);
11654 // For forward jumps, record the address of the distance value
11655 // Hard-coded 4 here because we already output the displacement, as the last thing.
11656 id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
11666 if (ins != INS_mov)
11668 dst += emitOutputByte(dst, code);
11672 dst += emitOutputByte(dst, code >> 8);
11676 // For forward jumps, record the address of the distance value
11677 id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
11679 dst += emitOutputLong(dst, distVal);
11681 #ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
11682 if (emitComp->opts.compReloc)
11687 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
11689 else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
11691 assert(id->idjKeepLong);
11692 emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
11697 // Local calls kill all registers
11698 if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
11700 emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
11706 /*****************************************************************************
11708 * Append the machine code corresponding to the given instruction descriptor
11709 * to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
11710 * is the instruction group that contains the instruction. Updates '*dp' to
11711 * point past the generated code, and returns the size of the instruction
11712 * descriptor in bytes.
11716 #pragma warning(push)
11717 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11719 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
11721 assert(emitIssuing);
11724 size_t sz = sizeof(instrDesc);
11725 instruction ins = id->idIns();
11726 unsigned char callInstrSize = 0;
11729 bool dspOffs = emitComp->opts.dspGCtbls;
11732 emitAttr size = id->idOpSize();
11734 assert(REG_NA == (int)REG_NA);
11736 assert(ins != INS_imul || size >= EA_4BYTE); // Has no 'w' bit
11737 assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
11739 VARSET_TP GCvars(VarSetOps::UninitVal());
11741 // What instruction format have we got?
11742 switch (id->idInsFmt())
11752 regMaskTP gcrefRegs;
11753 regMaskTP byrefRegs;
11755 /********************************************************************/
11757 /********************************************************************/
11759 // the loop alignment pseudo instruction
11760 if (ins == INS_align)
11762 sz = TINY_IDSC_SIZE;
11763 dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
11764 assert(((size_t)dst & 0x0f) == 0);
11768 if (ins == INS_nop)
11770 dst = emitOutputNOP(dst, id->idCodeSize());
11774 // the cdq instruction kills the EDX register implicitly
11775 if (ins == INS_cdq)
11777 emitGCregDeadUpd(REG_EDX, dst);
11782 #if FEATURE_STACK_FP_X87
11786 #endif // FEATURE_STACK_FP_X87
11788 assert(id->idGCref() == GCT_NONE);
11790 code = insCodeMR(ins);
11792 #ifdef _TARGET_AMD64_
11793 // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
11794 code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
11796 if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
11798 code = AddRexWPrefix(ins, code);
11800 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11802 // Is this a 'big' opcode?
11803 if (code & 0xFF000000)
11805 // The high word and then the low word
11806 dst += emitOutputWord(dst, code >> 16);
11807 code &= 0x0000FFFF;
11808 dst += emitOutputWord(dst, code);
11810 else if (code & 0x00FF0000)
11812 // The high byte and then the low word
11813 dst += emitOutputByte(dst, code >> 16);
11814 code &= 0x0000FFFF;
11815 dst += emitOutputWord(dst, code);
11817 else if (code & 0xFF00)
11819 // The 2 byte opcode
11820 dst += emitOutputWord(dst, code);
11824 // The 1 byte opcode
11825 dst += emitOutputByte(dst, code);
11830 /********************************************************************/
11831 /* Simple constant, local label, method */
11832 /********************************************************************/
11835 dst = emitOutputIV(dst, id);
11836 sz = emitSizeOfInsDsc(id);
11842 assert(id->idGCref() == GCT_NONE);
11843 assert(id->idIsBound());
11845 // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
11846 dst = emitOutputLJ(dst, id);
11847 sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
11852 // Assume we'll be recording this call
11855 // Get hold of the argument count and field Handle
11856 args = emitGetInsCDinfo(id);
11858 // Is this a "fat" call descriptor?
11859 if (id->idIsLargeCall())
11861 instrDescCGCA* idCall = (instrDescCGCA*)id;
11862 gcrefRegs = idCall->idcGcrefRegs;
11863 byrefRegs = idCall->idcByrefRegs;
11864 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
11865 sz = sizeof(instrDescCGCA);
11869 assert(!id->idIsLargeDsp());
11870 assert(!id->idIsLargeCns());
11872 gcrefRegs = emitDecodeCallGCregs(id);
11874 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
11875 sz = sizeof(instrDesc);
11878 addr = (BYTE*)id->idAddr()->iiaAddr;
11879 assert(addr != nullptr);
11881 // Some helpers don't get recorded in GC tables
11882 if (id->idIsNoGC())
11887 // What kind of a call do we have here?
11888 if (id->idInsFmt() == IF_METHPTR)
11890 // This is call indirect via a method pointer
11892 code = insCodeMR(ins);
11893 if (ins == INS_i_jmp)
11898 if (id->idIsDspReloc())
11900 dst += emitOutputWord(dst, code | 0x0500);
11901 #ifdef _TARGET_AMD64_
11902 dst += emitOutputLong(dst, 0);
11904 dst += emitOutputLong(dst, (int)addr);
11906 emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
11910 #ifdef _TARGET_X86_
11911 dst += emitOutputWord(dst, code | 0x0500);
11912 #else //_TARGET_AMD64_
11913 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
11914 // This addr mode should never be used while generating relocatable ngen code nor if
11915 // the addr can be encoded as pc-relative address.
11916 noway_assert(!emitComp->opts.compReloc);
11917 noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
11918 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
11920 // This requires, specifying a SIB byte after ModRM byte.
11921 dst += emitOutputWord(dst, code | 0x0400);
11922 dst += emitOutputByte(dst, 0x25);
11923 #endif //_TARGET_AMD64_
11924 dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
11930 // This is call direct where we know the target, thus we can
11931 // use a direct call; the target to jump to is in iiaAddr.
11932 assert(id->idInsFmt() == IF_METHOD);
11934 // Output the call opcode followed by the target distance
11935 dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
11938 #ifdef _TARGET_AMD64_
11939 // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst.
11941 assert(id->idIsDspReloc());
11943 // Calculate PC relative displacement.
11944 // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
11945 // only allow a 32-bit offset, so we correctly use sizeof(INT32)
11946 offset = addr - (dst + sizeof(INT32));
11949 dst += emitOutputLong(dst, offset);
11951 if (id->idIsDspReloc())
11953 emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
11958 /* We update the GC info before the call as the variables cannot be
11959 used by the call. Killing variables before the call helps with
11960 boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
11961 If we ever track aliased variables (which could be used by the
11962 call), we would have to keep them alive past the call.
11964 assert(FitsIn<unsigned char>(dst - *dp));
11965 callInstrSize = static_cast<unsigned char>(dst - *dp);
11966 emitUpdateLiveGCvars(GCvars, *dp);
11968 // If the method returns a GC ref, mark EAX appropriately
11969 if (id->idGCref() == GCT_GCREF)
11971 gcrefRegs |= RBM_EAX;
11973 else if (id->idGCref() == GCT_BYREF)
11975 byrefRegs |= RBM_EAX;
11978 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
11979 // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
11980 if (id->idIsLargeCall())
11982 instrDescCGCA* idCall = (instrDescCGCA*)id;
11983 if (idCall->idSecondGCref() == GCT_GCREF)
11985 gcrefRegs |= RBM_RDX;
11987 else if (idCall->idSecondGCref() == GCT_BYREF)
11989 byrefRegs |= RBM_RDX;
11992 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
11994 // If the GC register set has changed, report the new set
11995 if (gcrefRegs != emitThisGCrefRegs)
11997 emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
12000 if (byrefRegs != emitThisByrefRegs)
12002 emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
12005 if (recCall || args)
12007 // For callee-pop, all arguments will be popped after the call.
12008 // For caller-pop, any GC arguments will go dead after the call.
12010 assert(callInstrSize != 0);
12014 emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
12018 emitStackKillArgs(dst, -args, callInstrSize);
12022 // Do we need to record a call location for GC purposes?
12023 if (!emitFullGCinfo && recCall)
12025 assert(callInstrSize != 0);
12026 emitRecordGCcall(dst, callInstrSize);
12030 if (ins == INS_call)
12032 emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
12033 (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
12039 /********************************************************************/
12040 /* One register operand */
12041 /********************************************************************/
12046 dst = emitOutputR(dst, id);
12047 sz = TINY_IDSC_SIZE;
12050 /********************************************************************/
12051 /* Register and register/constant */
12052 /********************************************************************/
12055 code = insCodeMR(ins);
12056 // Emit the VEX prefix if it exists
12057 code = AddVexPrefixIfNeeded(ins, code, size);
12058 code = insEncodeMRreg(ins, id->idReg1(), size, code);
12061 if (size != EA_1BYTE)
12066 // Emit the REX prefix if it exists
12067 if (TakesRexWPrefix(ins, size))
12069 code = AddRexWPrefix(ins, code);
12072 // Output a size prefix for a 16-bit operand
12073 if (size == EA_2BYTE)
12075 dst += emitOutputByte(dst, 0x66);
12078 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12079 dst += emitOutputWord(dst, code);
12080 dst += emitOutputByte(dst, emitGetInsSC(id));
12081 sz = emitSizeOfInsDsc(id);
12084 assert(!id->idGCref());
12085 emitGCregDeadUpd(id->idReg1(), dst);
12092 dst = emitOutputRR(dst, id);
12093 sz = TINY_IDSC_SIZE;
12099 dst = emitOutputRI(dst, id);
12100 sz = emitSizeOfInsDsc(id);
12103 case IF_RWR_RRD_RRD:
12104 dst = emitOutputRRR(dst, id);
12105 sz = emitSizeOfInsDsc(id);
12107 case IF_RWR_RRD_RRD_CNS:
12108 case IF_RWR_RRD_RRD_RRD:
12109 dst = emitOutputRRR(dst, id);
12110 sz = emitSizeOfInsDsc(id);
12111 dst += emitOutputByte(dst, emitGetInsSC(id));
12114 case IF_RRW_RRW_CNS:
12115 assert(id->idGCref() == GCT_NONE);
12117 // Get the 'base' opcode (it's a big one)
12118 // Also, determine which operand goes where in the ModRM byte.
12121 // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128)
12122 if (hasCodeMR(ins))
12124 code = insCodeMR(ins);
12125 // Emit the VEX prefix if it exists
12126 code = AddVexPrefixIfNeeded(ins, code, size);
12127 code = insEncodeMRreg(ins, code);
12128 mReg = id->idReg1();
12129 rReg = id->idReg2();
12133 code = insCodeRM(ins);
12134 // Emit the VEX prefix if it exists
12135 code = AddVexPrefixIfNeeded(ins, code, size);
12136 code = insEncodeRMreg(ins, code);
12137 mReg = id->idReg2();
12138 rReg = id->idReg1();
12140 assert(code & 0x00FF0000);
12142 if (TakesRexWPrefix(ins, size))
12144 code = AddRexWPrefix(ins, code);
12147 if (TakesVexPrefix(ins))
12149 if (IsDstDstSrcAVXInstruction(ins))
12151 // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
12152 // This code will have to change when we support 3 operands.
12153 // For now, we always overload this source with the destination (always reg1).
12154 // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
12155 // e.g. pslldq, when/if we support those instructions with 2 registers.)
12156 // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
12157 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12159 else if (IsDstSrcSrcAVXInstruction(ins))
12161 // This is a "merge" move instruction.
12162 // Encode source operand reg in 'vvvv' bits in 1's complement form
12163 code = insEncodeReg3456(ins, id->idReg2(), size, code);
12167 regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code));
12169 // Output the REX prefix
12170 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12172 if (code & 0xFF000000)
12174 // Output the highest word of the opcode
12175 dst += emitOutputWord(dst, code >> 16);
12176 code &= 0x0000FFFF;
12178 if (Is4ByteSSE4Instruction(ins))
12180 // Output 3rd byte of the opcode
12181 dst += emitOutputByte(dst, code);
12185 else if (code & 0x00FF0000)
12187 dst += emitOutputByte(dst, code >> 16);
12188 code &= 0x0000FFFF;
12191 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
12192 if ((code & 0xFF00) == 0xC000)
12194 dst += emitOutputWord(dst, code | (regcode << 8));
12196 else if ((code & 0xFF) == 0x00)
12198 // This case happens for SSE4/AVX instructions only
12199 assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins));
12201 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
12202 dst += emitOutputByte(dst, (0xC0 | regcode));
12206 dst += emitOutputWord(dst, code);
12207 dst += emitOutputByte(dst, (0xC0 | regcode));
12210 dst += emitOutputByte(dst, emitGetInsSC(id));
12211 sz = emitSizeOfInsDsc(id);
12213 // Kill any GC ref in the destination register if necessary.
12214 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
12216 emitGCregDeadUpd(id->idReg1(), dst);
12220 /********************************************************************/
12221 /* Address mode operand */
12222 /********************************************************************/
12228 #if FEATURE_STACK_FP_X87
12234 // case IF_ARD_TRD:
12235 // case IF_ARW_TRD:
12238 #endif // FEATURE_STACK_FP_X87
12240 dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
12247 // Get hold of the argument count and method handle
12248 args = emitGetInsCIargs(id);
12250 // Is this a "fat" call descriptor?
12251 if (id->idIsLargeCall())
12253 instrDescCGCA* idCall = (instrDescCGCA*)id;
12255 gcrefRegs = idCall->idcGcrefRegs;
12256 byrefRegs = idCall->idcByrefRegs;
12257 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12258 sz = sizeof(instrDescCGCA);
12262 assert(!id->idIsLargeDsp());
12263 assert(!id->idIsLargeCns());
12265 gcrefRegs = emitDecodeCallGCregs(id);
12267 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12268 sz = sizeof(instrDesc);
12276 sz = emitSizeOfInsDsc(id);
12281 case IF_RRW_ARD_CNS:
12282 case IF_RWR_ARD_CNS:
12283 emitGetInsAmdCns(id, &cnsVal);
12284 code = insCodeRM(ins);
12286 // Special case 4-byte AVX instructions
12287 if (Is4ByteSSE4OrAVXInstruction(ins))
12289 dst = emitOutputAM(dst, id, code, &cnsVal);
12293 code = AddVexPrefixIfNeeded(ins, code, size);
12294 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12295 dst = emitOutputAM(dst, id, code | regcode, &cnsVal);
12298 sz = emitSizeOfInsDsc(id);
12301 case IF_AWR_RRD_CNS:
12302 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12303 assert(UseVEXEncoding());
12304 emitGetInsAmdCns(id, &cnsVal);
12305 code = insCodeMR(ins);
12306 dst = emitOutputAM(dst, id, code, &cnsVal);
12307 sz = emitSizeOfInsDsc(id);
12313 case IF_RWR_RRD_ARD:
12314 code = insCodeRM(ins);
12315 if (Is4ByteSSE4OrAVXInstruction(ins))
12317 dst = emitOutputAM(dst, id, code);
12321 code = AddVexPrefixIfNeeded(ins, code, size);
12322 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12323 dst = emitOutputAM(dst, id, code | regcode);
12325 sz = emitSizeOfInsDsc(id);
12328 case IF_RWR_RRD_ARD_CNS:
12330 emitGetInsAmdCns(id, &cnsVal);
12331 code = insCodeRM(ins);
12332 if (Is4ByteSSE4OrAVXInstruction(ins))
12334 dst = emitOutputAM(dst, id, code, &cnsVal);
12338 code = AddVexPrefixIfNeeded(ins, code, size);
12339 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12340 dst = emitOutputAM(dst, id, code | regcode, &cnsVal);
12342 sz = emitSizeOfInsDsc(id);
12349 code = insCodeMR(ins);
12350 code = AddVexPrefixIfNeeded(ins, code, size);
12351 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12352 dst = emitOutputAM(dst, id, code | regcode);
12353 sz = emitSizeOfInsDsc(id);
12359 emitGetInsAmdCns(id, &cnsVal);
12360 dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
12361 sz = emitSizeOfInsDsc(id);
12365 emitGetInsAmdCns(id, &cnsVal);
12366 dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
12367 sz = emitSizeOfInsDsc(id);
12370 /********************************************************************/
12371 /* Stack-based operand */
12372 /********************************************************************/
12378 #if FEATURE_STACK_FP_X87
12384 // case IF_SRD_TRD:
12385 // case IF_SRW_TRD:
12388 #endif // FEATURE_STACK_FP_X87
12390 assert(ins != INS_pop_hide);
12391 if (ins == INS_pop)
12393 // The offset in "pop [ESP+xxx]" is relative to the new ESP value
12394 CLANG_FORMAT_COMMENT_ANCHOR;
12396 #if !FEATURE_FIXED_OUT_ARGS
12397 emitCurStackLvl -= sizeof(int);
12399 dst = emitOutputSV(dst, id, insCodeMR(ins));
12401 #if !FEATURE_FIXED_OUT_ARGS
12402 emitCurStackLvl += sizeof(int);
12407 dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
12409 if (ins == INS_call)
12419 emitGetInsCns(id, &cnsVal);
12420 dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
12421 sz = emitSizeOfInsDsc(id);
12425 emitGetInsCns(id, &cnsVal);
12426 dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
12427 sz = emitSizeOfInsDsc(id);
12430 case IF_RRW_SRD_CNS:
12431 case IF_RWR_SRD_CNS:
12432 emitGetInsCns(id, &cnsVal);
12433 code = insCodeRM(ins);
12435 // Special case 4-byte AVX instructions
12436 if (Is4ByteSSE4OrAVXInstruction(ins))
12438 dst = emitOutputSV(dst, id, code, &cnsVal);
12442 code = AddVexPrefixIfNeeded(ins, code, size);
12444 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12445 // Note that reg1 is both a source and a destination.
12447 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12448 // now we use the single source as source1 and source2.
12449 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12450 if (IsDstDstSrcAVXInstruction(ins))
12452 // encode source operand reg in 'vvvv' bits in 1's complement form
12453 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12456 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12457 dst = emitOutputSV(dst, id, code | regcode, &cnsVal);
12460 sz = emitSizeOfInsDsc(id);
12466 code = insCodeRM(ins);
12468 // 4-byte AVX instructions are special cased inside emitOutputSV
12469 // since they do not have space to encode ModRM byte.
12470 if (Is4ByteSSE4OrAVXInstruction(ins))
12472 dst = emitOutputSV(dst, id, code);
12476 code = AddVexPrefixIfNeeded(ins, code, size);
12478 if (IsDstDstSrcAVXInstruction(ins))
12480 // encode source operand reg in 'vvvv' bits in 1's complement form
12481 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12484 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12485 dst = emitOutputSV(dst, id, code | regcode);
12489 case IF_RWR_RRD_SRD:
12491 // This should only be called on AVX instructions
12492 assert(IsAVXInstruction(ins));
12494 code = insCodeRM(ins);
12495 code = AddVexPrefixIfNeeded(ins, code, size);
12496 code = insEncodeReg3456(ins, id->idReg2(), size,
12497 code); // encode source operand reg in 'vvvv' bits in 1's complement form
12499 // 4-byte AVX instructions are special cased inside emitOutputSV
12500 // since they do not have space to encode ModRM byte.
12501 if (Is4ByteSSE4OrAVXInstruction(ins))
12503 dst = emitOutputSV(dst, id, code);
12507 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12508 dst = emitOutputSV(dst, id, code | regcode);
12513 case IF_RWR_RRD_SRD_CNS:
12515 // This should only be called on AVX instructions
12516 assert(IsAVXInstruction(ins));
12517 emitGetInsCns(id, &cnsVal);
12519 code = insCodeRM(ins);
12520 code = AddVexPrefixIfNeeded(ins, code, size);
12521 code = insEncodeReg3456(ins, id->idReg2(), size,
12522 code); // encode source operand reg in 'vvvv' bits in 1's complement form
12524 // 4-byte AVX instructions are special cased inside emitOutputSV
12525 // since they do not have space to encode ModRM byte.
12526 if (Is4ByteSSE4OrAVXInstruction(ins))
12528 dst = emitOutputSV(dst, id, code, &cnsVal);
12532 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12533 dst = emitOutputSV(dst, id, code | regcode, &cnsVal);
12541 code = insCodeMR(ins);
12542 code = AddVexPrefixIfNeeded(ins, code, size);
12544 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12545 // Note that reg1 is both a source and a destination.
12547 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12548 // now we use the single source as source1 and source2.
12549 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12550 if (IsDstDstSrcAVXInstruction(ins))
12552 // encode source operand reg in 'vvvv' bits in 1's complement form
12553 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12556 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12557 dst = emitOutputSV(dst, id, code | regcode);
12560 /********************************************************************/
12561 /* Direct memory address */
12562 /********************************************************************/
12568 #if FEATURE_STACK_FP_X87
12574 // case IF_MRD_TRD:
12575 // case IF_MRW_TRD:
12578 #endif // FEATURE_STACK_FP_X87
12580 noway_assert(ins != INS_call);
12581 dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
12582 sz = emitSizeOfInsDsc(id);
12586 dst = emitOutputCV(dst, id, insCodeMI(ins));
12589 case IF_RRW_MRD_CNS:
12590 case IF_RWR_MRD_CNS:
12591 emitGetInsDcmCns(id, &cnsVal);
12592 code = insCodeRM(ins);
12594 // Special case 4-byte AVX instructions
12595 if (Is4ByteSSE4OrAVXInstruction(ins))
12597 dst = emitOutputCV(dst, id, code, &cnsVal);
12601 code = AddVexPrefixIfNeeded(ins, code, size);
12603 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12604 // Note that reg1 is both a source and a destination.
12606 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12607 // now we use the single source as source1 and source2.
12608 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12609 if (IsDstDstSrcAVXInstruction(ins))
12611 // encode source operand reg in 'vvvv' bits in 1's complement form
12612 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12615 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12616 dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
12619 sz = emitSizeOfInsDsc(id);
12622 case IF_MWR_RRD_CNS:
12623 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12624 assert(UseVEXEncoding());
12625 emitGetInsDcmCns(id, &cnsVal);
12626 code = insCodeMR(ins);
12627 // only AVX2 vextracti128 and AVX vextractf128 can reach this path,
12628 // they do not need VEX.vvvv to encode the register operand
12629 dst = emitOutputCV(dst, id, code, &cnsVal);
12630 sz = emitSizeOfInsDsc(id);
12636 code = insCodeRM(ins);
12637 // Special case 4-byte AVX instructions
12638 if (Is4ByteSSE4OrAVXInstruction(ins))
12640 dst = emitOutputCV(dst, id, code);
12644 code = AddVexPrefixIfNeeded(ins, code, size);
12646 if (IsDstDstSrcAVXInstruction(ins))
12648 // encode source operand reg in 'vvvv' bits in 1's complement form
12649 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12652 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12653 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
12655 sz = emitSizeOfInsDsc(id);
12658 case IF_RWR_RRD_MRD:
12660 // This should only be called on AVX instructions
12661 assert(IsAVXInstruction(ins));
12663 code = insCodeRM(ins);
12664 code = AddVexPrefixIfNeeded(ins, code, size);
12665 code = insEncodeReg3456(ins, id->idReg2(), size,
12666 code); // encode source operand reg in 'vvvv' bits in 1's complement form
12668 // Special case 4-byte AVX instructions
12669 if (Is4ByteSSE4OrAVXInstruction(ins))
12671 dst = emitOutputCV(dst, id, code);
12675 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12676 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
12678 sz = emitSizeOfInsDsc(id);
12682 case IF_RWR_RRD_MRD_CNS:
12684 // This should only be called on AVX instructions
12685 assert(IsAVXInstruction(ins));
12686 emitGetInsCns(id, &cnsVal);
12688 code = insCodeRM(ins);
12689 code = AddVexPrefixIfNeeded(ins, code, size);
12690 code = insEncodeReg3456(ins, id->idReg2(), size,
12691 code); // encode source operand reg in 'vvvv' bits in 1's complement form
12693 // Special case 4-byte AVX instructions
12694 if (Is4ByteSSE4OrAVXInstruction(ins))
12696 dst = emitOutputCV(dst, id, code, &cnsVal);
12700 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12701 dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
12703 sz = emitSizeOfInsDsc(id);
12707 case IF_RWR_MRD_OFF:
12708 code = insCode(ins);
12709 code = AddVexPrefixIfNeeded(ins, code, size);
12711 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12712 // Note that reg1 is both a source and a destination.
12714 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12715 // now we use the single source as source1 and source2.
12716 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12717 if (IsDstDstSrcAVXInstruction(ins))
12719 // encode source operand reg in 'vvvv' bits in 1's complement form
12720 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12723 regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
12724 dst = emitOutputCV(dst, id, code | 0x30 | regcode);
12725 sz = emitSizeOfInsDsc(id);
12731 code = insCodeMR(ins);
12732 code = AddVexPrefixIfNeeded(ins, code, size);
12734 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
12735 // Note that reg1 is both a source and a destination.
12737 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
12738 // now we use the single source as source1 and source2.
12739 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
12740 if (IsDstDstSrcAVXInstruction(ins))
12742 // encode source operand reg in 'vvvv' bits in 1's complement form
12743 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12746 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12747 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
12748 sz = emitSizeOfInsDsc(id);
12754 emitGetInsDcmCns(id, &cnsVal);
12755 dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
12756 sz = emitSizeOfInsDsc(id);
12760 emitGetInsDcmCns(id, &cnsVal);
12761 dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
12762 sz = emitSizeOfInsDsc(id);
12765 #if FEATURE_STACK_FP_X87
12767 /********************************************************************/
12768 /* FP coprocessor stack operands */
12769 /********************************************************************/
12774 assert(id->idGCref() == GCT_NONE);
12775 dst += emitOutputWord(dst, insCodeMR(ins) | 0xC000 | (id->idReg1() << 8));
12781 assert(id->idGCref() == GCT_NONE);
12782 dst += emitOutputWord(dst, insCodeMR(ins) | 0xC004 | (id->idReg1() << 8));
12785 #endif // FEATURE_STACK_FP_X87
12787 /********************************************************************/
12789 /********************************************************************/
12794 printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
12795 assert(!"don't know how to encode this instruction");
12800 // Make sure we set the instruction descriptor size correctly
12801 assert(sz == emitSizeOfInsDsc(id));
12803 #if !FEATURE_FIXED_OUT_ARGS
12804 bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
12806 #if FEATURE_EH_FUNCLETS
12807 updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
12808 #endif // FEATURE_EH_FUNCLETS
12810 // Make sure we keep the current stack level up to date
12811 if (updateStackLevel)
12816 // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
12817 // finally block for calling it locally for an op_leave.
12818 emitStackPush(dst, id->idGCref());
12822 emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
12826 // Check for "sub ESP, icon"
12827 if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
12829 assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
12830 emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
12835 // Check for "add ESP, icon"
12836 if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
12838 assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
12839 emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
12840 (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
12849 #endif // !FEATURE_FIXED_OUT_ARGS
12851 assert((int)emitCurStackLvl >= 0);
12853 // Only epilog "instructions" and some pseudo-instrs
12854 // are allowed not to generate any code
12856 assert(*dp != dst || emitInstHasNoCode(ins));
12859 if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
12861 emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
12864 if (emitComp->compDebugBreak)
12866 // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
12867 // at the beginning of this method.
12868 if (JitConfig.JitEmitPrintRefRegs() != 0)
12870 printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
12871 printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
12872 printRegMaskInt(emitThisGCrefRegs);
12873 emitDispRegSet(emitThisGCrefRegs);
12875 printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
12876 printRegMaskInt(emitThisByrefRegs);
12877 emitDispRegSet(emitThisByrefRegs);
12881 // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
12882 // emitting instruction a6, (i.e. IN00a6 in jitdump).
12883 if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
12885 assert(!"JitBreakEmitOutputInstr reached");
12890 #ifdef TRANSLATE_PDB
12893 // only map instruction groups to instruction groups
12894 MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
12901 if (ins == INS_mulEAX || ins == INS_imulEAX)
12903 // INS_mulEAX has implicit target of Edx:Eax. Make sure
12904 // that we detected this cleared its GC-status.
12906 assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
12909 if (instrIs3opImul(ins))
12911 // The target of the 3-operand imul is implicitly encoded. Make sure
12912 // that we detected the implicit register and cleared its GC-status.
12914 regMaskTP regMask = genRegMask(inst3opImulReg(ins));
12915 assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
12922 #pragma warning(pop)
12925 /*****************************************************************************/
12926 /*****************************************************************************/
12928 #endif // defined(_TARGET_XARCH_)