1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #if defined(_TARGET_XARCH_)
21 /*****************************************************************************/
22 /*****************************************************************************/
28 bool IsSSEInstruction(instruction ins)
30 return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_SSE_INSTRUCTION);
33 bool IsSSEOrAVXInstruction(instruction ins)
35 return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
38 bool IsAVXOnlyInstruction(instruction ins)
40 return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
43 bool IsFMAInstruction(instruction ins)
45 return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
48 bool IsBMIInstruction(instruction ins)
50 return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
53 regNumber getBmiRegNumber(instruction ins)
74 assert(IsBMIInstruction(ins));
80 regNumber getSseShiftRegNumber(instruction ins)
116 assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
122 bool emitter::IsAVXInstruction(instruction ins)
124 return UseVEXEncoding() && IsSSEOrAVXInstruction(ins);
127 // Returns true if the AVX instruction is a binary operator that requires 3 operands.
128 // When we emit an instruction with only two operands, we will duplicate the destination
130 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
131 // be formalized by adding an additional field to instruction table to
132 // to indicate whether a 3-operand instruction.
133 bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
135 return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstDstSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
138 // Returns true if the AVX instruction requires 3 operands that duplicate the source
139 // register in the vvvv field.
140 // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
141 // be formalized by adding an additional field to instruction table to
142 // to indicate whether a 3-operand instruction.
143 bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
145 return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
148 //------------------------------------------------------------------------
149 // AreUpper32BitsZero: check if some previously emitted
150 // instruction set the upper 32 bits of reg to zero.
153 // reg - register of interest
156 // true if previous instruction zeroed reg's upper 32 bits.
157 // false if it did not, or if we can't safely determine.
160 // Currently only looks back one instruction.
162 // movsx eax, ... might seem viable but we always encode this
163 // instruction with a 64 bit destination. See TakesRexWPrefix.
165 bool emitter::AreUpper32BitsZero(regNumber reg)
167 // Don't look back across IG boundaries (possible control flow)
168 if (emitCurIGinsCnt == 0)
173 instrDesc* id = emitLastIns;
174 insFormat fmt = id->idInsFmt();
176 // This isn't meant to be a comprehensive check. Just look for what
177 // seems to be common.
189 // Bail if not writing to the right register
190 if (id->idReg1() != reg)
195 // Bail if movsx, we always have movsx sign extend to 8 bytes
196 if (id->idIns() == INS_movsx)
201 // movzx always zeroes the upper 32 bits.
202 if (id->idIns() == INS_movzx)
207 // Else rely on operation size.
208 return (id->idOpSize() == EA_4BYTE);
217 #ifdef FEATURE_HW_INTRINSICS
218 //------------------------------------------------------------------------
219 // IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or
220 // "reg/mem, reg, imm" form for the legacy, VEX, and EVEX
224 // instruction -- processor instruction to check
227 // true if instruction has a "reg, reg/mem, imm" or "reg/mem, reg, imm" encoding
228 // form for the legacy, VEX, and EVEX encodings.
230 // That is, the instruction takes two operands, one of which is immediate, and it
231 // does not need to encode any data in the VEX.vvvv field.
233 static bool IsDstSrcImmAvxInstruction(instruction ins)
237 case INS_aeskeygenassist:
253 #endif // FEATURE_HW_INTRINSICS
255 // -------------------------------------------------------------------
256 // Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode.
261 // Note that this should be true for any of the instructions in instrsXArch.h
262 // that use the SSE38 or SSE3A macro but returns false if the VEX encoding is
263 // in use, since that encoding does not require an additional byte.
264 bool emitter::Is4ByteSSEInstruction(instruction ins)
266 return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins);
269 // Returns true if this instruction requires a VEX prefix
270 // All AVX instructions require a VEX prefix
271 bool emitter::TakesVexPrefix(instruction ins)
273 // special case vzeroupper as it requires 2-byte VEX prefix
274 // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
280 case INS_prefetchnta:
291 return IsAVXInstruction(ins);
294 // Add base VEX prefix without setting W, R, X, or B bits
295 // L bit will be set based on emitter attr.
297 // 2-byte VEX prefix = C5 <R,vvvv,L,pp>
298 // 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
299 // - R, X, B, W - bits to express corresponding REX prefixes
301 // 0-00001 - implied leading 0F opcode byte
302 // 0-00010 - implied leading 0F 38 opcode bytes
303 // 0-00011 - implied leading 0F 3A opcode bytes
304 // Rest - reserved for future use and usage of them will uresult in Undefined instruction exception
306 // - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
307 // - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1)
308 // - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
309 // these prefixes are treated mandatory when used with escape opcode 0Fh for
310 // some SIMD instructions
311 // 00 - None (0F - packed float)
312 // 01 - 66 (66 0F - packed double)
313 // 10 - F3 (F3 0F - scalar float
314 // 11 - F2 (F2 0F - scalar double)
315 #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
316 #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
317 #define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
318 emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
320 // The 2-byte VEX encoding is preferred when possible, but actually emitting
321 // it depends on a number of factors that we may not know until much later.
323 // In order to handle this "easily", we just carry the 3-byte encoding all
324 // the way through and "fix-up" the encoding when the VEX prefix is actually
325 // emitted, by simply checking that all the requirements were met.
327 // Only AVX instructions require VEX prefix
328 assert(IsAVXInstruction(ins));
330 // Shouldn't have already added VEX prefix
331 assert(!hasVexPrefix(code));
333 assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0);
335 code |= DEFAULT_3BYTE_VEX_PREFIX;
337 if (attr == EA_32BYTE)
339 // Set L bit to 1 in case of instructions that operate on 256-bits.
340 code |= LBIT_IN_3BYTE_VEX_PREFIX;
346 // Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
347 bool TakesRexWPrefix(instruction ins, emitAttr attr)
349 // Because the current implementation of AVX does not have a way to distinguish between the register
350 // size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are
351 // required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE),
352 // and here we must special case these by the opcode.
361 case INS_vfmadd132pd:
362 case INS_vfmadd213pd:
363 case INS_vfmadd231pd:
364 case INS_vfmadd132sd:
365 case INS_vfmadd213sd:
366 case INS_vfmadd231sd:
367 case INS_vfmaddsub132pd:
368 case INS_vfmaddsub213pd:
369 case INS_vfmaddsub231pd:
370 case INS_vfmsubadd132pd:
371 case INS_vfmsubadd213pd:
372 case INS_vfmsubadd231pd:
373 case INS_vfmsub132pd:
374 case INS_vfmsub213pd:
375 case INS_vfmsub231pd:
376 case INS_vfmsub132sd:
377 case INS_vfmsub213sd:
378 case INS_vfmsub231sd:
379 case INS_vfnmadd132pd:
380 case INS_vfnmadd213pd:
381 case INS_vfnmadd231pd:
382 case INS_vfnmadd132sd:
383 case INS_vfnmadd213sd:
384 case INS_vfnmadd231sd:
385 case INS_vfnmsub132pd:
386 case INS_vfnmsub213pd:
387 case INS_vfnmsub231pd:
388 case INS_vfnmsub132sd:
389 case INS_vfnmsub213sd:
390 case INS_vfnmsub231sd:
401 #ifdef _TARGET_AMD64_
402 // movsx should always sign extend out to 8 bytes just because we don't track
403 // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
404 // of the source, not the dest).
405 // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
408 // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
409 if (ins == INS_movsx || ins == INS_rex_jmp)
414 if (EA_SIZE(attr) != EA_8BYTE)
419 if (IsSSEOrAVXInstruction(ins))
447 // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
449 // These are all the instructions that default to 8-byte operand without the REX.W bit
450 // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
451 // so we never need it
452 if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
453 (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
461 #else //!_TARGET_AMD64 = _TARGET_X86_
463 #endif //!_TARGET_AMD64_
466 // Returns true if using this register will require a REX.* prefix.
467 // Since XMM registers overlap with YMM registers, this routine
468 // can also be used to know whether a YMM register if the
469 // instruction in question is AVX.
470 bool IsExtendedReg(regNumber reg)
472 #ifdef _TARGET_AMD64_
473 return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
475 // X86 JIT operates in 32-bit mode and hence extended reg are not available.
480 // Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
481 bool IsExtendedReg(regNumber reg, emitAttr attr)
483 #ifdef _TARGET_AMD64_
484 // Not a register, so doesn't need a prefix
490 // Opcode field only has 3 bits for the register, these high registers
491 // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
492 if (IsExtendedReg(reg))
497 if (EA_SIZE(attr) != EA_1BYTE)
502 // There are 12 one byte registers addressible 'below' r8b:
503 // al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
504 // The first 4 are always addressible, the last 8 are divided into 2 sets:
507 // spl, bpl, sil, dil
508 // Both sets are encoded exactly the same, the difference is the presence
509 // of a REX prefix, even a REX prefix with no other bits set (0x40).
510 // So in order to get to the second set we need a REX prefix (but no bits).
512 // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
513 // encoding/tracking/encoding registers.
514 return (reg >= REG_RSP);
516 // X86 JIT operates in 32-bit mode and hence extended reg are not available.
521 // Since XMM registers overlap with YMM registers, this routine
522 // can also used to know whether a YMM register in case of AVX instructions.
523 bool IsXMMReg(regNumber reg)
525 #ifdef _TARGET_AMD64_
526 return (reg >= REG_XMM0) && (reg <= REG_XMM15);
527 #else // !_TARGET_AMD64_
528 return (reg >= REG_XMM0) && (reg <= REG_XMM7);
529 #endif // !_TARGET_AMD64_
532 // Returns bits to be encoded in instruction for the given register.
533 unsigned RegEncoding(regNumber reg)
535 static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE");
536 return (unsigned)(reg & 0x7);
539 // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
540 // SSE2: separate 1-byte prefix gets added before opcode.
541 // AVX: specific bits within VEX prefix need to be set in bit-inverted form.
542 emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
544 if (UseVEXEncoding() && IsAVXInstruction(ins))
546 if (TakesVexPrefix(ins))
548 // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
549 assert(hasVexPrefix(code));
551 // W-bit is the only bit that is added in non bit-inverted form.
552 return emitter::code_t(code | 0x00008000000000ULL);
555 #ifdef _TARGET_AMD64_
556 return emitter::code_t(code | 0x4800000000ULL);
558 assert(!"UNREACHED");
563 #ifdef _TARGET_AMD64_
565 emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
567 if (UseVEXEncoding() && IsAVXInstruction(ins))
569 if (TakesVexPrefix(ins))
571 // R-bit is supported by both 2-byte and 3-byte VEX prefix
572 assert(hasVexPrefix(code));
574 // R-bit is added in bit-inverted form.
575 return code & 0xFF7FFFFFFFFFFFULL;
579 return code | 0x4400000000ULL;
582 emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
584 if (UseVEXEncoding() && IsAVXInstruction(ins))
586 if (TakesVexPrefix(ins))
588 // X-bit is available only in 3-byte VEX prefix that starts with byte C4.
589 assert(hasVexPrefix(code));
591 // X-bit is added in bit-inverted form.
592 return code & 0xFFBFFFFFFFFFFFULL;
596 return code | 0x4200000000ULL;
599 emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
601 if (UseVEXEncoding() && IsAVXInstruction(ins))
603 if (TakesVexPrefix(ins))
605 // B-bit is available only in 3-byte VEX prefix that starts with byte C4.
606 assert(hasVexPrefix(code));
608 // B-bit is added in bit-inverted form.
609 return code & 0xFFDFFFFFFFFFFFULL;
613 return code | 0x4100000000ULL;
616 // Adds REX prefix (0x40) without W, R, X or B bits set
617 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
619 assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
620 return code | 0x4000000000ULL;
623 #endif //_TARGET_AMD64_
625 bool isPrefix(BYTE b)
627 assert(b != 0); // Caller should check this
628 assert(b != 0x67); // We don't use the address size prefix
629 assert(b != 0x65); // The GS segment override prefix is emitted separately
630 assert(b != 0x64); // The FS segment override prefix is emitted separately
631 assert(b != 0xF0); // The lock prefix is emitted separately
632 assert(b != 0x2E); // We don't use the CS segment override prefix
633 assert(b != 0x3E); // Or the DS segment override prefix
634 assert(b != 0x26); // Or the ES segment override prefix
635 assert(b != 0x36); // Or the SS segment override prefix
637 // That just leaves the size prefixes used in SSE opcodes:
638 // Scalar Double Scalar Single Packed Double
639 return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
642 // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
643 unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
645 if (hasVexPrefix(code))
647 // Only AVX instructions should have a VEX prefix
648 assert(UseVEXEncoding() && IsAVXInstruction(ins));
649 code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
650 code &= 0x00000000FFFFFFFFLL;
652 WORD leadingBytes = 0;
653 BYTE check = (code >> 24) & 0xFF;
656 // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
657 // 4-byte opcode: with the bytes ordered as 0x22114433
658 // check for a prefix in the 11 position
659 BYTE sizePrefix = (code >> 16) & 0xFF;
660 if ((sizePrefix != 0) && isPrefix(sizePrefix))
662 // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
664 // 00 - None (0F - packed float)
665 // 01 - 66 (66 0F - packed double)
666 // 10 - F3 (F3 0F - scalar float
667 // 11 - F2 (F2 0F - scalar double)
671 if (IsBMIInstruction(ins))
707 assert(!"unrecognized SIMD size prefix");
711 // Now the byte in the 22 position must be an escape byte 0F
712 leadingBytes = check;
713 assert(leadingBytes == 0x0F);
715 // Get rid of both sizePrefix and escape byte
716 code &= 0x0000FFFFLL;
718 // Check the byte in the 33 position to see if it is 3A or 38.
719 // In such a case escape bytes must be 0x0F3A or 0x0F38
721 if (check == 0x3A || check == 0x38)
723 leadingBytes = (leadingBytes << 8) | check;
724 code &= 0x0000FF00LL;
730 // 2-byte opcode with the bytes ordered as 0x0011RM22
731 // the byte in position 11 must be an escape byte.
732 leadingBytes = (code >> 16) & 0xFF;
733 assert(leadingBytes == 0x0F || leadingBytes == 0x00);
737 // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
738 // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
739 // implied leading bytes. 0x0F is supported by both the 2-byte and
740 // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by
741 // the 3-byte version.
743 switch (leadingBytes)
746 // there is no leading byte
758 assert(!"encountered unknown leading bytes");
763 // VEX.2211RM33 got transformed as VEX.0000RM33
764 // VEX.0011RM22 got transformed as VEX.0000RM22
766 // Now output VEX prefix leaving the 4-byte opcode
768 // The 2-byte VEX encoding, requires that the X and B-bits are set (these
769 // bits are inverted from the REX values so set means off), the W-bit is
770 // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001
771 // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these
772 // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then
773 // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2
774 // from the 3-byte VEX encoding.
776 // Given the above, the check can be reduced to a simple mask and comparison.
777 // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about:
778 // * R can be set or unset (0x7F ignores bit 7)
779 // * vvvv can be any value (0x80 ignores bits 3-6)
780 // * L can be set or unset (0x80 ignores bit 2)
781 // * pp can be any value (0x80 ignores bits 0-1)
782 // * 0x00C46100 is a value that signifies the requirements listed above were met:
783 // * We must be a three-byte VEX opcode (0x00C4)
784 // * X and B must be set (0x61 validates bits 5-6)
785 // * m-mmmm must be 0-00001 (0x61 validates bits 0-4)
786 // * W must be unset (0x00 validates bit 7)
787 if ((vexPrefix & 0xFFFF7F80) == 0x00C46100)
789 emitOutputByte(dst, 0xC5);
790 emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F));
794 emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
795 emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
796 emitOutputByte(dst + 2, vexPrefix & 0xFF);
800 #ifdef _TARGET_AMD64_
801 if (code > 0x00FFFFFFFFLL)
803 BYTE prefix = (code >> 32) & 0xFF;
804 noway_assert(prefix >= 0x40 && prefix <= 0x4F);
805 code &= 0x00000000FFFFFFFFLL;
807 // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
808 // we can remove this code as well
810 // The REX prefix is required to come after all other prefixes.
811 // Some of our 'opcodes' actually include some prefixes, if that
812 // is the case, shift them over and place the REX prefix after
813 // the other prefixes, and emit any prefix that got moved out.
814 BYTE check = (code >> 24) & 0xFF;
817 // 3-byte opcode: with the bytes ordered as 0x00113322
818 // check for a prefix in the 11 position
819 check = (code >> 16) & 0xFF;
820 if (check != 0 && isPrefix(check))
822 // Swap the rex prefix and whatever this prefix is
823 code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
824 // and then emit the other prefix
825 return emitOutputByte(dst, check);
830 // 4-byte opcode with the bytes ordered as 0x22114433
831 // first check for a prefix in the 11 position
832 BYTE check2 = (code >> 16) & 0xFF;
833 if (isPrefix(check2))
835 assert(!isPrefix(check)); // We currently don't use this, so it is untested
838 // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
839 // Change to c2rrc1XXXX, and emit check2 now
840 code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL));
844 // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
845 // Change to c2XXrrXXXX, and emit check2 now
846 code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL));
848 return emitOutputByte(dst, check2);
852 return emitOutputByte(dst, prefix);
854 #endif // _TARGET_AMD64_
859 #ifdef _TARGET_AMD64_
860 /*****************************************************************************
861 * Is the last instruction emitted a call instruction?
863 bool emitter::emitIsLastInsCall()
865 if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
873 /*****************************************************************************
874 * We're about to create an epilog. If the last instruction we output was a 'call',
875 * then we need to insert a NOP, to allow for proper exception-handling behavior.
877 void emitter::emitOutputPreEpilogNOP()
879 if (emitIsLastInsCall())
885 #endif //_TARGET_AMD64_
887 // Size of rex prefix in bytes
888 unsigned emitter::emitGetRexPrefixSize(instruction ins)
890 // In case of AVX instructions, REX prefixes are part of VEX prefix.
891 // And hence requires no additional byte to encode REX prefixes.
892 if (IsAVXInstruction(ins))
897 // If not AVX, then we would need 1-byte to encode REX prefix.
901 // Size of vex prefix in bytes
902 unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
904 if (IsAVXInstruction(ins))
909 // If not AVX, then we don't need to encode vex prefix.
913 // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
914 // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
915 // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
916 // instruction size estimate will be accurate.
917 // Basically this function will decrease the vexPrefixSize,
918 // so that opcodeSize + vexPrefixAdjustedSize will be the right size.
919 // rightOpcodeSize + vexPrefixSize
920 //=(opcodeSize - ExtrabytesSize) + vexPrefixSize
921 //=opcodeSize + (vexPrefixSize - ExtrabytesSize)
922 //=opcodeSize + vexPrefixAdjustedSize
923 unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
925 if (IsAVXInstruction(ins))
927 unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
928 assert(vexPrefixAdjustedSize == 3);
930 // In this case, opcode will contains escape prefix at least one byte,
931 // vexPrefixAdjustedSize should be minus one.
932 vexPrefixAdjustedSize -= 1;
934 // Get the fourth byte in Opcode.
935 // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
936 BYTE check = (code >> 24) & 0xFF;
939 // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
940 // 4-byte opcode: with the bytes ordered as 0x22114433
941 // Simd prefix is at the first byte.
942 BYTE sizePrefix = (code >> 16) & 0xFF;
943 if (sizePrefix != 0 && isPrefix(sizePrefix))
945 vexPrefixAdjustedSize -= 1;
948 // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
949 // But in this case the opcode has not counted R\M part.
950 // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
951 //=opcodeSize + VexPrefixAdjustedSize -1 + 1
952 //=opcodeSize + VexPrefixAdjustedSize
953 // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize.
956 return vexPrefixAdjustedSize;
961 // Get size of rex or vex prefix emitted in code
962 unsigned emitter::emitGetPrefixSize(code_t code)
964 if (hasVexPrefix(code))
969 if (hasRexPrefix(code))
978 /*****************************************************************************
980 * Record a non-empty stack
983 void emitter::emitMarkStackLvl(unsigned stackLevel)
985 assert(int(stackLevel) >= 0);
986 assert(emitCurStackLvl == 0);
987 assert(emitCurIG->igStkLvl == 0);
988 assert(emitCurIGfreeNext == emitCurIGfreeBase);
990 assert(stackLevel && stackLevel % sizeof(int) == 0);
992 emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
994 if (emitMaxStackDepth < emitCurStackLvl)
996 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
997 emitMaxStackDepth = emitCurStackLvl;
1002 /*****************************************************************************
1004 * Get hold of the address mode displacement value for an indirect call.
1007 inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
1009 if (id->idIsLargeCall())
1011 return ((instrDescCGCA*)id)->idcDisp;
1015 assert(!id->idIsLargeDsp());
1016 assert(!id->idIsLargeCns());
1018 return id->idAddr()->iiaAddrMode.amDisp;
1022 /** ***************************************************************************
1024 * The following table is used by the instIsFP()/instUse/DefFlags() helpers.
1028 const insFlags CodeGenInterface::instInfo[] =
1030 #define INST0(id, nm, um, mr, flags) static_cast<insFlags>(flags),
1031 #define INST1(id, nm, um, mr, flags) static_cast<insFlags>(flags),
1032 #define INST2(id, nm, um, mr, mi, flags) static_cast<insFlags>(flags),
1033 #define INST3(id, nm, um, mr, mi, rm, flags) static_cast<insFlags>(flags),
1034 #define INST4(id, nm, um, mr, mi, rm, a4, flags) static_cast<insFlags>(flags),
1035 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) static_cast<insFlags>(flags),
1046 /*****************************************************************************
1048 * Initialize the table used by emitInsModeFormat().
1052 const BYTE emitter::emitInsModeFmtTab[] =
1054 #define INST0(id, nm, um, mr, flags) um,
1055 #define INST1(id, nm, um, mr, flags) um,
1056 #define INST2(id, nm, um, mr, mi, flags) um,
1057 #define INST3(id, nm, um, mr, mi, rm, flags) um,
1058 #define INST4(id, nm, um, mr, mi, rm, a4, flags) um,
1059 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) um,
1071 unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab);
1074 /*****************************************************************************
1076 * Combine the given base format with the update mode of the instuction.
1079 inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
1081 assert(IF_RRD + IUM_RD == IF_RRD);
1082 assert(IF_RRD + IUM_WR == IF_RWR);
1083 assert(IF_RRD + IUM_RW == IF_RRW);
1085 return (insFormat)(base + emitInsUpdateMode(ins));
1088 // This is a helper we need due to Vs Whidbey #254016 in order to distinguish
1089 // if we can not possibly be updating an integer register. This is not the best
1090 // solution, but the other ones (see bug) are going to be much more complicated.
1091 bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
1093 instruction ins = id->idIns();
1095 if (!IsSSEOrAVXInstruction(ins))
1124 case INS_pextrw_sse41:
1126 // These SSE instructions write to a general purpose integer register.
1137 /*****************************************************************************
1139 * Returns the base encoding of the given CPU instruction.
1142 inline size_t insCode(instruction ins)
1148 #define INST0(id, nm, um, mr, flags) mr,
1149 #define INST1(id, nm, um, mr, flags) mr,
1150 #define INST2(id, nm, um, mr, mi, flags) mr,
1151 #define INST3(id, nm, um, mr, mi, rm, flags) mr,
1152 #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
1153 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1164 assert((unsigned)ins < _countof(insCodes));
1165 assert((insCodes[ins] != BAD_CODE));
1167 return insCodes[ins];
1170 /*****************************************************************************
1172 * Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
1175 inline size_t insCodeACC(instruction ins)
1179 size_t insCodesACC[] =
1181 #define INST0(id, nm, um, mr, flags)
1182 #define INST1(id, nm, um, mr, flags)
1183 #define INST2(id, nm, um, mr, mi, flags)
1184 #define INST3(id, nm, um, mr, mi, rm, flags)
1185 #define INST4(id, nm, um, mr, mi, rm, a4, flags) a4,
1186 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) a4,
1197 assert((unsigned)ins < _countof(insCodesACC));
1198 assert((insCodesACC[ins] != BAD_CODE));
1200 return insCodesACC[ins];
1203 /*****************************************************************************
1205 * Returns the "register" encoding of the given CPU instruction.
1208 inline size_t insCodeRR(instruction ins)
1212 size_t insCodesRR[] =
1214 #define INST0(id, nm, um, mr, flags)
1215 #define INST1(id, nm, um, mr, flags)
1216 #define INST2(id, nm, um, mr, mi, flags)
1217 #define INST3(id, nm, um, mr, mi, rm, flags)
1218 #define INST4(id, nm, um, mr, mi, rm, a4, flags)
1219 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rr,
1230 assert((unsigned)ins < _countof(insCodesRR));
1231 assert((insCodesRR[ins] != BAD_CODE));
1233 return insCodesRR[ins];
1238 size_t insCodesRM[] =
1240 #define INST0(id, nm, um, mr, flags)
1241 #define INST1(id, nm, um, mr, flags)
1242 #define INST2(id, nm, um, mr, mi, flags)
1243 #define INST3(id, nm, um, mr, mi, rm, flags) rm,
1244 #define INST4(id, nm, um, mr, mi, rm, a4, flags) rm,
1245 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rm,
1256 // Returns true iff the give CPU instruction has an RM encoding.
1257 inline bool hasCodeRM(instruction ins)
1259 assert((unsigned)ins < _countof(insCodesRM));
1260 return ((insCodesRM[ins] != BAD_CODE));
1263 /*****************************************************************************
1265 * Returns the "reg, [r/m]" encoding of the given CPU instruction.
1268 inline size_t insCodeRM(instruction ins)
1270 assert((unsigned)ins < _countof(insCodesRM));
1271 assert((insCodesRM[ins] != BAD_CODE));
1273 return insCodesRM[ins];
1278 size_t insCodesMI[] =
1280 #define INST0(id, nm, um, mr, flags)
1281 #define INST1(id, nm, um, mr, flags)
1282 #define INST2(id, nm, um, mr, mi, flags) mi,
1283 #define INST3(id, nm, um, mr, mi, rm, flags) mi,
1284 #define INST4(id, nm, um, mr, mi, rm, a4, flags) mi,
1285 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mi,
1296 // Returns true iff the give CPU instruction has an MI encoding.
1297 inline bool hasCodeMI(instruction ins)
1299 assert((unsigned)ins < _countof(insCodesMI));
1300 return ((insCodesMI[ins] != BAD_CODE));
1303 /*****************************************************************************
1305 * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
1308 inline size_t insCodeMI(instruction ins)
1310 assert((unsigned)ins < _countof(insCodesMI));
1311 assert((insCodesMI[ins] != BAD_CODE));
1313 return insCodesMI[ins];
1318 size_t insCodesMR[] =
1320 #define INST0(id, nm, um, mr, flags)
1321 #define INST1(id, nm, um, mr, flags) mr,
1322 #define INST2(id, nm, um, mr, mi, flags) mr,
1323 #define INST3(id, nm, um, mr, mi, rm, flags) mr,
1324 #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
1325 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1336 // Returns true iff the give CPU instruction has an MR encoding.
1337 inline bool hasCodeMR(instruction ins)
1339 assert((unsigned)ins < _countof(insCodesMR));
1340 return ((insCodesMR[ins] != BAD_CODE));
1343 /*****************************************************************************
1345 * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
1348 inline size_t insCodeMR(instruction ins)
1350 assert((unsigned)ins < _countof(insCodesMR));
1351 assert((insCodesMR[ins] != BAD_CODE));
1353 return insCodesMR[ins];
1356 // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
1357 bool emitter::EncodedBySSE38orSSE3A(instruction ins)
1359 const size_t SSE38 = 0x0F660038;
1360 const size_t SSE3A = 0x0F66003A;
1361 const size_t MASK = 0xFFFF00FF;
1365 if (!IsSSEOrAVXInstruction(ins))
1372 insCode = insCodeRM(ins);
1374 else if (hasCodeMI(ins))
1376 insCode = insCodeMI(ins);
1378 else if (hasCodeMR(ins))
1380 insCode = insCodeMR(ins);
1384 return insCode == SSE38 || insCode == SSE3A;
1387 /*****************************************************************************
1389 * Returns an encoding for the specified register to be used in the bit0-2
1390 * part of an opcode.
1393 inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
1395 assert(reg < REG_STK);
1397 #ifdef _TARGET_AMD64_
1398 // Either code is not NULL or reg is not an extended reg.
1399 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1400 // which would require code != NULL.
1401 assert(code != nullptr || !IsExtendedReg(reg));
1403 if (IsExtendedReg(reg))
1405 *code = AddRexBPrefix(ins, *code); // REX.B
1407 else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1409 // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1410 // not the corresponding AH, CH, DH, or BH
1411 *code = AddRexPrefix(ins, *code); // REX
1413 #endif // _TARGET_AMD64_
1415 unsigned regBits = RegEncoding(reg);
1417 assert(regBits < 8);
1421 /*****************************************************************************
1423 * Returns an encoding for the specified register to be used in the bit3-5
1424 * part of an opcode.
1427 inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
1429 assert(reg < REG_STK);
1431 #ifdef _TARGET_AMD64_
1432 // Either code is not NULL or reg is not an extended reg.
1433 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1434 // which would require code != NULL.
1435 assert(code != nullptr || !IsExtendedReg(reg));
1437 if (IsExtendedReg(reg))
1439 *code = AddRexRPrefix(ins, *code); // REX.R
1441 else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1443 // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1444 // not the corresponding AH, CH, DH, or BH
1445 *code = AddRexPrefix(ins, *code); // REX
1447 #endif // _TARGET_AMD64_
1449 unsigned regBits = RegEncoding(reg);
1451 assert(regBits < 8);
1452 return (regBits << 3);
1455 /***********************************************************************************
1457 * Returns modified AVX opcode with the specified register encoded in bits 3-6 of
1458 * byte 2 of VEX prefix.
1460 inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
1462 assert(reg < REG_STK);
1463 assert(IsAVXInstruction(ins));
1464 assert(hasVexPrefix(code));
1466 // Get 4-bit register encoding
1467 // RegEncoding() gives lower 3 bits
1468 // IsExtendedReg() gives MSB.
1469 code_t regBits = RegEncoding(reg);
1470 if (IsExtendedReg(reg))
1475 // VEX prefix encodes register operand in 1's complement form
1476 // Shift count = 4-bytes of opcode + 0-2 bits
1477 assert(regBits <= 0xF);
1479 return code ^ regBits;
1482 /*****************************************************************************
1484 * Returns an encoding for the specified register to be used in the bit3-5
1485 * part of an SIB byte (unshifted).
1486 * Used exclusively to generate the REX.X bit and truncate the register.
1489 inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
1491 assert(reg < REG_STK);
1493 #ifdef _TARGET_AMD64_
1494 // Either code is not NULL or reg is not an extended reg.
1495 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1496 // which would require code != NULL.
1497 assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
1499 if (IsExtendedReg(reg))
1501 *code = AddRexXPrefix(ins, *code); // REX.X
1503 unsigned regBits = RegEncoding(reg);
1504 #else // !_TARGET_AMD64_
1505 unsigned regBits = reg;
1506 #endif // !_TARGET_AMD64_
1508 assert(regBits < 8);
1512 /*****************************************************************************
1514 * Returns the "[r/m]" opcode with the mod/RM field set to register.
1517 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
1519 // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1520 // Otherwise, it will be placed after the 4 byte encoding.
1521 if ((code & 0xFF00) == 0)
1523 assert((code & 0xC000) == 0);
1530 /*****************************************************************************
1532 * Returns the given "[r/m]" opcode with the mod/RM field set to register.
1535 inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
1537 // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1538 // Otherwise, it will be placed after the 4 byte encoding.
1539 if ((code & 0xFF00) == 0)
1541 assert((code & 0xC000) == 0);
1547 /*****************************************************************************
1549 * Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
1550 * the given register.
1553 inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1555 assert((code & 0xC000) == 0);
1557 unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1562 /*****************************************************************************
1564 * Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
1565 * the given register.
1568 inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1570 assert((code & 0xC000) == 0);
1572 unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1577 /*****************************************************************************
1579 * Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
1580 * "reg,reg,imm8" form.
1582 inline bool insNeedsRRIb(instruction ins)
1584 // If this list gets longer, use a switch or a table.
1585 return ins == INS_imul;
1588 /*****************************************************************************
1590 * Returns the "reg,reg,imm8" opcode with both the reg's set to the
1591 * the given register.
1593 inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
1595 assert(size == EA_4BYTE); // All we handle for now.
1596 assert(insNeedsRRIb(ins));
1597 // If this list gets longer, use a switch, or a table lookup.
1598 code_t code = 0x69c0;
1599 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1600 // We use the same register as source and destination. (Could have another version that does both regs...)
1602 code |= (regcode << 3);
1606 /*****************************************************************************
1608 * Returns the "+reg" opcode with the the given register set into the low
1609 * nibble of the opcode
1612 inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
1614 code_t code = insCodeRR(ins);
1615 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1620 /*****************************************************************************
1622 * Return the 'SS' field value for the given index scale factor.
1625 inline unsigned emitter::insSSval(unsigned scale)
1627 assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1629 const static BYTE scales[] = {
1640 return scales[scale - 1];
1643 const instruction emitJumpKindInstructions[] = {INS_nop,
1645 #define JMP_SMALL(en, rev, ins) INS_##ins,
1646 #include "emitjmps.h"
1650 const emitJumpKind emitReverseJumpKinds[] = {
1653 #define JMP_SMALL(en, rev, ins) EJ_##rev,
1654 #include "emitjmps.h"
1657 /*****************************************************************************
1658 * Look up the instruction for a jump kind
1661 /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
1663 assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
1664 return emitJumpKindInstructions[jumpKind];
1667 /*****************************************************************************
1668 * Reverse the conditional jump
1671 /* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
1673 assert(jumpKind < EJ_COUNT);
1674 return emitReverseJumpKinds[jumpKind];
1677 /*****************************************************************************
1678 * The size for these instructions is less than EA_4BYTE,
1679 * but the target register need not be byte-addressable
1682 inline bool emitInstHasNoCode(instruction ins)
1684 if (ins == INS_align)
1692 /*****************************************************************************
1693 * When encoding instructions that operate on byte registers
1694 * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
1695 * otherwise we will incorrectly encode the instruction
1698 bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
1700 #if CPU_HAS_BYTE_REGS
1701 if (size != EA_1BYTE) // Not operating on a byte register is fine
1706 if ((ins != INS_movsx) && // These three instructions support high register
1707 (ins != INS_movzx) // encodings for reg1
1708 #ifdef FEATURE_HW_INTRINSICS
1709 && (ins != INS_crc32)
1713 // reg1 must be a byte-able register
1714 if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
1719 // if reg2 is not REG_NA then reg2 must be a byte-able register
1720 if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
1725 // The instruction can be encoded
1729 /*****************************************************************************
1731 * Estimate the size (in bytes of generated code) of the given instruction.
1734 inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
1736 UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
1737 #ifdef _TARGET_AMD64_
1738 size += emitGetPrefixSize(code);
1743 inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
1745 return emitInsSize(insCodeRM(ins));
1748 inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
1750 emitAttr size = EA_SIZE(attr);
1754 // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
1755 // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
1756 // This would probably be better expressed as a different format or something?
1757 code_t code = insCodeRM(ins);
1759 if ((code & 0xFF00) != 0)
1765 sz = emitInsSize(insEncodeRMreg(ins, code));
1768 // Most 16-bit operand instructions will need a prefix
1769 if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
1775 sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
1778 if (!hasRexPrefix(code))
1780 if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
1781 IsExtendedReg(reg2, attr))
1783 sz += emitGetRexPrefixSize(ins);
1790 /*****************************************************************************/
1792 inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
1794 UNATIVE_OFFSET size = emitInsSize(code);
1795 UNATIVE_OFFSET offs;
1796 bool offsIsUpperBound = true;
1797 bool EBPbased = true;
1799 /* Is this a temporary? */
1803 /* An address off of ESP takes an extra byte */
1805 if (!emitHasFramePtr)
1810 // The offset is already assigned. Find the temp.
1811 TempDsc* tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_USED);
1814 // It might be in the free lists, if we're working on zero initializing the temps.
1815 tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_FREE);
1817 assert(tmp != nullptr);
1818 offs = tmp->tdTempOffs();
1820 // We only care about the magnitude of the offset here, to determine instruction size.
1821 if (emitComp->isFramePointerUsed())
1830 // SP-based offsets must already be positive.
1831 assert((int)offs >= 0);
1837 /* Get the frame offset of the (non-temp) variable */
1839 offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
1841 /* An address off of ESP takes an extra byte */
1848 /* Is this a stack parameter reference? */
1850 if (emitComp->lvaIsParameter(var)
1851 #if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
1852 && !emitComp->lvaIsRegArgument(var)
1853 #endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
1856 /* If no EBP frame, arguments are off of ESP, above temps */
1860 assert((int)offs >= 0);
1862 offsIsUpperBound = false; // since #temps can increase
1863 offs += emitMaxTmpSize;
1868 /* Locals off of EBP are at negative offsets */
1872 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
1873 // If localloc is not used, then ebp chaining is done and hence
1874 // offset of locals will be at negative offsets, Otherwise offsets
1875 // will be positive. In future, when RBP gets positioned in the
1876 // middle of the frame so as to optimize instruction encoding size,
1877 // the below asserts needs to be modified appropriately.
1878 // However, for Unix platforms, we always do frame pointer chaining,
1879 // so offsets from the frame pointer will always be negative.
1880 if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
1882 noway_assert((int)offs >= 0);
1887 // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
1888 CLANG_FORMAT_COMMENT_ANCHOR;
1890 #ifdef UNIX_AMD64_ABI
1891 LclVarDsc* varDsc = emitComp->lvaTable + var;
1892 bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
1893 // Register passed args could have a stack offset of 0.
1894 noway_assert((int)offs < 0 || isRegPassedArg);
1895 #else // !UNIX_AMD64_ABI
1896 noway_assert((int)offs < 0);
1897 #endif // !UNIX_AMD64_ABI
1900 assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
1902 // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
1903 if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
1904 unsigned(var) == emitComp->lvaStubArgumentVar)
1906 offs -= emitMaxTmpSize;
1911 // offset is negative
1912 return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
1914 #ifdef _TARGET_AMD64_
1915 // This case arises for localloc frames
1918 return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
1923 if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
1925 offs += emitMaxTmpSize;
1930 assert((int)offs >= 0);
1932 #if !FEATURE_FIXED_OUT_ARGS
1934 /* Are we addressing off of ESP? */
1936 if (!emitHasFramePtr)
1938 /* Adjust the effective offset if necessary */
1940 if (emitCntStackDepth)
1941 offs += emitCurStackLvl;
1943 // we could (and used to) check for the special case [sp] here but the stack offset
1944 // estimator was off, and there is very little harm in overestimating for such a
1948 #endif // !FEATURE_FIXED_OUT_ARGS
1950 // printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
1951 // emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
1953 #ifdef _TARGET_AMD64_
1954 bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
1956 bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
1959 // If it is ESP based, and the offset is zero, we will not encode the disp part.
1960 if (!EBPbased && offs == 0)
1966 return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
1970 inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp)
1972 instruction ins = id->idIns();
1973 emitAttr attrSize = id->idOpSize();
1974 UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1975 return prefix + emitInsSizeSV(code, var, dsp);
1978 inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val)
1980 instruction ins = id->idIns();
1981 emitAttr attrSize = id->idOpSize();
1982 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize);
1983 UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1984 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
1986 #ifdef _TARGET_AMD64_
1987 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
1988 // all other opcodes take a sign-extended 4-byte immediate
1989 noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
1990 #endif // _TARGET_AMD64_
1992 if (valSize > sizeof(int))
1994 valSize = sizeof(int);
1997 if (id->idIsCnsReloc())
1999 valInByte = false; // relocs can't be placed in a byte
2000 assert(valSize == sizeof(int));
2005 valSize = sizeof(char);
2008 // 16-bit operand instructions need a prefix.
2009 // This referes to 66h size prefix override
2010 if (id->idOpSize() == EA_2BYTE)
2015 return prefix + valSize + emitInsSizeSV(code, var, dsp);
2018 /*****************************************************************************/
2020 static bool baseRegisterRequiresSibByte(regNumber base)
2022 #ifdef _TARGET_AMD64_
2023 return base == REG_ESP || base == REG_R12;
2025 return base == REG_ESP;
2029 static bool baseRegisterRequiresDisplacement(regNumber base)
2031 #ifdef _TARGET_AMD64_
2032 return base == REG_EBP || base == REG_R13;
2034 return base == REG_EBP;
2038 UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
2040 emitAttr attrSize = id->idOpSize();
2041 instruction ins = id->idIns();
2042 /* The displacement field is in an unusual place for calls */
2043 ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
2044 bool dspInByte = ((signed char)dsp == (ssize_t)dsp);
2045 bool dspIsZero = (dsp == 0);
2046 UNATIVE_OFFSET size;
2048 // Note that the values in reg and rgx are used in this method to decide
2049 // how many bytes will be needed by the address [reg+rgx+cns]
2050 // this includes the prefix bytes when reg or rgx are registers R8-R15
2054 // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
2055 // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
2056 // ideally these should really be the only idInsFmts that we see here
2057 // but we have some outliers to deal with:
2058 // emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
2059 // emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
2061 switch (id->idInsFmt())
2072 reg = id->idAddr()->iiaAddrMode.amBaseReg;
2073 rgx = id->idAddr()->iiaAddrMode.amIndxReg;
2077 if (id->idIsDspReloc())
2079 dspInByte = false; // relocs can't be placed in a byte
2080 dspIsZero = false; // relocs won't always be zero
2083 if (code & 0xFF000000)
2087 else if (code & 0x00FF0000)
2089 // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
2090 assert(ins != INS_bt);
2092 assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2093 || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
2094 || (ins == INS_movzx) || (ins == INS_movsx)
2095 // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
2096 || isPrefetch(ins));
2103 // Most 16-bit operands will require a size prefix.
2104 // This refers to 66h size prefix override.
2106 if (attrSize == EA_2BYTE)
2112 size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2114 if (hasRexPrefix(code))
2117 size += emitGetRexPrefixSize(ins);
2119 else if (TakesRexWPrefix(ins, attrSize))
2122 size += emitGetRexPrefixSize(ins);
2124 else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
2125 ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize)))
2127 // Should have a REX byte
2128 size += emitGetRexPrefixSize(ins);
2131 size += emitAdjustSizeCrc32(ins, attrSize);
2135 /* The address is of the form "[reg+disp]" */
2139 /* The address is of the form "[disp]" */
2141 size += sizeof(INT32);
2143 #ifdef _TARGET_AMD64_
2144 // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
2145 if (!id->idIsDspReloc())
2153 // If this is just "call reg", we're done.
2154 if (id->idIsCallRegPtr())
2156 assert(ins == INS_call);
2161 // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
2162 if (baseRegisterRequiresSibByte(reg))
2167 // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
2168 // Otherwise, the displacement can be elided if it is zero.
2169 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2174 /* Does the offset fit in a byte? */
2178 size += sizeof(char);
2182 size += sizeof(INT32);
2187 /* An index register is present */
2191 /* Is the index value scaled? */
2193 if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
2195 /* Is there a base register? */
2199 /* The address is "[reg + {2/4/8} * rgx + icon]" */
2201 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2203 /* The address is "[reg + {2/4/8} * rgx]" */
2207 /* The address is "[reg + {2/4/8} * rgx + disp]" */
2211 size += sizeof(char);
2215 size += sizeof(int);
2221 /* The address is "[{2/4/8} * rgx + icon]" */
2223 size += sizeof(INT32);
2228 if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
2230 /* Swap reg and rgx, such that reg is not EBP/R13 */
2231 regNumber tmp = reg;
2232 id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
2233 id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
2236 /* The address is "[reg+rgx+dsp]" */
2238 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2240 /* This is [reg+rgx]" */
2244 /* This is [reg+rgx+dsp]" */
2248 size += sizeof(char);
2252 size += sizeof(int);
2261 inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
2263 instruction ins = id->idIns();
2264 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2265 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2267 // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
2268 // but it requires special handling of the immediate value (it is always encoded in a byte).
2269 // Let's not complicate things until this is needed.
2270 assert(ins != INS_bt);
2272 #ifdef _TARGET_AMD64_
2273 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2274 // all other opcodes take a sign-extended 4-byte immediate
2275 noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
2276 #endif // _TARGET_AMD64_
2278 if (valSize > sizeof(INT32))
2280 valSize = sizeof(INT32);
2283 if (id->idIsCnsReloc())
2285 valInByte = false; // relocs can't be placed in a byte
2286 assert(valSize == sizeof(INT32));
2291 valSize = sizeof(char);
2294 return valSize + emitInsSizeAM(id, code);
2297 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
2299 instruction ins = id->idIns();
2300 emitAttr attrSize = id->idOpSize();
2302 // fgMorph changes any statics that won't fit into 32-bit addresses
2303 // into constants with an indir, rather than GT_CLS_VAR
2304 // so we should only hit this path for statics that are RIP-relative
2305 UNATIVE_OFFSET size = sizeof(INT32);
2307 size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2308 size += emitAdjustSizeCrc32(ins, attrSize);
2310 // Most 16-bit operand instructions will need a prefix.
2311 // This refers to 66h size prefix override.
2313 if (attrSize == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
2318 return size + emitInsSize(code);
2321 inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
2323 instruction ins = id->idIns();
2324 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2325 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2327 #ifndef _TARGET_AMD64_
2328 // occasionally longs get here on x86
2329 if (valSize > sizeof(INT32))
2330 valSize = sizeof(INT32);
2331 #endif // !_TARGET_AMD64_
2333 if (id->idIsCnsReloc())
2335 valInByte = false; // relocs can't be placed in a byte
2336 assert(valSize == sizeof(INT32));
2341 valSize = sizeof(char);
2344 return valSize + emitInsSizeCV(id, code);
2347 /*****************************************************************************
2349 * Allocate instruction descriptors for instructions with address modes.
2352 inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
2354 if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2356 instrDescAmd* id = emitAllocInstrAmd(size);
2358 id->idSetIsLargeDsp();
2360 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2362 id->idaAmdVal = dsp;
2368 instrDesc* id = emitAllocInstr(size);
2370 id->idAddr()->iiaAddrMode.amDisp = dsp;
2371 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2377 /*****************************************************************************
2379 * Set the displacement field in an instruction. Only handles instrDescAmd type.
2382 inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
2384 if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2386 id->idSetIsLargeDsp();
2388 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2390 id->idaAmdVal = dsp;
2394 id->idSetIsSmallDsp();
2395 id->idAddr()->iiaAddrMode.amDisp = dsp;
2396 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2400 /*****************************************************************************
2402 * Allocate an instruction descriptor for an instruction that uses both
2403 * an address mode displacement and a constant.
2406 emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
2408 if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
2410 instrDesc* id = emitNewInstrCns(size, cns);
2411 id->idAddr()->iiaAddrMode.amDisp = dsp;
2412 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2418 if (instrDesc::fitsInSmallCns(cns))
2420 instrDescAmd* id = emitAllocInstrAmd(size);
2422 id->idSetIsLargeDsp();
2424 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2426 id->idaAmdVal = dsp;
2428 id->idSmallCns(cns);
2434 instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
2436 id->idSetIsLargeCns();
2437 id->idacCnsVal = cns;
2439 id->idSetIsLargeDsp();
2441 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2443 id->idacAmdVal = dsp;
2450 /*****************************************************************************
2452 * The next instruction will be a loop head entry point
2453 * So insert a dummy instruction here to ensure that
2454 * the x86 I-cache alignment rule is followed.
2457 void emitter::emitLoopAlign()
2459 /* Insert a pseudo-instruction to ensure that we align
2460 the next instruction properly */
2462 instrDesc* id = emitNewInstrSmall(EA_1BYTE);
2463 id->idIns(INS_align);
2464 id->idCodeSize(15); // We may need to skip up to 15 bytes of code
2465 emitCurIGsize += 15;
2468 /*****************************************************************************
2470 * Add a NOP instruction of the given size.
2473 void emitter::emitIns_Nop(unsigned size)
2477 instrDesc* id = emitNewInstr();
2479 id->idInsFmt(IF_NONE);
2480 id->idCodeSize(size);
2483 emitCurIGsize += size;
2486 /*****************************************************************************
2488 * Add an instruction with no operands.
2490 void emitter::emitIns(instruction ins)
2493 instrDesc* id = emitNewInstr();
2494 code_t code = insCodeMR(ins);
2498 // We cannot have #ifdef inside macro expansion.
2500 (ins == INS_cdq || ins == INS_int3 || ins == INS_lock || ins == INS_leave || ins == INS_movsb ||
2501 ins == INS_movsd || ins == INS_movsp || ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd ||
2502 ins == INS_r_movsp || ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
2503 ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
2504 // These instructions take zero operands
2505 || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence);
2511 assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
2513 if (code & 0xFF000000)
2515 sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
2517 else if (code & 0x00FF0000)
2521 else if (code & 0x0000FF00)
2530 // vzeroupper includes its 2-byte VEX prefix in its MR code.
2531 assert((ins != INS_vzeroupper) || (sz == 3));
2533 insFormat fmt = IF_NONE;
2540 emitCurIGsize += sz;
2543 // Add an instruction with no operands, but whose encoding depends on the size
2544 // (Only CDQ/CQO currently)
2545 void emitter::emitIns(instruction ins, emitAttr attr)
2548 instrDesc* id = emitNewInstr(attr);
2549 code_t code = insCodeMR(ins);
2550 assert(ins == INS_cdq);
2551 assert((code & 0xFFFFFF00) == 0);
2554 insFormat fmt = IF_NONE;
2556 sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
2557 if (TakesRexWPrefix(ins, attr))
2559 sz += emitGetRexPrefixSize(ins);
2567 emitCurIGsize += sz;
2570 //------------------------------------------------------------------------
2571 // emitMapFmtForIns: map the instruction format based on the instruction.
2572 // Shift-by-a-constant instructions have a special format.
2575 // fmt - the instruction format to map
2576 // ins - the instruction
2579 // The mapped instruction format.
2581 emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
2613 //------------------------------------------------------------------------
2614 // emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
2617 // fmt - the instruction format to map
2620 // The mapped instruction format.
2622 emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
2637 case IF_RWR_ARD_CNS:
2638 return IF_RWR_MRD_CNS;
2641 case IF_RRW_ARD_CNS:
2642 return IF_RRW_MRD_CNS;
2643 case IF_RWR_RRD_ARD:
2644 return IF_RWR_RRD_MRD;
2645 case IF_RWR_RRD_ARD_CNS:
2646 return IF_RWR_RRD_MRD_CNS;
2647 case IF_RWR_RRD_ARD_RRD:
2648 return IF_RWR_RRD_MRD_RRD;
2664 case IF_AWR_RRD_CNS:
2665 return IF_MWR_RRD_CNS;
2675 //------------------------------------------------------------------------
2676 // emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
2679 // indir - the memory operand.
2680 // id - the instrDesc to fill in.
2681 // fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
2682 // GT_CLS_VAR_ADDR), this function will map it to the correct format.
2683 // ins - the instruction we are generating. This might affect the instruction format we choose.
2686 // The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
2689 // For base address of int constant:
2690 // -- the caller must have added the int constant base to the instrDesc when creating it via
2691 // emitNewInstrAmdCns().
2692 // For simple address modes (base + scale * index + offset):
2693 // -- the base register, index register, and scale factor are set.
2694 // -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
2695 // emitNewInstrAmdCns().
2697 // The instruction format is set.
2699 // idSetIsDspReloc() is called if necessary.
2701 void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
2703 assert(fmt != IF_NONE);
2705 GenTree* memBase = indir->Base();
2707 if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
2709 CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
2711 // Static always need relocs
2712 if (!jitStaticFldIsGlobAddr(fldHnd))
2715 // fgMorphField() changes any statics that won't fit into 32-bit addresses into
2716 // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
2717 // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
2719 // Data section constants: these get allocated close to code block of the method and
2720 // always addressable IP relative. These too should be marked as relocatable.
2722 id->idSetIsDspReloc();
2725 id->idAddr()->iiaFieldHnd = fldHnd;
2726 id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
2728 else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
2730 // Absolute addresses marked as contained should fit within the base of addr mode.
2731 assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
2733 // Either not generating relocatable code, or addr must be an icon handle, or the
2734 // constant is zero (which we won't generate a relocation for).
2735 assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0));
2737 if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
2739 id->idSetIsDspReloc();
2742 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2743 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2744 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; // for completeness
2746 id->idInsFmt(emitMapFmtForIns(fmt, ins));
2748 // Absolute address must have already been set in the instrDesc constructor.
2749 assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
2753 if (memBase != nullptr)
2755 id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
2759 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2762 if (indir->HasIndex())
2764 id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
2768 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2770 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
2772 id->idInsFmt(emitMapFmtForIns(fmt, ins));
2774 // disp must have already been set in the instrDesc constructor.
2775 assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly
2779 // Takes care of storing all incoming register parameters
2780 // into its corresponding shadow space (defined by the x64 ABI)
2781 void emitter::spillIntArgRegsToShadowSlots()
2787 assert(emitComp->compGeneratingProlog);
2789 for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
2791 regNumber argReg = intArgRegs[argNum];
2793 // The offsets for the shadow space start at RSP + 8
2794 // (right before the caller return address)
2795 int offset = (argNum + 1) * EA_PTRSIZE;
2797 id = emitNewInstrAmd(EA_PTRSIZE, offset);
2799 id->idInsFmt(IF_AWR_RRD);
2800 id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
2801 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2802 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
2804 // The offset has already been set in the intrDsc ctor,
2805 // make sure we got it right.
2806 assert(emitGetInsAmdAny(id) == ssize_t(offset));
2809 sz = emitInsSizeAM(id, insCodeMR(INS_mov));
2811 emitCurIGsize += sz;
2815 //------------------------------------------------------------------------
2816 // emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss")
2817 // instruction for a GT_IND node.
2820 // ins - the instruction to emit
2821 // attr - the instruction operand size
2822 // dstReg - the destination register
2823 // mem - the GT_IND node
2825 void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem)
2827 assert(mem->OperIs(GT_IND));
2829 GenTree* addr = mem->Addr();
2831 if (addr->OperGet() == GT_CLS_VAR_ADDR)
2833 emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, 0);
2837 if (addr->OperGet() == GT_LCL_VAR_ADDR)
2839 GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2840 emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0);
2841 codeGen->genUpdateLife(varNode);
2845 assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
2846 ssize_t offset = mem->Offset();
2847 instrDesc* id = emitNewInstrAmd(attr, offset);
2850 emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
2851 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2854 emitCurIGsize += sz;
2857 //------------------------------------------------------------------------
2858 // emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2859 // instruction for a GT_STOREIND node.
2862 // ins - the instruction to emit
2863 // attr - the instruction operand size
2864 // mem - the GT_STOREIND node
2866 void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
2868 assert(mem->OperIs(GT_STOREIND));
2870 GenTree* addr = mem->Addr();
2871 GenTree* data = mem->Data();
2873 if (addr->OperGet() == GT_CLS_VAR_ADDR)
2875 if (data->isContainedIntOrIImmed())
2877 emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
2881 assert(!data->isContained());
2882 emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
2887 if (addr->OperGet() == GT_LCL_VAR_ADDR)
2889 GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2890 if (data->isContainedIntOrIImmed())
2892 emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2896 assert(!data->isContained());
2897 emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2899 codeGen->genUpdateLife(varNode);
2903 ssize_t offset = mem->Offset();
2907 if (data->isContainedIntOrIImmed())
2909 int icon = (int)data->AsIntConCommon()->IconValue();
2910 id = emitNewInstrAmdCns(attr, offset, icon);
2912 emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
2913 sz = emitInsSizeAM(id, insCodeMI(ins), icon);
2918 assert(!data->isContained());
2919 id = emitNewInstrAmd(attr, offset);
2921 emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
2922 id->idReg1(data->gtRegNum);
2923 sz = emitInsSizeAM(id, insCodeMR(ins));
2928 emitCurIGsize += sz;
2931 //------------------------------------------------------------------------
2932 // emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2933 // instruction for a GT_STORE_LCL_VAR node.
2936 // ins - the instruction to emit
2937 // attr - the instruction operand size
2938 // varNode - the GT_STORE_LCL_VAR node
2940 void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode)
2942 assert(varNode->OperIs(GT_STORE_LCL_VAR));
2943 assert(varNode->gtRegNum == REG_NA); // stack store
2945 GenTree* data = varNode->gtGetOp1();
2946 codeGen->inst_set_SV_var(varNode);
2948 if (data->isContainedIntOrIImmed())
2950 emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2954 assert(!data->isContained());
2955 emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2957 codeGen->genUpdateLife(varNode);
2960 //------------------------------------------------------------------------
2961 // emitInsBinary: Emits an instruction for a node which takes two operands
2964 // ins - the instruction to emit
2965 // attr - the instruction operand size
2966 // dst - the destination and first source operand
2967 // src - the second source operand
2970 // i) caller of this routine needs to call genConsumeReg()
2971 // ii) caller of this routine needs to call genProduceReg()
2972 regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
2974 // We can only have one memory operand and only src can be a constant operand
2975 // However, the handling for a given operand type (mem, cns, or other) is fairly
2976 // consistent regardless of whether they are src or dst. As such, we will find
2977 // the type of each operand and only check them against src/dst where relevant.
2979 GenTree* memOp = nullptr;
2980 GenTree* cnsOp = nullptr;
2981 GenTree* otherOp = nullptr;
2983 if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp())
2985 // dst can only be a modrm
2986 // dst on 3opImul isn't really the dst
2987 assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) || instrIs3opImul(ins));
2988 assert(!src->isUsedFromMemory());
2992 if (src->isContained())
2994 assert(src->IsCnsIntOrI());
3002 else if (src->isContained() || src->isUsedFromSpillTemp())
3004 assert(!dst->isUsedFromMemory());
3007 if ((src->IsCnsIntOrI() || src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp())
3009 assert(!src->isUsedFromMemory() || src->IsCnsFltOrDbl());
3014 assert(src->isUsedFromMemory());
3019 // At this point, we either have a memory operand or we don't.
3021 // If we don't then the logic is very simple and we will either be emitting a
3022 // `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise.
3024 // If we do have a memory operand, the logic is a bit more complicated as we need
3025 // to do different things depending on the type of memory operand. These types include:
3027 // * Indirect access
3030 // * Addressing mode [base + index * scale + offset]
3034 // Most of these types (except Indirect: Class variable and Indirect: Addressing mode)
3035 // give us a a local variable number and an offset and access memory on the stack
3037 // Indirect: Class variable is used for access static class variables and gives us a handle
3038 // to the memory location we read from
3040 // Indirect: Addressing mode is used for the remaining memory accesses and will give us
3041 // a base address, an index, a scale, and an offset. These are combined to let us easily
3042 // access the given memory location.
3044 // In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]`
3045 // or `[mem], reg`) by comparing memOp to src to determine which `emitIns_*` method needs
3046 // to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable)
3047 // where only src can be the immediate.
3049 if (memOp != nullptr)
3051 TempDsc* tmpDsc = nullptr;
3052 unsigned varNum = BAD_VAR_NUM;
3053 unsigned offset = (unsigned)-1;
3055 if (memOp->isUsedFromSpillTemp())
3057 assert(memOp->IsRegOptional());
3059 tmpDsc = codeGen->getSpillTempDsc(memOp);
3060 varNum = tmpDsc->tdTempNum();
3063 codeGen->regSet.tmpRlsTemp(tmpDsc);
3065 else if (memOp->isIndir())
3067 GenTreeIndir* memIndir = memOp->AsIndir();
3068 GenTree* memBase = memIndir->gtOp1;
3070 switch (memBase->OperGet())
3072 case GT_LCL_VAR_ADDR:
3074 varNum = memBase->AsLclVarCommon()->GetLclNum();
3077 // Ensure that all the GenTreeIndir values are set to their defaults.
3078 assert(!memIndir->HasIndex());
3079 assert(memIndir->Scale() == 1);
3080 assert(memIndir->Offset() == 0);
3085 case GT_CLS_VAR_ADDR:
3089 assert(otherOp == dst);
3090 assert(cnsOp == nullptr);
3092 if (instrHasImplicitRegPairDest(ins))
3094 // src is a class static variable
3095 // dst is implicit - RDX:RAX
3096 emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
3100 // src is a class static variable
3101 // dst is a register
3102 emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
3107 assert(memOp == dst);
3109 if (cnsOp != nullptr)
3111 assert(cnsOp == src);
3112 assert(otherOp == nullptr);
3113 assert(src->IsCnsIntOrI());
3115 // src is an contained immediate
3116 // dst is a class static variable
3117 emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0,
3118 (int)src->gtIntConCommon.IconValue());
3122 assert(otherOp == src);
3124 // src is a register
3125 // dst is a class static variable
3126 emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
3130 return dst->gtRegNum;
3133 default: // Addressing mode [base + index * scale + offset]
3135 instrDesc* id = nullptr;
3137 if (cnsOp != nullptr)
3139 assert(memOp == dst);
3140 assert(cnsOp == src);
3141 assert(otherOp == nullptr);
3142 assert(src->IsCnsIntOrI());
3144 id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue());
3148 ssize_t offset = memIndir->Offset();
3149 id = emitNewInstrAmd(attr, offset);
3152 GenTree* regTree = (memOp == src) ? dst : src;
3154 // there must be one non-contained op
3155 assert(!regTree->isContained());
3156 id->idReg1(regTree->gtRegNum);
3158 assert(id != nullptr);
3160 id->idIns(ins); // Set the instruction.
3162 // Determine the instruction format
3163 insFormat fmt = IF_NONE;
3167 assert(cnsOp == nullptr);
3168 assert(otherOp == dst);
3170 if (instrHasImplicitRegPairDest(ins))
3172 fmt = emitInsModeFormat(ins, IF_ARD);
3176 fmt = emitInsModeFormat(ins, IF_RRD_ARD);
3181 assert(memOp == dst);
3183 if (cnsOp != nullptr)
3185 assert(cnsOp == src);
3186 assert(otherOp == nullptr);
3187 assert(src->IsCnsIntOrI());
3189 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
3193 assert(otherOp == src);
3194 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
3197 assert(fmt != IF_NONE);
3198 emitHandleMemOp(memIndir, id, fmt, ins);
3200 // Determine the instruction size
3201 UNATIVE_OFFSET sz = 0;
3205 assert(otherOp == dst);
3206 assert(cnsOp == nullptr);
3208 if (instrHasImplicitRegPairDest(ins))
3210 sz = emitInsSizeAM(id, insCode(ins));
3214 sz = emitInsSizeAM(id, insCodeRM(ins));
3219 assert(memOp == dst);
3221 if (cnsOp != nullptr)
3223 assert(memOp == dst);
3224 assert(cnsOp == src);
3225 assert(otherOp == nullptr);
3227 sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue());
3231 assert(otherOp == src);
3232 sz = emitInsSizeAM(id, insCodeMR(ins));
3240 emitCurIGsize += sz;
3242 return (memOp == src) ? dst->gtRegNum : REG_NA;
3248 switch (memOp->OperGet())
3251 case GT_STORE_LCL_FLD:
3253 GenTreeLclFld* lclField = memOp->AsLclFld();
3254 varNum = lclField->GetLclNum();
3255 offset = lclField->gtLclFld.gtLclOffs;
3261 assert(memOp->IsRegOptional() || !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate());
3262 varNum = memOp->AsLclVar()->GetLclNum();
3273 // Ensure we got a good varNum and offset.
3274 // We also need to check for `tmpDsc != nullptr` since spill temp numbers
3275 // are negative and start with -1, which also happens to be BAD_VAR_NUM.
3276 assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
3277 assert(offset != (unsigned)-1);
3281 assert(otherOp == dst);
3282 assert(cnsOp == nullptr);
3284 if (instrHasImplicitRegPairDest(ins))
3286 // src is a stack based local variable
3287 // dst is implicit - RDX:RAX
3288 emitIns_S(ins, attr, varNum, offset);
3292 // src is a stack based local variable
3293 // dst is a register
3294 emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
3299 assert(memOp == dst);
3300 assert((dst->gtRegNum == REG_NA) || dst->IsRegOptional());
3302 if (cnsOp != nullptr)
3304 assert(cnsOp == src);
3305 assert(otherOp == nullptr);
3306 assert(src->IsCnsIntOrI());
3308 // src is an contained immediate
3309 // dst is a stack based local variable
3310 emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
3314 assert(otherOp == src);
3315 assert(!src->isContained());
3317 // src is a register
3318 // dst is a stack based local variable
3319 emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
3323 else if (cnsOp != nullptr) // reg, immed
3325 assert(cnsOp == src);
3326 assert(otherOp == dst);
3328 if (src->IsCnsIntOrI())
3330 assert(!dst->isContained());
3331 GenTreeIntConCommon* intCns = src->AsIntConCommon();
3332 emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue());
3336 assert(src->IsCnsFltOrDbl());
3337 GenTreeDblCon* dblCns = src->AsDblCon();
3339 CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns));
3340 emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
3345 assert(otherOp == nullptr);
3346 assert(!src->isContained() && !dst->isContained());
3348 if (instrHasImplicitRegPairDest(ins))
3350 emitIns_R(ins, attr, src->gtRegNum);
3354 emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
3358 return dst->gtRegNum;
3361 //------------------------------------------------------------------------
3362 // emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
3364 // Responsible for emitting a single instruction that will perform an operation of the form:
3365 // *addr = *addr <BinOp> src
3370 // ins - instruction to generate
3371 // attr - emitter attribute for instruction
3372 // storeInd - indir for RMW addressing mode
3373 // src - source operand of instruction
3376 // Lowering has taken care of recognizing the StoreInd pattern of:
3377 // StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
3378 // The address to store is already sitting in a register.
3381 // This is a no-produce operation, meaning that no register output will
3382 // be produced for future use in the code stream.
3384 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
3386 GenTree* addr = storeInd->Addr();
3387 addr = addr->gtSkipReloadOrCopy();
3388 assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
3389 addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
3391 instrDesc* id = nullptr;
3395 if (addr->OperGet() != GT_CLS_VAR_ADDR)
3397 offset = storeInd->Offset();
3400 if (src->isContainedIntOrIImmed())
3402 GenTreeIntConCommon* intConst = src->AsIntConCommon();
3403 int iconVal = (int)intConst->IconValue();
3419 id = emitNewInstrAmdCns(attr, offset, iconVal);
3420 emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
3422 sz = emitInsSizeAM(id, insCodeMI(ins), iconVal);
3426 assert(!src->isContained()); // there must be one non-contained src
3429 id = emitNewInstrAmd(attr, offset);
3430 emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
3431 id->idReg1(src->gtRegNum);
3433 sz = emitInsSizeAM(id, insCodeMR(ins));
3439 emitCurIGsize += sz;
3442 //------------------------------------------------------------------------
3443 // emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
3445 // Responsible for emitting a single instruction that will perform an operation of the form:
3446 // *addr = UnaryOp *addr
3451 // ins - instruction to generate
3452 // attr - emitter attribute for instruction
3453 // storeInd - indir for RMW addressing mode
3456 // Lowering has taken care of recognizing the StoreInd pattern of:
3457 // StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
3458 // The address to store is already sitting in a register.
3461 // This is a no-produce operation, meaning that no register output will
3462 // be produced for future use in the code stream.
3464 void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
3466 GenTree* addr = storeInd->Addr();
3467 addr = addr->gtSkipReloadOrCopy();
3468 assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
3469 addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
3472 if (addr->OperGet() != GT_CLS_VAR_ADDR)
3474 offset = storeInd->Offset();
3477 instrDesc* id = emitNewInstrAmd(attr, offset);
3478 emitHandleMemOp(storeInd, id, IF_ARW, ins);
3480 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3484 emitCurIGsize += sz;
3487 /*****************************************************************************
3489 * Add an instruction referencing a single register.
3492 void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
3494 emitAttr size = EA_SIZE(attr);
3496 assert(size <= EA_PTRSIZE);
3497 noway_assert(emitVerifyEncodable(ins, size, reg));
3500 instrDesc* id = emitNewInstrSmall(attr);
3506 #ifdef _TARGET_AMD64_
3508 sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
3510 #else // !_TARGET_AMD64_
3512 if (size == EA_1BYTE)
3513 sz = 2; // Use the long form as the small one has no 'w' bit
3515 sz = 1; // Use short form
3517 #endif // !_TARGET_AMD64_
3526 /* We don't currently push/pop small values */
3528 assert(size == EA_PTRSIZE);
3535 /* All the sixteen INS_setCCs are contiguous. */
3537 if (INS_seto <= ins && ins <= INS_setg)
3539 // Rough check that we used the endpoints for the range check
3541 assert(INS_seto + 0xF == INS_setg);
3543 // The caller must specify EA_1BYTE for 'attr'
3545 assert(attr == EA_1BYTE);
3547 /* We expect this to always be a 'big' opcode */
3549 assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
3562 insFormat fmt = emitInsModeFormat(ins, IF_RRD);
3568 // 16-bit operand instructions will need a prefix.
3569 // This refers to 66h size prefix override.
3570 if (size == EA_2BYTE)
3576 sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
3579 if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
3581 sz += emitGetRexPrefixSize(ins);
3587 emitCurIGsize += sz;
3589 emitAdjustStackDepthPushPop(ins);
3592 /*****************************************************************************
3594 * Add an instruction referencing a register and a constant.
3597 void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
3599 emitAttr size = EA_SIZE(attr);
3601 // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
3602 assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
3604 noway_assert(emitVerifyEncodable(ins, size, reg));
3606 #ifdef _TARGET_AMD64_
3607 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3608 // all other opcodes take a sign-extended 4-byte immediate
3609 noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
3614 insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS);
3615 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
3617 // BT reg,imm might be useful but it requires special handling of the immediate value
3618 // (it is always encoded in a byte). Let's not complicate things until this is needed.
3619 assert(ins != INS_bt);
3621 // Figure out the size of the instruction
3625 #ifdef _TARGET_AMD64_
3626 // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
3627 // and this isn't a reloc constant.
3628 if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
3630 attr = size = EA_4BYTE;
3633 if (size > EA_4BYTE)
3635 sz = 9; // Really it is 10, but we'll add one more later
3638 #endif // _TARGET_AMD64_
3653 valInByte = true; // shift amount always placed in a byte
3658 if (EA_IS_CNS_RELOC(attr))
3660 valInByte = false; // relocs can't be placed in a byte
3665 if (IsSSEOrAVXInstruction(ins))
3669 else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins))
3680 if (reg == REG_EAX && !instrIs3opImul(ins))
3689 #ifdef _TARGET_AMD64_
3690 if (size > EA_4BYTE)
3692 // We special-case anything that takes a full 8-byte constant.
3696 #endif // _TARGET_AMD64_
3698 sz += EA_SIZE_IN_BYTES(attr);
3705 sz += emitGetVexPrefixSize(ins, attr);
3707 // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
3708 // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
3709 // register. So we also need to check if that built-in register is an extended register.
3710 if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
3712 sz += emitGetRexPrefixSize(ins);
3715 id = emitNewInstrSC(attr, val);
3720 // 16-bit operand instructions will need a prefix
3721 if (size == EA_2BYTE)
3729 emitCurIGsize += sz;
3733 emitAdjustStackDepth(ins, val);
3737 /*****************************************************************************
3739 * Add an instruction referencing an integer constant.
3742 void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
3746 bool valInByte = ((signed char)val == val);
3748 #ifdef _TARGET_AMD64_
3749 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3750 // all other opcodes take a sign-extended 4-byte immediate
3751 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3754 if (EA_IS_CNS_RELOC(attr))
3756 valInByte = false; // relocs can't be placed in a byte
3772 sz = valInByte ? 2 : 5;
3776 NO_WAY("unexpected instruction");
3779 id = emitNewInstrSC(attr, val);
3781 id->idInsFmt(IF_CNS);
3785 emitCurIGsize += sz;
3787 emitAdjustStackDepthPushPop(ins);
3790 /*****************************************************************************
3792 * Add a "jump through a table" instruction.
3795 void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
3797 assert(EA_SIZE(attr) == EA_4BYTE);
3799 UNATIVE_OFFSET sz = 3 + 4;
3800 const instruction ins = INS_i_jmp;
3802 if (IsExtendedReg(reg, attr))
3804 sz += emitGetRexPrefixSize(ins);
3807 instrDesc* id = emitNewInstrAmd(attr, base);
3810 id->idInsFmt(IF_ARD);
3811 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
3812 id->idAddr()->iiaAddrMode.amIndxReg = reg;
3813 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP;
3816 id->idDebugOnlyInfo()->idMemCookie = base;
3822 emitCurIGsize += sz;
3825 /*****************************************************************************
3827 * Add an instruction with a static data member operand. If 'size' is 0, the
3828 * instruction operates on the address of the static member instead of its
3829 * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
3832 void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
3834 // Static always need relocs
3835 if (!jitStaticFldIsGlobAddr(fldHnd))
3837 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3843 /* Are we pushing the offset of the class variable? */
3845 if (EA_IS_OFFSET(attr))
3847 assert(ins == INS_push);
3848 sz = 1 + TARGET_POINTER_SIZE;
3850 id = emitNewInstrDsp(EA_1BYTE, offs);
3852 id->idInsFmt(IF_MRD_OFF);
3856 insFormat fmt = emitInsModeFormat(ins, IF_MRD);
3858 id = emitNewInstrDsp(attr, offs);
3861 sz = emitInsSizeCV(id, insCodeMR(ins));
3865 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
3867 if (TakesRexWPrefix(ins, attr))
3870 sz += emitGetRexPrefixSize(ins);
3873 id->idAddr()->iiaFieldHnd = fldHnd;
3878 emitCurIGsize += sz;
3880 emitAdjustStackDepthPushPop(ins);
3883 /*****************************************************************************
3885 * Add an instruction with two register operands.
3888 void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
3890 emitAttr size = EA_SIZE(attr);
3892 /* We don't want to generate any useless mov instructions! */
3893 CLANG_FORMAT_COMMENT_ANCHOR;
3895 #ifdef _TARGET_AMD64_
3896 // Same-reg 4-byte mov can be useful because it performs a
3897 // zero-extension to 8 bytes.
3898 assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
3900 assert(ins != INS_mov || reg1 != reg2);
3901 #endif // _TARGET_AMD64_
3903 assert(size <= EA_32BYTE);
3904 noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
3906 UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
3908 if (Is4ByteSSEInstruction(ins))
3910 // The 4-Byte SSE instructions require one additional byte
3914 /* Special case: "XCHG" uses a different format */
3915 insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
3917 instrDesc* id = emitNewInstrSmall(attr);
3925 emitCurIGsize += sz;
3928 /*****************************************************************************
3930 * Add an instruction with two register operands and an integer constant.
3933 void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
3935 // SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes
3936 UNATIVE_OFFSET sz = 4;
3937 if (IsSSEOrAVXInstruction(ins))
3939 // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
3940 // SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
3941 // SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
3942 sz = (UseVEXEncoding() || Is4ByteSSEInstruction(ins)) ? 6 : 5;
3945 #ifdef _TARGET_AMD64_
3946 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3947 // all other opcodes take a sign-extended 4-byte immediate
3948 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3951 instrDesc* id = emitNewInstrSC(attr, ival);
3954 if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
3956 sz += emitGetRexPrefixSize(ins);
3959 if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding())
3965 id->idInsFmt(IF_RRW_RRW_CNS);
3971 emitCurIGsize += sz;
3974 void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
3976 assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta);
3978 instrDesc* id = emitNewInstrAmd(attr, offs);
3982 id->idInsFmt(IF_ARD);
3983 id->idAddr()->iiaAddrMode.amBaseReg = base;
3984 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
3986 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3990 emitCurIGsize += sz;
3993 //------------------------------------------------------------------------
3994 // emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands
3995 // and that does not return a value
3998 // ins -- The instruction being emitted
3999 // attr -- The emit attribute
4000 // targetReg -- The target register
4001 // op2Reg -- The register of the second operand
4002 // op3Reg -- The register of the third operand
4003 // base -- The base register used for the memory address (first operand)
4004 // offs -- The offset from base
4006 void emitter::emitIns_AR_R_R(
4007 instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs)
4009 assert(IsSSEOrAVXInstruction(ins));
4010 assert(IsThreeOperandAVXInstruction(ins));
4012 instrDesc* id = emitNewInstrAmd(attr, offs);
4018 id->idInsFmt(IF_AWR_RRD_RRD);
4019 id->idAddr()->iiaAddrMode.amBaseReg = base;
4020 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4022 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
4026 emitCurIGsize += sz;
4029 void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir)
4031 ssize_t offs = indir->Offset();
4032 instrDesc* id = emitNewInstrAmd(attr, offs);
4037 emitHandleMemOp(indir, id, IF_RRW_ARD, ins);
4039 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4043 emitCurIGsize += sz;
4046 void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
4048 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4049 assert(IsSSEOrAVXInstruction(ins));
4051 ssize_t offs = indir->Offset();
4052 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4057 emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
4059 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4061 if (Is4ByteSSEInstruction(ins))
4063 // The 4-Byte SSE instructions require two additional bytes
4070 emitCurIGsize += sz;
4073 void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
4075 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4076 assert(IsSSEOrAVXInstruction(ins));
4078 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4083 id->idInsFmt(IF_RRW_ARD_CNS);
4084 id->idAddr()->iiaAddrMode.amBaseReg = base;
4085 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4087 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4089 if (Is4ByteSSEInstruction(ins))
4091 // The 4-Byte SSE instructions require two additional bytes
4098 emitCurIGsize += sz;
4101 void emitter::emitIns_R_C_I(
4102 instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4104 // Static always need relocs
4105 if (!jitStaticFldIsGlobAddr(fldHnd))
4107 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4110 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4111 assert(IsSSEOrAVXInstruction(ins));
4113 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4116 id->idInsFmt(IF_RRW_MRD_CNS);
4118 id->idAddr()->iiaFieldHnd = fldHnd;
4120 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4122 if (Is4ByteSSEInstruction(ins))
4124 // The 4-Byte SSE instructions require two additional bytes
4131 emitCurIGsize += sz;
4134 void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
4136 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4137 assert(IsSSEOrAVXInstruction(ins));
4139 instrDesc* id = emitNewInstrCns(attr, ival);
4142 id->idInsFmt(IF_RRW_SRD_CNS);
4144 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4147 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4150 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4152 if (Is4ByteSSEInstruction(ins))
4154 // The 4-Byte SSE instructions require two additional bytes
4161 emitCurIGsize += sz;
4164 void emitter::emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir)
4166 assert(IsSSEOrAVXInstruction(ins));
4167 assert(IsThreeOperandAVXInstruction(ins));
4169 ssize_t offs = indir->Offset();
4170 instrDesc* id = emitNewInstrAmd(attr, offs);
4176 emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins);
4178 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4182 emitCurIGsize += sz;
4185 void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
4187 assert(IsSSEOrAVXInstruction(ins));
4188 assert(IsThreeOperandAVXInstruction(ins));
4190 instrDesc* id = emitNewInstrAmd(attr, offs);
4196 id->idInsFmt(IF_RWR_RRD_ARD);
4197 id->idAddr()->iiaAddrMode.amBaseReg = base;
4198 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4200 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4204 emitCurIGsize += sz;
4207 //------------------------------------------------------------------------
4208 // IsAVX2GatherInstruction: return true if the instruction is AVX2 Gather
4211 // ins - the instruction to check
4213 // true if the instruction is AVX2 Gather
4215 bool IsAVX2GatherInstruction(instruction ins)
4219 case INS_vpgatherdd:
4220 case INS_vpgatherdq:
4221 case INS_vpgatherqd:
4222 case INS_vpgatherqq:
4223 case INS_vgatherdps:
4224 case INS_vgatherdpd:
4225 case INS_vgatherqps:
4226 case INS_vgatherqpd:
4233 //------------------------------------------------------------------------
4234 // emitIns_R_AR_R: Emits an AVX2 Gather instructions
4237 // ins - the instruction to emit
4238 // attr - the instruction operand size
4239 // reg1 - the destination and first source operand
4240 // reg2 - the mask operand (encoded in VEX.vvvv)
4241 // base - the base register of address to load
4242 // index - the index register of VSIB
4243 // scale - the scale number of VSIB
4244 // offs - the offset added to the memory address from base
4246 void emitter::emitIns_R_AR_R(instruction ins,
4255 assert(IsAVX2GatherInstruction(ins));
4257 instrDesc* id = emitNewInstrAmd(attr, offs);
4263 id->idInsFmt(IF_RWR_ARD_RRD);
4264 id->idAddr()->iiaAddrMode.amBaseReg = base;
4265 id->idAddr()->iiaAddrMode.amIndxReg = index;
4266 id->idAddr()->iiaAddrMode.amScale = emitEncodeSize((emitAttr)scale);
4268 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4272 emitCurIGsize += sz;
4275 void emitter::emitIns_R_R_C(
4276 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
4278 assert(IsSSEOrAVXInstruction(ins));
4279 assert(IsThreeOperandAVXInstruction(ins));
4281 // Static always need relocs
4282 if (!jitStaticFldIsGlobAddr(fldHnd))
4284 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4287 instrDesc* id = emitNewInstrDsp(attr, offs);
4290 id->idInsFmt(IF_RWR_RRD_MRD);
4293 id->idAddr()->iiaFieldHnd = fldHnd;
4295 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
4299 emitCurIGsize += sz;
4302 /*****************************************************************************
4304 * Add an instruction with three register operands.
4307 void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
4309 assert(IsSSEOrAVXInstruction(ins));
4310 assert(IsThreeOperandAVXInstruction(ins));
4311 // Currently vex prefix only use three bytes mode.
4312 // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
4313 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4314 UNATIVE_OFFSET sz = 5;
4316 instrDesc* id = emitNewInstr(attr);
4318 id->idInsFmt(IF_RWR_RRD_RRD);
4319 id->idReg1(targetReg);
4325 emitCurIGsize += sz;
4328 void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
4330 assert(IsSSEOrAVXInstruction(ins));
4331 assert(IsThreeOperandAVXInstruction(ins));
4333 instrDesc* id = emitNewInstr(attr);
4336 id->idInsFmt(IF_RWR_RRD_SRD);
4339 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4342 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4345 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
4349 emitCurIGsize += sz;
4352 void emitter::emitIns_R_R_A_I(
4353 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
4355 assert(IsSSEOrAVXInstruction(ins));
4356 assert(IsThreeOperandAVXInstruction(ins));
4358 ssize_t offs = indir->Offset();
4359 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4365 emitHandleMemOp(indir, id, fmt, ins);
4367 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4371 emitCurIGsize += sz;
4374 void emitter::emitIns_R_R_AR_I(
4375 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
4377 assert(IsSSEOrAVXInstruction(ins));
4378 assert(IsThreeOperandAVXInstruction(ins));
4380 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4386 id->idInsFmt(IF_RWR_RRD_ARD_CNS);
4387 id->idAddr()->iiaAddrMode.amBaseReg = base;
4388 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4390 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4394 emitCurIGsize += sz;
4397 void emitter::emitIns_R_R_C_I(
4398 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4400 assert(IsSSEOrAVXInstruction(ins));
4401 assert(IsThreeOperandAVXInstruction(ins));
4403 // Static always need relocs
4404 if (!jitStaticFldIsGlobAddr(fldHnd))
4406 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4409 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4412 id->idInsFmt(IF_RWR_RRD_MRD_CNS);
4415 id->idAddr()->iiaFieldHnd = fldHnd;
4417 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4421 emitCurIGsize += sz;
4424 /**********************************************************************************
4425 * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
4428 * ins - the instruction to add
4429 * attr - the emitter attribute for instruction
4430 * targetReg - the target (destination) register
4431 * reg1 - the first source register
4432 * reg2 - the second source register
4433 * ival - the immediate value
4436 void emitter::emitIns_R_R_R_I(
4437 instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
4439 assert(IsSSEOrAVXInstruction(ins));
4440 assert(IsThreeOperandAVXInstruction(ins));
4441 // Currently vex prefix only use three bytes mode.
4442 // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6
4443 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4444 UNATIVE_OFFSET sz = 6;
4446 instrDesc* id = emitNewInstrCns(attr, ival);
4448 id->idInsFmt(IF_RWR_RRD_RRD_CNS);
4449 id->idReg1(targetReg);
4455 emitCurIGsize += sz;
4458 void emitter::emitIns_R_R_S_I(
4459 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
4461 assert(IsSSEOrAVXInstruction(ins));
4462 assert(IsThreeOperandAVXInstruction(ins));
4464 instrDesc* id = emitNewInstrCns(attr, ival);
4467 id->idInsFmt(IF_RWR_RRD_SRD_CNS);
4470 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4473 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4476 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4480 emitCurIGsize += sz;
4483 //------------------------------------------------------------------------
4484 // encodeXmmRegAsIval: Encodes a XMM register into imm[7:4] for use by a SIMD instruction
4487 // opReg -- The register being encoded
4490 // opReg encoded in imm[7:4]
4491 static int encodeXmmRegAsIval(regNumber opReg)
4493 // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4494 // which encodes the fourth register into imm8[7:4]
4495 assert(opReg >= XMMBASE);
4496 int ival = (opReg - XMMBASE) << 4;
4498 assert((ival >= 0) && (ival <= 255));
4499 return (int8_t)ival;
4502 //------------------------------------------------------------------------
4503 // emitIns_R_R_A_R: emits the code for an instruction that takes a register operand, a GenTreeIndir address,
4504 // another register operand, and that returns a value in register
4507 // ins -- The instruction being emitted
4508 // attr -- The emit attribute
4509 // targetReg -- The target register
4510 // op1Reg -- The register of the first operand
4511 // op3Reg -- The register of the third operand
4512 // indir -- The GenTreeIndir used for the memory address
4515 // op2 is built from indir
4517 void emitter::emitIns_R_R_A_R(
4518 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
4520 assert(isAvxBlendv(ins));
4521 assert(UseVEXEncoding());
4523 int ival = encodeXmmRegAsIval(op3Reg);
4524 ssize_t offs = indir->Offset();
4525 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4528 id->idReg1(targetReg);
4531 emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins);
4533 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4537 emitCurIGsize += sz;
4540 //------------------------------------------------------------------------
4541 // emitIns_R_R_AR_R: emits the code for an instruction that takes a register operand, a base memory
4542 // register, another register operand, and that returns a value in register
4545 // ins -- The instruction being emitted
4546 // attr -- The emit attribute
4547 // targetReg -- The target register
4548 // op1Reg -- The register of the first operands
4549 // op3Reg -- The register of the third operand
4550 // base -- The base register used for the memory address
4551 // offs -- The offset added to the memory address from base
4554 // op2 is built from base + offs
4556 void emitter::emitIns_R_R_AR_R(
4557 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs)
4559 assert(isAvxBlendv(ins));
4560 assert(UseVEXEncoding());
4562 int ival = encodeXmmRegAsIval(op3Reg);
4563 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4566 id->idReg1(targetReg);
4569 id->idInsFmt(IF_RWR_RRD_ARD_RRD);
4570 id->idAddr()->iiaAddrMode.amBaseReg = base;
4571 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4573 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4577 emitCurIGsize += sz;
4580 //------------------------------------------------------------------------
4581 // emitIns_R_R_C_R: emits the code for an instruction that takes a register operand, a field handle +
4582 // offset, another register operand, and that returns a value in register
4585 // ins -- The instruction being emitted
4586 // attr -- The emit attribute
4587 // targetReg -- The target register
4588 // op1Reg -- The register of the first operand
4589 // op3Reg -- The register of the third operand
4590 // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
4591 // offs -- The offset added to the memory address from fldHnd
4594 // op2 is built from fldHnd + offs
4596 void emitter::emitIns_R_R_C_R(instruction ins,
4598 regNumber targetReg,
4601 CORINFO_FIELD_HANDLE fldHnd,
4604 assert(isAvxBlendv(ins));
4605 assert(UseVEXEncoding());
4607 // Static always need relocs
4608 if (!jitStaticFldIsGlobAddr(fldHnd))
4610 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4613 int ival = encodeXmmRegAsIval(op3Reg);
4614 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4617 id->idReg1(targetReg);
4620 id->idInsFmt(IF_RWR_RRD_MRD_RRD);
4621 id->idAddr()->iiaFieldHnd = fldHnd;
4623 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4627 emitCurIGsize += sz;
4630 //------------------------------------------------------------------------
4631 // emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index +
4632 // offset, another register operand, and that returns a value in register
4635 // ins -- The instruction being emitted
4636 // attr -- The emit attribute
4637 // targetReg -- The target register
4638 // op1Reg -- The register of the first operand
4639 // op3Reg -- The register of the third operand
4640 // varx -- The variable index used for the memory address
4641 // offs -- The offset added to the memory address from varx
4644 // op2 is built from varx + offs
4646 void emitter::emitIns_R_R_S_R(
4647 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
4649 assert(isAvxBlendv(ins));
4650 assert(UseVEXEncoding());
4652 int ival = encodeXmmRegAsIval(op3Reg);
4653 instrDesc* id = emitNewInstrCns(attr, ival);
4656 id->idReg1(targetReg);
4659 id->idInsFmt(IF_RWR_RRD_SRD_RRD);
4660 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4662 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4666 emitCurIGsize += sz;
4669 void emitter::emitIns_R_R_R_R(
4670 instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4672 assert(isAvxBlendv(ins));
4673 assert(UseVEXEncoding());
4674 // Currently vex prefix only use three bytes mode.
4675 // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4676 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4677 UNATIVE_OFFSET sz = 6;
4679 int ival = encodeXmmRegAsIval(reg3);
4680 instrDesc* id = emitNewInstrCns(attr, ival);
4683 id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4684 id->idReg1(targetReg);
4691 emitCurIGsize += sz;
4694 /*****************************************************************************
4696 * Add an instruction with a register + static member operands.
4698 void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
4700 // Static always need relocs
4701 if (!jitStaticFldIsGlobAddr(fldHnd))
4703 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4706 emitAttr size = EA_SIZE(attr);
4708 assert(size <= EA_32BYTE);
4709 noway_assert(emitVerifyEncodable(ins, size, reg));
4714 // Are we MOV'ing the offset of the class variable into EAX?
4715 if (EA_IS_OFFSET(attr))
4717 id = emitNewInstrDsp(EA_1BYTE, offs);
4719 id->idInsFmt(IF_RWR_MRD_OFF);
4721 assert(ins == INS_mov && reg == REG_EAX);
4723 // Special case: "mov eax, [addr]" is smaller
4724 sz = 1 + TARGET_POINTER_SIZE;
4728 insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
4730 id = emitNewInstrDsp(attr, offs);
4735 // Special case: "mov eax, [addr]" is smaller.
4736 // This case is not enabled for amd64 as it always uses RIP relative addressing
4737 // and it results in smaller instruction size than encoding 64-bit addr in the
4739 if (ins == INS_mov && reg == REG_EAX)
4741 sz = 1 + TARGET_POINTER_SIZE;
4742 if (size == EA_2BYTE)
4746 #endif //_TARGET_X86_
4748 sz = emitInsSizeCV(id, insCodeRM(ins));
4751 // Special case: mov reg, fs:[ddd]
4752 if (fldHnd == FLD_GLOBAL_FS)
4759 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4762 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4764 sz += emitGetRexPrefixSize(ins);
4770 id->idAddr()->iiaFieldHnd = fldHnd;
4773 emitCurIGsize += sz;
4776 /*****************************************************************************
4778 * Add an instruction with a static member + register operands.
4781 void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
4783 // Static always need relocs
4784 if (!jitStaticFldIsGlobAddr(fldHnd))
4786 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4789 emitAttr size = EA_SIZE(attr);
4791 #if defined(_TARGET_X86_)
4792 // For x86 it is valid to storeind a double sized operand in an xmm reg to memory
4793 assert(size <= EA_8BYTE);
4795 assert(size <= EA_PTRSIZE);
4798 noway_assert(emitVerifyEncodable(ins, size, reg));
4800 instrDesc* id = emitNewInstrDsp(attr, offs);
4801 insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD);
4809 // Special case: "mov [addr], EAX" is smaller.
4810 // This case is not enable for amd64 as it always uses RIP relative addressing
4811 // and it will result in smaller instruction size than encoding 64-bit addr in
4813 if (ins == INS_mov && reg == REG_EAX)
4815 sz = 1 + TARGET_POINTER_SIZE;
4816 if (size == EA_2BYTE)
4820 #endif //_TARGET_X86_
4822 sz = emitInsSizeCV(id, insCodeMR(ins));
4825 // Special case: mov reg, fs:[ddd]
4826 if (fldHnd == FLD_GLOBAL_FS)
4832 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
4835 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4837 sz += emitGetRexPrefixSize(ins);
4843 id->idAddr()->iiaFieldHnd = fldHnd;
4846 emitCurIGsize += sz;
4849 /*****************************************************************************
4851 * Add an instruction with a static member + constant.
4854 void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
4856 // Static always need relocs
4857 if (!jitStaticFldIsGlobAddr(fldHnd))
4859 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4879 fmt = emitInsModeFormat(ins, IF_MRD_CNS);
4883 instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
4887 code_t code = insCodeMI(ins);
4888 UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
4891 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
4893 // REX prefix, if not already included in "code"
4894 if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
4896 sz += emitGetRexPrefixSize(ins);
4899 id->idAddr()->iiaFieldHnd = fldHnd;
4903 emitCurIGsize += sz;
4906 void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
4908 assert(ins == INS_mov);
4909 assert(dst->bbFlags & BBF_JMP_TARGET);
4911 instrDescLbl* id = emitNewInstrLbl();
4914 id->idInsFmt(IF_SWR_LABEL);
4915 id->idAddr()->iiaBBlabel = dst;
4917 /* The label reference is always long */
4920 id->idjKeepLong = 1;
4922 /* Record the current IG and offset within it */
4924 id->idjIG = emitCurIG;
4925 id->idjOffs = emitCurIGsize;
4927 /* Append this instruction to this IG's jump list */
4929 id->idjNext = emitCurIGjmpList;
4930 emitCurIGjmpList = id;
4932 UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(id, insCodeMI(ins), varx, offs);
4933 id->dstLclVar.initLclVarAddr(varx, offs);
4935 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4942 #ifndef _TARGET_AMD64_
4943 // Storing the address of a basicBlock will need a reloc
4944 // as the instruction uses the absolute address,
4945 // not a relative address.
4947 // On Amd64, Absolute code addresses should always go through a reloc to
4948 // to be encoded as RIP rel32 offset.
4949 if (emitComp->opts.compReloc)
4952 id->idSetIsDspReloc();
4958 emitCurIGsize += sz;
4961 /*****************************************************************************
4963 * Add a label instruction.
4965 void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
4967 assert(ins == INS_lea);
4968 assert(dst->bbFlags & BBF_JMP_TARGET);
4970 instrDescJmp* id = emitNewInstrJmp();
4974 id->idInsFmt(IF_RWR_LABEL);
4975 id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
4976 id->idAddr()->iiaBBlabel = dst;
4978 /* The label reference is always long */
4981 id->idjKeepLong = 1;
4983 /* Record the current IG and offset within it */
4985 id->idjIG = emitCurIG;
4986 id->idjOffs = emitCurIGsize;
4988 /* Append this instruction to this IG's jump list */
4990 id->idjNext = emitCurIGjmpList;
4991 emitCurIGjmpList = id;
4994 // Mark the catch return
4995 if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
4997 id->idDebugOnlyInfo()->idCatchRet = true;
5005 // Set the relocation flags - these give hint to zap to perform
5006 // relocation of the specified 32bit address.
5008 // Note the relocation flags influence the size estimate.
5009 id->idSetRelocFlags(attr);
5011 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
5015 emitCurIGsize += sz;
5018 /*****************************************************************************
5020 * The following adds instructions referencing address modes.
5023 void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
5025 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5027 #ifdef _TARGET_AMD64_
5028 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5029 // all other opcodes take a sign-extended 4-byte immediate
5030 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5050 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5055 Useful if you want to trap moves with 0 constant
5056 if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5063 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5067 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5068 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5070 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5072 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5076 emitCurIGsize += sz;
5079 void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
5081 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5083 #ifdef _TARGET_AMD64_
5084 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5085 // all other opcodes take a sign-extended 4-byte immediate
5086 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5106 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5111 Useful if you want to trap moves with 0 constant
5112 if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5119 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5123 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5124 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5126 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5128 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5132 emitCurIGsize += sz;
5135 void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5137 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
5138 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5142 if (ireg == base && disp == 0)
5144 // Maybe the emitter is not the common place for this optimization, but it's a better choke point
5145 // for all the emitIns(ins, tree), we would have to be analyzing at each call site
5152 instrDesc* id = emitNewInstrAmd(attr, disp);
5153 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5159 id->idAddr()->iiaAddrMode.amBaseReg = base;
5160 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5162 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5164 sz = emitInsSizeAM(id, insCodeRM(ins));
5166 if (Is4ByteSSEInstruction(ins))
5168 // The 4-Byte SSE instructions require two additional bytes
5175 emitCurIGsize += sz;
5178 void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5180 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5181 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5184 instrDesc* id = emitNewInstrAmd(attr, disp);
5185 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5191 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5192 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5194 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5196 sz = emitInsSizeAM(id, insCodeRM(ins));
5200 emitCurIGsize += sz;
5203 void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5206 instrDesc* id = emitNewInstrAmd(attr, disp);
5211 fmt = emitInsModeFormat(ins, IF_ARD);
5215 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5217 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
5218 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5226 id->idAddr()->iiaAddrMode.amBaseReg = base;
5227 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5229 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5231 sz = emitInsSizeAM(id, insCodeMR(ins));
5235 emitCurIGsize += sz;
5237 emitAdjustStackDepthPushPop(ins);
5240 void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
5242 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
5243 assert(base != REG_NA);
5244 assert(ireg != REG_NA);
5245 instrDesc* id = emitNewInstrAmdCns(attr, disp, ival);
5248 id->idInsFmt(IF_AWR_RRD_CNS);
5249 id->idAddr()->iiaAddrMode.amBaseReg = base;
5250 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5253 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5255 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins), ival);
5259 emitCurIGsize += sz;
5262 void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5265 instrDesc* id = emitNewInstrAmd(attr, disp);
5270 fmt = emitInsModeFormat(ins, IF_ARD);
5274 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5276 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5277 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5285 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5286 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5288 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5290 sz = emitInsSizeAM(id, insCodeMR(ins));
5294 emitCurIGsize += sz;
5296 emitAdjustStackDepthPushPop(ins);
5299 void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
5301 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5303 #ifdef _TARGET_AMD64_
5304 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5305 // all other opcodes take a sign-extended 4-byte immediate
5306 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5326 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5331 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5335 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5336 id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5337 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5339 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5341 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5345 emitCurIGsize += sz;
5348 void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
5350 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5351 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5354 instrDesc* id = emitNewInstrAmd(attr, disp);
5355 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5361 id->idAddr()->iiaAddrMode.amBaseReg = base;
5362 id->idAddr()->iiaAddrMode.amIndxReg = index;
5363 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5365 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5367 sz = emitInsSizeAM(id, insCodeRM(ins));
5371 emitCurIGsize += sz;
5374 void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
5377 instrDesc* id = emitNewInstrAmd(attr, disp);
5382 fmt = emitInsModeFormat(ins, IF_ARD);
5386 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5388 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5389 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5397 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5398 id->idAddr()->iiaAddrMode.amIndxReg = index;
5399 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
5401 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5403 sz = emitInsSizeAM(id, insCodeMR(ins));
5407 emitCurIGsize += sz;
5409 emitAdjustStackDepthPushPop(ins);
5412 void emitter::emitIns_I_ARX(
5413 instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
5415 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5417 #ifdef _TARGET_AMD64_
5418 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5419 // all other opcodes take a sign-extended 4-byte immediate
5420 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5440 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5445 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5450 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5451 id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5452 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5454 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5456 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5460 emitCurIGsize += sz;
5463 void emitter::emitIns_R_ARX(
5464 instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5466 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5467 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5470 instrDesc* id = emitNewInstrAmd(attr, disp);
5471 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5477 id->idAddr()->iiaAddrMode.amBaseReg = base;
5478 id->idAddr()->iiaAddrMode.amIndxReg = index;
5479 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5481 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5483 sz = emitInsSizeAM(id, insCodeRM(ins));
5487 emitCurIGsize += sz;
5490 void emitter::emitIns_ARX_R(
5491 instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5494 instrDesc* id = emitNewInstrAmd(attr, disp);
5499 fmt = emitInsModeFormat(ins, IF_ARD);
5503 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5505 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5506 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5514 id->idAddr()->iiaAddrMode.amBaseReg = base;
5515 id->idAddr()->iiaAddrMode.amIndxReg = index;
5516 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5518 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5520 sz = emitInsSizeAM(id, insCodeMR(ins));
5524 emitCurIGsize += sz;
5526 emitAdjustStackDepthPushPop(ins);
5529 void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
5531 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5533 #ifdef _TARGET_AMD64_
5534 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5535 // all other opcodes take a sign-extended 4-byte immediate
5536 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5556 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5561 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5565 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5566 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5567 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5569 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5571 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5575 emitCurIGsize += sz;
5578 void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5580 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5581 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5584 instrDesc* id = emitNewInstrAmd(attr, disp);
5585 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5591 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5592 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5593 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5595 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5597 sz = emitInsSizeAM(id, insCodeRM(ins));
5601 emitCurIGsize += sz;
5604 void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5607 instrDesc* id = emitNewInstrAmd(attr, disp);
5612 fmt = emitInsModeFormat(ins, IF_ARD);
5616 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5617 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5618 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5626 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5627 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5628 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5630 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5632 sz = emitInsSizeAM(id, insCodeMR(ins));
5636 emitCurIGsize += sz;
5638 emitAdjustStackDepthPushPop(ins);
5641 #ifdef FEATURE_HW_INTRINSICS
5642 //------------------------------------------------------------------------
5643 // emitIns_SIMD_R_R_I: emits the code for a SIMD instruction that takes a register operand, an immediate operand
5644 // and that returns a value in register
5647 // ins -- The instruction being emitted
5648 // attr -- The emit attribute
5649 // targetReg -- The target register
5650 // op1Reg -- The register of the first operand
5651 // ival -- The immediate value
5653 void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival)
5655 if (UseVEXEncoding() || IsDstSrcImmAvxInstruction(ins))
5657 emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival);
5661 if (op1Reg != targetReg)
5663 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5665 emitIns_R_I(ins, attr, targetReg, ival);
5669 //------------------------------------------------------------------------
5670 // emitIns_SIMD_R_R_A: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5671 // and that returns a value in register
5674 // ins -- The instruction being emitted
5675 // attr -- The emit attribute
5676 // targetReg -- The target register
5677 // op1Reg -- The register of the first operand
5678 // indir -- The GenTreeIndir used for the memory address
5680 void emitter::emitIns_SIMD_R_R_A(
5681 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir)
5683 if (UseVEXEncoding())
5685 emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir);
5689 if (op1Reg != targetReg)
5691 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5693 emitIns_R_A(ins, attr, targetReg, indir);
5697 //------------------------------------------------------------------------
5698 // emitIns_SIMD_R_R_AR: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5699 // and that returns a value in register
5702 // ins -- The instruction being emitted
5703 // attr -- The emit attribute
5704 // targetReg -- The target register
5705 // op1Reg -- The register of the first operand
5706 // base -- The base register used for the memory address
5708 void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base)
5710 if (UseVEXEncoding())
5712 emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, 0);
5716 if (op1Reg != targetReg)
5718 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5720 emitIns_R_AR(ins, attr, targetReg, base, 0);
5724 //------------------------------------------------------------------------
5725 // emitIns_SIMD_R_R_C: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5726 // and that returns a value in register
5729 // ins -- The instruction being emitted
5730 // attr -- The emit attribute
5731 // targetReg -- The target register
5732 // op1Reg -- The register of the first operand
5733 // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5734 // offs -- The offset added to the memory address from fldHnd
5736 void emitter::emitIns_SIMD_R_R_C(
5737 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
5739 if (UseVEXEncoding())
5741 emitIns_R_R_C(ins, attr, targetReg, op1Reg, fldHnd, offs);
5745 if (op1Reg != targetReg)
5747 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5749 emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
5753 //------------------------------------------------------------------------
5754 // emitIns_SIMD_R_R_R: emits the code for a SIMD instruction that takes two register operands, and that returns a
5755 // value in register
5758 // ins -- The instruction being emitted
5759 // attr -- The emit attribute
5760 // targetReg -- The target register
5761 // op1Reg -- The register of the first operand
5762 // op2Reg -- The register of the second operand
5764 void emitter::emitIns_SIMD_R_R_R(
5765 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg)
5767 if (UseVEXEncoding())
5769 emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
5773 if (op1Reg != targetReg)
5775 // Ensure we aren't overwriting op2
5776 assert(op2Reg != targetReg);
5778 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5780 emitIns_R_R(ins, attr, targetReg, op2Reg);
5784 //------------------------------------------------------------------------
5785 // emitIns_SIMD_R_R_S: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5786 // and that returns a value in register
5789 // ins -- The instruction being emitted
5790 // attr -- The emit attribute
5791 // targetReg -- The target register
5792 // op1Reg -- The register of the first operand
5793 // varx -- The variable index used for the memory address
5794 // offs -- The offset added to the memory address from varx
5796 void emitter::emitIns_SIMD_R_R_S(
5797 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs)
5799 if (UseVEXEncoding())
5801 emitIns_R_R_S(ins, attr, targetReg, op1Reg, varx, offs);
5805 if (op1Reg != targetReg)
5807 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5809 emitIns_R_S(ins, attr, targetReg, varx, offs);
5813 //------------------------------------------------------------------------
5814 // emitIns_SIMD_R_R_A_I: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5815 // an immediate operand, and that returns a value in register
5818 // ins -- The instruction being emitted
5819 // attr -- The emit attribute
5820 // targetReg -- The target register
5821 // op1Reg -- The register of the first operand
5822 // indir -- The GenTreeIndir used for the memory address
5823 // ival -- The immediate value
5825 void emitter::emitIns_SIMD_R_R_A_I(
5826 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival)
5828 if (UseVEXEncoding())
5830 emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS);
5834 if (op1Reg != targetReg)
5836 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5838 emitIns_R_A_I(ins, attr, targetReg, indir, ival);
5842 //------------------------------------------------------------------------
5843 // emitIns_SIMD_R_R_AR_I: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5844 // an immediate operand, and that returns a value in register
5847 // ins -- The instruction being emitted
5848 // attr -- The emit attribute
5849 // targetReg -- The target register
5850 // op1Reg -- The register of the first operand
5851 // base -- The base register used for the memory address
5852 // ival -- The immediate value
5854 void emitter::emitIns_SIMD_R_R_AR_I(
5855 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int ival)
5857 if (UseVEXEncoding())
5859 emitIns_R_R_AR_I(ins, attr, targetReg, op1Reg, base, 0, ival);
5863 if (op1Reg != targetReg)
5865 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5867 emitIns_R_AR_I(ins, attr, targetReg, base, 0, ival);
5871 //------------------------------------------------------------------------
5872 // emitIns_SIMD_R_R_C_I: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5873 // an immediate operand, and that returns a value in register
5876 // ins -- The instruction being emitted
5877 // attr -- The emit attribute
5878 // targetReg -- The target register
5879 // op1Reg -- The register of the first operand
5880 // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5881 // offs -- The offset added to the memory address from fldHnd
5882 // ival -- The immediate value
5884 void emitter::emitIns_SIMD_R_R_C_I(instruction ins,
5886 regNumber targetReg,
5888 CORINFO_FIELD_HANDLE fldHnd,
5892 if (UseVEXEncoding())
5894 emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival);
5898 if (op1Reg != targetReg)
5900 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5902 emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival);
5906 //------------------------------------------------------------------------
5907 // emitIns_SIMD_R_R_R_I: emits the code for a SIMD instruction that takes two register operands, an immediate operand,
5908 // and that returns a value in register
5911 // ins -- The instruction being emitted
5912 // attr -- The emit attribute
5913 // targetReg -- The target register
5914 // op1Reg -- The register of the first operand
5915 // op2Reg -- The register of the second operand
5916 // ival -- The immediate value
5918 void emitter::emitIns_SIMD_R_R_R_I(
5919 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival)
5921 if (UseVEXEncoding())
5923 emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival);
5927 if (op1Reg != targetReg)
5929 // Ensure we aren't overwriting op2
5930 assert(op2Reg != targetReg);
5932 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5934 emitIns_R_R_I(ins, attr, targetReg, op2Reg, ival);
5938 //------------------------------------------------------------------------
5939 // emitIns_SIMD_R_R_S_I: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5940 // an imediate operand, and that returns a value in register
5943 // ins -- The instruction being emitted
5944 // attr -- The emit attribute
5945 // targetReg -- The target register
5946 // op1Reg -- The register of the first operand
5947 // varx -- The variable index used for the memory address
5948 // offs -- The offset added to the memory address from varx
5949 // ival -- The immediate value
5951 void emitter::emitIns_SIMD_R_R_S_I(
5952 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival)
5954 if (UseVEXEncoding())
5956 emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival);
5960 if (op1Reg != targetReg)
5962 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5964 emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival);
5968 //------------------------------------------------------------------------
5969 // emitIns_SIMD_R_R_R_A: emits the code for a SIMD instruction that takes two register operands, a GenTreeIndir address,
5970 // and that returns a value in register
5973 // ins -- The instruction being emitted
5974 // attr -- The emit attribute
5975 // targetReg -- The target register
5976 // op1Reg -- The register of the first operand
5977 // op2Reg -- The register of the second operand
5978 // indir -- The GenTreeIndir used for the memory address
5980 void emitter::emitIns_SIMD_R_R_R_A(
5981 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
5983 assert(IsFMAInstruction(ins));
5984 assert(UseVEXEncoding());
5986 if (op1Reg != targetReg)
5988 // Ensure we aren't overwriting op2
5989 assert(op2Reg != targetReg);
5991 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5994 emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir);
5997 //------------------------------------------------------------------------
5998 // emitIns_SIMD_R_R_R_AR: emits the code for a SIMD instruction that takes two register operands, a base memory
5999 // register, and that returns a value in register
6002 // ins -- The instruction being emitted
6003 // attr -- The emit attribute
6004 // targetReg -- The target register
6005 // op1Reg -- The register of the first operands
6006 // op2Reg -- The register of the second operand
6007 // base -- The base register used for the memory address
6009 void emitter::emitIns_SIMD_R_R_R_AR(
6010 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base)
6012 assert(IsFMAInstruction(ins));
6013 assert(UseVEXEncoding());
6015 if (op1Reg != targetReg)
6017 // Ensure we aren't overwriting op2
6018 assert(op2Reg != targetReg);
6020 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6023 emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, 0);
6026 //------------------------------------------------------------------------
6027 // emitIns_SIMD_R_R_R_C: emits the code for a SIMD instruction that takes two register operands, a field handle +
6028 // offset, and that returns a value in register
6031 // ins -- The instruction being emitted
6032 // attr -- The emit attribute
6033 // targetReg -- The target register
6034 // op1Reg -- The register of the first operand
6035 // op2Reg -- The register of the second operand
6036 // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
6037 // offs -- The offset added to the memory address from fldHnd
6039 void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
6041 regNumber targetReg,
6044 CORINFO_FIELD_HANDLE fldHnd,
6047 assert(IsFMAInstruction(ins));
6048 assert(UseVEXEncoding());
6050 if (op1Reg != targetReg)
6052 // Ensure we aren't overwriting op2
6053 assert(op2Reg != targetReg);
6055 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6058 emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs);
6061 //------------------------------------------------------------------------
6062 // emitIns_SIMD_R_R_R_R: emits the code for a SIMD instruction that takes three register operands, and that returns a
6063 // value in register
6066 // ins -- The instruction being emitted
6067 // attr -- The emit attribute
6068 // targetReg -- The target register
6069 // op1Reg -- The register of the first operand
6070 // op2Reg -- The register of the second operand
6071 // op3Reg -- The register of the second operand
6073 void emitter::emitIns_SIMD_R_R_R_R(
6074 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
6076 if (IsFMAInstruction(ins))
6078 assert(UseVEXEncoding());
6080 if (op1Reg != targetReg)
6082 // Ensure we aren't overwriting op2 or op3
6084 assert(op2Reg != targetReg);
6085 assert(op3Reg != targetReg);
6087 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6090 emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
6092 else if (UseVEXEncoding())
6094 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6096 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6100 ins = INS_vblendvps;
6103 ins = INS_vblendvpd;
6106 ins = INS_vpblendvb;
6111 emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg);
6115 assert(isSse41Blendv(ins));
6116 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6117 if (op3Reg != REG_XMM0)
6119 // Ensure we aren't overwriting op1 or op2
6120 assert(op1Reg != REG_XMM0);
6121 assert(op2Reg != REG_XMM0);
6123 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6125 if (op1Reg != targetReg)
6127 // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0)
6128 assert(op2Reg != targetReg);
6129 assert(targetReg != REG_XMM0);
6131 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6133 emitIns_R_R(ins, attr, targetReg, op2Reg);
6137 //------------------------------------------------------------------------
6138 // emitIns_SIMD_R_R_R_S: emits the code for a SIMD instruction that takes two register operands, a variable index +
6139 // offset, and that returns a value in register
6142 // ins -- The instruction being emitted
6143 // attr -- The emit attribute
6144 // targetReg -- The target register
6145 // op1Reg -- The register of the first operand
6146 // op2Reg -- The register of the second operand
6147 // varx -- The variable index used for the memory address
6148 // offs -- The offset added to the memory address from varx
6150 void emitter::emitIns_SIMD_R_R_R_S(
6151 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
6153 assert(IsFMAInstruction(ins));
6154 assert(UseVEXEncoding());
6156 if (op1Reg != targetReg)
6158 // Ensure we aren't overwriting op2
6159 assert(op2Reg != targetReg);
6161 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6164 emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs);
6167 //------------------------------------------------------------------------
6168 // emitIns_SIMD_R_R_A_R: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
6169 // another register operand, and that returns a value in register
6172 // ins -- The instruction being emitted
6173 // attr -- The emit attribute
6174 // targetReg -- The target register
6175 // op1Reg -- The register of the first operand
6176 // op3Reg -- The register of the third operand
6177 // indir -- The GenTreeIndir used for the memory address
6179 void emitter::emitIns_SIMD_R_R_A_R(
6180 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
6182 if (UseVEXEncoding())
6184 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6186 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6191 ins = INS_vblendvps;
6197 ins = INS_vblendvpd;
6203 ins = INS_vpblendvb;
6213 emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir);
6217 assert(isSse41Blendv(ins));
6219 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6220 if (op3Reg != REG_XMM0)
6222 // Ensure we aren't overwriting op1
6223 assert(op1Reg != REG_XMM0);
6225 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6227 if (op1Reg != targetReg)
6229 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6230 assert(targetReg != REG_XMM0);
6232 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6235 emitIns_R_A(ins, attr, targetReg, indir);
6239 //------------------------------------------------------------------------
6240 // emitIns_SIMD_R_R_AR_R: emits the code for a SIMD instruction that takes a register operand, a base memory
6241 // register, another register operand, and that returns a value in register
6244 // ins -- The instruction being emitted
6245 // attr -- The emit attribute
6246 // targetReg -- The target register
6247 // op1Reg -- The register of the first operands
6248 // op3Reg -- The register of the third operand
6249 // base -- The base register used for the memory address
6251 void emitter::emitIns_SIMD_R_R_AR_R(
6252 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base)
6254 if (UseVEXEncoding())
6256 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6258 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6263 ins = INS_vblendvps;
6269 ins = INS_vblendvpd;
6275 ins = INS_vpblendvb;
6285 emitIns_R_R_AR_R(ins, attr, targetReg, op1Reg, op3Reg, base, 0);
6289 assert(isSse41Blendv(ins));
6291 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6292 if (op3Reg != REG_XMM0)
6294 // Ensure we aren't overwriting op1
6295 assert(op1Reg != REG_XMM0);
6297 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6299 if (op1Reg != targetReg)
6301 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6302 assert(targetReg != REG_XMM0);
6304 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6307 emitIns_R_AR(ins, attr, targetReg, base, 0);
6311 //------------------------------------------------------------------------
6312 // emitIns_SIMD_R_R_C_R: emits the code for a SIMD instruction that takes a register operand, a field handle +
6313 // offset, another register operand, and that returns a value in register
6316 // ins -- The instruction being emitted
6317 // attr -- The emit attribute
6318 // targetReg -- The target register
6319 // op1Reg -- The register of the first operand
6320 // op3Reg -- The register of the third operand
6321 // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
6322 // offs -- The offset added to the memory address from fldHnd
6324 void emitter::emitIns_SIMD_R_R_C_R(instruction ins,
6326 regNumber targetReg,
6329 CORINFO_FIELD_HANDLE fldHnd,
6332 if (UseVEXEncoding())
6334 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6336 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6341 ins = INS_vblendvps;
6347 ins = INS_vblendvpd;
6353 ins = INS_vpblendvb;
6363 emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs);
6367 assert(isSse41Blendv(ins));
6369 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6370 if (op3Reg != REG_XMM0)
6372 // Ensure we aren't overwriting op1
6373 assert(op1Reg != REG_XMM0);
6375 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6377 if (op1Reg != targetReg)
6379 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6380 assert(targetReg != REG_XMM0);
6382 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6385 emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
6389 //------------------------------------------------------------------------
6390 // emitIns_SIMD_R_R_S_R: emits the code for a SIMD instruction that takes a register operand, a variable index +
6391 // offset, another register operand, and that returns a value in register
6394 // ins -- The instruction being emitted
6395 // attr -- The emit attribute
6396 // targetReg -- The target register
6397 // op1Reg -- The register of the first operand
6398 // op3Reg -- The register of the third operand
6399 // varx -- The variable index used for the memory address
6400 // offs -- The offset added to the memory address from varx
6402 void emitter::emitIns_SIMD_R_R_S_R(
6403 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
6405 if (UseVEXEncoding())
6407 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6409 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6414 ins = INS_vblendvps;
6420 ins = INS_vblendvpd;
6426 ins = INS_vpblendvb;
6436 emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs);
6440 assert(isSse41Blendv(ins));
6442 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6443 if (op3Reg != REG_XMM0)
6445 // Ensure we aren't overwriting op1
6446 assert(op1Reg != REG_XMM0);
6448 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6450 if (op1Reg != targetReg)
6452 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6453 assert(targetReg != REG_XMM0);
6455 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6458 emitIns_R_S(ins, attr, targetReg, varx, offs);
6461 #endif // FEATURE_HW_INTRINSICS
6463 /*****************************************************************************
6465 * The following add instructions referencing stack-based local variables.
6468 void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
6470 instrDesc* id = emitNewInstr(attr);
6471 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6472 insFormat fmt = emitInsModeFormat(ins, IF_SRD);
6474 // 16-bit operand instructions will need a prefix
6475 if (EA_SIZE(attr) == EA_2BYTE)
6481 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6483 // 64-bit operand instructions will need a REX.W prefix
6484 if (TakesRexWPrefix(ins, attr))
6486 sz += emitGetRexPrefixSize(ins);
6491 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6495 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6498 emitCurIGsize += sz;
6500 emitAdjustStackDepthPushPop(ins);
6503 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6505 instrDesc* id = emitNewInstr(attr);
6506 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6507 insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD);
6510 if (attr == EA_1BYTE)
6512 assert(isByteReg(ireg));
6515 // 16-bit operand instructions will need a prefix
6516 if (EA_SIZE(attr) == EA_2BYTE)
6522 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6524 // 64-bit operand instructions will need a REX.W prefix
6525 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
6527 sz += emitGetRexPrefixSize(ins);
6533 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6536 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6539 emitCurIGsize += sz;
6542 void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6544 emitAttr size = EA_SIZE(attr);
6545 noway_assert(emitVerifyEncodable(ins, size, ireg));
6547 instrDesc* id = emitNewInstr(attr);
6548 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
6549 insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD);
6551 // Most 16-bit operand instructions need a prefix
6552 if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
6558 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
6560 // 64-bit operand instructions will need a REX.W prefix
6561 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
6563 sz += emitGetRexPrefixSize(ins);
6566 sz += emitAdjustSizeCrc32(ins, attr);
6571 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6574 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6577 emitCurIGsize += sz;
6580 void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
6582 #ifdef _TARGET_AMD64_
6583 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
6584 // all other opcodes take a sign-extended 4-byte immediate
6585 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
6605 fmt = emitInsModeFormat(ins, IF_SRD_CNS);
6609 instrDesc* id = emitNewInstrCns(attr, val);
6612 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val);
6615 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
6617 // 64-bit operand instructions will need a REX.W prefix
6618 if (TakesRexWPrefix(ins, attr))
6620 sz += emitGetRexPrefixSize(ins);
6623 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6626 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6629 emitCurIGsize += sz;
6632 /*****************************************************************************
6634 * Record that a jump instruction uses the short encoding
6637 void emitter::emitSetShortJump(instrDescJmp* id)
6639 if (id->idjKeepLong)
6644 id->idjShort = true;
6647 /*****************************************************************************
6649 * Add a jmp instruction.
6652 void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
6655 instrDescJmp* id = emitNewInstrJmp();
6657 assert(dst->bbFlags & BBF_JMP_TARGET);
6660 id->idInsFmt(IF_LABEL);
6661 id->idAddr()->iiaBBlabel = dst;
6664 // Mark the finally call
6665 if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
6667 id->idDebugOnlyInfo()->idFinallyCall = true;
6671 /* Assume the jump will be long */
6674 id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
6676 /* Record the jump's IG and offset within it */
6678 id->idjIG = emitCurIG;
6679 id->idjOffs = emitCurIGsize;
6681 /* Append this jump to this IG's jump list */
6683 id->idjNext = emitCurIGjmpList;
6684 emitCurIGjmpList = id;
6690 /* Figure out the max. size of the jump/call instruction */
6692 if (ins == INS_call)
6694 sz = CALL_INST_SIZE;
6696 else if (ins == INS_push || ins == INS_push_hide)
6698 // Pushing the address of a basicBlock will need a reloc
6699 // as the instruction uses the absolute address,
6700 // not a relative address
6701 if (emitComp->opts.compReloc)
6703 id->idSetIsDspReloc();
6705 sz = PUSH_INST_SIZE;
6711 /* This is a jump - assume the worst */
6713 sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
6715 /* Can we guess at the jump distance? */
6717 tgt = (insGroup*)emitCodeGetCookie(dst);
6722 UNATIVE_OFFSET srcOffs;
6725 assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
6727 /* This is a backward jump - figure out the distance */
6729 srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
6731 /* Compute the distance estimate */
6733 jmpDist = srcOffs - tgt->igOffs;
6734 assert((int)jmpDist > 0);
6736 /* How much beyond the max. short distance does the jump go? */
6738 extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
6741 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6743 if (INTERESTING_JUMP_NUM == 0)
6745 printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6747 printf("[0] Jump source is at %08X\n", srcOffs);
6748 printf("[0] Label block is at %08X\n", tgt->igOffs);
6749 printf("[0] Jump distance - %04X\n", jmpDist);
6752 printf("[0] Distance excess = %d \n", extra);
6757 if (extra <= 0 && !id->idjKeepLong)
6759 /* Wonderful - this jump surely will be short */
6761 emitSetShortJump(id);
6762 sz = JMP_SIZE_SMALL;
6768 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6770 if (INTERESTING_JUMP_NUM == 0)
6772 printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6774 printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
6775 emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
6776 printf("[0] Label block is unknown\n");
6785 emitCurIGsize += sz;
6787 emitAdjustStackDepthPushPop(ins);
6790 #if !FEATURE_FIXED_OUT_ARGS
6792 //------------------------------------------------------------------------
6793 // emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
6796 // ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
6799 // 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6800 // 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6802 void emitter::emitAdjustStackDepthPushPop(instruction ins)
6804 if (ins == INS_push)
6806 emitCurStackLvl += emitCntStackDepth;
6808 if (emitMaxStackDepth < emitCurStackLvl)
6810 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6811 emitMaxStackDepth = emitCurStackLvl;
6814 else if (ins == INS_pop)
6816 emitCurStackLvl -= emitCntStackDepth;
6817 assert((int)emitCurStackLvl >= 0);
6821 //------------------------------------------------------------------------
6822 // emitAdjustStackDepth: Adjust the current and maximum stack depth.
6825 // ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
6826 // It is assumed that the add/sub is on the stack pointer.
6827 // val - the number of bytes to add to or subtract from the stack pointer.
6830 // 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6831 // 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6833 void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
6835 // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
6836 if (emitCntStackDepth == 0)
6841 S_UINT32 newStackLvl(emitCurStackLvl);
6842 newStackLvl += S_UINT32(val);
6843 noway_assert(!newStackLvl.IsOverflow());
6845 emitCurStackLvl = newStackLvl.Value();
6847 if (emitMaxStackDepth < emitCurStackLvl)
6849 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6850 emitMaxStackDepth = emitCurStackLvl;
6853 else if (ins == INS_add)
6855 S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
6856 noway_assert(!newStackLvl.IsOverflow());
6858 emitCurStackLvl = newStackLvl.Value();
6862 #endif // EMIT_TRACK_STACK_DEPTH
6864 /*****************************************************************************
6866 * Add a call instruction (direct or indirect).
6867 * argSize<0 means that the caller will pop the arguments
6869 * The other arguments are interpreted depending on callType as shown:
6870 * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
6872 * EC_FUNC_TOKEN : addr is the method address
6873 * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
6874 * EC_FUNC_ADDR : addr is the absolute address of the function
6875 * EC_FUNC_VIRTUAL : "call [ireg+disp]"
6877 * If callType is one of these emitCallTypes, addr has to be NULL.
6878 * EC_INDIR_R : "call ireg".
6879 * EC_INDIR_SR : "call lcl<disp>" (eg. call [ebp-8]).
6880 * EC_INDIR_C : "call clsVar<disp>" (eg. call [clsVarAddr])
6881 * EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]"
6886 void emitter::emitIns_Call(EmitCallType callType,
6887 CORINFO_METHOD_HANDLE methHnd,
6888 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
6892 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
6893 VARSET_VALARG_TP ptrVars,
6894 regMaskTP gcrefRegs,
6895 regMaskTP byrefRegs,
6896 IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
6897 regNumber ireg, // = REG_NA
6898 regNumber xreg, // = REG_NA
6899 unsigned xmul, // = 0
6900 ssize_t disp, // = 0
6901 bool isJump) // = false
6904 /* Sanity check the arguments depending on callType */
6906 assert(callType < EC_COUNT);
6907 assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
6908 (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
6909 assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
6910 assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
6911 assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
6912 assert(callType != EC_INDIR_SR ||
6913 (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
6914 assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
6916 // Our stack level should be always greater than the bytes of arguments we push. Just
6918 assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
6920 // Trim out any callee-trashed registers from the live set.
6921 regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
6922 gcrefRegs &= savedSet;
6923 byrefRegs &= savedSet;
6926 if (EMIT_GC_VERBOSE)
6928 printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
6929 dumpConvertedVarSet(emitComp, ptrVars);
6930 printf(", gcrefRegs=");
6931 printRegMaskInt(gcrefRegs);
6932 emitDispRegSet(gcrefRegs);
6933 printf(", byrefRegs=");
6934 printRegMaskInt(byrefRegs);
6935 emitDispRegSet(byrefRegs);
6940 /* Managed RetVal: emit sequence point for the call */
6941 if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
6943 codeGen->genIPmappingAdd(ilOffset, false);
6947 We need to allocate the appropriate instruction descriptor based
6948 on whether this is a direct/indirect call, and whether we need to
6949 record an updated set of live GC variables.
6951 The stats for a ton of classes is as follows:
6953 Direct call w/o GC vars 220,216
6954 Indir. call w/o GC vars 144,781
6956 Direct call with GC vars 9,440
6957 Indir. call with GC vars 5,768
6962 assert(argSize % REGSIZE_BYTES == 0);
6963 int argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide
6965 if (callType >= EC_FUNC_VIRTUAL)
6967 /* Indirect call, virtual calls */
6969 assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
6970 callType == EC_INDIR_C || callType == EC_INDIR_ARD);
6972 id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
6973 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6977 // Helper/static/nonvirtual/function calls (direct or through handle),
6978 // and calls to an absolute addr.
6980 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
6982 id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
6983 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
6986 /* Update the emitter's live GC ref sets */
6988 VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
6989 emitThisGCrefRegs = gcrefRegs;
6990 emitThisByrefRegs = byrefRegs;
6992 /* Set the instruction - special case jumping a function */
6993 instruction ins = INS_call;
6997 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
6998 if (callType == EC_FUNC_TOKEN)
7009 id->idSetIsNoGC(emitNoGChelper(methHnd));
7013 // Record the address: method, indirection, or funcptr
7014 if (callType >= EC_FUNC_VIRTUAL)
7016 // This is an indirect call (either a virtual call or func ptr call)
7021 // Indirect call using an absolute code address.
7022 // Must be marked as relocatable and is done at the
7023 // branch target location.
7024 goto CALL_ADDR_MODE;
7026 case EC_INDIR_R: // the address is in a register
7028 id->idSetIsCallRegPtr();
7032 case EC_INDIR_ARD: // the address is an indirection
7034 goto CALL_ADDR_MODE;
7036 case EC_INDIR_SR: // the address is in a lcl var
7038 id->idInsFmt(IF_SRD);
7039 // disp is really a lclVarNum
7040 noway_assert((unsigned)disp == (size_t)disp);
7041 id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
7042 sz = emitInsSizeSV(id, insCodeMR(INS_call), (unsigned)disp, 0);
7046 case EC_FUNC_VIRTUAL:
7052 // The function is "ireg" if id->idIsCallRegPtr(),
7053 // else [ireg+xmul*xreg+disp]
7055 id->idInsFmt(IF_ARD);
7057 id->idAddr()->iiaAddrMode.amBaseReg = ireg;
7058 id->idAddr()->iiaAddrMode.amIndxReg = xreg;
7059 id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
7061 sz = emitInsSizeAM(id, insCodeMR(INS_call));
7063 if (ireg == REG_NA && xreg == REG_NA)
7065 if (codeGen->genCodeIndirAddrNeedsReloc(disp))
7067 id->idSetIsDspReloc();
7069 #ifdef _TARGET_AMD64_
7072 // An absolute indir address that doesn't need reloc should fit within 32-bits
7073 // to be encoded as offset relative to zero. This addr mode requires an extra
7075 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7078 #endif //_TARGET_AMD64_
7084 NO_WAY("unexpected instruction");
7088 else if (callType == EC_FUNC_TOKEN_INDIR)
7090 /* "call [method_addr]" */
7092 assert(addr != nullptr);
7094 id->idInsFmt(IF_METHPTR);
7095 id->idAddr()->iiaAddr = (BYTE*)addr;
7098 // Since this is an indirect call through a pointer and we don't
7099 // currently pass in emitAttr into this function, we query codegen
7100 // whether addr needs a reloc.
7101 if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
7103 id->idSetIsDspReloc();
7105 #ifdef _TARGET_AMD64_
7108 // An absolute indir address that doesn't need reloc should fit within 32-bits
7109 // to be encoded as offset relative to zero. This addr mode requires an extra
7111 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7114 #endif //_TARGET_AMD64_
7118 /* This is a simple direct call: "call helper/method/addr" */
7120 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
7122 assert(addr != nullptr);
7124 id->idInsFmt(IF_METHOD);
7127 id->idAddr()->iiaAddr = (BYTE*)addr;
7129 if (callType == EC_FUNC_ADDR)
7131 id->idSetIsCallAddr();
7134 // Direct call to a method and no addr indirection is needed.
7135 if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
7137 id->idSetIsDspReloc();
7142 if (emitComp->verbose && 0)
7144 if (id->idIsLargeCall())
7146 if (callType >= EC_FUNC_VIRTUAL)
7148 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7149 VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7153 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7154 VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7159 id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
7160 id->idDebugOnlyInfo()->idCallSig = sigInfo;
7164 if (addr != nullptr)
7166 codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
7168 #endif // LATE_DISASM
7173 emitCurIGsize += sz;
7175 #if !FEATURE_FIXED_OUT_ARGS
7177 /* The call will pop the arguments */
7179 if (emitCntStackDepth && argSize > 0)
7181 noway_assert((ssize_t)emitCurStackLvl >= argSize);
7182 emitCurStackLvl -= (int)argSize;
7183 assert((int)emitCurStackLvl >= 0);
7186 #endif // !FEATURE_FIXED_OUT_ARGS
7190 /*****************************************************************************
7192 * The following called for each recorded instruction -- use for debugging.
7194 void emitter::emitInsSanityCheck(instrDesc* id)
7196 // make certain you only try to put relocs on things that can have them.
7197 ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7198 if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
7203 if (id->idIsDspReloc())
7205 assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
7206 idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
7210 if (id->idIsCnsReloc())
7212 assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
7213 idOp == ID_OP_CALL || idOp == ID_OP_JMP);
7218 /*****************************************************************************
7220 * Return the allocated size (in bytes) of the given instruction descriptor.
7223 size_t emitter::emitSizeOfInsDsc(instrDesc* id)
7225 if (emitIsScnsInsDsc(id))
7227 return SMALL_IDSC_SIZE;
7230 assert((unsigned)id->idInsFmt() < emitFmtCount);
7232 ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7234 // An INS_call instruction may use a "fat" direct/indirect call descriptor
7235 // except for a local call to a label (i.e. call to a finally)
7236 // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
7237 // INS_call instruction always uses one of these idOps
7239 if (id->idIns() == INS_call)
7241 assert(idOp == ID_OP_CALL || // is a direct call
7242 idOp == ID_OP_SPEC || // is a indirect call
7243 idOp == ID_OP_JMP); // is a local call to finally clause
7252 return sizeof(instrDescLbl);
7255 return sizeof(instrDescJmp);
7259 if (id->idIsLargeCall())
7261 /* Must be a "fat" indirect call descriptor */
7262 return sizeof(instrDescCGCA);
7273 if (id->idIsLargeCns())
7275 if (id->idIsLargeDsp())
7277 return sizeof(instrDescCnsDsp);
7281 return sizeof(instrDescCns);
7286 if (id->idIsLargeDsp())
7288 return sizeof(instrDescDsp);
7292 return sizeof(instrDesc);
7297 NO_WAY("unexpected instruction descriptor format");
7301 return sizeof(instrDesc);
7304 /*****************************************************************************/
7306 /*****************************************************************************
7308 * Return a string that represents the given register.
7311 const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
7313 static char rb[2][128];
7314 static unsigned char rbc = 0;
7316 const char* rn = emitComp->compRegVarName(reg, varName);
7318 #ifdef _TARGET_AMD64_
7321 switch (EA_SIZE(attr))
7324 return emitYMMregName(reg);
7327 return emitXMMregName(reg);
7330 if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7332 return emitXMMregName(reg);
7337 if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7339 return emitXMMregName(reg);
7352 rbc = (rbc + 1) % 2;
7374 rbc = (rbc + 1) % 2;
7381 rb[rbc][3] = suffix;
7386 rb[rbc][2] = suffix;
7392 rbc = (rbc + 1) % 2;
7413 #endif // _TARGET_AMD64_
7416 assert(strlen(rn) >= 3);
7418 switch (EA_SIZE(attr))
7421 return emitYMMregName(reg);
7424 return emitXMMregName(reg);
7427 if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7429 return emitXMMregName(reg);
7434 if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7436 return emitXMMregName(reg);
7445 rbc = (rbc + 1) % 2;
7448 strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
7456 #endif // _TARGET_X86_
7459 // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
7460 // however it's possibly not interesting most of the time.
7461 if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
7466 strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
7470 if (EA_IS_GCREF(attr))
7472 strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
7474 else if (EA_IS_BYREF(attr))
7476 strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
7484 /*****************************************************************************
7486 * Return a string that represents the given FP register.
7489 const char* emitter::emitFPregName(unsigned reg, bool varName)
7491 assert(reg < REG_COUNT);
7493 return emitComp->compFPregVarName((regNumber)(reg), varName);
7496 /*****************************************************************************
7498 * Return a string that represents the given XMM register.
7501 const char* emitter::emitXMMregName(unsigned reg)
7503 static const char* const regNames[] = {
7504 #define REGDEF(name, rnum, mask, sname) "x" sname,
7505 #include "register.h"
7508 assert(reg < REG_COUNT);
7509 assert(reg < _countof(regNames));
7511 return regNames[reg];
7514 /*****************************************************************************
7516 * Return a string that represents the given YMM register.
7519 const char* emitter::emitYMMregName(unsigned reg)
7521 static const char* const regNames[] = {
7522 #define REGDEF(name, rnum, mask, sname) "y" sname,
7523 #include "register.h"
7526 assert(reg < REG_COUNT);
7527 assert(reg < _countof(regNames));
7529 return regNames[reg];
7532 /*****************************************************************************
7534 * Display a static data member reference.
7537 void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
7541 /* Filter out the special case of fs:[offs] */
7543 // Munge any pointers if we want diff-able disassembly
7544 if (emitComp->opts.disDiffable)
7546 ssize_t top12bits = (offs >> 20);
7547 if ((top12bits != 0) && (top12bits != -1))
7553 if (fldHnd == FLD_GLOBAL_FS)
7555 printf("FS:[0x%04X]", offs);
7559 if (fldHnd == FLD_GLOBAL_DS)
7561 printf("[0x%04X]", offs);
7567 doffs = Compiler::eeGetJitDataOffs(fldHnd);
7578 printf("@CNS%02u", doffs - 1);
7582 printf("@RWD%02u", doffs);
7587 printf("%+Id", offs);
7592 printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
7596 printf("%+Id", offs);
7602 if (emitComp->opts.varNames && offs < 0)
7604 printf("'%s", emitComp->eeGetFieldName(fldHnd));
7607 printf("%+Id", offs);
7613 /*****************************************************************************
7615 * Display a stack frame reference.
7618 void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
7625 if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
7629 printf("TEMP_%02u", -varx);
7633 printf("V%02u", +varx);
7638 printf("-0x%X", -disp);
7642 printf("+0x%X", +disp);
7646 if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7653 addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
7661 printf("-%02XH", -addr);
7665 printf("+%02XH", addr);
7670 /* Adjust the offset by amount currently pushed on the stack */
7676 printf("-%02XH", -addr);
7680 printf("+%02XH", addr);
7683 #if !FEATURE_FIXED_OUT_ARGS
7685 if (emitCurStackLvl)
7686 printf("+%02XH", emitCurStackLvl);
7688 #endif // !FEATURE_FIXED_OUT_ARGS
7694 if (varx >= 0 && emitComp->opts.varNames)
7697 const char* varName;
7699 assert((unsigned)varx < emitComp->lvaCount);
7700 varDsc = emitComp->lvaTable + varx;
7701 varName = emitComp->compLocalVarName(varx, offs);
7705 printf("'%s", varName);
7709 printf("-%d", -disp);
7713 printf("+%d", +disp);
7721 /*****************************************************************************
7723 * Display an reloc value
7724 * If we are formatting for an assembly listing don't print the hex value
7725 * since it will prevent us from doing assembly diffs
7727 void emitter::emitDispReloc(ssize_t value)
7729 if (emitComp->opts.disAsm)
7735 printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
7739 /*****************************************************************************
7741 * Display an address mode.
7744 void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
7750 dataSection* jdsc = nullptr;
7752 /* The displacement field is in an unusual place for calls */
7754 disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
7756 /* Display a jump table label if this is a switch table jump */
7758 if (id->idIns() == INS_i_jmp)
7760 UNATIVE_OFFSET offs = 0;
7762 /* Find the appropriate entry in the data section list */
7764 for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
7766 UNATIVE_OFFSET size = jdsc->dsSize;
7768 /* Is this a label table? */
7775 if (offs == id->idDebugOnlyInfo()->idMemCookie)
7784 /* If we've found a matching entry then is a table jump */
7788 if (id->idIsDspReloc())
7792 printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
7795 disp -= id->idDebugOnlyInfo()->idMemCookie;
7798 bool frameRef = false;
7802 if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
7804 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7806 if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
7810 else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
7816 if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
7818 size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
7826 printf("%u*", scale);
7828 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
7832 if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
7838 emitDispReloc(disp);
7842 // Munge any pointers if we want diff-able disassembly
7843 // It's assumed to be a pointer when disp is outside of the range (-1M, +1M); top bits are not 0 or -1
7844 if (!frameRef && emitComp->opts.disDiffable && (static_cast<size_t>((disp >> 20) + 1) > 1))
7850 printf("D1FFAB1EH");
7860 printf("%02XH", disp);
7862 else if (disp < 1000)
7866 else if (disp <= 0xFFFF)
7868 printf("%04XH", disp);
7872 printf("%08XH", disp);
7879 printf("-%02XH", -disp);
7881 else if (disp > -1000)
7883 printf("-%d", -disp);
7885 else if (disp >= -0xFFFF)
7887 printf("-%04XH", -disp);
7889 else if (disp < -0xFFFFFF)
7895 printf("%08XH", disp);
7899 printf("-%08XH", -disp);
7904 printf("%04XH", disp);
7910 // pretty print string if it looks like one
7911 if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA))
7913 const wchar_t* str = emitComp->eeGetCPString(disp);
7916 printf(" '%S'", str);
7920 if (jdsc && !noDetail)
7922 unsigned cnt = (jdsc->dsSize - 1) / TARGET_POINTER_SIZE;
7923 BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
7925 #ifdef _TARGET_AMD64_
7926 #define SIZE_LETTER "Q"
7928 #define SIZE_LETTER "D"
7930 printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
7932 /* Display the label table (it's stored as "BasicBlock*" values) */
7938 /* Convert the BasicBlock* value to an IG address */
7940 lab = (insGroup*)emitCodeGetCookie(*bbp++);
7943 printf("\n D" SIZE_LETTER " G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
7948 /*****************************************************************************
7950 * If the given instruction is a shift, display the 2nd operand.
7953 void emitter::emitDispShift(instruction ins, int cnt)
7984 printf(", %d", cnt);
7992 /*****************************************************************************
7994 * Display (optionally) the bytes for the instruction encoding in hex
7997 void emitter::emitDispInsHex(BYTE* code, size_t sz)
7999 // We do not display the instruction hex if we want diff-able disassembly
8000 if (!emitComp->opts.disDiffable)
8002 #ifdef _TARGET_AMD64_
8003 // how many bytes per instruction we format for
8004 const size_t digits = 10;
8005 #else // _TARGET_X86
8006 const size_t digits = 6;
8009 for (unsigned i = 0; i < sz; i++)
8011 printf("%02X", (*((BYTE*)(code + i))));
8016 printf("%.*s", 2 * (digits - sz), " ");
8021 /*****************************************************************************
8023 * Display the given instruction.
8026 void emitter::emitDispIns(
8027 instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
8032 instruction ins = id->idIns();
8034 if (emitComp->verbose)
8036 unsigned idNum = id->idDebugOnlyInfo()->idNum;
8037 printf("IN%04x: ", idNum);
8040 #define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
8042 /* Display a constant value if the instruction references one */
8046 switch (id->idInsFmt())
8069 /* Is this actually a reference to a data section? */
8071 offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
8077 /* Display a data section reference */
8079 assert((unsigned)offs < emitConsDsc.dsdOffs);
8080 addr = emitConsBlock ? emitConsBlock + offs : nullptr;
8083 // TODO-XArch-Cleanup: Fix or remove this code.
8084 /* Is the operand an integer or floating-point value? */
8088 if (CodeGen::instIsFP(id->idIns()))
8090 switch (id->idIns())
8103 printf("@CNS%02u", offs);
8105 printf("@RWD%02u", offs);
8113 // This was busted by switching the order
8114 // in which we output the code block vs.
8115 // the data blocks -- when we get here,
8116 // the data block has not been filled in
8117 // yet, so we'll display garbage.
8121 if (id->idOpSize() == EA_4BYTE)
8122 printf("DF %f \n", addr ? *(float *)addr : 0);
8124 printf("DQ %lf\n", addr ? *(double *)addr : 0);
8128 if (id->idOpSize() <= EA_4BYTE)
8129 printf("DD %d \n", addr ? *(int *)addr : 0);
8131 printf("DQ %D \n", addr ? *(__int64 *)addr : 0);
8143 // printf("[F=%s] " , emitIfName(id->idInsFmt()));
8144 // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
8145 // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
8146 // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
8147 // printf("[A=%08X] " , emitSimpleStkMask);
8148 // printf("[A=%08X] " , emitSimpleByrefStkMask);
8149 // printf("[L=%02u] " , id->idCodeSize());
8151 if (!emitComp->opts.dspEmit && !isNew && !asmfm)
8156 /* Display the instruction offset */
8158 emitDispInsOffs(offset, doffs);
8160 if (code != nullptr)
8162 /* Display the instruction hex code */
8164 emitDispInsHex(code, sz);
8167 /* Display the instruction name */
8169 sstr = codeGen->genInsName(ins);
8171 if (IsAVXInstruction(ins) && !IsBMIInstruction(ins))
8173 printf(" v%-8s", sstr);
8177 printf(" %-9s", sstr);
8180 if (strnlen_s(sstr, 10) >= 8)
8181 #else // FEATURE_PAL
8182 if (strnlen(sstr, 10) >= 8)
8183 #endif // FEATURE_PAL
8188 /* By now the size better be set to something */
8190 assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
8192 /* Figure out the operand size */
8194 if (id->idGCref() == GCT_GCREF)
8197 sstr = "gword ptr ";
8199 else if (id->idGCref() == GCT_BYREF)
8202 sstr = "bword ptr ";
8206 attr = id->idOpSize();
8207 sstr = codeGen->genSizeStr(attr);
8211 #ifdef _TARGET_AMD64_
8212 assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
8214 assert(attr == EA_4BYTE);
8220 /* Now see what instruction format we've got */
8222 // First print the implicit register usage
8223 if (instrHasImplicitRegPairDest(ins))
8225 printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
8227 else if (instrIs3opImul(ins))
8229 regNumber tgtReg = inst3opImulReg(ins);
8230 printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
8233 switch (id->idInsFmt())
8238 const char* methodName;
8241 val = emitGetInsSC(id);
8242 #ifdef _TARGET_AMD64_
8243 // no 8-byte immediates allowed here!
8244 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8246 if (id->idIsCnsReloc())
8253 // Munge any pointers if we want diff-able disassembly
8254 if (emitComp->opts.disDiffable)
8256 ssize_t top14bits = (val >> 18);
8257 if ((top14bits != 0) && (top14bits != -1))
8262 if ((val > -1000) && (val < 1000))
8266 else if ((val > 0) || (val < -0xFFFFFF))
8268 printf("0x%IX", val);
8272 printf("-0x%IX", -val);
8281 if (ins == INS_call && id->idIsCallRegPtr())
8283 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
8288 emitDispAddrMode(id, isNew);
8291 if (ins == INS_call)
8293 assert(id->idInsFmt() == IF_ARD);
8295 /* Ignore indirect calls */
8297 if (id->idDebugOnlyInfo()->idMemCookie == 0)
8302 assert(id->idDebugOnlyInfo()->idMemCookie);
8304 /* This is a virtual call */
8306 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
8307 printf("%s", methodName);
8314 #ifdef _TARGET_AMD64_
8315 if (ins == INS_movsxd)
8321 if (ins == INS_movsx || ins == INS_movzx)
8325 else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8327 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8328 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8331 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8332 emitDispAddrMode(id);
8335 case IF_RRW_ARD_CNS:
8336 case IF_RWR_ARD_CNS:
8338 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8339 emitDispAddrMode(id);
8340 emitGetInsAmdCns(id, &cnsVal);
8342 val = cnsVal.cnsVal;
8345 if (cnsVal.cnsReloc)
8351 goto PRINT_CONSTANT;
8357 case IF_AWR_RRD_CNS:
8359 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8360 // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8361 sstr = codeGen->genSizeStr(EA_ATTR(16));
8363 emitDispAddrMode(id);
8364 printf(", %s", emitRegName(id->idReg1(), attr));
8366 emitGetInsAmdCns(id, &cnsVal);
8368 val = cnsVal.cnsVal;
8371 if (cnsVal.cnsReloc)
8377 goto PRINT_CONSTANT;
8383 case IF_RWR_RRD_ARD:
8384 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8385 emitDispAddrMode(id);
8388 case IF_RWR_ARD_RRD:
8389 if (ins == INS_vpgatherqd || ins == INS_vgatherqps)
8393 sstr = codeGen->genSizeStr(EA_ATTR(4));
8394 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8395 emitDispAddrMode(id);
8396 printf(", %s", emitRegName(id->idReg2(), attr));
8399 case IF_RWR_RRD_ARD_CNS:
8401 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8402 emitDispAddrMode(id);
8403 emitGetInsAmdCns(id, &cnsVal);
8405 val = cnsVal.cnsVal;
8408 if (cnsVal.cnsReloc)
8414 goto PRINT_CONSTANT;
8420 case IF_RWR_RRD_ARD_RRD:
8422 printf("%s, ", emitRegName(id->idReg1(), attr));
8423 printf("%s, ", emitRegName(id->idReg2(), attr));
8424 emitDispAddrMode(id);
8426 emitGetInsAmdCns(id, &cnsVal);
8427 val = (cnsVal.cnsVal >> 4) + XMMBASE;
8428 printf(", %s", emitRegName((regNumber)val, attr));
8437 emitDispAddrMode(id);
8438 printf(", %s", emitRegName(id->idReg1(), attr));
8441 case IF_AWR_RRD_RRD:
8444 emitDispAddrMode(id);
8445 printf(", %s", emitRegName(id->idReg1(), attr));
8446 printf(", %s", emitRegName(id->idReg2(), attr));
8456 emitDispAddrMode(id);
8457 emitGetInsAmdCns(id, &cnsVal);
8458 val = cnsVal.cnsVal;
8459 #ifdef _TARGET_AMD64_
8460 // no 8-byte immediates allowed here!
8461 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8463 if (id->idInsFmt() == IF_ARW_SHF)
8465 emitDispShift(ins, (BYTE)val);
8470 if (cnsVal.cnsReloc)
8476 goto PRINT_CONSTANT;
8487 #if !FEATURE_FIXED_OUT_ARGS
8489 emitCurStackLvl -= sizeof(int);
8492 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8493 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8495 #if !FEATURE_FIXED_OUT_ARGS
8497 emitCurStackLvl += sizeof(int);
8509 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8510 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8512 printf(", %s", emitRegName(id->idReg1(), attr));
8522 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8523 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8525 emitGetInsCns(id, &cnsVal);
8526 val = cnsVal.cnsVal;
8527 #ifdef _TARGET_AMD64_
8528 // no 8-byte immediates allowed here!
8529 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8531 if (id->idInsFmt() == IF_SRW_SHF)
8533 emitDispShift(ins, (BYTE)val);
8538 if (cnsVal.cnsReloc)
8544 goto PRINT_CONSTANT;
8552 #ifdef _TARGET_AMD64_
8553 if (ins == INS_movsxd)
8559 if (ins == INS_movsx || ins == INS_movzx)
8563 else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8565 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8566 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8570 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8571 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8572 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8576 case IF_RRW_SRD_CNS:
8577 case IF_RWR_SRD_CNS:
8579 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8580 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8581 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8582 emitGetInsCns(id, &cnsVal);
8584 val = cnsVal.cnsVal;
8587 if (cnsVal.cnsReloc)
8593 goto PRINT_CONSTANT;
8598 case IF_RWR_RRD_SRD:
8599 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8600 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8601 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8604 case IF_RWR_RRD_SRD_CNS:
8606 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8607 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8608 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8609 emitGetInsCns(id, &cnsVal);
8611 val = cnsVal.cnsVal;
8614 if (cnsVal.cnsReloc)
8620 goto PRINT_CONSTANT;
8625 case IF_RWR_RRD_SRD_RRD:
8627 printf("%s, ", emitRegName(id->idReg1(), attr));
8628 printf("%s, ", emitRegName(id->idReg2(), attr));
8629 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8630 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8632 emitGetInsCns(id, &cnsVal);
8633 val = (cnsVal.cnsVal >> 4) + XMMBASE;
8634 printf(", %s", emitRegName((regNumber)val, attr));
8641 if (ins == INS_mov_i2xmm)
8643 printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8645 else if (ins == INS_mov_xmm2i)
8647 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
8649 else if (ins == INS_pmovmskb)
8651 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8653 else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
8655 printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8657 else if ((ins == INS_cvttsd2si) || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si))
8659 printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
8661 #ifdef _TARGET_AMD64_
8662 else if (ins == INS_movsxd)
8664 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
8666 #endif // _TARGET_AMD64_
8667 else if (ins == INS_movsx || ins == INS_movzx)
8669 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
8671 else if (ins == INS_bt)
8673 // INS_bt operands are reversed. Display them in the normal order.
8674 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
8676 #ifdef FEATURE_HW_INTRINSICS
8677 else if (ins == INS_crc32 && attr != EA_8BYTE)
8679 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8680 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8681 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8683 #endif // FEATURE_HW_INTRINSICS
8686 printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
8691 assert(ins == INS_xchg);
8692 printf("%s,", emitRegName(id->idReg1(), attr));
8693 printf(" %s", emitRegName(id->idReg2(), attr));
8696 case IF_RWR_RRD_RRD:
8698 assert(IsAVXInstruction(ins));
8699 assert(IsThreeOperandAVXInstruction(ins));
8700 regNumber reg2 = id->idReg2();
8701 regNumber reg3 = id->idReg3();
8702 if (ins == INS_bextr || ins == INS_bzhi)
8704 // BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM,
8705 // which is different from most of other instructions
8706 regNumber tmp = reg2;
8710 printf("%s, ", emitRegName(id->idReg1(), attr));
8711 printf("%s, ", emitRegName(reg2, attr));
8712 printf("%s", emitRegName(reg3, attr));
8716 case IF_RWR_RRD_RRD_CNS:
8717 assert(IsAVXInstruction(ins));
8718 assert(IsThreeOperandAVXInstruction(ins));
8719 printf("%s, ", emitRegName(id->idReg1(), attr));
8720 printf("%s, ", emitRegName(id->idReg2(), attr));
8721 printf("%s, ", emitRegName(id->idReg3(), attr));
8722 val = emitGetInsSC(id);
8723 goto PRINT_CONSTANT;
8725 case IF_RWR_RRD_RRD_RRD:
8726 assert(IsAVXOnlyInstruction(ins));
8727 assert(UseVEXEncoding());
8728 printf("%s, ", emitRegName(id->idReg1(), attr));
8729 printf("%s, ", emitRegName(id->idReg2(), attr));
8730 printf("%s, ", emitRegName(id->idReg3(), attr));
8731 printf("%s", emitRegName(id->idReg4(), attr));
8733 case IF_RRW_RRW_CNS:
8734 printf("%s,", emitRegName(id->idReg1(), attr));
8735 printf(" %s", emitRegName(id->idReg2(), attr));
8736 val = emitGetInsSC(id);
8737 #ifdef _TARGET_AMD64_
8738 // no 8-byte immediates allowed here!
8739 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8742 if (id->idIsCnsReloc())
8748 goto PRINT_CONSTANT;
8755 printf("%s", emitRegName(id->idReg1(), attr));
8760 printf("%s", emitRegName(id->idReg1(), attr));
8761 emitDispShift(ins, (BYTE)emitGetInsSC(id));
8768 if (ins == INS_movsx || ins == INS_movzx)
8772 #ifdef _TARGET_AMD64_
8773 else if (ins == INS_movsxd)
8778 else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8780 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8781 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8784 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8785 offs = emitGetInsDsp(id);
8786 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8789 case IF_RRW_MRD_CNS:
8790 case IF_RWR_MRD_CNS:
8792 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8793 offs = emitGetInsDsp(id);
8794 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8795 emitGetInsDcmCns(id, &cnsVal);
8797 val = cnsVal.cnsVal;
8800 if (cnsVal.cnsReloc)
8806 goto PRINT_CONSTANT;
8811 case IF_MWR_RRD_CNS:
8813 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8814 // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8815 sstr = codeGen->genSizeStr(EA_ATTR(16));
8817 offs = emitGetInsDsp(id);
8818 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8819 printf(", %s", emitRegName(id->idReg1(), attr));
8820 emitGetInsDcmCns(id, &cnsVal);
8822 val = cnsVal.cnsVal;
8825 if (cnsVal.cnsReloc)
8831 goto PRINT_CONSTANT;
8837 case IF_RWR_RRD_MRD:
8838 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8839 offs = emitGetInsDsp(id);
8840 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8843 case IF_RWR_RRD_MRD_CNS:
8845 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8846 offs = emitGetInsDsp(id);
8847 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8848 emitGetInsDcmCns(id, &cnsVal);
8850 val = cnsVal.cnsVal;
8853 if (cnsVal.cnsReloc)
8859 goto PRINT_CONSTANT;
8864 case IF_RWR_RRD_MRD_RRD:
8866 printf("%s, ", emitRegName(id->idReg1(), attr));
8867 printf("%s, ", emitRegName(id->idReg2(), attr));
8869 offs = emitGetInsDsp(id);
8870 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8872 emitGetInsDcmCns(id, &cnsVal);
8873 val = (cnsVal.cnsVal >> 4) + XMMBASE;
8874 printf(", %s", emitRegName((regNumber)val, attr));
8878 case IF_RWR_MRD_OFF:
8880 printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
8881 offs = emitGetInsDsp(id);
8882 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8890 offs = emitGetInsDsp(id);
8891 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8892 printf(", %s", emitRegName(id->idReg1(), attr));
8901 offs = emitGetInsDsp(id);
8902 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8903 emitGetInsDcmCns(id, &cnsVal);
8904 val = cnsVal.cnsVal;
8905 #ifdef _TARGET_AMD64_
8906 // no 8-byte immediates allowed here!
8907 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8909 if (cnsVal.cnsReloc)
8913 else if (id->idInsFmt() == IF_MRW_SHF)
8915 emitDispShift(ins, (BYTE)val);
8920 goto PRINT_CONSTANT;
8929 offs = emitGetInsDsp(id);
8930 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8937 offs = emitGetInsDsp(id);
8938 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8944 printf("%s, ", emitRegName(id->idReg1(), attr));
8945 val = emitGetInsSC(id);
8946 if (id->idIsCnsReloc())
8952 goto PRINT_CONSTANT;
8962 printf("%s, ", emitRegName(id->idReg1(), attr));
8964 else if (ins == INS_mov)
8966 /* mov dword ptr [frame.callSiteReturnAddress], label */
8967 assert(id->idInsFmt() == IF_SWR_LABEL);
8968 instrDescLbl* idlbl = (instrDescLbl*)id;
8970 emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
8975 if (((instrDescJmp*)id)->idjShort)
8980 if (id->idIsBound())
8982 printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
8986 printf("L_M%03u_" FMT_BB, Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
8992 if (id->idIsCallAddr())
8994 offs = (ssize_t)id->idAddr()->iiaAddr;
9000 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
9003 if (id->idInsFmt() == IF_METHPTR)
9010 if (id->idIsDspReloc())
9014 printf("%08X", offs);
9018 printf("%s", methodName);
9021 if (id->idInsFmt() == IF_METHPTR)
9032 printf("unexpected format %s", emitIfName(id->idInsFmt()));
9033 assert(!"unexpectedFormat");
9037 if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
9039 // Code size in the instrDesc is different from the actual code size we've been given!
9040 printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
9046 /*****************************************************************************/
9049 /*****************************************************************************
9051 * Output nBytes bytes of NOP instructions
9054 static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
9056 assert(nBytes <= 15);
9058 #ifndef _TARGET_AMD64_
9059 // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
9060 // more efficient real NOP: 0x0F 0x1F +modR/M
9061 // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
9062 // because debugger and msdis don't like it, so maybe VIA doesn't either
9063 // So instead just stick to repeating single byte nops
9115 #else // _TARGET_AMD64_
9157 // More than 3 prefixes is slower than just 2 NOPs
9158 dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
9161 // More than 3 prefixes is slower than just 2 NOPs
9162 dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
9165 // More than 3 prefixes is slower than just 2 NOPs
9166 dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
9169 // More than 3 prefixes is slower than just 2 NOPs
9170 dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
9192 #endif // _TARGET_AMD64_
9197 /*****************************************************************************
9199 * Output an instruction involving an address mode.
9202 BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9210 instruction ins = id->idIns();
9211 emitAttr size = id->idOpSize();
9212 size_t opsz = EA_SIZE_IN_BYTES(size);
9214 // Get the base/index registers
9215 reg = id->idAddr()->iiaAddrMode.amBaseReg;
9216 rgx = id->idAddr()->iiaAddrMode.amIndxReg;
9218 // For INS_call the instruction size is actually the return value size
9219 if (ins == INS_call)
9221 // Special case: call via a register
9222 if (id->idIsCallRegPtr())
9224 code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
9226 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
9227 dst += emitOutputWord(dst, opcode);
9231 // The displacement field is in an unusual place for calls
9232 dsp = emitGetInsCIdisp(id);
9234 #ifdef _TARGET_AMD64_
9236 // Compute the REX prefix if it exists
9237 if (IsExtendedReg(reg, EA_PTRSIZE))
9239 insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9240 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9241 reg = (regNumber)RegEncoding(reg);
9244 if (IsExtendedReg(rgx, EA_PTRSIZE))
9246 insEncodeRegSIB(ins, rgx, &code);
9247 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9248 rgx = (regNumber)RegEncoding(rgx);
9251 // And emit the REX prefix
9252 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9254 #endif // _TARGET_AMD64_
9259 // `addc` is used for two kinds if instructions
9260 // 1. ins like ADD that can have reg/mem and const versions both and const version needs to modify the opcode for
9261 // large constant operand (e.g., imm32)
9262 // 2. certain SSE/AVX ins have const operand as control bits that is always 1-Byte (imm8) even if `size` > 1-Byte
9263 if (addc && (size > EA_1BYTE))
9265 ssize_t cval = addc->cnsVal;
9267 // Does the constant fit in a byte?
9268 // SSE/AVX do not need to modify opcode
9269 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9271 if (id->idInsFmt() != IF_ARW_SHF && !IsSSEOrAVXInstruction(ins))
9280 // Emit VEX prefix if required
9281 // There are some callers who already add VEX prefix and call this routine.
9282 // Therefore, add VEX prefix is one is not already present.
9283 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9285 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
9286 if (TakesVexPrefix(ins))
9288 if (IsDstDstSrcAVXInstruction(ins))
9290 regNumber src1 = REG_NA;
9292 switch (id->idInsFmt())
9294 case IF_RWR_RRD_ARD:
9295 case IF_RWR_ARD_RRD:
9296 case IF_RWR_RRD_ARD_CNS:
9297 case IF_RWR_RRD_ARD_RRD:
9299 src1 = id->idReg2();
9305 src1 = id->idReg1();
9310 // encode source operand reg in 'vvvv' bits in 1's complement form
9311 code = insEncodeReg3456(ins, src1, size, code);
9313 else if (IsDstSrcSrcAVXInstruction(ins))
9315 code = insEncodeReg3456(ins, id->idReg2(), size, code);
9319 // Emit the REX prefix if required
9320 if (TakesRexWPrefix(ins, size))
9322 code = AddRexWPrefix(ins, code);
9325 if (IsExtendedReg(reg, EA_PTRSIZE))
9327 insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9328 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9329 reg = (regNumber)RegEncoding(reg);
9332 if (IsExtendedReg(rgx, EA_PTRSIZE))
9334 insEncodeRegSIB(ins, rgx, &code);
9335 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9336 rgx = (regNumber)RegEncoding(rgx);
9339 // Special case emitting AVX instructions
9340 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9342 if ((ins == INS_crc32) && (size > EA_1BYTE))
9346 if (size == EA_2BYTE)
9348 dst += emitOutputByte(dst, 0x66);
9352 regNumber reg345 = REG_NA;
9353 if (IsBMIInstruction(ins))
9355 reg345 = getBmiRegNumber(ins);
9357 if (reg345 == REG_NA)
9359 switch (id->idInsFmt())
9361 case IF_AWR_RRD_RRD:
9363 reg345 = id->idReg2();
9369 reg345 = id->idReg1();
9374 unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
9376 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9378 if (UseVEXEncoding() && (ins != INS_crc32))
9380 // Emit last opcode byte
9381 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9382 assert((code & 0xFF) == 0);
9383 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9387 dst += emitOutputWord(dst, code >> 16);
9388 dst += emitOutputWord(dst, code & 0xFFFF);
9393 // Is this a 'big' opcode?
9394 else if (code & 0xFF000000)
9396 // Output the REX prefix
9397 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9399 // Output the highest word of the opcode
9400 // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
9401 // and encoded as part of VEX prefix.
9402 if (code & 0xFF000000)
9404 dst += emitOutputWord(dst, code >> 16);
9408 else if (code & 0x00FF0000)
9410 // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
9411 assert(ins != INS_bt);
9413 // Output the REX prefix
9414 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9416 // Output the highest byte of the opcode
9417 if (code & 0x00FF0000)
9419 dst += emitOutputByte(dst, code >> 16);
9423 // Use the large version if this is not a byte. This trick will not
9424 // work in case of SSE2 and AVX instructions.
9425 if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9430 else if (CodeGen::instIsFP(ins))
9432 assert(size == EA_4BYTE || size == EA_8BYTE);
9433 if (size == EA_8BYTE)
9438 else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9440 /* Is the operand size larger than a byte? */
9449 /* Output a size prefix for a 16-bit operand */
9451 dst += emitOutputByte(dst, 0x66);
9456 #ifdef _TARGET_AMD64_
9460 /* Set the 'w' bit to get the large version */
9468 /* Double operand - set the appropriate bit */
9473 #endif // _TARGET_X86_
9476 NO_WAY("unexpected size");
9481 // Output the REX prefix
9482 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9484 // Get the displacement value
9485 dsp = emitGetInsAmdAny(id);
9489 dspInByte = ((signed char)dsp == (ssize_t)dsp);
9490 dspIsZero = (dsp == 0);
9492 if (id->idIsDspReloc())
9494 dspInByte = false; // relocs can't be placed in a byte
9497 // Is there a [scaled] index component?
9500 // The address is of the form "[reg+disp]"
9505 if (id->idIsDspReloc())
9507 INT32 addlDelta = 0;
9509 // The address is of the form "[disp]"
9510 // On x86 - disp is relative to zero
9511 // On Amd64 - disp is relative to RIP
9512 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9514 dst += emitOutputByte(dst, code | 0x05);
9518 dst += emitOutputWord(dst, code | 0x0500);
9523 // It is of the form "ins [disp], imm" or "ins reg, [disp], imm"
9524 // For emitting relocation, we also need to take into account of the
9525 // additional bytes of code emitted for immed val.
9527 ssize_t cval = addc->cnsVal;
9529 #ifdef _TARGET_AMD64_
9530 // all these opcodes only take a sign-extended 4-byte immediate
9531 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9532 #else //_TARGET_X86_
9533 noway_assert(opsz <= 4);
9534 #endif //_TARGET_X86_
9551 assert(!"unexpected operand size");
9556 #ifdef _TARGET_AMD64_
9557 // We emit zero on Amd64, to avoid the assert in emitOutputLong()
9558 dst += emitOutputLong(dst, 0);
9560 dst += emitOutputLong(dst, dsp);
9562 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
9568 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9570 dst += emitOutputByte(dst, code | 0x05);
9574 dst += emitOutputWord(dst, code | 0x0500);
9576 #else //_TARGET_AMD64_
9577 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
9578 // This addr mode should never be used while generating relocatable ngen code nor if
9579 // the addr can be encoded as pc-relative address.
9580 noway_assert(!emitComp->opts.compReloc);
9581 noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
9582 noway_assert((int)dsp == dsp);
9584 // This requires, specifying a SIB byte after ModRM byte.
9585 if (EncodedBySSE38orSSE3A(ins))
9587 dst += emitOutputByte(dst, code | 0x04);
9591 dst += emitOutputWord(dst, code | 0x0400);
9593 dst += emitOutputByte(dst, 0x25);
9594 #endif //_TARGET_AMD64_
9595 dst += emitOutputLong(dst, dsp);
9602 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9604 // Does the offset fit in a byte?
9607 dst += emitOutputByte(dst, code | 0x45);
9608 dst += emitOutputByte(dst, dsp);
9612 dst += emitOutputByte(dst, code | 0x85);
9613 dst += emitOutputLong(dst, dsp);
9615 if (id->idIsDspReloc())
9617 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9623 // Does the offset fit in a byte?
9626 dst += emitOutputWord(dst, code | 0x4500);
9627 dst += emitOutputByte(dst, dsp);
9631 dst += emitOutputWord(dst, code | 0x8500);
9632 dst += emitOutputLong(dst, dsp);
9634 if (id->idIsDspReloc())
9636 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9645 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9647 // Is the offset 0 or does it at least fit in a byte?
9650 dst += emitOutputByte(dst, code | 0x04);
9651 dst += emitOutputByte(dst, 0x24);
9655 dst += emitOutputByte(dst, code | 0x44);
9656 dst += emitOutputByte(dst, 0x24);
9657 dst += emitOutputByte(dst, dsp);
9661 dst += emitOutputByte(dst, code | 0x84);
9662 dst += emitOutputByte(dst, 0x24);
9663 dst += emitOutputLong(dst, dsp);
9664 if (id->idIsDspReloc())
9666 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9672 // Is the offset 0 or does it at least fit in a byte?
9675 dst += emitOutputWord(dst, code | 0x0400);
9676 dst += emitOutputByte(dst, 0x24);
9680 dst += emitOutputWord(dst, code | 0x4400);
9681 dst += emitOutputByte(dst, 0x24);
9682 dst += emitOutputByte(dst, dsp);
9686 dst += emitOutputWord(dst, code | 0x8400);
9687 dst += emitOutputByte(dst, 0x24);
9688 dst += emitOutputLong(dst, dsp);
9689 if (id->idIsDspReloc())
9691 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9700 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9702 // Put the register in the opcode
9703 code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr);
9705 // Is there a displacement?
9708 // This is simply "[reg]"
9709 dst += emitOutputByte(dst, code);
9713 // This is [reg + dsp]" -- does the offset fit in a byte?
9716 dst += emitOutputByte(dst, code | 0x40);
9717 dst += emitOutputByte(dst, dsp);
9721 dst += emitOutputByte(dst, code | 0x80);
9722 dst += emitOutputLong(dst, dsp);
9723 if (id->idIsDspReloc())
9725 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9732 // Put the register in the opcode
9733 code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
9735 // Is there a displacement?
9738 // This is simply "[reg]"
9739 dst += emitOutputWord(dst, code);
9743 // This is [reg + dsp]" -- does the offset fit in a byte?
9746 dst += emitOutputWord(dst, code | 0x4000);
9747 dst += emitOutputByte(dst, dsp);
9751 dst += emitOutputWord(dst, code | 0x8000);
9752 dst += emitOutputLong(dst, dsp);
9753 if (id->idIsDspReloc())
9755 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9769 // We have a scaled index operand
9770 unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
9772 // Is the index operand scaled?
9775 // Is there a base register?
9778 // The address is "[reg + {2/4/8} * rgx + icon]"
9779 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
9780 insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9782 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9784 // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9785 if (dspIsZero && reg != REG_EBP)
9787 // The address is "[reg + {2/4/8} * rgx]"
9788 dst += emitOutputByte(dst, code | 0x04);
9789 dst += emitOutputByte(dst, regByte);
9793 // The address is "[reg + {2/4/8} * rgx + disp]"
9796 dst += emitOutputByte(dst, code | 0x44);
9797 dst += emitOutputByte(dst, regByte);
9798 dst += emitOutputByte(dst, dsp);
9802 dst += emitOutputByte(dst, code | 0x84);
9803 dst += emitOutputByte(dst, regByte);
9804 dst += emitOutputLong(dst, dsp);
9805 if (id->idIsDspReloc())
9807 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9814 // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9815 if (dspIsZero && reg != REG_EBP)
9817 // The address is "[reg + {2/4/8} * rgx]"
9818 dst += emitOutputWord(dst, code | 0x0400);
9819 dst += emitOutputByte(dst, regByte);
9823 // The address is "[reg + {2/4/8} * rgx + disp]"
9826 dst += emitOutputWord(dst, code | 0x4400);
9827 dst += emitOutputByte(dst, regByte);
9828 dst += emitOutputByte(dst, dsp);
9832 dst += emitOutputWord(dst, code | 0x8400);
9833 dst += emitOutputByte(dst, regByte);
9834 dst += emitOutputLong(dst, dsp);
9835 if (id->idIsDspReloc())
9837 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9845 // The address is "[{2/4/8} * rgx + icon]"
9846 regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
9847 insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9849 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9851 dst += emitOutputByte(dst, code | 0x04);
9855 dst += emitOutputWord(dst, code | 0x0400);
9858 dst += emitOutputByte(dst, regByte);
9860 // Special case: jump through a jump table
9861 if (ins == INS_i_jmp)
9863 dsp += (size_t)emitConsBlock;
9866 dst += emitOutputLong(dst, dsp);
9867 if (id->idIsDspReloc())
9869 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9875 // The address is "[reg+rgx+dsp]"
9876 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
9878 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9880 if (dspIsZero && reg != REG_EBP)
9882 // This is [reg+rgx]"
9883 dst += emitOutputByte(dst, code | 0x04);
9884 dst += emitOutputByte(dst, regByte);
9888 // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9891 dst += emitOutputByte(dst, code | 0x44);
9892 dst += emitOutputByte(dst, regByte);
9893 dst += emitOutputByte(dst, dsp);
9897 dst += emitOutputByte(dst, code | 0x84);
9898 dst += emitOutputByte(dst, regByte);
9899 dst += emitOutputLong(dst, dsp);
9900 if (id->idIsDspReloc())
9902 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9909 if (dspIsZero && reg != REG_EBP)
9911 // This is [reg+rgx]"
9912 dst += emitOutputWord(dst, code | 0x0400);
9913 dst += emitOutputByte(dst, regByte);
9917 // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9920 dst += emitOutputWord(dst, code | 0x4400);
9921 dst += emitOutputByte(dst, regByte);
9922 dst += emitOutputByte(dst, dsp);
9926 dst += emitOutputWord(dst, code | 0x8400);
9927 dst += emitOutputByte(dst, regByte);
9928 dst += emitOutputLong(dst, dsp);
9929 if (id->idIsDspReloc())
9931 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9939 // Now generate the constant value, if present
9942 ssize_t cval = addc->cnsVal;
9944 #ifdef _TARGET_AMD64_
9945 // all these opcodes only take a sign-extended 4-byte immediate
9946 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9954 dst += emitOutputLong(dst, cval);
9957 dst += emitOutputWord(dst, cval);
9960 dst += emitOutputByte(dst, cval);
9964 assert(!"unexpected operand size");
9969 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
9976 // Does this instruction operate on a GC ref value?
9979 switch (id->idInsFmt())
9990 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
9994 // Mark the destination register as holding a GCT_BYREF
9995 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
9996 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10003 case IF_AWR_RRD_RRD:
10012 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10017 emitDispIns(id, false, false, false);
10019 assert(!"unexpected GC ref instruction format");
10022 // mul can never produce a GC ref
10023 assert(!instrIs3opImul(ins));
10024 assert(ins != INS_mulEAX && ins != INS_imulEAX);
10028 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10030 switch (id->idInsFmt())
10034 case IF_RWR_RRD_ARD:
10035 emitGCregDeadUpd(id->idReg1(), dst);
10041 if (ins == INS_mulEAX || ins == INS_imulEAX)
10043 emitGCregDeadUpd(REG_EAX, dst);
10044 emitGCregDeadUpd(REG_EDX, dst);
10047 // For the three operand imul instruction the target register
10048 // is encoded in the opcode
10050 if (instrIs3opImul(ins))
10052 regNumber tgtReg = inst3opImulReg(ins);
10053 emitGCregDeadUpd(tgtReg, dst);
10061 /*****************************************************************************
10063 * Output an instruction involving a stack frame value.
10066 BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10074 instruction ins = id->idIns();
10075 emitAttr size = id->idOpSize();
10076 size_t opsz = EA_SIZE_IN_BYTES(size);
10078 assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
10080 // `addc` is used for two kinds if instructions
10081 // 1. ins like ADD that can have reg/mem and const versions both and const version needs to modify the opcode for
10082 // large constant operand (e.g., imm32)
10083 // 2. certain SSE/AVX ins have const operand as control bits that is always 1-Byte (imm8) even if `size` > 1-Byte
10084 if (addc && (size > EA_1BYTE))
10086 ssize_t cval = addc->cnsVal;
10088 // Does the constant fit in a byte?
10089 // SSE/AVX do not need to modify opcode
10090 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10092 if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) &&
10093 (id->idInsFmt() != IF_RWR_RRD_SRD_CNS) && !IsSSEOrAVXInstruction(ins))
10102 // Add VEX prefix if required.
10103 // There are some callers who already add VEX prefix and call this routine.
10104 // Therefore, add VEX prefix is one is not already present.
10105 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10107 // Compute the REX prefix
10108 if (TakesRexWPrefix(ins, size))
10110 code = AddRexWPrefix(ins, code);
10113 // Special case emitting AVX instructions
10114 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10116 if ((ins == INS_crc32) && (size > EA_1BYTE))
10120 if (size == EA_2BYTE)
10122 dst += emitOutputByte(dst, 0x66);
10126 regNumber reg345 = REG_NA;
10127 if (IsBMIInstruction(ins))
10129 reg345 = getBmiRegNumber(ins);
10131 if (reg345 == REG_NA)
10133 reg345 = id->idReg1();
10137 code = insEncodeReg3456(ins, id->idReg1(), size, code);
10139 unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10141 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10143 if (UseVEXEncoding() && (ins != INS_crc32))
10145 // Emit last opcode byte
10146 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10147 assert((code & 0xFF) == 0);
10148 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10152 dst += emitOutputWord(dst, code >> 16);
10153 dst += emitOutputWord(dst, code & 0xFFFF);
10158 // Is this a 'big' opcode?
10159 else if (code & 0xFF000000)
10161 // Output the REX prefix
10162 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10164 // Output the highest word of the opcode
10165 // We need to check again because in case of AVX instructions the leading
10166 // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10167 if (code & 0xFF000000)
10169 dst += emitOutputWord(dst, code >> 16);
10170 code &= 0x0000FFFF;
10173 else if (code & 0x00FF0000)
10175 // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
10176 assert(ins != INS_bt);
10178 // Output the REX prefix
10179 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10181 // Output the highest byte of the opcode.
10182 // We need to check again because in case of AVX instructions the leading
10183 // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10184 if (code & 0x00FF0000)
10186 dst += emitOutputByte(dst, code >> 16);
10187 code &= 0x0000FFFF;
10190 // Use the large version if this is not a byte
10191 if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) &&
10192 !IsAVXInstruction(ins))
10197 else if (CodeGen::instIsFP(ins))
10199 assert(size == EA_4BYTE || size == EA_8BYTE);
10201 if (size == EA_8BYTE)
10206 else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
10208 // Is the operand size larger than a byte?
10215 // Output a size prefix for a 16-bit operand
10216 dst += emitOutputByte(dst, 0x66);
10220 #ifdef _TARGET_AMD64_
10222 #endif // _TARGET_AMD64_
10224 /* Set the 'w' size bit to indicate 32-bit operation
10225 * Note that incrementing "code" for INS_call (0xFF) would
10226 * overflow, whereas setting the lower bit to 1 just works out
10232 #ifdef _TARGET_X86_
10235 // Double operand - set the appropriate bit.
10236 // I don't know what a legitimate reason to end up in this case would be
10237 // considering that FP is taken care of above...
10238 // what is an instruction that takes a double which is not covered by the
10239 // above instIsFP? Of the list in instrsxarch, only INS_fprem
10241 NO_WAY("bad 8 byte op");
10243 #endif // _TARGET_X86_
10246 NO_WAY("unexpected size");
10251 // Output the REX prefix
10252 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10254 // Figure out the variable's frame position
10255 int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
10257 adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
10258 dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
10260 dspInByte = ((signed char)dsp == (int)dsp);
10261 dspIsZero = (dsp == 0);
10263 // for stack varaibles the dsp should never be a reloc
10264 assert(id->idIsDspReloc() == 0);
10268 // EBP-based variable: does the offset fit in a byte?
10269 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10273 dst += emitOutputByte(dst, code | 0x45);
10274 dst += emitOutputByte(dst, dsp);
10278 dst += emitOutputByte(dst, code | 0x85);
10279 dst += emitOutputLong(dst, dsp);
10286 dst += emitOutputWord(dst, code | 0x4500);
10287 dst += emitOutputByte(dst, dsp);
10291 dst += emitOutputWord(dst, code | 0x8500);
10292 dst += emitOutputLong(dst, dsp);
10299 #if !FEATURE_FIXED_OUT_ARGS
10300 // Adjust the offset by the amount currently pushed on the CPU stack
10301 dsp += emitCurStackLvl;
10304 dspInByte = ((signed char)dsp == (int)dsp);
10305 dspIsZero = (dsp == 0);
10307 // Does the offset fit in a byte?
10308 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10314 dst += emitOutputByte(dst, code | 0x04);
10315 dst += emitOutputByte(dst, 0x24);
10319 dst += emitOutputByte(dst, code | 0x44);
10320 dst += emitOutputByte(dst, 0x24);
10321 dst += emitOutputByte(dst, dsp);
10326 dst += emitOutputByte(dst, code | 0x84);
10327 dst += emitOutputByte(dst, 0x24);
10328 dst += emitOutputLong(dst, dsp);
10337 dst += emitOutputWord(dst, code | 0x0400);
10338 dst += emitOutputByte(dst, 0x24);
10342 dst += emitOutputWord(dst, code | 0x4400);
10343 dst += emitOutputByte(dst, 0x24);
10344 dst += emitOutputByte(dst, dsp);
10349 dst += emitOutputWord(dst, code | 0x8400);
10350 dst += emitOutputByte(dst, 0x24);
10351 dst += emitOutputLong(dst, dsp);
10356 // Now generate the constant value, if present
10359 ssize_t cval = addc->cnsVal;
10361 #ifdef _TARGET_AMD64_
10362 // all these opcodes only take a sign-extended 4-byte immediate
10363 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10371 dst += emitOutputLong(dst, cval);
10374 dst += emitOutputWord(dst, cval);
10377 dst += emitOutputByte(dst, cval);
10381 assert(!"unexpected operand size");
10384 if (addc->cnsReloc)
10386 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10391 // Does this instruction operate on a GC ref value?
10394 // Factor in the sub-variable offset
10395 adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
10397 switch (id->idInsFmt())
10400 // Read stack -- no change
10403 case IF_SWR: // Stack Write (So we need to update GC live for stack var)
10404 // Write stack -- GC var may be born
10405 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10409 // Read stack -- no change
10413 // Write stack -- no change
10418 // Read stack , read register -- no change
10421 case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
10423 // Read stack , write register -- GC reg may be born
10424 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10427 case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
10428 // Read register, write stack -- GC var may be born
10429 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10432 case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
10434 // reg could have been a GCREF as GCREF + int=BYREF
10435 // or BYREF+/-int=BYREF
10436 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10437 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10442 // += -= of a byref, no change
10449 emitDispIns(id, false, false, false);
10451 assert(!"unexpected GC ref instruction format");
10456 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10458 switch (id->idInsFmt())
10460 case IF_RWR_SRD: // Register Write, Stack Read
10461 case IF_RRW_SRD: // Register Read/Write, Stack Read
10462 case IF_RWR_RRD_SRD:
10463 emitGCregDeadUpd(id->idReg1(), dst);
10469 if (ins == INS_mulEAX || ins == INS_imulEAX)
10471 emitGCregDeadUpd(REG_EAX, dst);
10472 emitGCregDeadUpd(REG_EDX, dst);
10475 // For the three operand imul instruction the target register
10476 // is encoded in the opcode
10478 if (instrIs3opImul(ins))
10480 regNumber tgtReg = inst3opImulReg(ins);
10481 emitGCregDeadUpd(tgtReg, dst);
10489 /*****************************************************************************
10491 * Output an instruction with a static data member (class variable).
10494 BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10497 CORINFO_FIELD_HANDLE fldh;
10501 emitAttr size = id->idOpSize();
10502 size_t opsz = EA_SIZE_IN_BYTES(size);
10503 instruction ins = id->idIns();
10504 bool isMoffset = false;
10506 // Get hold of the field handle and offset
10507 fldh = id->idAddr()->iiaFieldHnd;
10508 offs = emitGetInsDsp(id);
10510 // Special case: mov reg, fs:[ddd]
10511 if (fldh == FLD_GLOBAL_FS)
10513 dst += emitOutputByte(dst, 0x64);
10516 // Compute VEX prefix
10517 // Some of its callers already add VEX prefix and then call this routine.
10518 // Therefore add VEX prefix is not already present.
10519 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10521 // Compute the REX prefix
10522 if (TakesRexWPrefix(ins, size))
10524 code = AddRexWPrefix(ins, code);
10527 // `addc` is used for two kinds if instructions
10528 // 1. ins like ADD that can have reg/mem and const versions both and const version needs to modify the opcode for
10529 // large constant operand (e.g., imm32)
10530 // 2. certain SSE/AVX ins have const operand as control bits that is always 1-Byte (imm8) even if `size` > 1-Byte
10531 if (addc && (size > EA_1BYTE))
10533 ssize_t cval = addc->cnsVal;
10534 // Does the constant fit in a byte?
10535 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10537 // SSE/AVX do not need to modify opcode
10538 if (id->idInsFmt() != IF_MRW_SHF && !IsSSEOrAVXInstruction(ins))
10546 #ifdef _TARGET_X86_
10549 // Special case: "mov eax, [addr]" and "mov [addr], eax"
10550 // Amd64: this is one case where addr can be 64-bit in size. This is
10551 // currently unused or not enabled on amd64 as it always uses RIP
10552 // relative addressing which results in smaller instruction size.
10553 if (ins == INS_mov && id->idReg1() == REG_EAX)
10555 switch (id->idInsFmt())
10559 assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
10561 code &= ~((code_t)0xFFFFFFFF);
10568 assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
10570 code &= ~((code_t)0xFFFFFFFF);
10580 #endif //_TARGET_X86_
10582 // Special case emitting AVX instructions
10583 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10585 if ((ins == INS_crc32) && (size > EA_1BYTE))
10589 if (size == EA_2BYTE)
10591 dst += emitOutputByte(dst, 0x66);
10595 regNumber reg345 = REG_NA;
10596 if (IsBMIInstruction(ins))
10598 reg345 = getBmiRegNumber(ins);
10600 if (reg345 == REG_NA)
10602 reg345 = id->idReg1();
10606 code = insEncodeReg3456(ins, id->idReg1(), size, code);
10608 unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10610 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10612 if (UseVEXEncoding() && (ins != INS_crc32))
10614 // Emit last opcode byte
10615 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10616 assert((code & 0xFF) == 0);
10617 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10621 dst += emitOutputWord(dst, code >> 16);
10622 dst += emitOutputWord(dst, code & 0xFFFF);
10625 // Emit Mod,R/M byte
10626 dst += emitOutputByte(dst, regcode | 0x05);
10629 // Is this a 'big' opcode?
10630 else if (code & 0xFF000000)
10632 // Output the REX prefix
10633 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10635 // Output the highest word of the opcode.
10636 // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
10637 if (code & 0xFF000000)
10639 dst += emitOutputWord(dst, code >> 16);
10641 code &= 0x0000FFFF;
10643 else if (code & 0x00FF0000)
10645 // Output the REX prefix
10646 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10648 // Check again as VEX prefix would have encoded leading opcode byte
10649 if (code & 0x00FF0000)
10651 dst += emitOutputByte(dst, code >> 16);
10652 code &= 0x0000FFFF;
10655 if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
10659 // movsx and movzx are 'big' opcodes but also have the 'w' bit
10663 else if (CodeGen::instIsFP(ins))
10665 assert(size == EA_4BYTE || size == EA_8BYTE);
10667 if (size == EA_8BYTE)
10674 // Is the operand size larger than a byte?
10681 // Output a size prefix for a 16-bit operand
10682 dst += emitOutputByte(dst, 0x66);
10686 #ifdef _TARGET_AMD64_
10689 // Set the 'w' bit to get the large version
10693 #ifdef _TARGET_X86_
10695 // Double operand - set the appropriate bit
10698 #endif // _TARGET_X86_
10701 assert(!"unexpected size");
10705 // Output the REX prefix
10706 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10710 if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
10712 dst += emitOutputByte(dst, code);
10716 dst += emitOutputWord(dst, code);
10720 // Do we have a constant or a static data member?
10721 doff = Compiler::eeGetJitDataOffs(fldh);
10724 addr = emitConsBlock + doff;
10726 int byteSize = EA_SIZE_IN_BYTES(size);
10728 // this instruction has a fixed size (4) src.
10729 if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
10733 // This has a fixed size (8) source.
10734 if (ins == INS_vbroadcastsd)
10739 // Check that the offset is properly aligned (i.e. the ddd in [ddd])
10740 assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
10744 // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
10745 if (jitStaticFldIsGlobAddr(fldh))
10751 addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
10752 if (addr == nullptr)
10754 NO_WAY("could not obtain address of static field");
10759 BYTE* target = (addr + offs);
10763 INT32 addlDelta = 0;
10767 // It is of the form "ins [disp], imm" or "ins reg, [disp], imm"
10768 // For emitting relocation, we also need to take into account of the
10769 // additional bytes of code emitted for immed val.
10771 ssize_t cval = addc->cnsVal;
10773 #ifdef _TARGET_AMD64_
10774 // all these opcodes only take a sign-extended 4-byte immediate
10775 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10776 #else //_TARGET_X86_
10777 noway_assert(opsz <= 4);
10778 #endif //_TARGET_X86_
10795 assert(!"unexpected operand size");
10800 #ifdef _TARGET_AMD64_
10801 // All static field and data section constant accesses should be marked as relocatable
10802 noway_assert(id->idIsDspReloc());
10803 dst += emitOutputLong(dst, 0);
10804 #else //_TARGET_X86_
10805 dst += emitOutputLong(dst, (int)target);
10806 #endif //_TARGET_X86_
10808 if (id->idIsDspReloc())
10810 emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
10815 #ifdef _TARGET_AMD64_
10816 // This code path should never be hit on amd64 since it always uses RIP relative addressing.
10817 // In future if ever there is a need to enable this special case, also enable the logic
10818 // that sets isMoffset to true on amd64.
10820 #else //_TARGET_X86_
10822 dst += emitOutputSizeT(dst, (ssize_t)target);
10824 if (id->idIsDspReloc())
10826 emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET);
10829 #endif //_TARGET_X86_
10832 // Now generate the constant value, if present
10835 ssize_t cval = addc->cnsVal;
10837 #ifdef _TARGET_AMD64_
10838 // all these opcodes only take a sign-extended 4-byte immediate
10839 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10847 dst += emitOutputLong(dst, cval);
10850 dst += emitOutputWord(dst, cval);
10853 dst += emitOutputByte(dst, cval);
10857 assert(!"unexpected operand size");
10859 if (addc->cnsReloc)
10861 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10866 // Does this instruction operate on a GC ref value?
10869 switch (id->idInsFmt())
10880 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10895 assert(id->idGCref() == GCT_BYREF);
10896 assert(ins == INS_add || ins == INS_sub);
10898 // Mark it as holding a GCT_BYREF
10899 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10904 emitDispIns(id, false, false, false);
10906 assert(!"unexpected GC ref instruction format");
10911 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10913 switch (id->idInsFmt())
10917 case IF_RWR_RRD_MRD:
10918 emitGCregDeadUpd(id->idReg1(), dst);
10924 if (ins == INS_mulEAX || ins == INS_imulEAX)
10926 emitGCregDeadUpd(REG_EAX, dst);
10927 emitGCregDeadUpd(REG_EDX, dst);
10930 // For the three operand imul instruction the target register
10931 // is encoded in the opcode
10933 if (instrIs3opImul(ins))
10935 regNumber tgtReg = inst3opImulReg(ins);
10936 emitGCregDeadUpd(tgtReg, dst);
10944 /*****************************************************************************
10946 * Output an instruction with one register operand.
10949 BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
10953 instruction ins = id->idIns();
10954 regNumber reg = id->idReg1();
10955 emitAttr size = id->idOpSize();
10957 // We would to update GC info correctly
10958 assert(!IsSSEInstruction(ins));
10959 assert(!IsAVXInstruction(ins));
10961 // Get the 'base' opcode
10967 #ifdef _TARGET_AMD64_
10970 if (size == EA_1BYTE)
10973 assert(INS_inc_l == INS_inc + 1);
10974 assert(INS_dec_l == INS_dec + 1);
10976 // Can't use the compact form, use the long form
10977 ins = (instruction)(ins + 1);
10978 if (size == EA_2BYTE)
10980 // Output a size prefix for a 16-bit operand
10981 dst += emitOutputByte(dst, 0x66);
10984 code = insCodeRR(ins);
10985 if (size != EA_1BYTE)
10987 // Set the 'w' bit to get the large version
10991 if (TakesRexWPrefix(ins, size))
10993 code = AddRexWPrefix(ins, code);
10997 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
10999 // Output the REX prefix
11000 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11002 dst += emitOutputWord(dst, code | (regcode << 8));
11006 if (size == EA_2BYTE)
11008 // Output a size prefix for a 16-bit operand
11009 dst += emitOutputByte(dst, 0x66);
11011 dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
11018 case INS_push_hide:
11020 assert(size == EA_PTRSIZE);
11021 code = insEncodeOpreg(ins, reg, size);
11023 assert(!TakesVexPrefix(ins));
11024 assert(!TakesRexWPrefix(ins, size));
11026 // Output the REX prefix
11027 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11029 dst += emitOutputByte(dst, code);
11034 assert(size >= EA_4BYTE && size <= EA_PTRSIZE); // 16-bit BSWAP is undefined
11036 // The Intel instruction set reference for BSWAP states that extended registers
11037 // should be enabled via REX.R, but per Vol. 2A, Sec. 2.2.1.2 (see also Figure 2-7),
11038 // REX.B should instead be used if the register is encoded in the opcode byte itself.
11039 // Therefore the default logic of insEncodeReg012 is correct for this case.
11041 code = insCodeRR(ins);
11043 if (TakesRexWPrefix(ins, size))
11045 code = AddRexWPrefix(ins, code);
11049 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11051 // Output the REX prefix
11052 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11054 dst += emitOutputWord(dst, code | (regcode << 8));
11075 assert(id->idGCref() == GCT_NONE);
11076 assert(size == EA_1BYTE);
11078 code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
11080 // Output the REX prefix
11081 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11083 // We expect this to always be a 'big' opcode
11084 assert(code & 0x00FF0000);
11086 dst += emitOutputByte(dst, code >> 16);
11087 dst += emitOutputWord(dst, code & 0x0000FFFF);
11094 // Kill off any GC refs in EAX or EDX
11095 emitGCregDeadUpd(REG_EAX, dst);
11096 emitGCregDeadUpd(REG_EDX, dst);
11102 assert(id->idGCref() == GCT_NONE);
11104 code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
11106 if (size != EA_1BYTE)
11108 // Set the 'w' bit to get the large version
11111 if (size == EA_2BYTE)
11113 // Output a size prefix for a 16-bit operand
11114 dst += emitOutputByte(dst, 0x66);
11118 code = AddVexPrefixIfNeeded(ins, code, size);
11120 if (TakesRexWPrefix(ins, size))
11122 code = AddRexWPrefix(ins, code);
11125 // Output the REX prefix
11126 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11128 dst += emitOutputWord(dst, code);
11132 // Are we writing the register? if so then update the GC information
11133 switch (id->idInsFmt())
11140 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11144 emitGCregDeadUpd(id->idReg1(), dst);
11150 regMaskTP regMask = genRegMask(reg);
11154 // The reg must currently be holding either a gcref or a byref
11155 // and the instruction must be inc or dec
11156 assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
11157 (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
11158 assert(id->idGCref() == GCT_BYREF);
11159 // Mark it as holding a GCT_BYREF
11160 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11164 // Can't use RRW to trash a GC ref. It's OK for unverifiable code
11165 // to trash Byrefs.
11166 assert((emitThisGCrefRegs & regMask) == 0);
11172 emitDispIns(id, false, false, false);
11174 assert(!"unexpected instruction format");
11181 /*****************************************************************************
11183 * Output an instruction with two register operands.
11186 BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
11190 instruction ins = id->idIns();
11191 regNumber reg1 = id->idReg1();
11192 regNumber reg2 = id->idReg2();
11193 emitAttr size = id->idOpSize();
11195 // Get the 'base' opcode
11196 code = insCodeRM(ins);
11197 code = AddVexPrefixIfNeeded(ins, code, size);
11198 if (IsSSEOrAVXInstruction(ins))
11200 code = insEncodeRMreg(ins, code);
11202 if (TakesRexWPrefix(ins, size))
11204 code = AddRexWPrefix(ins, code);
11207 else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
11209 code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
11210 #ifdef _TARGET_AMD64_
11212 assert((size < EA_4BYTE) || (insIsCMOV(ins)));
11213 if ((size == EA_8BYTE) || (ins == INS_movsx))
11215 code = AddRexWPrefix(ins, code);
11218 else if (ins == INS_movsxd)
11220 code = insEncodeRMreg(ins, code);
11222 #endif // _TARGET_AMD64_
11224 #ifdef FEATURE_HW_INTRINSICS
11225 else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt))
11227 code = insEncodeRMreg(ins, code);
11228 if ((ins == INS_crc32) && (size > EA_1BYTE))
11233 if (size == EA_2BYTE)
11235 assert(ins == INS_crc32);
11236 dst += emitOutputByte(dst, 0x66);
11238 else if (size == EA_8BYTE)
11240 code = AddRexWPrefix(ins, code);
11243 #endif // FEATURE_HW_INTRINSICS
11246 code = insEncodeMRreg(ins, insCodeMR(ins));
11248 if (ins != INS_test)
11256 noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
11257 noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
11261 // Output a size prefix for a 16-bit operand
11262 dst += emitOutputByte(dst, 0x66);
11266 // Set the 'w' bit to get the large version
11270 #ifdef _TARGET_AMD64_
11272 // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
11273 // Don't need to zero out the high bits explicitly
11274 if ((ins != INS_xor) || (reg1 != reg2))
11276 code = AddRexWPrefix(ins, code);
11279 // Set the 'w' bit to get the large version
11283 #endif // _TARGET_AMD64_
11286 assert(!"unexpected size");
11290 regNumber reg345 = REG_NA;
11291 if (IsBMIInstruction(ins))
11293 reg345 = getBmiRegNumber(ins);
11295 if (reg345 == REG_NA)
11297 reg345 = id->idReg1();
11299 unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
11300 regCode |= insEncodeReg012(ins, reg2, size, &code);
11302 if (TakesVexPrefix(ins))
11304 // In case of AVX instructions that take 3 operands, we generally want to encode reg1
11305 // as first source. In this case, reg1 is both a source and a destination.
11306 // The exception is the "merge" 3-operand case, where we have a move instruction, such
11307 // as movss, and we want to merge the source with itself.
11309 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11310 // now we use the single source as source1 and source2.
11311 if (IsDstDstSrcAVXInstruction(ins))
11313 // encode source/dest operand reg in 'vvvv' bits in 1's complement form
11314 code = insEncodeReg3456(ins, reg1, size, code);
11316 else if (IsDstSrcSrcAVXInstruction(ins))
11318 // encode source operand reg in 'vvvv' bits in 1's complement form
11319 code = insEncodeReg3456(ins, reg2, size, code);
11323 // Output the REX prefix
11324 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11326 if (code & 0xFF000000)
11328 // Output the highest word of the opcode
11329 dst += emitOutputWord(dst, code >> 16);
11330 code &= 0x0000FFFF;
11332 if (Is4ByteSSEInstruction(ins))
11334 // Output 3rd byte of the opcode
11335 dst += emitOutputByte(dst, code);
11339 else if (code & 0x00FF0000)
11341 dst += emitOutputByte(dst, code >> 16);
11342 code &= 0x0000FFFF;
11345 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11346 if ((code & 0xFF00) == 0xC000)
11348 dst += emitOutputWord(dst, code | (regCode << 8));
11350 else if ((code & 0xFF) == 0x00)
11352 // This case happens for some SSE/AVX instructions only
11353 assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins));
11355 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
11356 dst += emitOutputByte(dst, (0xC0 | regCode));
11360 dst += emitOutputWord(dst, code);
11361 dst += emitOutputByte(dst, (0xC0 | regCode));
11364 // Does this instruction operate on a GC ref value?
11367 switch (id->idInsFmt())
11374 if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
11376 // We're relocating "this" in the prolog
11377 assert(emitComp->lvaIsOriginalThisArg(0));
11378 assert(emitComp->lvaTable[0].lvRegister);
11379 assert(emitComp->lvaTable[0].lvRegNum == reg1);
11381 if (emitFullGCinfo)
11383 emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
11388 /* If emitFullGCinfo==false, the we don't use any
11389 regPtrDsc's and so explictly note the location
11390 of "this" in GCEncode.cpp
11395 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11400 switch (id->idIns())
11403 This must be one of the following cases:
11405 xor reg, reg to assign NULL
11407 and r1 , r2 if (ptr1 && ptr2) ...
11408 or r1 , r2 if (ptr1 || ptr2) ...
11410 add r1 , r2 to compute a normal byref
11411 sub r1 , r2 to compute a strange byref (VC only)
11415 assert(id->idReg1() == id->idReg2());
11416 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11421 emitGCregDeadUpd(id->idReg1(), dst);
11426 assert(id->idGCref() == GCT_BYREF);
11430 regMask = genRegMask(reg1) | genRegMask(reg2);
11432 // r1/r2 could have been a GCREF as GCREF + int=BYREF
11433 // or BYREF+/-int=BYREF
11434 assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
11435 ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
11437 // Mark r1 as holding a byref
11438 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11443 emitDispIns(id, false, false, false);
11445 assert(!"unexpected GC reg update instruction");
11451 // This must be "xchg reg1, reg2"
11452 assert(id->idIns() == INS_xchg);
11454 // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
11455 // register pointer mask.
11459 gc1 = emitRegGCtype(reg1);
11460 gc2 = emitRegGCtype(reg2);
11464 // Kill the GC-info about the GC registers
11468 emitGCregDeadUpd(reg1, dst);
11473 emitGCregDeadUpd(reg2, dst);
11476 // Now, swap the info
11480 emitGCregLiveUpd(gc1, reg2, dst);
11485 emitGCregLiveUpd(gc2, reg1, dst);
11492 emitDispIns(id, false, false, false);
11494 assert(!"unexpected GC ref instruction format");
11499 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
11501 switch (id->idInsFmt())
11504 // INS_mulEAX can not be used with any of these formats
11505 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11507 // For the three operand imul instruction the target
11508 // register is encoded in the opcode
11510 if (instrIs3opImul(ins))
11512 regNumber tgtReg = inst3opImulReg(ins);
11513 emitGCregDeadUpd(tgtReg, dst);
11519 case IF_RWR_RRD_RRD:
11520 // INS_movxmm2i writes to reg2.
11521 if (ins == INS_mov_xmm2i)
11523 emitGCregDeadUpd(id->idReg2(), dst);
11527 emitGCregDeadUpd(id->idReg1(), dst);
11540 BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
11544 instruction ins = id->idIns();
11545 assert(IsAVXInstruction(ins));
11546 assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
11547 regNumber targetReg = id->idReg1();
11548 regNumber src1 = id->idReg2();
11549 regNumber src2 = id->idReg3();
11550 emitAttr size = id->idOpSize();
11552 code = insCodeRM(ins);
11553 code = AddVexPrefixIfNeeded(ins, code, size);
11554 code = insEncodeRMreg(ins, code);
11556 if (TakesRexWPrefix(ins, size))
11558 code = AddRexWPrefix(ins, code);
11561 unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
11562 regCode |= insEncodeReg012(ins, src2, size, &code);
11563 // encode source operand reg in 'vvvv' bits in 1's complement form
11564 code = insEncodeReg3456(ins, src1, size, code);
11566 // Output the REX prefix
11567 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11569 // Is this a 'big' opcode?
11570 if (code & 0xFF000000)
11572 // Output the highest word of the opcode
11573 dst += emitOutputWord(dst, code >> 16);
11574 code &= 0x0000FFFF;
11576 else if (code & 0x00FF0000)
11578 dst += emitOutputByte(dst, code >> 16);
11579 code &= 0x0000FFFF;
11582 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11583 if ((code & 0xFF00) == 0xC000)
11585 dst += emitOutputWord(dst, code | (regCode << 8));
11587 else if ((code & 0xFF) == 0x00)
11589 // This case happens for AVX instructions only
11590 assert(IsAVXInstruction(ins));
11592 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
11593 dst += emitOutputByte(dst, (0xC0 | regCode));
11597 dst += emitOutputWord(dst, code);
11598 dst += emitOutputByte(dst, (0xC0 | regCode));
11601 noway_assert(!id->idGCref());
11606 /*****************************************************************************
11608 * Output an instruction with a register and constant operands.
11611 BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
11614 emitAttr size = id->idOpSize();
11615 instruction ins = id->idIns();
11616 regNumber reg = id->idReg1();
11617 ssize_t val = emitGetInsSC(id);
11618 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
11620 // BT reg,imm might be useful but it requires special handling of the immediate value
11621 // (it is always encoded in a byte). Let's not complicate things until this is needed.
11622 assert(ins != INS_bt);
11624 if (id->idIsCnsReloc())
11626 valInByte = false; // relocs can't be placed in a byte
11629 noway_assert(emitVerifyEncodable(ins, size, reg));
11631 if (IsSSEOrAVXInstruction(ins))
11633 // Handle SSE2 instructions of the form "opcode reg, immed8"
11635 assert(id->idGCref() == GCT_NONE);
11638 // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
11639 regNumber regOpcode = getSseShiftRegNumber(ins);
11641 // Get the 'base' opcode.
11642 code = insCodeMI(ins);
11643 code = AddVexPrefixIfNeeded(ins, code, size);
11644 code = insEncodeMIreg(ins, reg, size, code);
11645 assert(code & 0x00FF0000);
11646 if (TakesVexPrefix(ins))
11648 // The 'vvvv' bits encode the destination register, which for this case (RI)
11649 // is the same as the source.
11650 code = insEncodeReg3456(ins, reg, size, code);
11653 unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
11655 // Output the REX prefix
11656 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11658 if (code & 0xFF000000)
11660 dst += emitOutputWord(dst, code >> 16);
11662 else if (code & 0xFF0000)
11664 dst += emitOutputByte(dst, code >> 16);
11667 dst += emitOutputWord(dst, code | regcode);
11669 dst += emitOutputByte(dst, val);
11674 // The 'mov' opcode is special
11675 if (ins == INS_mov)
11677 code = insCodeACC(ins);
11678 assert(code < 0x100);
11680 code |= 0x08; // Set the 'w' bit
11681 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11684 // This is INS_mov and will not take VEX prefix
11685 assert(!TakesVexPrefix(ins));
11687 if (TakesRexWPrefix(ins, size))
11689 code = AddRexWPrefix(ins, code);
11692 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11694 dst += emitOutputByte(dst, code);
11695 if (size == EA_4BYTE)
11697 dst += emitOutputLong(dst, val);
11699 #ifdef _TARGET_AMD64_
11702 assert(size == EA_PTRSIZE);
11703 dst += emitOutputSizeT(dst, val);
11707 if (id->idIsCnsReloc())
11709 emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
11715 // Decide which encoding is the shortest
11716 bool useSigned, useACC;
11718 if (reg == REG_EAX && !instrIs3opImul(ins))
11720 if (size == EA_1BYTE || (ins == INS_test))
11722 // For al, ACC encoding is always the smallest
11728 /* For ax/eax, we avoid ACC encoding for small constants as we
11729 * can emit the small constant and have it sign-extended.
11730 * For big constants, the ACC encoding is better as we can use
11731 * the 1 byte opcode
11736 // avoid using ACC encoding
11761 // "test" has no 's' bit
11762 if (ins == INS_test)
11767 // Get the 'base' opcode
11770 assert(!useSigned);
11771 code = insCodeACC(ins);
11775 assert(!useSigned || valInByte);
11777 // Some instructions (at least 'imul') do not have a
11778 // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
11779 if (valInByte && useSigned && insNeedsRRIb(ins))
11781 code = insEncodeRRIb(ins, reg, size);
11785 code = insCodeMI(ins);
11786 code = AddVexPrefixIfNeeded(ins, code, size);
11787 code = insEncodeMIreg(ins, reg, size, code);
11797 // Output a size prefix for a 16-bit operand
11798 dst += emitOutputByte(dst, 0x66);
11802 // Set the 'w' bit to get the large version
11806 #ifdef _TARGET_AMD64_
11808 /* Set the 'w' bit to get the large version */
11809 /* and the REX.W bit to get the really large version */
11811 code = AddRexWPrefix(ins, code);
11817 assert(!"unexpected size");
11820 // Output the REX prefix
11821 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11823 // Does the value fit in a sign-extended byte?
11824 // Important! Only set the 's' bit when we have a size larger than EA_1BYTE.
11825 // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
11827 if (useSigned && (size > EA_1BYTE))
11829 // We can just set the 's' bit, and issue an immediate byte
11831 code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
11832 dst += emitOutputWord(dst, code);
11833 dst += emitOutputByte(dst, val);
11837 // Can we use an accumulator (EAX) encoding?
11840 dst += emitOutputByte(dst, code);
11844 dst += emitOutputWord(dst, code);
11850 dst += emitOutputByte(dst, val);
11853 dst += emitOutputWord(dst, val);
11856 dst += emitOutputLong(dst, val);
11858 #ifdef _TARGET_AMD64_
11860 dst += emitOutputLong(dst, val);
11862 #endif // _TARGET_AMD64_
11867 if (id->idIsCnsReloc())
11869 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11870 assert(size == EA_4BYTE);
11876 // Does this instruction operate on a GC ref value?
11879 switch (id->idInsFmt())
11885 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11889 assert(id->idGCref() == GCT_BYREF);
11893 regMask = genRegMask(reg);
11894 // FIXNOW review the other places and relax the assert there too
11896 // The reg must currently be holding either a gcref or a byref
11897 // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
11898 if (emitThisGCrefRegs & regMask)
11900 assert(ins == INS_add);
11902 if (emitThisByrefRegs & regMask)
11904 assert(ins == INS_add || ins == INS_sub);
11907 // Mark it as holding a GCT_BYREF
11908 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11913 emitDispIns(id, false, false, false);
11915 assert(!"unexpected GC ref instruction format");
11918 // mul can never produce a GC ref
11919 assert(!instrIs3opImul(ins));
11920 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11924 switch (id->idInsFmt())
11927 // INS_mulEAX can not be used with any of these formats
11928 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11930 // For the three operand imul instruction the target
11931 // register is encoded in the opcode
11933 if (instrIs3opImul(ins))
11935 regNumber tgtReg = inst3opImulReg(ins);
11936 emitGCregDeadUpd(tgtReg, dst);
11942 assert(!instrIs3opImul(ins));
11944 emitGCregDeadUpd(id->idReg1(), dst);
11949 emitDispIns(id, false, false, false);
11951 assert(!"unexpected GC ref instruction format");
11958 /*****************************************************************************
11960 * Output an instruction with a constant operand.
11963 BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
11966 instruction ins = id->idIns();
11967 emitAttr size = id->idOpSize();
11968 ssize_t val = emitGetInsSC(id);
11969 bool valInByte = ((signed char)val == val);
11971 // We would to update GC info correctly
11972 assert(!IsSSEInstruction(ins));
11973 assert(!IsAVXInstruction(ins));
11975 #ifdef _TARGET_AMD64_
11976 // all these opcodes take a sign-extended 4-byte immediate, max
11977 noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
11980 if (id->idIsCnsReloc())
11982 valInByte = false; // relocs can't be placed in a byte
11984 // Of these instructions only the push instruction can have reloc
11985 assert(ins == INS_push || ins == INS_push_hide);
11991 assert((val >= -128) && (val <= 127));
11992 dst += emitOutputByte(dst, insCode(ins));
11993 dst += emitOutputByte(dst, val);
11997 assert((val >= -128) && (val <= 127));
11998 dst += emitOutputByte(dst, insCodeMI(ins));
11999 dst += emitOutputByte(dst, val);
12004 dst += emitOutputByte(dst, insCodeMI(ins));
12005 dst += emitOutputWord(dst, val);
12008 case INS_push_hide:
12010 code = insCodeMI(ins);
12012 // Does the operand fit in a byte?
12015 dst += emitOutputByte(dst, code | 2);
12016 dst += emitOutputByte(dst, val);
12020 if (TakesRexWPrefix(ins, size))
12022 code = AddRexWPrefix(ins, code);
12023 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12026 dst += emitOutputByte(dst, code);
12027 dst += emitOutputLong(dst, val);
12028 if (id->idIsCnsReloc())
12030 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
12034 // Did we push a GC ref value?
12038 printf("UNDONE: record GCref push [cns]\n");
12045 assert(!"unexpected instruction");
12051 /*****************************************************************************
12053 * Output a local jump instruction.
12054 * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
12055 * needs to get bound to an actual address and processed by branch shortening.
12058 BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
12064 instrDescJmp* id = (instrDescJmp*)i;
12065 instruction ins = id->idIns();
12067 bool relAddr = true; // does the instruction use relative-addressing?
12069 // SSE/AVX doesnt make any sense here
12070 assert(!IsSSEInstruction(ins));
12071 assert(!IsAVXInstruction(ins));
12079 ssz = JCC_SIZE_SMALL;
12080 lsz = JCC_SIZE_LARGE;
12085 ssz = JMP_SIZE_SMALL;
12086 lsz = JMP_SIZE_LARGE;
12091 ssz = lsz = CALL_INST_SIZE;
12095 case INS_push_hide:
12104 ssz = lsz = id->idCodeSize();
12110 // Figure out the distance to the target
12111 srcOffs = emitCurCodeOffs(dst);
12112 dstOffs = id->idAddr()->iiaIGlabel->igOffs;
12116 distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
12120 distVal = (ssize_t)emitOffsetToPtr(dstOffs);
12123 if (dstOffs <= srcOffs)
12125 // This is a backward jump - distance is known at this point
12126 CLANG_FORMAT_COMMENT_ANCHOR;
12129 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
12131 size_t blkOffs = id->idjIG->igOffs;
12133 if (INTERESTING_JUMP_NUM == 0)
12135 printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12137 printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
12138 printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
12139 printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
12143 // Can we use a short jump?
12144 if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
12146 emitSetShortJump(id);
12151 // This is a forward jump - distance will be an upper limit
12152 emitFwdJumps = true;
12154 // The target offset will be closer by at least 'emitOffsAdj', but only if this
12155 // jump doesn't cross the hot-cold boundary.
12156 if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12158 dstOffs -= emitOffsAdj;
12159 distVal -= emitOffsAdj;
12162 // Record the location of the jump for later patching
12163 id->idjOffs = dstOffs;
12165 // Are we overflowing the id->idjOffs bitfield?
12166 if (id->idjOffs != dstOffs)
12168 IMPL_LIMITATION("Method is too large");
12172 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
12174 size_t blkOffs = id->idjIG->igOffs;
12176 if (INTERESTING_JUMP_NUM == 0)
12178 printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12180 printf("[4] Jump block is at %08X\n", blkOffs);
12181 printf("[4] Jump is at %08X\n", srcOffs);
12182 printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
12186 // Can we use a short jump?
12187 if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
12189 emitSetShortJump(id);
12193 // Adjust the offset to emit relative to the end of the instruction
12196 distVal -= id->idjShort ? ssz : lsz;
12200 if (0 && emitComp->verbose)
12202 size_t sz = id->idjShort ? ssz : lsz;
12203 int distValSize = id->idjShort ? 4 : 8;
12204 printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
12205 emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
12210 // What size jump should we use?
12214 assert(!id->idjKeepLong);
12215 assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
12217 assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
12218 assert(JMP_SIZE_SMALL == 2);
12222 if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
12224 emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
12227 if (emitComp->verbose)
12229 printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
12234 dst += emitOutputByte(dst, insCode(ins));
12236 // For forward jumps, record the address of the distance value
12237 id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
12239 dst += emitOutputByte(dst, distVal);
12248 // clang-format off
12249 assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
12250 assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
12251 assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
12252 assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
12253 assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
12254 assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
12255 assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
12256 assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
12257 assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
12258 assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
12259 assert(INS_jp + (INS_l_jmp - INS_jmp) == INS_l_jp);
12260 assert(INS_jnp + (INS_l_jmp - INS_jmp) == INS_l_jnp);
12261 assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
12262 assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
12263 assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
12264 assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
12267 code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
12269 else if (ins == INS_push || ins == INS_push_hide)
12271 assert(insCodeMI(INS_push) == 0x68);
12274 else if (ins == INS_mov)
12276 // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
12277 insFormat tmpInsFmt = id->idInsFmt();
12278 insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel;
12279 bool tmpDspReloc = id->idIsDspReloc();
12281 id->idInsFmt(IF_SWR_CNS);
12282 id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
12283 id->idSetIsDspReloc(false);
12285 dst = emitOutputSV(dst, id, insCodeMI(ins));
12287 // Restore id fields with original values
12288 id->idInsFmt(tmpInsFmt);
12289 id->idAddr()->iiaIGlabel = tmpIGlabel;
12290 id->idSetIsDspReloc(tmpDspReloc);
12293 else if (ins == INS_lea)
12295 // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
12296 // We basically are doing what emitIns_R_AI does.
12297 // TODO-XArch-Cleanup: revisit this.
12298 instrDescAmd idAmdStackLocal;
12299 instrDescAmd* idAmd = &idAmdStackLocal;
12300 *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields
12301 memset((BYTE*)idAmd + sizeof(instrDesc), 0,
12302 sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
12304 idAmd->idInsFmt(IF_RWR_ARD);
12305 idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
12306 idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
12307 emitSetAmdDisp(idAmd, distVal); // set the displacement
12308 idAmd->idSetIsDspReloc(id->idIsDspReloc());
12309 assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
12311 UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
12312 idAmd->idCodeSize(sz);
12314 code = insCodeRM(ins);
12315 code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
12317 dst = emitOutputAM(dst, idAmd, code, nullptr);
12321 // For forward jumps, record the address of the distance value
12322 // Hard-coded 4 here because we already output the displacement, as the last thing.
12323 id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
12333 if (ins != INS_mov)
12335 dst += emitOutputByte(dst, code);
12339 dst += emitOutputByte(dst, code >> 8);
12343 // For forward jumps, record the address of the distance value
12344 id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
12346 dst += emitOutputLong(dst, distVal);
12348 #ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
12349 if (emitComp->opts.compReloc)
12354 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
12356 else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12358 assert(id->idjKeepLong);
12359 emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
12364 // Local calls kill all registers
12365 if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
12367 emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
12373 /*****************************************************************************
12375 * Append the machine code corresponding to the given instruction descriptor
12376 * to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
12377 * is the instruction group that contains the instruction. Updates '*dp' to
12378 * point past the generated code, and returns the size of the instruction
12379 * descriptor in bytes.
12383 #pragma warning(push)
12384 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12386 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
12388 assert(emitIssuing);
12391 size_t sz = sizeof(instrDesc);
12392 instruction ins = id->idIns();
12393 unsigned char callInstrSize = 0;
12396 bool dspOffs = emitComp->opts.dspGCtbls;
12399 emitAttr size = id->idOpSize();
12401 assert(REG_NA == (int)REG_NA);
12403 assert(ins != INS_imul || size >= EA_4BYTE); // Has no 'w' bit
12404 assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
12406 VARSET_TP GCvars(VarSetOps::UninitVal());
12408 // What instruction format have we got?
12409 switch (id->idInsFmt())
12419 regMaskTP gcrefRegs;
12420 regMaskTP byrefRegs;
12422 /********************************************************************/
12424 /********************************************************************/
12426 // the loop alignment pseudo instruction
12427 if (ins == INS_align)
12429 sz = SMALL_IDSC_SIZE;
12430 dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
12431 assert(((size_t)dst & 0x0f) == 0);
12435 if (ins == INS_nop)
12437 dst = emitOutputNOP(dst, id->idCodeSize());
12441 // the cdq instruction kills the EDX register implicitly
12442 if (ins == INS_cdq)
12444 emitGCregDeadUpd(REG_EDX, dst);
12447 assert(id->idGCref() == GCT_NONE);
12449 code = insCodeMR(ins);
12451 #ifdef _TARGET_AMD64_
12452 // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
12453 code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
12455 if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
12457 code = AddRexWPrefix(ins, code);
12459 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12461 // Is this a 'big' opcode?
12462 if (code & 0xFF000000)
12464 // The high word and then the low word
12465 dst += emitOutputWord(dst, code >> 16);
12466 code &= 0x0000FFFF;
12467 dst += emitOutputWord(dst, code);
12469 else if (code & 0x00FF0000)
12471 // The high byte and then the low word
12472 dst += emitOutputByte(dst, code >> 16);
12473 code &= 0x0000FFFF;
12474 dst += emitOutputWord(dst, code);
12476 else if (code & 0xFF00)
12478 // The 2 byte opcode
12479 dst += emitOutputWord(dst, code);
12483 // The 1 byte opcode
12484 dst += emitOutputByte(dst, code);
12489 /********************************************************************/
12490 /* Simple constant, local label, method */
12491 /********************************************************************/
12494 dst = emitOutputIV(dst, id);
12495 sz = emitSizeOfInsDsc(id);
12501 assert(id->idGCref() == GCT_NONE);
12502 assert(id->idIsBound());
12504 // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
12505 dst = emitOutputLJ(dst, id);
12506 sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
12511 // Assume we'll be recording this call
12514 // Get hold of the argument count and field Handle
12515 args = emitGetInsCDinfo(id);
12517 // Is this a "fat" call descriptor?
12518 if (id->idIsLargeCall())
12520 instrDescCGCA* idCall = (instrDescCGCA*)id;
12521 gcrefRegs = idCall->idcGcrefRegs;
12522 byrefRegs = idCall->idcByrefRegs;
12523 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12524 sz = sizeof(instrDescCGCA);
12528 assert(!id->idIsLargeDsp());
12529 assert(!id->idIsLargeCns());
12531 gcrefRegs = emitDecodeCallGCregs(id);
12533 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12534 sz = sizeof(instrDesc);
12537 addr = (BYTE*)id->idAddr()->iiaAddr;
12538 assert(addr != nullptr);
12540 // Some helpers don't get recorded in GC tables
12541 if (id->idIsNoGC())
12546 // What kind of a call do we have here?
12547 if (id->idInsFmt() == IF_METHPTR)
12549 // This is call indirect via a method pointer
12551 code = insCodeMR(ins);
12552 if (ins == INS_i_jmp)
12557 if (id->idIsDspReloc())
12559 dst += emitOutputWord(dst, code | 0x0500);
12560 #ifdef _TARGET_AMD64_
12561 dst += emitOutputLong(dst, 0);
12563 dst += emitOutputLong(dst, (int)addr);
12565 emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
12569 #ifdef _TARGET_X86_
12570 dst += emitOutputWord(dst, code | 0x0500);
12571 #else //_TARGET_AMD64_
12572 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
12573 // This addr mode should never be used while generating relocatable ngen code nor if
12574 // the addr can be encoded as pc-relative address.
12575 noway_assert(!emitComp->opts.compReloc);
12576 noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
12577 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
12579 // This requires, specifying a SIB byte after ModRM byte.
12580 dst += emitOutputWord(dst, code | 0x0400);
12581 dst += emitOutputByte(dst, 0x25);
12582 #endif //_TARGET_AMD64_
12583 dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
12589 // This is call direct where we know the target, thus we can
12590 // use a direct call; the target to jump to is in iiaAddr.
12591 assert(id->idInsFmt() == IF_METHOD);
12593 // Output the call opcode followed by the target distance
12594 dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
12597 #ifdef _TARGET_AMD64_
12598 // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst.
12600 assert(id->idIsDspReloc());
12602 // Calculate PC relative displacement.
12603 // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
12604 // only allow a 32-bit offset, so we correctly use sizeof(INT32)
12605 offset = addr - (dst + sizeof(INT32));
12608 dst += emitOutputLong(dst, offset);
12610 if (id->idIsDspReloc())
12612 emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
12617 /* We update the GC info before the call as the variables cannot be
12618 used by the call. Killing variables before the call helps with
12619 boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
12620 If we ever track aliased variables (which could be used by the
12621 call), we would have to keep them alive past the call.
12623 assert(FitsIn<unsigned char>(dst - *dp));
12624 callInstrSize = static_cast<unsigned char>(dst - *dp);
12625 emitUpdateLiveGCvars(GCvars, *dp);
12627 // If the method returns a GC ref, mark EAX appropriately
12628 if (id->idGCref() == GCT_GCREF)
12630 gcrefRegs |= RBM_EAX;
12632 else if (id->idGCref() == GCT_BYREF)
12634 byrefRegs |= RBM_EAX;
12637 #ifdef UNIX_AMD64_ABI
12638 // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
12639 if (id->idIsLargeCall())
12641 instrDescCGCA* idCall = (instrDescCGCA*)id;
12642 if (idCall->idSecondGCref() == GCT_GCREF)
12644 gcrefRegs |= RBM_RDX;
12646 else if (idCall->idSecondGCref() == GCT_BYREF)
12648 byrefRegs |= RBM_RDX;
12651 #endif // UNIX_AMD64_ABI
12653 // If the GC register set has changed, report the new set
12654 if (gcrefRegs != emitThisGCrefRegs)
12656 emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
12659 if (byrefRegs != emitThisByrefRegs)
12661 emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
12664 if (recCall || args)
12666 // For callee-pop, all arguments will be popped after the call.
12667 // For caller-pop, any GC arguments will go dead after the call.
12669 assert(callInstrSize != 0);
12673 emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
12677 emitStackKillArgs(dst, -args, callInstrSize);
12681 // Do we need to record a call location for GC purposes?
12682 if (!emitFullGCinfo && recCall)
12684 assert(callInstrSize != 0);
12685 emitRecordGCcall(dst, callInstrSize);
12689 if (ins == INS_call)
12691 emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
12692 (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
12698 /********************************************************************/
12699 /* One register operand */
12700 /********************************************************************/
12705 dst = emitOutputR(dst, id);
12706 sz = SMALL_IDSC_SIZE;
12709 /********************************************************************/
12710 /* Register and register/constant */
12711 /********************************************************************/
12714 code = insCodeMR(ins);
12715 // Emit the VEX prefix if it exists
12716 code = AddVexPrefixIfNeeded(ins, code, size);
12717 code = insEncodeMRreg(ins, id->idReg1(), size, code);
12720 if (size != EA_1BYTE)
12725 // Emit the REX prefix if it exists
12726 if (TakesRexWPrefix(ins, size))
12728 code = AddRexWPrefix(ins, code);
12731 // Output a size prefix for a 16-bit operand
12732 if (size == EA_2BYTE)
12734 dst += emitOutputByte(dst, 0x66);
12737 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12738 dst += emitOutputWord(dst, code);
12739 dst += emitOutputByte(dst, emitGetInsSC(id));
12740 sz = emitSizeOfInsDsc(id);
12743 assert(!id->idGCref());
12744 emitGCregDeadUpd(id->idReg1(), dst);
12751 dst = emitOutputRR(dst, id);
12752 sz = SMALL_IDSC_SIZE;
12758 dst = emitOutputRI(dst, id);
12759 sz = emitSizeOfInsDsc(id);
12762 case IF_RWR_RRD_RRD:
12763 dst = emitOutputRRR(dst, id);
12764 sz = emitSizeOfInsDsc(id);
12766 case IF_RWR_RRD_RRD_CNS:
12767 case IF_RWR_RRD_RRD_RRD:
12768 dst = emitOutputRRR(dst, id);
12769 sz = emitSizeOfInsDsc(id);
12770 dst += emitOutputByte(dst, emitGetInsSC(id));
12773 case IF_RRW_RRW_CNS:
12774 assert(id->idGCref() == GCT_NONE);
12776 // Get the 'base' opcode (it's a big one)
12777 // Also, determine which operand goes where in the ModRM byte.
12780 if (hasCodeMR(ins))
12782 code = insCodeMR(ins);
12783 // Emit the VEX prefix if it exists
12784 code = AddVexPrefixIfNeeded(ins, code, size);
12785 code = insEncodeMRreg(ins, code);
12786 mReg = id->idReg1();
12787 rReg = id->idReg2();
12789 else if (hasCodeMI(ins))
12791 code = insCodeMI(ins);
12793 // Emit the VEX prefix if it exists
12794 code = AddVexPrefixIfNeeded(ins, code, size);
12796 assert((code & 0xC000) == 0);
12799 mReg = id->idReg2();
12801 // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
12802 rReg = getSseShiftRegNumber(ins);
12806 code = insCodeRM(ins);
12807 // Emit the VEX prefix if it exists
12808 code = AddVexPrefixIfNeeded(ins, code, size);
12809 code = insEncodeRMreg(ins, code);
12810 mReg = id->idReg2();
12811 rReg = id->idReg1();
12813 assert(code & 0x00FF0000);
12815 if (TakesRexWPrefix(ins, size))
12817 code = AddRexWPrefix(ins, code);
12820 if (TakesVexPrefix(ins))
12822 if (IsDstDstSrcAVXInstruction(ins))
12824 // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
12825 // This code will have to change when we support 3 operands.
12826 // For now, we always overload this source with the destination (always reg1).
12827 // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
12828 // e.g. pslldq, when/if we support those instructions with 2 registers.)
12829 // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
12830 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12832 else if (IsDstSrcSrcAVXInstruction(ins))
12834 // This is a "merge" move instruction.
12835 // Encode source operand reg in 'vvvv' bits in 1's complement form
12836 code = insEncodeReg3456(ins, id->idReg2(), size, code);
12840 regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code));
12842 // Output the REX prefix
12843 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12845 if (code & 0xFF000000)
12847 // Output the highest word of the opcode
12848 dst += emitOutputWord(dst, code >> 16);
12849 code &= 0x0000FFFF;
12851 if (Is4ByteSSEInstruction(ins))
12853 // Output 3rd byte of the opcode
12854 dst += emitOutputByte(dst, code);
12858 else if (code & 0x00FF0000)
12860 dst += emitOutputByte(dst, code >> 16);
12861 code &= 0x0000FFFF;
12864 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
12865 if ((code & 0xFF00) == 0xC000)
12867 dst += emitOutputWord(dst, code | (regcode << 8));
12869 else if ((code & 0xFF) == 0x00)
12871 // This case happens for some SSE/AVX instructions only
12872 assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins));
12874 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
12875 dst += emitOutputByte(dst, (0xC0 | regcode));
12879 dst += emitOutputWord(dst, code);
12880 dst += emitOutputByte(dst, (0xC0 | regcode));
12883 dst += emitOutputByte(dst, emitGetInsSC(id));
12884 sz = emitSizeOfInsDsc(id);
12886 // Kill any GC ref in the destination register if necessary.
12887 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
12889 emitGCregDeadUpd(id->idReg1(), dst);
12893 /********************************************************************/
12894 /* Address mode operand */
12895 /********************************************************************/
12901 dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
12908 // Get hold of the argument count and method handle
12909 args = emitGetInsCIargs(id);
12911 // Is this a "fat" call descriptor?
12912 if (id->idIsLargeCall())
12914 instrDescCGCA* idCall = (instrDescCGCA*)id;
12916 gcrefRegs = idCall->idcGcrefRegs;
12917 byrefRegs = idCall->idcByrefRegs;
12918 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12919 sz = sizeof(instrDescCGCA);
12923 assert(!id->idIsLargeDsp());
12924 assert(!id->idIsLargeCns());
12926 gcrefRegs = emitDecodeCallGCregs(id);
12928 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12929 sz = sizeof(instrDesc);
12937 sz = emitSizeOfInsDsc(id);
12942 case IF_RRW_ARD_CNS:
12943 case IF_RWR_ARD_CNS:
12944 assert(IsSSEOrAVXInstruction(ins));
12945 emitGetInsAmdCns(id, &cnsVal);
12946 code = insCodeRM(ins);
12948 // Special case 4-byte AVX instructions
12949 if (EncodedBySSE38orSSE3A(ins))
12951 dst = emitOutputAM(dst, id, code, &cnsVal);
12955 code = AddVexPrefixIfNeeded(ins, code, size);
12956 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12957 dst = emitOutputAM(dst, id, code | regcode, &cnsVal);
12960 sz = emitSizeOfInsDsc(id);
12963 case IF_AWR_RRD_CNS:
12964 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12965 assert(UseVEXEncoding());
12966 emitGetInsAmdCns(id, &cnsVal);
12967 code = insCodeMR(ins);
12968 dst = emitOutputAM(dst, id, code, &cnsVal);
12969 sz = emitSizeOfInsDsc(id);
12975 case IF_RWR_RRD_ARD:
12977 code = insCodeRM(ins);
12978 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
12980 dst = emitOutputAM(dst, id, code);
12984 code = AddVexPrefixIfNeeded(ins, code, size);
12985 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12986 dst = emitOutputAM(dst, id, code | regcode);
12988 sz = emitSizeOfInsDsc(id);
12992 case IF_RWR_ARD_RRD:
12994 assert(IsAVX2GatherInstruction(ins));
12995 code = insCodeRM(ins);
12996 dst = emitOutputAM(dst, id, code);
12997 sz = emitSizeOfInsDsc(id);
13001 case IF_RWR_RRD_ARD_CNS:
13002 case IF_RWR_RRD_ARD_RRD:
13004 assert(IsSSEOrAVXInstruction(ins));
13005 emitGetInsAmdCns(id, &cnsVal);
13006 code = insCodeRM(ins);
13007 if (EncodedBySSE38orSSE3A(ins))
13009 dst = emitOutputAM(dst, id, code, &cnsVal);
13013 code = AddVexPrefixIfNeeded(ins, code, size);
13014 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13015 dst = emitOutputAM(dst, id, code | regcode, &cnsVal);
13017 sz = emitSizeOfInsDsc(id);
13024 code = insCodeMR(ins);
13025 code = AddVexPrefixIfNeeded(ins, code, size);
13026 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13027 dst = emitOutputAM(dst, id, code | regcode);
13028 sz = emitSizeOfInsDsc(id);
13031 case IF_AWR_RRD_RRD:
13033 code = insCodeMR(ins);
13034 code = AddVexPrefixIfNeeded(ins, code, size);
13035 dst = emitOutputAM(dst, id, code);
13036 sz = emitSizeOfInsDsc(id);
13043 emitGetInsAmdCns(id, &cnsVal);
13044 dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
13045 sz = emitSizeOfInsDsc(id);
13049 emitGetInsAmdCns(id, &cnsVal);
13050 dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
13051 sz = emitSizeOfInsDsc(id);
13054 /********************************************************************/
13055 /* Stack-based operand */
13056 /********************************************************************/
13062 assert(ins != INS_pop_hide);
13063 if (ins == INS_pop)
13065 // The offset in "pop [ESP+xxx]" is relative to the new ESP value
13066 CLANG_FORMAT_COMMENT_ANCHOR;
13068 #if !FEATURE_FIXED_OUT_ARGS
13069 emitCurStackLvl -= sizeof(int);
13071 dst = emitOutputSV(dst, id, insCodeMR(ins));
13073 #if !FEATURE_FIXED_OUT_ARGS
13074 emitCurStackLvl += sizeof(int);
13079 dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
13081 if (ins == INS_call)
13091 emitGetInsCns(id, &cnsVal);
13092 dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
13093 sz = emitSizeOfInsDsc(id);
13097 emitGetInsCns(id, &cnsVal);
13098 dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
13099 sz = emitSizeOfInsDsc(id);
13102 case IF_RRW_SRD_CNS:
13103 case IF_RWR_SRD_CNS:
13104 assert(IsSSEOrAVXInstruction(ins));
13105 emitGetInsCns(id, &cnsVal);
13106 code = insCodeRM(ins);
13108 // Special case 4-byte AVX instructions
13109 if (EncodedBySSE38orSSE3A(ins))
13111 dst = emitOutputSV(dst, id, code, &cnsVal);
13115 code = AddVexPrefixIfNeeded(ins, code, size);
13117 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13118 // Note that reg1 is both a source and a destination.
13120 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13121 // now we use the single source as source1 and source2.
13122 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13123 if (IsDstDstSrcAVXInstruction(ins))
13125 // encode source operand reg in 'vvvv' bits in 1's complement form
13126 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13129 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13130 dst = emitOutputSV(dst, id, code | regcode, &cnsVal);
13133 sz = emitSizeOfInsDsc(id);
13140 code = insCodeRM(ins);
13142 // 4-byte AVX instructions are special cased inside emitOutputSV
13143 // since they do not have space to encode ModRM byte.
13144 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13146 dst = emitOutputSV(dst, id, code);
13150 code = AddVexPrefixIfNeeded(ins, code, size);
13152 if (IsDstDstSrcAVXInstruction(ins))
13154 // encode source operand reg in 'vvvv' bits in 1's complement form
13155 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13158 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13159 dst = emitOutputSV(dst, id, code | regcode);
13162 sz = emitSizeOfInsDsc(id);
13166 case IF_RWR_RRD_SRD:
13168 // This should only be called on AVX instructions
13169 assert(IsAVXInstruction(ins));
13171 code = insCodeRM(ins);
13172 code = AddVexPrefixIfNeeded(ins, code, size);
13173 code = insEncodeReg3456(ins, id->idReg2(), size,
13174 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13176 // 4-byte AVX instructions are special cased inside emitOutputSV
13177 // since they do not have space to encode ModRM byte.
13178 if (EncodedBySSE38orSSE3A(ins))
13180 dst = emitOutputSV(dst, id, code);
13184 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13185 dst = emitOutputSV(dst, id, code | regcode);
13190 case IF_RWR_RRD_SRD_CNS:
13191 case IF_RWR_RRD_SRD_RRD:
13193 // This should only be called on AVX instructions
13194 assert(IsAVXInstruction(ins));
13195 emitGetInsCns(id, &cnsVal);
13197 code = insCodeRM(ins);
13198 code = AddVexPrefixIfNeeded(ins, code, size);
13199 code = insEncodeReg3456(ins, id->idReg2(), size,
13200 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13202 // 4-byte AVX instructions are special cased inside emitOutputSV
13203 // since they do not have space to encode ModRM byte.
13204 if (EncodedBySSE38orSSE3A(ins))
13206 dst = emitOutputSV(dst, id, code, &cnsVal);
13210 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13211 dst = emitOutputSV(dst, id, code | regcode, &cnsVal);
13214 sz = emitSizeOfInsDsc(id);
13221 code = insCodeMR(ins);
13222 code = AddVexPrefixIfNeeded(ins, code, size);
13224 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13225 // Note that reg1 is both a source and a destination.
13227 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13228 // now we use the single source as source1 and source2.
13229 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13230 if (IsDstDstSrcAVXInstruction(ins))
13232 // encode source operand reg in 'vvvv' bits in 1's complement form
13233 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13236 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13237 dst = emitOutputSV(dst, id, code | regcode);
13240 /********************************************************************/
13241 /* Direct memory address */
13242 /********************************************************************/
13248 noway_assert(ins != INS_call);
13249 dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
13250 sz = emitSizeOfInsDsc(id);
13254 dst = emitOutputCV(dst, id, insCodeMI(ins));
13257 case IF_RRW_MRD_CNS:
13258 case IF_RWR_MRD_CNS:
13259 assert(IsSSEOrAVXInstruction(ins));
13260 emitGetInsDcmCns(id, &cnsVal);
13261 code = insCodeRM(ins);
13263 // Special case 4-byte AVX instructions
13264 if (EncodedBySSE38orSSE3A(ins))
13266 dst = emitOutputCV(dst, id, code, &cnsVal);
13270 code = AddVexPrefixIfNeeded(ins, code, size);
13272 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13273 // Note that reg1 is both a source and a destination.
13275 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13276 // now we use the single source as source1 and source2.
13277 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13278 if (IsDstDstSrcAVXInstruction(ins))
13280 // encode source operand reg in 'vvvv' bits in 1's complement form
13281 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13284 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13285 dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
13288 sz = emitSizeOfInsDsc(id);
13291 case IF_MWR_RRD_CNS:
13292 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
13293 assert(UseVEXEncoding());
13294 emitGetInsDcmCns(id, &cnsVal);
13295 code = insCodeMR(ins);
13296 // only AVX2 vextracti128 and AVX vextractf128 can reach this path,
13297 // they do not need VEX.vvvv to encode the register operand
13298 dst = emitOutputCV(dst, id, code, &cnsVal);
13299 sz = emitSizeOfInsDsc(id);
13306 code = insCodeRM(ins);
13308 // Special case 4-byte AVX instructions
13309 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13311 dst = emitOutputCV(dst, id, code);
13315 code = AddVexPrefixIfNeeded(ins, code, size);
13317 if (IsDstDstSrcAVXInstruction(ins))
13319 // encode source operand reg in 'vvvv' bits in 1's complement form
13320 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13323 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13324 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
13327 sz = emitSizeOfInsDsc(id);
13331 case IF_RWR_RRD_MRD:
13333 // This should only be called on AVX instructions
13334 assert(IsAVXInstruction(ins));
13336 code = insCodeRM(ins);
13337 code = AddVexPrefixIfNeeded(ins, code, size);
13338 code = insEncodeReg3456(ins, id->idReg2(), size,
13339 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13341 // Special case 4-byte AVX instructions
13342 if (EncodedBySSE38orSSE3A(ins))
13344 dst = emitOutputCV(dst, id, code);
13348 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13349 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
13351 sz = emitSizeOfInsDsc(id);
13355 case IF_RWR_RRD_MRD_CNS:
13356 case IF_RWR_RRD_MRD_RRD:
13358 // This should only be called on AVX instructions
13359 assert(IsAVXInstruction(ins));
13360 emitGetInsCns(id, &cnsVal);
13362 code = insCodeRM(ins);
13363 code = AddVexPrefixIfNeeded(ins, code, size);
13364 code = insEncodeReg3456(ins, id->idReg2(), size,
13365 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13367 // Special case 4-byte AVX instructions
13368 if (EncodedBySSE38orSSE3A(ins))
13370 dst = emitOutputCV(dst, id, code, &cnsVal);
13374 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13375 dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
13377 sz = emitSizeOfInsDsc(id);
13381 case IF_RWR_MRD_OFF:
13382 code = insCode(ins);
13383 code = AddVexPrefixIfNeeded(ins, code, size);
13385 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13386 // Note that reg1 is both a source and a destination.
13388 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13389 // now we use the single source as source1 and source2.
13390 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13391 if (IsDstDstSrcAVXInstruction(ins))
13393 // encode source operand reg in 'vvvv' bits in 1's complement form
13394 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13397 regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
13398 dst = emitOutputCV(dst, id, code | 0x30 | regcode);
13399 sz = emitSizeOfInsDsc(id);
13405 code = insCodeMR(ins);
13406 code = AddVexPrefixIfNeeded(ins, code, size);
13408 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13409 // Note that reg1 is both a source and a destination.
13411 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13412 // now we use the single source as source1 and source2.
13413 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13414 if (IsDstDstSrcAVXInstruction(ins))
13416 // encode source operand reg in 'vvvv' bits in 1's complement form
13417 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13420 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13421 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
13422 sz = emitSizeOfInsDsc(id);
13428 emitGetInsDcmCns(id, &cnsVal);
13429 dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
13430 sz = emitSizeOfInsDsc(id);
13434 emitGetInsDcmCns(id, &cnsVal);
13435 dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
13436 sz = emitSizeOfInsDsc(id);
13439 /********************************************************************/
13441 /********************************************************************/
13446 printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
13447 assert(!"don't know how to encode this instruction");
13452 // Make sure we set the instruction descriptor size correctly
13453 assert(sz == emitSizeOfInsDsc(id));
13455 #if !FEATURE_FIXED_OUT_ARGS
13456 bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
13458 #if FEATURE_EH_FUNCLETS
13459 updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
13460 #endif // FEATURE_EH_FUNCLETS
13462 // Make sure we keep the current stack level up to date
13463 if (updateStackLevel)
13468 // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
13469 // finally block for calling it locally for an op_leave.
13470 emitStackPush(dst, id->idGCref());
13474 emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
13478 // Check for "sub ESP, icon"
13479 if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13481 assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
13482 emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13487 // Check for "add ESP, icon"
13488 if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13490 assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
13491 emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
13492 (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13501 #endif // !FEATURE_FIXED_OUT_ARGS
13503 assert((int)emitCurStackLvl >= 0);
13505 // Only epilog "instructions" and some pseudo-instrs
13506 // are allowed not to generate any code
13508 assert(*dp != dst || emitInstHasNoCode(ins));
13511 if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
13513 emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
13516 if (emitComp->compDebugBreak)
13518 // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
13519 // at the beginning of this method.
13520 if (JitConfig.JitEmitPrintRefRegs() != 0)
13522 printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
13523 printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
13524 printRegMaskInt(emitThisGCrefRegs);
13525 emitDispRegSet(emitThisGCrefRegs);
13527 printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
13528 printRegMaskInt(emitThisByrefRegs);
13529 emitDispRegSet(emitThisByrefRegs);
13533 // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
13534 // emitting instruction a6, (i.e. IN00a6 in jitdump).
13535 if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
13537 assert(!"JitBreakEmitOutputInstr reached");
13542 #ifdef TRANSLATE_PDB
13545 // only map instruction groups to instruction groups
13546 MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
13553 if (ins == INS_mulEAX || ins == INS_imulEAX)
13555 // INS_mulEAX has implicit target of Edx:Eax. Make sure
13556 // that we detected this cleared its GC-status.
13558 assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
13561 if (instrIs3opImul(ins))
13563 // The target of the 3-operand imul is implicitly encoded. Make sure
13564 // that we detected the implicit register and cleared its GC-status.
13566 regMaskTP regMask = genRegMask(inst3opImulReg(ins));
13567 assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
13574 #pragma warning(pop)
13577 /*****************************************************************************/
13578 /*****************************************************************************/
13580 #endif // defined(_TARGET_XARCH_)