1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Amd64/x86 Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
25 #include "gcinfoencoder.h"
27 /*****************************************************************************
29 * Generate code that will set the given register to the integer constant.
32 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
34 // Reg cannot be a FP reg
35 assert(!genIsValidFloatReg(reg));
37 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
38 // relocatable. Other ref type constants (e.g. string objects) go through a different
40 noway_assert(type != TYP_REF || val == 0);
44 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
48 // TODO-XArch-CQ: needs all the optimized cases
49 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
53 /*****************************************************************************
55 * Generate code to check that the GS cookie wasn't thrashed by a buffer
56 * overrun. If pushReg is true, preserve all registers around code sequence.
57 * Otherwise ECX could be modified.
59 * Implementation Note: pushReg = true, in case of tail calls.
61 void CodeGen::genEmitGSCookieCheck(bool pushReg)
63 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
65 // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
66 // executing GS cookie check will not collect the object pointed to by EAX.
68 // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
69 // In such case make sure that the correct GC-ness of RDX is reported as well, so
70 // a GC object pointed by RDX will not be collected.
73 // Handle multi-reg return type values
74 if (compiler->compMethodReturnsMultiRegRetType())
76 ReturnTypeDesc retTypeDesc;
77 if (varTypeIsLong(compiler->info.compRetNativeType))
79 retTypeDesc.InitializeLongReturnType(compiler);
81 else // we must have a struct return type
83 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
86 unsigned regCount = retTypeDesc.GetReturnRegCount();
88 // Only x86 and x64 Unix ABI allows multi-reg return and
89 // number of result regs should be equal to MAX_RET_REG_COUNT.
90 assert(regCount == MAX_RET_REG_COUNT);
92 for (unsigned i = 0; i < regCount; ++i)
94 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
97 else if (compiler->compMethodReturnsRetBufAddr())
99 // This is for returning in an implicit RetBuf.
100 // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
102 // In case the return is in an implicit RetBuf, the native return type should be a struct
103 assert(varTypeIsStruct(compiler->info.compRetNativeType));
105 gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
107 // ... all other cases.
110 #ifdef _TARGET_AMD64_
111 // For x64, structs that are not returned in registers are always
112 // returned in implicit RetBuf. If we reached here, we should not have
113 // a RetBuf and the return type should not be a struct.
114 assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
115 assert(!varTypeIsStruct(compiler->info.compRetNativeType));
116 #endif // _TARGET_AMD64_
118 // For x86 Windows we can't make such assertions since we generate code for returning of
119 // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
120 // compRetNativeType could be TYP_STRUCT.
121 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
125 regNumber regGSCheck;
126 regMaskTP regMaskGSCheck = RBM_NONE;
130 // Non-tail call: we can use any callee trash register that is not
131 // a return register or contain 'this' pointer (keep alive this), since
132 // we are generating GS cookie check after a GT_RETURN block.
133 // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
134 // as return register for two-register-returned structs.
135 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
136 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
138 regGSCheck = REG_ARG_1;
142 regGSCheck = REG_ARG_0;
148 // It doesn't matter which register we pick, since we're going to save and restore it
150 // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes?
151 regGSCheck = REG_EAX;
152 regMaskGSCheck = RBM_EAX;
153 #else // !_TARGET_X86_
154 // Tail calls from methods that need GS check: We need to preserve registers while
155 // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
156 // check, we might need a register. This won't be an issue for jmp calls for the
157 // reason mentioned below (see comment starting with "Jmp Calls:").
159 // The following are the possible solutions in case of tail prefixed calls:
160 // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
161 // present in methods that require GS cookie check. Rest of the tail calls that
162 // do not require R11 will be honored.
163 // 2) Internal register - GT_CALL node reserves an internal register and emits GS
164 // cookie check as part of tail call codegen. GenExitCode() needs to special case
165 // fast tail calls implemented as epilog+jmp or such tail calls should always get
166 // dispatched via helper.
167 // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
168 // right execution order during rationalization.
170 // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
171 // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
172 // VSD calls from methods that need GS check.
174 // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
175 // ignores tail prefix. In future, if we intend to support tail calls from such a method,
176 // consider one of the options mentioned above. For now adding an assert that we don't
177 // expect to see a tail call in a method that requires GS check.
178 noway_assert(!compiler->compTailCallUsed);
180 // Jmp calls: specify method handle using which JIT queries VM for its entry point
181 // address and hence it can neither be a VSD call nor PInvoke calli with cookie
182 // parameter. Therefore, in case of jmp calls it is safe to use R11.
183 regGSCheck = REG_R11;
184 #endif // !_TARGET_X86_
187 regMaskTP byrefPushedRegs = RBM_NONE;
188 regMaskTP norefPushedRegs = RBM_NONE;
189 regMaskTP pushedRegs = RBM_NONE;
191 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
193 #if defined(_TARGET_AMD64_)
194 // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
195 // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
196 if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
198 genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
199 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
202 #endif // defined(_TARGET_AMD64_)
204 assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
205 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
206 (int)compiler->gsGlobalSecurityCookieVal);
211 // Ngen case - GS cookie value needs to be accessed through an indirection.
213 pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
215 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
216 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
217 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
220 BasicBlock* gsCheckBlk = genCreateTempLabel();
221 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
222 inst_JMP(jmpEqual, gsCheckBlk);
223 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
224 genDefineTempLabel(gsCheckBlk);
226 genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
229 BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
231 #if FEATURE_EH_FUNCLETS
232 // Generate a call to the finally, like this:
233 // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
234 // call finally-funclet
235 // jmp finally-return // Only for non-retless finally calls
236 // The jmp can be a NOP if we're going to the next block.
237 // If we're generating code for the main function (not a funclet), and there is no localloc,
238 // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP
239 // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI).
241 if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
242 (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
245 inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
246 #endif // !UNIX_X86_ABI
250 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
252 getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
254 if (block->bbFlags & BBF_RETLESS_CALL)
256 // We have a retless call, and the last instruction generated was a call.
257 // If the next block is in a different EH region (or is the end of the code
258 // block), then we need to generate a breakpoint here (since it will never
259 // get executed) to get proper unwind behavior.
261 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
263 instGen(INS_BREAKPOINT); // This should never get executed
268 // TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other
270 #ifndef JIT32_GCENCODER
271 // Because of the way the flowgraph is connected, the liveness info for this one instruction
272 // after the call is not (can not be) correct in cases where a variable has a last use in the
273 // handler. So turn off GC reporting for this single instruction.
274 getEmitter()->emitDisableGC();
275 #endif // JIT32_GCENCODER
277 // Now go to where the finally funclet needs to return to.
278 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
281 // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
282 // to the next instruction? This would depend on stack walking from within the finally
283 // handler working without this instruction being in this special EH region.
288 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
291 #ifndef JIT32_GCENCODER
292 getEmitter()->emitEnableGC();
293 #endif // JIT32_GCENCODER
296 #else // !FEATURE_EH_FUNCLETS
298 // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
299 // corresponding to the finally's nesting level. When invoked in response to an exception, the
302 // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
305 // mov [ebp - (n + 1)], 0
306 // mov [ebp - n ], 0xFC
316 noway_assert(isFramePointerUsed());
318 // Get the nesting level which contains the finally
319 unsigned finallyNesting = 0;
320 compiler->fgGetNestingLevel(block, &finallyNesting);
322 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
323 unsigned filterEndOffsetSlotOffs;
324 filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
326 unsigned curNestingSlotOffs;
327 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
329 // Zero out the slot for the next nesting level
330 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
331 curNestingSlotOffs - TARGET_POINTER_SIZE);
332 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
335 // Now push the address where the finally funclet should return to directly.
336 if (!(block->bbFlags & BBF_RETLESS_CALL))
338 assert(block->isBBCallAlwaysPair());
339 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
343 // EE expects a DWORD, so we give him 0
344 inst_IV(INS_push_hide, 0);
347 // Jump to the finally BB
348 inst_JMP(EJ_jmp, block->bbJumpDest);
350 #endif // !FEATURE_EH_FUNCLETS
352 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
353 // jump target using bbJumpDest - that is already used to point
354 // to the finally block. So just skip past the BBJ_ALWAYS unless the
356 if (!(block->bbFlags & BBF_RETLESS_CALL))
358 assert(block->isBBCallAlwaysPair());
359 block = block->bbNext;
364 #if FEATURE_EH_FUNCLETS
365 void CodeGen::genEHCatchRet(BasicBlock* block)
367 // Set RAX to the address the VM should return to after the catch.
368 // Generate a RIP-relative
369 // lea reg, [rip + disp32] ; the RIP is implicit
370 // which will be position-indepenent.
371 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
374 #else // !FEATURE_EH_FUNCLETS
376 void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block)
378 // The last statement of the block must be a GT_RETFILT, which has already been generated.
379 assert(block->lastNode() != nullptr);
380 assert(block->lastNode()->OperGet() == GT_RETFILT);
382 if (block->bbJumpKind == BBJ_EHFINALLYRET)
384 assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
386 // Return using a pop-jmp sequence. As the "try" block calls
387 // the finally with a jmp, this leaves the x86 call-ret stack
388 // balanced in the normal flow of path.
390 noway_assert(isFramePointerRequired());
391 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
392 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
396 assert(block->bbJumpKind == BBJ_EHFILTERRET);
398 // The return value has already been computed.
403 #endif // !FEATURE_EH_FUNCLETS
405 // Move an immediate value into an integer register
407 void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
409 // reg cannot be a FP register
410 assert(!genIsValidFloatReg(reg));
412 if (!compiler->opts.compReloc)
414 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
417 if ((imm == 0) && !EA_IS_RELOC(size))
419 instGen_Set_Reg_To_Zero(size, reg, flags);
423 if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
425 getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
429 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
432 regTracker.rsTrackRegIntCns(reg, imm);
435 /***********************************************************************************
437 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
438 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
439 * genProduceReg() on the target register.
441 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
443 switch (tree->gtOper)
447 // relocatable values tend to come down as a CNS_INT of native int type
448 // so the line between these two opcodes is kind of blurry
449 GenTreeIntConCommon* con = tree->AsIntConCommon();
450 ssize_t cnsVal = con->IconValue();
452 if (con->ImmedValNeedsReloc(compiler))
454 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
455 regTracker.rsTrackRegTrash(targetReg);
459 genSetRegToIcon(targetReg, cnsVal, targetType);
466 emitter* emit = getEmitter();
467 emitAttr size = emitTypeSize(targetType);
468 double constValue = tree->gtDblCon.gtDconVal;
470 // Make sure we use "xorps reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
471 if (*(__int64*)&constValue == 0)
473 // A faster/smaller way to generate 0
474 emit->emitIns_R_R(INS_xorps, size, targetReg, targetReg);
478 CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
479 emit->emitIns_R_C(ins_Load(targetType), size, targetReg, hnd, 0);
489 //------------------------------------------------------------------------
490 // genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
495 void CodeGen::genCodeForNegNot(GenTree* tree)
497 assert(tree->OperIs(GT_NEG, GT_NOT));
499 regNumber targetReg = tree->gtRegNum;
500 var_types targetType = tree->TypeGet();
502 if (varTypeIsFloating(targetType))
504 assert(tree->gtOper == GT_NEG);
505 genSSE2BitwiseOp(tree);
509 GenTree* operand = tree->gtGetOp1();
510 assert(operand->isUsedFromReg());
511 regNumber operandReg = genConsumeReg(operand);
513 if (operandReg != targetReg)
515 inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
518 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
519 inst_RV(ins, targetReg, targetType);
525 // Generate code to get the high N bits of a N*N=2N bit multiplication result
526 void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
528 assert(!treeNode->gtOverflowEx());
530 regNumber targetReg = treeNode->gtRegNum;
531 var_types targetType = treeNode->TypeGet();
532 emitter* emit = getEmitter();
533 emitAttr size = emitTypeSize(treeNode);
534 GenTree* op1 = treeNode->gtOp.gtOp1;
535 GenTree* op2 = treeNode->gtOp.gtOp2;
537 // to get the high bits of the multiply, we are constrained to using the
538 // 1-op form: RDX:RAX = RAX * rm
539 // The 3-op form (Rx=Ry*Rz) does not support it.
541 genConsumeOperands(treeNode->AsOp());
543 GenTree* regOp = op1;
546 // Set rmOp to the memory operand (if any)
547 if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == REG_RAX)))
552 assert(regOp->isUsedFromReg());
554 // Setup targetReg when neither of the source operands was a matching register
555 if (regOp->gtRegNum != REG_RAX)
557 inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->gtRegNum, targetType);
561 if ((treeNode->gtFlags & GTF_UNSIGNED) == 0)
569 emit->emitInsBinary(ins, size, treeNode, rmOp);
571 // Move the result to the desired register, if necessary
572 if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX)
574 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
577 genProduceReg(treeNode);
581 //------------------------------------------------------------------------
582 // genCodeForLongUMod: Generate code for a tree of the form
583 // `(umod (gt_long x y) (const int))`
586 // node - the node for which to generate code
588 void CodeGen::genCodeForLongUMod(GenTreeOp* node)
590 assert(node != nullptr);
591 assert(node->OperGet() == GT_UMOD);
592 assert(node->TypeGet() == TYP_INT);
594 GenTreeOp* const dividend = node->gtOp1->AsOp();
595 assert(dividend->OperGet() == GT_LONG);
596 assert(varTypeIsLong(dividend));
598 genConsumeOperands(node);
600 GenTree* const dividendLo = dividend->gtOp1;
601 GenTree* const dividendHi = dividend->gtOp2;
602 assert(dividendLo->isUsedFromReg());
603 assert(dividendHi->isUsedFromReg());
605 GenTree* const divisor = node->gtOp2;
606 assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);
607 assert(divisor->gtSkipReloadOrCopy()->isUsedFromReg());
608 assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2);
609 assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff);
611 // dividendLo must be in RAX; dividendHi must be in RDX
612 genCopyRegIfNeeded(dividendLo, REG_EAX);
613 genCopyRegIfNeeded(dividendHi, REG_EDX);
615 // At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
616 // contains the 32bit divisor. We want to generate the following code:
618 // cmp edx, divisor->gtRegNum
624 // div divisor->gtRegNum
628 // div divisor->gtRegNum
630 // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c.
632 BasicBlock* const noOverflow = genCreateTempLabel();
634 // cmp edx, divisor->gtRegNum
636 inst_RV_RV(INS_cmp, REG_EDX, divisor->gtRegNum);
637 inst_JMP(EJ_jb, noOverflow);
642 // div divisor->gtRegNum
644 const regNumber tempReg = node->GetSingleTempReg();
645 inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT);
646 inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT);
647 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
648 inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
649 inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT);
652 // div divisor->gtRegNum
653 genDefineTempLabel(noOverflow);
654 inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
656 const regNumber targetReg = node->gtRegNum;
657 if (targetReg != REG_EDX)
659 inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT);
663 #endif // _TARGET_X86_
665 //------------------------------------------------------------------------
666 // genCodeForDivMod: Generate code for a DIV or MOD operation.
669 // treeNode - the node to generate the code for
671 void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
673 assert(treeNode->OperIs(GT_DIV, GT_UDIV, GT_MOD, GT_UMOD));
675 // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
676 // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
677 // on float/double args.
678 assert(treeNode->OperIs(GT_DIV) || !varTypeIsFloating(treeNode));
680 GenTree* dividend = treeNode->gtOp1;
683 if (varTypeIsLong(dividend->TypeGet()))
685 genCodeForLongUMod(treeNode);
688 #endif // _TARGET_X86_
690 GenTree* divisor = treeNode->gtOp2;
691 genTreeOps oper = treeNode->OperGet();
692 emitAttr size = emitTypeSize(treeNode);
693 regNumber targetReg = treeNode->gtRegNum;
694 var_types targetType = treeNode->TypeGet();
695 emitter* emit = getEmitter();
697 // dividend is in a register.
698 assert(dividend->isUsedFromReg());
700 genConsumeOperands(treeNode->AsOp());
701 if (varTypeIsFloating(targetType))
703 // Floating point div/rem operation
704 assert(oper == GT_DIV || oper == GT_MOD);
706 if (dividend->gtRegNum == targetReg)
708 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
710 else if (divisor->isUsedFromReg() && divisor->gtRegNum == targetReg)
712 // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
713 // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
714 // LSRA has to make sure that such a register assignment is not generated for floating
715 // point div/rem operations.
717 !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
721 inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
722 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
727 // dividend must be in RAX
728 genCopyRegIfNeeded(dividend, REG_RAX);
730 // zero or sign extend rax to rdx
731 if (oper == GT_UMOD || oper == GT_UDIV)
733 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
737 emit->emitIns(INS_cdq, size);
738 // the cdq instruction writes RDX, So clear the gcInfo for RDX
739 gcInfo.gcMarkRegSetNpt(RBM_RDX);
742 // Perform the 'targetType' (64-bit or 32-bit) divide instruction
744 if (oper == GT_UMOD || oper == GT_UDIV)
753 emit->emitInsBinary(ins, size, treeNode, divisor);
755 // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
756 // Move the result to the desired register, if necessary
757 if (oper == GT_DIV || oper == GT_UDIV)
759 if (targetReg != REG_RAX)
761 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
766 assert((oper == GT_MOD) || (oper == GT_UMOD));
767 if (targetReg != REG_RDX)
769 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
773 genProduceReg(treeNode);
776 //------------------------------------------------------------------------
777 // genCodeForBinary: Generate code for many binary arithmetic operators
778 // This method is expected to have called genConsumeOperands() before calling it.
781 // treeNode - The binary operation for which we are generating code.
787 // Mul and div variants have special constraints on x64 so are not handled here.
788 // See teh assert below for the operators that are handled.
790 void CodeGen::genCodeForBinary(GenTree* treeNode)
792 const genTreeOps oper = treeNode->OperGet();
793 regNumber targetReg = treeNode->gtRegNum;
794 var_types targetType = treeNode->TypeGet();
795 emitter* emit = getEmitter();
797 #if defined(_TARGET_64BIT_)
798 assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
799 #else // !defined(_TARGET_64BIT_)
800 assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
801 oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_ADD || oper == GT_SUB);
802 #endif // !defined(_TARGET_64BIT_)
804 GenTree* op1 = treeNode->gtGetOp1();
805 GenTree* op2 = treeNode->gtGetOp2();
807 // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed"
808 if (!op1->isUsedFromReg())
810 assert(treeNode->OperIsCommutative());
811 assert(op1->isMemoryOp() || op1->IsLocal() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() ||
812 op1->IsRegOptional());
814 op1 = treeNode->gtGetOp2();
815 op2 = treeNode->gtGetOp1();
818 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
820 // The arithmetic node must be sitting in a register (since it's not contained)
821 noway_assert(targetReg != REG_NA);
823 regNumber op1reg = op1->isUsedFromReg() ? op1->gtRegNum : REG_NA;
824 regNumber op2reg = op2->isUsedFromReg() ? op2->gtRegNum : REG_NA;
829 // This is the case of reg1 = reg1 op reg2
830 // We're ready to emit the instruction without any moves
831 if (op1reg == targetReg)
836 // We have reg1 = reg2 op reg1
837 // In order for this operation to be correct
838 // we need that op is a commutative operation so
839 // we can convert it into reg1 = reg1 op reg2 and emit
840 // the same code as above
841 else if (op2reg == targetReg)
843 noway_assert(GenTree::OperIsCommutative(oper));
847 // now we know there are 3 different operands so attempt to use LEA
848 else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
849 && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags())
851 if (op2->isContainedIntOrIImmed())
853 emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
854 (int)op2->AsIntConCommon()->IconValue());
858 assert(op2reg != REG_NA);
859 emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
861 genProduceReg(treeNode);
864 // dest, op1 and op2 registers are different:
865 // reg3 = reg1 op reg2
866 // We can implement this by issuing a mov:
868 // reg3 = reg3 op reg2
871 inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
872 regTracker.rsTrackRegCopy(targetReg, op1reg);
873 gcInfo.gcMarkRegPtrVal(targetReg, targetType);
878 // try to use an inc or dec
879 if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
881 if (src->IsIntegralConst(1))
883 emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
884 genProduceReg(treeNode);
887 else if (src->IsIntegralConst(-1))
889 emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
890 genProduceReg(treeNode);
894 regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
895 noway_assert(r == targetReg);
897 if (treeNode->gtOverflowEx())
899 #if !defined(_TARGET_64BIT_)
900 assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
902 assert(oper == GT_ADD || oper == GT_SUB);
904 genCheckOverflow(treeNode);
906 genProduceReg(treeNode);
909 //------------------------------------------------------------------------
910 // genCodeForMul: Generate code for a MUL operation.
913 // treeNode - the node to generate the code for
915 void CodeGen::genCodeForMul(GenTreeOp* treeNode)
917 assert(treeNode->OperIs(GT_MUL));
919 regNumber targetReg = treeNode->gtRegNum;
920 var_types targetType = treeNode->TypeGet();
921 emitter* emit = getEmitter();
924 emitAttr size = emitTypeSize(treeNode);
925 bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
926 bool requiresOverflowCheck = treeNode->gtOverflowEx();
928 GenTree* op1 = treeNode->gtGetOp1();
929 GenTree* op2 = treeNode->gtGetOp2();
931 // there are 3 forms of x64 multiply:
932 // 1-op form with 128 result: RDX:RAX = RAX * rm
933 // 2-op form: reg *= rm
934 // 3-op form: reg = rm * imm
936 genConsumeOperands(treeNode->AsOp());
938 // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
940 // immOp :: Only one operand can be an immediate
941 // rmOp :: Only one operand can be a memory op.
942 // regOp :: A register op (especially the operand that matches 'targetReg')
943 // (can be nullptr when we have both a memory op and an immediate op)
945 GenTree* immOp = nullptr;
949 if (op2->isContainedIntOrIImmed())
953 else if (op1->isContainedIntOrIImmed())
959 if (immOp != nullptr)
961 // This must be a non-floating point operation.
962 assert(!varTypeIsFloating(treeNode));
964 // CQ: When possible use LEA for mul by imm 3, 5 or 9
965 ssize_t imm = immOp->AsIntConCommon()->IconValue();
967 if (!requiresOverflowCheck && rmOp->isUsedFromReg() && ((imm == 3) || (imm == 5) || (imm == 9)))
969 // We will use the LEA instruction to perform this multiply
970 // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
971 unsigned int scale = (unsigned int)(imm - 1);
972 getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
974 else if (!requiresOverflowCheck && rmOp->isUsedFromReg() && (imm == genFindLowestBit(imm)) && (imm != 0))
976 // Use shift for constant multiply when legal
977 uint64_t zextImm = static_cast<uint64_t>(static_cast<size_t>(imm));
978 unsigned int shiftAmount = genLog2(zextImm);
980 if (targetReg != rmOp->gtRegNum)
982 // Copy reg src to dest register
983 inst_RV_RV(ins_Copy(targetType), targetReg, rmOp->gtRegNum, targetType);
985 inst_RV_SH(INS_shl, size, targetReg, shiftAmount);
989 // use the 3-op form with immediate
990 ins = getEmitter()->inst3opImulForReg(targetReg);
991 emit->emitInsBinary(ins, size, rmOp, immOp);
994 else // we have no contained immediate operand
999 regNumber mulTargetReg = targetReg;
1000 if (isUnsignedMultiply && requiresOverflowCheck)
1003 mulTargetReg = REG_RAX;
1007 ins = genGetInsForOper(GT_MUL, targetType);
1010 // Set rmOp to the memory operand (if any)
1011 // or set regOp to the op2 when it has the matching target register for our multiply op
1013 if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == mulTargetReg)))
1018 assert(regOp->isUsedFromReg());
1020 // Setup targetReg when neither of the source operands was a matching register
1021 if (regOp->gtRegNum != mulTargetReg)
1023 inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
1026 emit->emitInsBinary(ins, size, treeNode, rmOp);
1028 // Move the result to the desired register, if necessary
1029 if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
1031 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
1035 if (requiresOverflowCheck)
1037 // Overflow checking is only used for non-floating point types
1038 noway_assert(!varTypeIsFloating(treeNode));
1040 genCheckOverflow(treeNode);
1043 genProduceReg(treeNode);
1046 //------------------------------------------------------------------------
1047 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
1050 // treeNode - The tree node to evaluate whether is a struct return.
1053 // For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
1054 // Otherwise returns false.
1055 // For other platforms always returns false.
1057 bool CodeGen::isStructReturn(GenTree* treeNode)
1059 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
1060 // For the GT_RET_FILT, the return is always
1061 // a bool or a void, for the end of a finally block.
1062 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1063 if (treeNode->OperGet() != GT_RETURN)
1068 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1069 return varTypeIsStruct(treeNode);
1070 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
1071 assert(!varTypeIsStruct(treeNode));
1073 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
1076 //------------------------------------------------------------------------
1077 // genStructReturn: Generates code for returning a struct.
1080 // treeNode - The GT_RETURN tree node.
1086 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
1087 void CodeGen::genStructReturn(GenTree* treeNode)
1089 assert(treeNode->OperGet() == GT_RETURN);
1090 GenTree* op1 = treeNode->gtGetOp1();
1092 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1093 if (op1->OperGet() == GT_LCL_VAR)
1095 GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
1096 LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
1097 assert(varDsc->lvIsMultiRegRet);
1099 ReturnTypeDesc retTypeDesc;
1100 retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
1101 unsigned regCount = retTypeDesc.GetReturnRegCount();
1102 assert(regCount == MAX_RET_REG_COUNT);
1104 if (varTypeIsEnregisterableStruct(op1))
1106 // Right now the only enregistrable structs supported are SIMD vector types.
1107 assert(varTypeIsSIMD(op1));
1108 assert(op1->isUsedFromReg());
1110 // This is a case of operand is in a single reg and needs to be
1111 // returned in multiple ABI return registers.
1112 regNumber opReg = genConsumeReg(op1);
1113 regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
1114 regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
1116 if (opReg != reg0 && opReg != reg1)
1118 // Operand reg is different from return regs.
1119 // Copy opReg to reg0 and let it to be handled by one of the
1121 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
1127 assert(opReg != reg1);
1129 // reg0 - already has required 8-byte in bit position [63:0].
1131 // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
1132 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
1136 assert(opReg == reg1);
1139 // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
1140 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
1142 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
1146 assert(op1->isUsedFromMemory());
1148 // Copy var on stack into ABI return registers
1150 for (unsigned i = 0; i < regCount; ++i)
1152 var_types type = retTypeDesc.GetReturnRegType(i);
1153 regNumber reg = retTypeDesc.GetABIReturnReg(i);
1154 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
1155 offset += genTypeSize(type);
1161 assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
1163 genConsumeRegs(op1);
1165 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1166 GenTreeCall* call = actualOp1->AsCall();
1167 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
1168 unsigned regCount = retTypeDesc->GetReturnRegCount();
1169 assert(regCount == MAX_RET_REG_COUNT);
1171 // Handle circular dependency between call allocated regs and ABI return regs.
1173 // It is possible under LSRA stress that originally allocated regs of call node,
1174 // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
1175 // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
1176 // kind of circular dependency could arise between xmm0 and xmm1 return regs.
1177 // Codegen is expected to handle such circular dependency.
1179 var_types regType0 = retTypeDesc->GetReturnRegType(0);
1180 regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
1181 regNumber allocatedReg0 = call->GetRegNumByIdx(0);
1183 var_types regType1 = retTypeDesc->GetReturnRegType(1);
1184 regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
1185 regNumber allocatedReg1 = call->GetRegNumByIdx(1);
1187 if (op1->IsCopyOrReload())
1189 // GT_COPY/GT_RELOAD will have valid reg for those positions
1190 // that need to be copied or reloaded.
1191 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
1192 if (reloadReg != REG_NA)
1194 allocatedReg0 = reloadReg;
1197 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
1198 if (reloadReg != REG_NA)
1200 allocatedReg1 = reloadReg;
1204 if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
1206 // Circular dependency - swap allocatedReg0 and allocatedReg1
1207 if (varTypeIsFloating(regType0))
1209 assert(varTypeIsFloating(regType1));
1211 // The fastest way to swap two XMM regs is using PXOR
1212 inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
1213 inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
1214 inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
1218 assert(varTypeIsIntegral(regType0));
1219 assert(varTypeIsIntegral(regType1));
1220 inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
1223 else if (allocatedReg1 == returnReg0)
1225 // Change the order of moves to correctly handle dependency.
1226 if (allocatedReg1 != returnReg1)
1228 inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
1231 if (allocatedReg0 != returnReg0)
1233 inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
1238 // No circular dependency case.
1239 if (allocatedReg0 != returnReg0)
1241 inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
1244 if (allocatedReg1 != returnReg1)
1246 inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
1255 #if defined(_TARGET_X86_)
1257 //------------------------------------------------------------------------
1258 // genFloatReturn: Generates code for float return statement for x86.
1260 // Note: treeNode's and op1's registers are already consumed.
1263 // treeNode - The GT_RETURN or GT_RETFILT tree node with float type.
1268 void CodeGen::genFloatReturn(GenTree* treeNode)
1270 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1271 assert(varTypeIsFloating(treeNode));
1273 GenTree* op1 = treeNode->gtGetOp1();
1274 // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
1275 // If it already has a home location, use that. Otherwise, we need a temp.
1276 if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
1278 if (compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum != REG_STK)
1280 op1->gtFlags |= GTF_SPILL;
1281 inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1,
1284 // Now, load it to the fp stack.
1285 getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
1289 // Spill the value, which should be in a register, then load it to the fp stack.
1290 // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
1291 op1->gtFlags |= GTF_SPILL;
1292 regSet.rsSpillTree(op1->gtRegNum, op1);
1293 op1->gtFlags |= GTF_SPILLED;
1294 op1->gtFlags &= ~GTF_SPILL;
1296 TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
1297 inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
1298 op1->gtFlags &= ~GTF_SPILLED;
1299 compiler->tmpRlsTemp(t);
1302 #endif // _TARGET_X86_
1304 //------------------------------------------------------------------------
1305 // genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE/GT_CMP node.
1310 void CodeGen::genCodeForCompare(GenTreeOp* tree)
1312 assert(tree->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT, GT_TEST_EQ, GT_TEST_NE, GT_CMP));
1314 // TODO-XArch-CQ: Check if we can use the currently set flags.
1315 // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
1316 // (signed < or >= where targetReg != REG_NA)
1318 GenTree* op1 = tree->gtOp1;
1319 var_types op1Type = op1->TypeGet();
1321 if (varTypeIsFloating(op1Type))
1323 genCompareFloat(tree);
1327 genCompareInt(tree);
1331 //------------------------------------------------------------------------
1332 // genCodeForBT: Generates code for a GT_BT node.
1337 void CodeGen::genCodeForBT(GenTreeOp* bt)
1339 assert(bt->OperIs(GT_BT));
1341 GenTree* op1 = bt->gtGetOp1();
1342 GenTree* op2 = bt->gtGetOp2();
1343 var_types type = genActualType(op1->TypeGet());
1345 assert(op1->isUsedFromReg() && op2->isUsedFromReg());
1346 assert((genTypeSize(type) >= genTypeSize(TYP_INT)) && (genTypeSize(type) <= genTypeSize(TYP_I_IMPL)));
1348 genConsumeOperands(bt);
1349 // Note that the emitter doesn't fully support INS_bt, it only supports the reg,reg
1350 // form and encodes the registers in reverse order. To get the correct order we need
1351 // to reverse the operands when calling emitIns_R_R.
1352 getEmitter()->emitIns_R_R(INS_bt, emitTypeSize(type), op2->gtRegNum, op1->gtRegNum);
1355 //------------------------------------------------------------------------
1356 // genCodeForJumpTrue: Generates code for jmpTrue statement.
1359 // tree - The GT_JTRUE tree node.
1364 void CodeGen::genCodeForJumpTrue(GenTree* tree)
1366 GenTree* cmp = tree->gtOp.gtOp1;
1368 assert(cmp->OperIsCompare());
1369 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
1371 #if !defined(_TARGET_64BIT_)
1372 // Long-typed compares should have been handled by Lowering::LowerCompare.
1373 assert(!varTypeIsLong(cmp->gtGetOp1()));
1376 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
1377 // is governed by a flag NOT by the inherent type of the node
1378 // TODO-XArch-CQ: Check if we can use the currently set flags.
1379 emitJumpKind jumpKind[2];
1380 bool branchToTrueLabel[2];
1381 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
1383 BasicBlock* skipLabel = nullptr;
1384 if (jumpKind[0] != EJ_NONE)
1386 BasicBlock* jmpTarget;
1387 if (branchToTrueLabel[0])
1389 jmpTarget = compiler->compCurBB->bbJumpDest;
1393 // This case arises only for ordered GT_EQ right now
1394 assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
1395 skipLabel = genCreateTempLabel();
1396 jmpTarget = skipLabel;
1399 inst_JMP(jumpKind[0], jmpTarget);
1402 if (jumpKind[1] != EJ_NONE)
1404 // the second conditional branch always has to be to the true label
1405 assert(branchToTrueLabel[1]);
1406 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
1409 if (skipLabel != nullptr)
1411 genDefineTempLabel(skipLabel);
1415 //------------------------------------------------------------------------
1416 // genCodeForJcc: Produce code for a GT_JCC node.
1421 void CodeGen::genCodeForJcc(GenTreeCC* tree)
1423 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
1425 CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
1426 emitJumpKind jumpKind = genJumpKindForOper(tree->gtCondition, compareKind);
1428 inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
1431 //------------------------------------------------------------------------
1432 // genCodeForSetcc: Generates a setcc instruction for a GT_SETCC node.
1435 // tree - the GT_SETCC node
1438 // The condition represents an integer comparison. This code doesn't
1439 // have the necessary logic to deal with floating point comparisons,
1440 // in fact it doesn't even know if the comparison is integer or floating
1441 // point because SETCC nodes do not have any operands.
1444 void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
1446 regNumber dstReg = setcc->gtRegNum;
1447 CompareKind compareKind = setcc->IsUnsigned() ? CK_UNSIGNED : CK_SIGNED;
1448 emitJumpKind jumpKind = genJumpKindForOper(setcc->gtCondition, compareKind);
1450 assert(genIsValidIntReg(dstReg) && isByteReg(dstReg));
1451 // Make sure nobody is setting GTF_RELOP_NAN_UN on this node as it is ignored.
1452 assert((setcc->gtFlags & GTF_RELOP_NAN_UN) == 0);
1454 inst_SET(jumpKind, dstReg);
1455 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
1456 genProduceReg(setcc);
1459 //------------------------------------------------------------------------
1460 // genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
1463 // tree - the GT_RETURNTRAP node
1465 void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
1467 assert(tree->OperGet() == GT_RETURNTRAP);
1469 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
1470 // based on the contents of 'data'
1472 GenTree* data = tree->gtOp1;
1473 genConsumeRegs(data);
1474 GenTreeIntCon cns = intForm(TYP_INT, 0);
1476 getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
1478 BasicBlock* skipLabel = genCreateTempLabel();
1480 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1481 inst_JMP(jmpEqual, skipLabel);
1483 // emit the call to the EE-helper that stops for GC (or other reasons)
1484 regNumber tmpReg = tree->GetSingleTempReg(RBM_ALLINT);
1485 assert(genIsValidIntReg(tmpReg));
1487 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
1488 genDefineTempLabel(skipLabel);
1491 /*****************************************************************************
1493 * Generate code for a single node in the tree.
1494 * Preconditions: All operands have been evaluated
1497 void CodeGen::genCodeForTreeNode(GenTree* treeNode)
1499 regNumber targetReg;
1500 #if !defined(_TARGET_64BIT_)
1501 if (treeNode->TypeGet() == TYP_LONG)
1503 // All long enregistered nodes will have been decomposed into their
1504 // constituent lo and hi nodes.
1508 #endif // !defined(_TARGET_64BIT_)
1510 targetReg = treeNode->gtRegNum;
1512 var_types targetType = treeNode->TypeGet();
1513 emitter* emit = getEmitter();
1516 // Validate that all the operands for the current node are consumed in order.
1517 // This is important because LSRA ensures that any necessary copies will be
1518 // handled correctly.
1519 lastConsumedNode = nullptr;
1520 if (compiler->verbose)
1522 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
1523 compiler->gtDispLIRNode(treeNode, "Generating: ");
1527 // Is this a node whose value is already in a register? LSRA denotes this by
1528 // setting the GTF_REUSE_REG_VAL flag.
1529 if (treeNode->IsReuseRegVal())
1531 // For now, this is only used for constant nodes.
1532 assert((treeNode->OperIsConst()));
1533 JITDUMP(" TreeNode is marked ReuseReg\n");
1537 // contained nodes are part of their parents for codegen purposes
1538 // ex : immediates, most LEAs
1539 if (treeNode->isContained())
1544 switch (treeNode->gtOper)
1546 #ifndef JIT32_GCENCODER
1547 case GT_START_NONGC:
1548 getEmitter()->emitDisableGC();
1550 #endif // !defined(JIT32_GCENCODER)
1553 #ifdef PROFILING_SUPPORTED
1554 // We should be seeing this only if profiler hook is needed
1555 noway_assert(compiler->compIsProfilerHookNeeded());
1557 // Right now this node is used only for tail calls. In future if
1558 // we intend to use it for Enter or Leave hooks, add a data member
1559 // to this node indicating the kind of profiler hook. For example,
1560 // helper number can be used.
1561 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
1562 #endif // PROFILING_SUPPORTED
1566 genLclHeap(treeNode);
1571 assert(!treeNode->IsIconHandle(GTF_ICON_TLS_HDL));
1572 #endif // _TARGET_X86_
1576 genSetRegToConst(targetReg, targetType, treeNode);
1577 genProduceReg(treeNode);
1582 genCodeForNegNot(treeNode);
1589 genCodeForDivMod(treeNode->AsOp());
1595 assert(varTypeIsIntegralOrI(treeNode));
1599 #if !defined(_TARGET_64BIT_)
1604 #endif // !defined(_TARGET_64BIT_)
1608 genConsumeOperands(treeNode->AsOp());
1609 genCodeForBinary(treeNode);
1613 genCodeForMul(treeNode->AsOp());
1621 genCodeForShift(treeNode);
1624 #if !defined(_TARGET_64BIT_)
1628 genCodeForShiftLong(treeNode);
1631 #endif // !defined(_TARGET_64BIT_)
1634 genCodeForCast(treeNode->AsOp());
1639 GenTree* const op1 = treeNode->AsOp()->gtOp1;
1642 const bool srcFltReg = varTypeIsFloating(op1) || varTypeIsSIMD(op1);
1643 const bool dstFltReg = varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode);
1644 if (srcFltReg != dstFltReg)
1651 ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
1652 fltReg = treeNode->gtRegNum;
1653 intReg = op1->gtRegNum;
1657 ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
1658 intReg = treeNode->gtRegNum;
1659 fltReg = op1->gtRegNum;
1661 inst_RV_RV(ins, fltReg, intReg, treeNode->TypeGet());
1663 else if (treeNode->gtRegNum != op1->gtRegNum)
1665 inst_RV_RV(ins_Copy(treeNode->TypeGet()), treeNode->gtRegNum, op1->gtRegNum, treeNode->TypeGet());
1668 genProduceReg(treeNode);
1672 case GT_LCL_FLD_ADDR:
1673 case GT_LCL_VAR_ADDR:
1674 genCodeForLclAddr(treeNode);
1678 genCodeForLclFld(treeNode->AsLclFld());
1682 genCodeForLclVar(treeNode->AsLclVar());
1685 case GT_STORE_LCL_FLD:
1686 genCodeForStoreLclFld(treeNode->AsLclFld());
1689 case GT_STORE_LCL_VAR:
1690 genCodeForStoreLclVar(treeNode->AsLclVar());
1695 genReturn(treeNode);
1699 // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction.
1700 genLeaInstruction(treeNode->AsAddrMode());
1704 genCodeForIndexAddr(treeNode->AsIndexAddr());
1708 genCodeForIndir(treeNode->AsIndir());
1715 genCodeForMulHi(treeNode->AsOp());
1719 genIntrinsic(treeNode);
1724 genSIMDIntrinsic(treeNode->AsSIMD());
1726 #endif // FEATURE_SIMD
1728 #ifdef FEATURE_HW_INTRINSICS
1729 case GT_HWIntrinsic:
1730 genHWIntrinsic(treeNode->AsHWIntrinsic());
1732 #endif // FEATURE_HW_INTRINSICS
1735 genCkfinite(treeNode);
1747 genCodeForCompare(treeNode->AsOp());
1751 genCodeForJumpTrue(treeNode);
1755 genCodeForJcc(treeNode->AsCC());
1759 genCodeForSetcc(treeNode->AsCC());
1763 genCodeForBT(treeNode->AsOp());
1767 genCodeForReturnTrap(treeNode->AsOp());
1771 genCodeForStoreInd(treeNode->AsStoreInd());
1775 // This is handled at the time we call genConsumeReg() on the GT_COPY
1780 // Should always be marked contained.
1781 assert(!"LIST, FIELD_LIST nodes should always be marked contained.");
1785 genCodeForSwap(treeNode->AsOp());
1789 genPutArgStk(treeNode->AsPutArgStk());
1793 genPutArgReg(treeNode->AsOp());
1797 genCallInstruction(treeNode->AsCall());
1801 genJmpMethod(treeNode);
1807 genLockedInstructions(treeNode->AsOp());
1810 case GT_MEMORYBARRIER:
1811 instGen_MemoryBarrier();
1815 genCodeForCmpXchg(treeNode->AsCmpXchg());
1819 // do nothing - reload is just a marker.
1820 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
1821 // into the register specified in this node.
1828 getEmitter()->emitIns_Nop(1);
1831 case GT_ARR_BOUNDS_CHECK:
1834 #endif // FEATURE_SIMD
1835 #ifdef FEATURE_HW_INTRINSICS
1836 case GT_HW_INTRINSIC_CHK:
1837 #endif // FEATURE_HW_INTRINSICS
1838 genRangeCheck(treeNode);
1842 genCodeForPhysReg(treeNode->AsPhysReg());
1846 genCodeForNullCheck(treeNode->AsOp());
1851 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
1853 /* Catch arguments get passed in a register. genCodeForBBlist()
1854 would have marked it as holding a GC object, but not used. */
1856 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
1857 genConsumeReg(treeNode);
1860 #if !FEATURE_EH_FUNCLETS
1863 // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
1864 // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
1866 unsigned finallyNesting;
1867 finallyNesting = treeNode->gtVal.gtVal1;
1868 noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
1869 noway_assert(finallyNesting < compiler->compHndBBtabCount);
1871 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
1872 unsigned filterEndOffsetSlotOffs;
1873 PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
1874 TARGET_POINTER_SIZE); // below doesn't underflow.
1875 filterEndOffsetSlotOffs =
1876 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
1878 unsigned curNestingSlotOffs;
1879 curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
1880 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
1882 #endif // !FEATURE_EH_FUNCLETS
1884 case GT_PINVOKE_PROLOG:
1885 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
1887 // the runtime side requires the codegen here to be consistent
1888 emit->emitDisableRandomNops();
1892 genPendingCallLabel = genCreateTempLabel();
1893 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
1894 emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
1898 case GT_STORE_DYN_BLK:
1900 genCodeForStoreBlk(treeNode->AsBlk());
1904 genJumpTable(treeNode);
1907 case GT_SWITCH_TABLE:
1908 genTableBasedSwitch(treeNode);
1912 genCodeForArrIndex(treeNode->AsArrIndex());
1916 genCodeForArrOffset(treeNode->AsArrOffs());
1919 case GT_CLS_VAR_ADDR:
1920 emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
1921 genProduceReg(treeNode);
1924 #if !defined(_TARGET_64BIT_)
1926 assert(treeNode->isUsedFromReg());
1927 genConsumeRegs(treeNode);
1932 // Do nothing; these nodes are simply markers for debug info.
1939 _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n",
1940 GenTree::OpName(treeNode->OperGet()));
1943 assert(!"Unknown node in codegen");
1949 //----------------------------------------------------------------------------------
1950 // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
1953 // treeNode - Gentree of GT_STORE_LCL_VAR
1959 // The child of store is a multi-reg call node.
1960 // genProduceReg() on treeNode is made by caller of this routine.
1962 void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
1964 assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
1966 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1967 // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
1968 assert(varTypeIsStruct(treeNode));
1970 // Assumption: current x64 Unix implementation requires that a multi-reg struct
1971 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
1972 // being struct promoted.
1973 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
1974 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1975 noway_assert(varDsc->lvIsMultiRegRet);
1977 GenTree* op1 = treeNode->gtGetOp1();
1978 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1979 GenTreeCall* call = actualOp1->AsCall();
1980 assert(call->HasMultiRegRetVal());
1982 genConsumeRegs(op1);
1984 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
1985 assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
1986 unsigned regCount = retTypeDesc->GetReturnRegCount();
1988 if (treeNode->gtRegNum != REG_NA)
1990 // Right now the only enregistrable structs supported are SIMD types.
1991 assert(varTypeIsSIMD(treeNode));
1992 assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
1993 assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
1995 // This is a case of two 8-bytes that comprise the operand is in
1996 // two different xmm registers and needs to assembled into a single
1998 regNumber targetReg = treeNode->gtRegNum;
1999 regNumber reg0 = call->GetRegNumByIdx(0);
2000 regNumber reg1 = call->GetRegNumByIdx(1);
2002 if (op1->IsCopyOrReload())
2004 // GT_COPY/GT_RELOAD will have valid reg for those positions
2005 // that need to be copied or reloaded.
2006 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
2007 if (reloadReg != REG_NA)
2012 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
2013 if (reloadReg != REG_NA)
2019 if (targetReg != reg0 && targetReg != reg1)
2021 // Copy reg0 into targetReg and let it to be handled by one
2022 // of the cases below.
2023 inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
2027 if (targetReg == reg0)
2029 // targeReg[63:0] = targetReg[63:0]
2030 // targetReg[127:64] = reg1[127:64]
2031 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
2035 assert(targetReg == reg1);
2037 // We need two shuffles to achieve this
2039 // targeReg[63:0] = targetReg[63:0]
2040 // targetReg[127:64] = reg0[63:0]
2043 // targeReg[63:0] = targetReg[127:64]
2044 // targetReg[127:64] = targetReg[63:0]
2046 // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
2047 // and next swap low and high 8-bytes of targetReg to have them
2048 // rearranged in the right order.
2049 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
2050 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
2057 for (unsigned i = 0; i < regCount; ++i)
2059 var_types type = retTypeDesc->GetReturnRegType(i);
2060 regNumber reg = call->GetRegNumByIdx(i);
2061 if (op1->IsCopyOrReload())
2063 // GT_COPY/GT_RELOAD will have valid reg for those positions
2064 // that need to be copied or reloaded.
2065 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
2066 if (reloadReg != REG_NA)
2072 assert(reg != REG_NA);
2073 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
2074 offset += genTypeSize(type);
2077 varDsc->lvRegNum = REG_STK;
2079 #elif defined(_TARGET_X86_)
2080 // Longs are returned in two return registers on x86.
2081 assert(varTypeIsLong(treeNode));
2083 // Assumption: current x86 implementation requires that a multi-reg long
2084 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
2086 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
2087 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
2088 noway_assert(varDsc->lvIsMultiRegRet);
2090 GenTree* op1 = treeNode->gtGetOp1();
2091 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
2092 GenTreeCall* call = actualOp1->AsCall();
2093 assert(call->HasMultiRegRetVal());
2095 genConsumeRegs(op1);
2097 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
2098 unsigned regCount = retTypeDesc->GetReturnRegCount();
2099 assert(regCount == MAX_RET_REG_COUNT);
2103 for (unsigned i = 0; i < regCount; ++i)
2105 var_types type = retTypeDesc->GetReturnRegType(i);
2106 regNumber reg = call->GetRegNumByIdx(i);
2107 if (op1->IsCopyOrReload())
2109 // GT_COPY/GT_RELOAD will have valid reg for those positions
2110 // that need to be copied or reloaded.
2111 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
2112 if (reloadReg != REG_NA)
2118 assert(reg != REG_NA);
2119 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
2120 offset += genTypeSize(type);
2123 varDsc->lvRegNum = REG_STK;
2124 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
2125 assert(!"Unreached");
2126 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
2129 //------------------------------------------------------------------------
2130 // genLclHeap: Generate code for localloc.
2133 // tree - the localloc tree to generate.
2136 // Note that for x86, we don't track ESP movements while generating the localloc code.
2137 // The ESP tracking is used to report stack pointer-relative GC info, which is not
2138 // interesting while doing the localloc construction. Also, for functions with localloc,
2139 // we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
2140 // call arguments. We store the ESP after the localloc is complete in the LocAllocSP
2141 // variable. This variable is implicitly reported to the VM in the GC info (its position
2142 // is defined by convention relative to other items), and is used by the GC to find the
2143 // "base" stack pointer in functions with localloc.
2145 void CodeGen::genLclHeap(GenTree* tree)
2147 assert(tree->OperGet() == GT_LCLHEAP);
2148 assert(compiler->compLocallocUsed);
2150 GenTree* size = tree->gtOp.gtOp1;
2151 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
2153 regNumber targetReg = tree->gtRegNum;
2154 regNumber regCnt = REG_NA;
2155 var_types type = genActualType(size->gtType);
2156 emitAttr easz = emitTypeSize(type);
2157 BasicBlock* endLabel = nullptr;
2161 if (compiler->opts.compStackCheckOnRet)
2163 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
2164 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
2165 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2166 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
2168 BasicBlock* esp_check = genCreateTempLabel();
2169 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2170 inst_JMP(jmpEqual, esp_check);
2171 getEmitter()->emitIns(INS_BREAKPOINT);
2172 genDefineTempLabel(esp_check);
2176 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
2177 noway_assert(genStackLevel == 0); // Can't have anything on the stack
2179 unsigned stackAdjustment = 0;
2180 BasicBlock* loop = nullptr;
2182 // compute the amount of memory to allocate to properly STACK_ALIGN.
2184 if (size->IsCnsIntOrI())
2186 // If size is a constant, then it must be contained.
2187 assert(size->isContained());
2189 // If amount is zero then return null in targetReg
2190 amount = size->gtIntCon.gtIconVal;
2193 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
2197 // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
2198 amount = AlignUp(amount, STACK_ALIGN);
2202 // The localloc requested memory size is non-constant.
2204 // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
2205 genConsumeRegAndCopy(size, targetReg);
2206 endLabel = genCreateTempLabel();
2207 getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
2208 inst_JMP(EJ_je, endLabel);
2210 // Compute the size of the block to allocate and perform alignment.
2211 // If compInitMem=true, we can reuse targetReg as regcnt,
2212 // since we don't need any internal registers.
2213 if (compiler->info.compInitMem)
2215 assert(tree->AvailableTempRegCount() == 0);
2220 regCnt = tree->ExtractTempReg();
2221 if (regCnt != targetReg)
2223 // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
2224 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
2228 // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
2232 // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
2233 // elements, not a byte count, after the alignment. So instead of the "and", which
2234 // becomes unnecessary, generate a shift, e.g.:
2238 inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
2240 if (compiler->info.compInitMem)
2242 // Convert the count from a count of bytes to a loop count. We will loop once per
2243 // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
2244 // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
2245 // words per iteration on x64. We will shift off all the stack alignment bits
2246 // added above, so there is no need for an 'and' instruction.
2248 // --- shr regCnt, 2 (or 4) ---
2249 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
2253 // Otherwise, mask off the low bits to align the byte count.
2254 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
2258 #if FEATURE_FIXED_OUT_ARGS
2259 // If we have an outgoing arg area then we must adjust the SP by popping off the
2260 // outgoing arg area. We will restore it right before we return from this method.
2262 // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
2263 // are the cases that need to be handled:
2264 // i) Method has out-going arg area.
2265 // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
2266 // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
2267 // ii) Method has no out-going arg area.
2268 // Nothing to pop off from the stack.
2269 if (compiler->lvaOutgoingArgSpaceSize > 0)
2271 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
2273 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
2274 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
2278 if (size->IsCnsIntOrI())
2280 // We should reach here only for non-zero, constant size allocations.
2282 assert((amount % STACK_ALIGN) == 0);
2283 assert((amount % REGSIZE_BYTES) == 0);
2285 // For small allocations we will generate up to six push 0 inline
2286 size_t cntRegSizedWords = amount / REGSIZE_BYTES;
2287 if (cntRegSizedWords <= 6)
2289 for (; cntRegSizedWords != 0; cntRegSizedWords--)
2291 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
2296 bool doNoInitLessThanOnePageAlloc =
2297 !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
2300 bool needRegCntRegister = true;
2301 #else // !_TARGET_X86_
2302 bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
2303 #endif // !_TARGET_X86_
2305 if (needRegCntRegister)
2307 // If compInitMem=true, we can reuse targetReg as regcnt.
2308 // Since size is a constant, regCnt is not yet initialized.
2309 assert(regCnt == REG_NA);
2310 if (compiler->info.compInitMem)
2312 assert(tree->AvailableTempRegCount() == 0);
2317 regCnt = tree->ExtractTempReg();
2321 if (doNoInitLessThanOnePageAlloc)
2323 // Since the size is less than a page, simply adjust ESP.
2324 // ESP might already be in the guard page, so we must touch it BEFORE
2325 // the alloc, not after.
2326 CLANG_FORMAT_COMMENT_ANCHOR;
2329 // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
2330 // to ESP. So do the work in the count register.
2331 // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
2332 // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
2334 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
2335 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
2336 inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
2337 inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
2338 #else // !_TARGET_X86_
2339 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
2340 inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
2341 #endif // !_TARGET_X86_
2346 // else, "mov regCnt, amount"
2348 if (compiler->info.compInitMem)
2350 // When initializing memory, we want 'amount' to be the loop count.
2351 assert((amount % STACK_ALIGN) == 0);
2352 amount /= STACK_ALIGN;
2355 genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
2358 loop = genCreateTempLabel();
2359 if (compiler->info.compInitMem)
2361 // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
2362 // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
2363 // Since we have to zero out the allocated memory AND ensure that RSP is always valid
2364 // by tickling the pages, we will just push 0's on the stack.
2366 assert(genIsValidIntReg(regCnt));
2369 genDefineTempLabel(loop);
2371 static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
2372 unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);
2374 for (unsigned i = 0; i < count; i++)
2376 inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
2378 // Note that the stack must always be aligned to STACK_ALIGN bytes
2380 // Decrement the loop counter and loop if not done.
2381 inst_RV(INS_dec, regCnt, TYP_I_IMPL);
2382 inst_JMP(EJ_jne, loop);
2386 // At this point 'regCnt' is set to the total number of bytes to localloc.
2388 // We don't need to zero out the allocated memory. However, we do have
2389 // to tickle the pages to ensure that ESP is always valid and is
2390 // in sync with the "stack guard page". Note that in the worst
2391 // case ESP is on the last byte of the guard page. Thus you must
2392 // touch ESP+0 first not ESP+x01000.
2394 // Another subtlety is that you don't want ESP to be exactly on the
2395 // boundary of the guard page because PUSH is predecrement, thus
2396 // call setup would not touch the guard page but just beyond it
2398 // Note that we go through a few hoops so that ESP never points to
2399 // illegal pages at any time during the tickling process
2402 // add REGCNT, ESP // reg now holds ultimate ESP
2403 // jb loop // result is smaller than orignial ESP (no wrap around)
2404 // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
2406 // test ESP, [ESP+0] // tickle the page
2408 // sub REGTMP, GetOsPageSize()
2415 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
2416 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
2417 inst_JMP(EJ_jb, loop);
2419 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
2421 genDefineTempLabel(loop);
2423 // Tickle the decremented value, and move back to ESP,
2424 // note that it has to be done BEFORE the update of ESP since
2425 // ESP might already be on the guard page. It is OK to leave
2426 // the final value of ESP on the guard page
2427 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
2429 // This is a harmless trick to avoid the emitter trying to track the
2430 // decrement of the ESP - we do the subtraction in another reg instead
2431 // of adjusting ESP directly.
2432 regNumber regTmp = tree->GetSingleTempReg();
2434 inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
2435 inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
2436 inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
2438 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
2439 inst_JMP(EJ_jae, loop);
2441 // Move the final value to ESP
2442 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
2446 // Re-adjust SP to allocate out-going arg area
2447 if (stackAdjustment > 0)
2449 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
2450 inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
2453 // Return the stackalloc'ed address in result register.
2454 // TargetReg = RSP + stackAdjustment.
2455 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
2457 if (endLabel != nullptr)
2459 genDefineTempLabel(endLabel);
2464 // Write the lvaLocAllocSPvar stack frame slot
2465 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
2467 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
2471 if (compiler->opts.compNeedStackProbes)
2473 genGenerateStackProbe();
2479 if (compiler->opts.compStackCheckOnRet)
2481 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
2482 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
2483 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2484 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
2488 genProduceReg(tree);
2491 void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
2493 assert(storeBlkNode->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
2495 if (storeBlkNode->OperIs(GT_STORE_OBJ) && storeBlkNode->OperIsCopyBlkOp() && !storeBlkNode->gtBlkOpGcUnsafe)
2497 assert(storeBlkNode->AsObj()->gtGcPtrCount != 0);
2498 genCodeForCpObj(storeBlkNode->AsObj());
2502 #ifdef JIT32_GCENCODER
2503 assert(!storeBlkNode->gtBlkOpGcUnsafe);
2505 if (storeBlkNode->gtBlkOpGcUnsafe)
2507 getEmitter()->emitDisableGC();
2509 #endif // JIT32_GCENCODER
2511 bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
2513 switch (storeBlkNode->gtBlkOpKind)
2515 #ifdef _TARGET_AMD64_
2516 case GenTreeBlk::BlkOpKindHelper:
2519 genCodeForCpBlk(storeBlkNode);
2523 genCodeForInitBlk(storeBlkNode);
2526 #endif // _TARGET_AMD64_
2527 case GenTreeBlk::BlkOpKindRepInstr:
2530 genCodeForCpBlkRepMovs(storeBlkNode);
2534 genCodeForInitBlkRepStos(storeBlkNode);
2537 case GenTreeBlk::BlkOpKindUnroll:
2540 genCodeForCpBlkUnroll(storeBlkNode);
2544 genCodeForInitBlkUnroll(storeBlkNode);
2551 #ifndef JIT32_GCENCODER
2552 if (storeBlkNode->gtBlkOpGcUnsafe)
2554 getEmitter()->emitEnableGC();
2556 #endif // !defined(JIT32_GCENCODER)
2560 //------------------------------------------------------------------------
2561 // genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos.
2564 // initBlkNode - The Block store for which we are generating code.
2568 // The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
2569 // Any value larger than that, we'll use the helper even if both the fill byte and the
2570 // size are integer constants.
2572 // The size must either be a non-constant or less than INITBLK_STOS_LIMIT bytes.
2574 void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
2576 // Make sure we got the arguments of the initblk/initobj operation in the right registers.
2577 unsigned size = initBlkNode->Size();
2578 GenTree* dstAddr = initBlkNode->Addr();
2579 GenTree* initVal = initBlkNode->Data();
2580 if (initVal->OperIsInitVal())
2582 initVal = initVal->gtGetOp1();
2586 assert(dstAddr->isUsedFromReg());
2587 assert(initVal->isUsedFromReg());
2588 #ifdef _TARGET_AMD64_
2591 if (initVal->IsCnsIntOrI())
2593 #ifdef _TARGET_AMD64_
2594 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
2596 // Note that a size of zero means a non-constant size.
2597 assert((size == 0) || (size > CPBLK_UNROLL_LIMIT));
2603 genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
2604 instGen(INS_r_stosb);
2607 // Generate code for InitBlk by performing a loop unroll
2609 // a) Both the size and fill byte value are integer constants.
2610 // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
2612 void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
2614 // Make sure we got the arguments of the initblk/initobj operation in the right registers
2615 unsigned size = initBlkNode->Size();
2616 GenTree* dstAddr = initBlkNode->Addr();
2617 GenTree* initVal = initBlkNode->Data();
2618 if (initVal->OperIsInitVal())
2620 initVal = initVal->gtGetOp1();
2623 assert(dstAddr->isUsedFromReg());
2624 assert(initVal->isUsedFromReg() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
2626 assert(size <= INITBLK_UNROLL_LIMIT);
2627 assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
2629 emitter* emit = getEmitter();
2631 genConsumeOperands(initBlkNode);
2633 // If the initVal was moved, or spilled and reloaded to a different register,
2634 // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
2635 // which needs to be the new register.
2636 regNumber valReg = initVal->gtRegNum;
2637 initVal = initVal->gtSkipReloadOrCopy();
2639 unsigned offset = 0;
2641 // Perform an unroll using SSE2 loads and stores.
2642 if (size >= XMM_REGSIZE_BYTES)
2644 regNumber tmpReg = initBlkNode->GetSingleTempReg();
2645 assert(genIsValidFloatReg(tmpReg));
2647 if (initVal->gtIntCon.gtIconVal != 0)
2649 emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
2650 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
2652 // For x86, we need one more to convert it from 8 bytes to 16 bytes.
2653 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
2654 #endif // _TARGET_X86_
2658 emit->emitIns_R_R(INS_xorps, EA_8BYTE, tmpReg, tmpReg);
2661 // Determine how many 16 byte slots we're going to fill using SSE movs.
2662 size_t slots = size / XMM_REGSIZE_BYTES;
2666 emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
2667 offset += XMM_REGSIZE_BYTES;
2671 // Fill the remainder (or a < 16 byte sized struct)
2672 if ((size & 8) != 0)
2675 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
2676 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2678 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2680 #else // !_TARGET_X86_
2682 emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
2685 #endif // !_TARGET_X86_
2687 if ((size & 4) != 0)
2689 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2692 if ((size & 2) != 0)
2694 emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
2697 if ((size & 1) != 0)
2699 emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
2703 // Generates code for InitBlk by calling the VM memset helper function.
2705 // a) The size argument of the InitBlk is not an integer constant.
2706 // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
2707 void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
2709 #ifdef _TARGET_AMD64_
2710 // Make sure we got the arguments of the initblk operation in the right registers
2711 unsigned blockSize = initBlkNode->Size();
2712 GenTree* dstAddr = initBlkNode->Addr();
2713 GenTree* initVal = initBlkNode->Data();
2714 if (initVal->OperIsInitVal())
2716 initVal = initVal->gtGetOp1();
2719 assert(dstAddr->isUsedFromReg());
2720 assert(initVal->isUsedFromReg());
2724 assert(blockSize >= CPBLK_MOVS_LIMIT);
2727 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
2729 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
2730 #else // !_TARGET_AMD64_
2731 NYI_X86("Helper call for InitBlk");
2732 #endif // !_TARGET_AMD64_
2735 // Generate code for a load from some address + offset
2736 // baseNode: tree node which can be either a local address or arbitrary node
2737 // offset: distance from the baseNode from which to load
2738 void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
2740 emitter* emit = getEmitter();
2742 if (baseNode->OperIsLocalAddr())
2744 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
2746 offset += baseNode->gtLclFld.gtLclOffs;
2748 emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
2752 emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
2756 //------------------------------------------------------------------------
2757 // genCodeForStoreOffset: Generate code to store a reg to [base + offset].
2760 // ins - the instruction to generate.
2761 // size - the size that needs to be stored.
2762 // src - the register which needs to be stored.
2763 // baseNode - the base, relative to which to store the src register.
2764 // offset - the offset that is added to the baseNode to calculate the address to store into.
2766 void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
2768 emitter* emit = getEmitter();
2770 if (baseNode->OperIsLocalAddr())
2772 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
2774 offset += baseNode->gtLclFld.gtLclOffs;
2777 emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
2781 emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
2785 // Generates CpBlk code by performing a loop unroll
2787 // The size argument of the CpBlk node is a constant and <= 64 bytes.
2788 // This may seem small but covers >95% of the cases in several framework assemblies.
2790 void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
2792 // Make sure we got the arguments of the cpblk operation in the right registers
2793 unsigned size = cpBlkNode->Size();
2794 GenTree* dstAddr = cpBlkNode->Addr();
2795 GenTree* source = cpBlkNode->Data();
2796 GenTree* srcAddr = nullptr;
2797 assert(size <= CPBLK_UNROLL_LIMIT);
2799 emitter* emit = getEmitter();
2801 if (dstAddr->isUsedFromReg())
2803 genConsumeReg(dstAddr);
2806 if (source->gtOper == GT_IND)
2808 srcAddr = source->gtGetOp1();
2809 if (srcAddr->isUsedFromReg())
2811 genConsumeReg(srcAddr);
2816 noway_assert(source->IsLocal());
2817 // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
2818 // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
2819 if (source->OperGet() == GT_LCL_VAR)
2821 source->SetOper(GT_LCL_VAR_ADDR);
2825 assert(source->OperGet() == GT_LCL_FLD);
2826 source->SetOper(GT_LCL_FLD_ADDR);
2831 unsigned offset = 0;
2833 // If the size of this struct is larger than 16 bytes
2834 // let's use SSE2 to be able to do 16 byte at a time
2835 // loads and stores.
2837 if (size >= XMM_REGSIZE_BYTES)
2839 regNumber xmmReg = cpBlkNode->GetSingleTempReg(RBM_ALLFLOAT);
2840 assert(genIsValidFloatReg(xmmReg));
2841 size_t slots = size / XMM_REGSIZE_BYTES;
2843 // TODO: In the below code the load and store instructions are for 16 bytes, but the
2844 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
2845 // this probably needs to be changed.
2849 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
2851 genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
2852 offset += XMM_REGSIZE_BYTES;
2856 // Fill the remainder (15 bytes or less) if there's one.
2857 if ((size & 0xf) != 0)
2859 // Grab the integer temp register to emit the remaining loads and stores.
2860 regNumber tmpReg = cpBlkNode->GetSingleTempReg(RBM_ALLINT);
2862 if ((size & 8) != 0)
2865 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
2866 for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
2868 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
2869 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
2871 #else // !_TARGET_X86_
2872 genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
2873 genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
2875 #endif // !_TARGET_X86_
2877 if ((size & 4) != 0)
2879 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
2880 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
2883 if ((size & 2) != 0)
2885 genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
2886 genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
2889 if ((size & 1) != 0)
2891 genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
2892 genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
2897 // Generate code for CpBlk by using rep movs
2899 // The size argument of the CpBlk is a constant and is between
2900 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
2901 void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
2903 // Make sure we got the arguments of the cpblk operation in the right registers
2904 unsigned size = cpBlkNode->Size();
2905 GenTree* dstAddr = cpBlkNode->Addr();
2906 GenTree* source = cpBlkNode->Data();
2907 GenTree* srcAddr = nullptr;
2910 assert(dstAddr->isUsedFromReg());
2911 assert(source->isContained());
2916 noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
2922 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
2924 assert(size > CPBLK_UNROLL_LIMIT);
2929 genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
2930 instGen(INS_r_movsb);
2933 #ifdef FEATURE_PUT_STRUCT_ARG_STK
2934 //------------------------------------------------------------------------
2935 // CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area
2938 // size - The size of bytes remaining to be moved
2939 // longTmpReg - The tmp register to be used for the long value
2940 // srcAddr - The address of the source struct
2941 // offset - The current offset being copied
2944 // Returns the number of bytes moved (8 or 0).
2947 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
2948 // not an even multiple of 16.
2949 // On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register.
2950 // This is checked by genStoreRegToStackArg.
2952 unsigned CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset)
2955 instruction longMovIns = INS_movq;
2956 #else // !_TARGET_X86_
2957 instruction longMovIns = INS_mov;
2958 #endif // !_TARGET_X86_
2959 if ((size & 8) != 0)
2961 genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset);
2962 genStoreRegToStackArg(TYP_LONG, longTmpReg, offset);
2968 //------------------------------------------------------------------------
2969 // CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area
2972 // size - The size of bytes remaining to be moved
2973 // intTmpReg - The tmp register to be used for the long value
2974 // srcAddr - The address of the source struct
2975 // offset - The current offset being copied
2978 // Returns the number of bytes moved (4 or 0).
2981 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
2982 // not an even multiple of 16.
2983 // intTmpReg must be an integer register.
2984 // This is checked by genStoreRegToStackArg.
2986 unsigned CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
2988 if ((size & 4) != 0)
2990 genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset);
2991 genStoreRegToStackArg(TYP_INT, intTmpReg, offset);
2997 //------------------------------------------------------------------------
2998 // CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area
3001 // size - The size of bytes remaining to be moved
3002 // intTmpReg - The tmp register to be used for the long value
3003 // srcAddr - The address of the source struct
3004 // offset - The current offset being copied
3007 // Returns the number of bytes moved (2 or 0).
3010 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
3011 // not an even multiple of 16.
3012 // intTmpReg must be an integer register.
3013 // This is checked by genStoreRegToStackArg.
3015 unsigned CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
3017 if ((size & 2) != 0)
3019 genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset);
3020 genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset);
3026 //------------------------------------------------------------------------
3027 // CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area
3030 // size - The size of bytes remaining to be moved
3031 // intTmpReg - The tmp register to be used for the long value
3032 // srcAddr - The address of the source struct
3033 // offset - The current offset being copied
3036 // Returns the number of bytes moved (1 or 0).
3039 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
3040 // not an even multiple of 16.
3041 // intTmpReg must be an integer register.
3042 // This is checked by genStoreRegToStackArg.
3044 unsigned CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
3046 if ((size & 1) != 0)
3048 genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset);
3049 genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset);
3055 //---------------------------------------------------------------------------------------------------------------//
3056 // genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
3059 // putArgNode - the PutArgStk tree.
3062 // m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the
3065 // TODO-Amd64-Unix: Try to share code with copyblk.
3066 // Need refactoring of copyblk before it could be used for putarg_stk.
3067 // The difference for now is that a putarg_stk contains its children, while cpyblk does not.
3068 // This creates differences in code. After some significant refactoring it could be reused.
3070 void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
3072 GenTree* src = putArgNode->gtOp.gtOp1;
3073 // We will never call this method for SIMD types, which are stored directly
3074 // in genPutStructArgStk().
3075 noway_assert(src->TypeGet() == TYP_STRUCT);
3077 unsigned size = putArgNode->getArgSize();
3078 assert(size <= CPBLK_UNROLL_LIMIT);
3080 emitter* emit = getEmitter();
3081 unsigned putArgOffset = putArgNode->getArgOffset();
3083 assert(src->isContained());
3085 assert(src->gtOper == GT_OBJ);
3087 if (src->gtOp.gtOp1->isUsedFromReg())
3089 genConsumeReg(src->gtOp.gtOp1);
3092 unsigned offset = 0;
3094 regNumber xmmTmpReg = REG_NA;
3095 regNumber intTmpReg = REG_NA;
3096 regNumber longTmpReg = REG_NA;
3098 // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's
3099 // less than 16 bytes, we will just be using pushes
3102 xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT);
3103 longTmpReg = xmmTmpReg;
3105 if ((size & 0x7) != 0)
3107 intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT);
3109 #else // !_TARGET_X86_
3110 // On x64 we use an XMM register only for 16-byte chunks.
3111 if (size >= XMM_REGSIZE_BYTES)
3113 xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT);
3115 if ((size & 0xf) != 0)
3117 intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT);
3118 longTmpReg = intTmpReg;
3120 #endif // !_TARGET_X86_
3122 // If the size of this struct is larger than 16 bytes
3123 // let's use SSE2 to be able to do 16 byte at a time
3124 // loads and stores.
3125 if (size >= XMM_REGSIZE_BYTES)
3128 assert(!m_pushStkArg);
3129 #endif // _TARGET_X86_
3130 size_t slots = size / XMM_REGSIZE_BYTES;
3132 assert(putArgNode->gtGetOp1()->isContained());
3133 assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
3135 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3136 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3137 // this probably needs to be changed.
3141 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset);
3144 genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);
3146 offset += XMM_REGSIZE_BYTES;
3150 // Fill the remainder (15 bytes or less) if there's one.
3151 if ((size & 0xf) != 0)
3156 // This case is currently supported only for the case where the total size is
3157 // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse
3158 // order. However, morph has ensured that we have a struct that is an even
3159 // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment.
3160 assert(((size & 0xc) == size) && (offset == 0));
3161 // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on
3162 // whether we've got an 8 byte chunk, and then push it on the stack.
3163 unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, size & 0x8);
3164 // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk)
3165 // and push it on the stack.
3166 pushedBytes += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, 0);
3169 #endif // _TARGET_X86_
3171 offset += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, offset);
3172 offset += genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
3173 offset += genMove2IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
3174 offset += genMove1IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
3175 assert(offset == size);
3180 //------------------------------------------------------------------------
3181 // genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
3184 // putArgNode - the PutArgStk tree.
3187 // The size argument of the PutArgStk (for structs) is a constant and is between
3188 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3189 // m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go.
3191 void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode)
3193 GenTree* srcAddr = putArgNode->gtGetOp1();
3194 assert(srcAddr->TypeGet() == TYP_STRUCT);
3195 assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
3197 // Make sure we got the arguments of the cpblk operation in the right registers, and that
3198 // 'srcAddr' is contained as expected.
3199 assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
3200 assert(srcAddr->isContained());
3202 genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
3203 instGen(INS_r_movsb);
3206 //------------------------------------------------------------------------
3207 // If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
3208 // must be cleared to zeroes. The native compiler doesn't clear the upper bits
3209 // and there is no way to know if the caller is native or not. So, the upper
3210 // 32 bits of Vector argument on stack are always cleared to zero.
3211 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
3212 void CodeGen::genClearStackVec3ArgUpperBits()
3217 printf("*************** In genClearStackVec3ArgUpperBits()\n");
3221 assert(compiler->compGeneratingProlog);
3223 unsigned varNum = 0;
3225 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
3227 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
3228 assert(varDsc->lvIsParam);
3230 // Does var has simd12 type?
3231 if (varDsc->lvType != TYP_SIMD12)
3236 if (!varDsc->lvIsRegArg)
3238 // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
3239 getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
3243 // Assume that for x64 linux, an argument is fully in registers
3244 // or fully on stack.
3245 regNumber argReg = varDsc->GetOtherArgReg();
3247 // Clear the upper 32 bits by two shift instructions.
3248 // argReg = argReg << 96
3249 getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
3250 // argReg = argReg >> 96
3251 getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
3255 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
3256 #endif // FEATURE_PUT_STRUCT_ARG_STK
3258 // Generate code for CpObj nodes wich copy structs that have interleaved
3260 // This will generate a sequence of movsp instructions for the cases of non-gc members.
3261 // Note that movsp is an alias for movsd on x86 and movsq on x64.
3262 // and calls to the BY_REF_ASSIGN helper otherwise.
3263 void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
3265 // Make sure we got the arguments of the cpobj operation in the right registers
3266 GenTree* dstAddr = cpObjNode->Addr();
3267 GenTree* source = cpObjNode->Data();
3268 GenTree* srcAddr = nullptr;
3269 var_types srcAddrType = TYP_BYREF;
3270 bool sourceIsLocal = false;
3272 assert(source->isContained());
3273 if (source->gtOper == GT_IND)
3275 srcAddr = source->gtGetOp1();
3276 assert(srcAddr->isUsedFromReg());
3280 noway_assert(source->IsLocal());
3281 sourceIsLocal = true;
3284 bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr();
3288 assert(dstAddr->isUsedFromReg());
3290 // If the GenTree node has data about GC pointers, this means we're dealing
3291 // with CpObj, so this requires special logic.
3292 assert(cpObjNode->gtGcPtrCount > 0);
3294 // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields
3295 // and it needs src = RSI and dst = RDI.
3296 // Either these registers must not contain lclVars, or they must be dying or marked for spill.
3297 // This is because these registers are incremented as we go through the struct.
3300 GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
3301 GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
3302 unsigned srcLclVarNum = BAD_VAR_NUM;
3303 unsigned dstLclVarNum = BAD_VAR_NUM;
3304 bool isSrcAddrLiveOut = false;
3305 bool isDstAddrLiveOut = false;
3306 if (genIsRegCandidateLocal(actualSrcAddr))
3308 srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
3309 isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
3311 if (genIsRegCandidateLocal(actualDstAddr))
3313 dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
3314 isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
3316 assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
3317 ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
3318 assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
3319 ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
3320 srcAddrType = srcAddr->TypeGet();
3324 // Consume the operands and get them into the right registers.
3325 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
3326 genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
3327 gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType);
3328 gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
3330 unsigned slots = cpObjNode->gtSlots;
3332 // If we can prove it's on the stack we don't need to use the write barrier.
3335 if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
3337 // If the destination of the CpObj is on the stack, make sure we allocated
3338 // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively).
3339 assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
3341 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
3342 instGen(INS_r_movsp);
3346 // For small structs, it's better to emit a sequence of movsp than to
3347 // emit a rep movsp instruction.
3357 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
3358 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
3366 // Let's see if we can use rep movsp instead of a sequence of movsp instructions
3367 // to save cycles and code size.
3369 unsigned nonGcSlotCount = 0;
3375 } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
3377 // If we have a very small contiguous non-gc region, it's better just to
3378 // emit a sequence of movsp instructions
3379 if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
3381 while (nonGcSlotCount > 0)
3389 // Otherwise, we can save code-size and improve CQ by emitting
3390 // rep movsp (alias for movsd/movsq for x86/x64)
3391 assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
3393 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
3394 instGen(INS_r_movsp);
3399 // We have a GC pointer, call the memory barrier.
3400 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
3406 assert(gcPtrCount == 0);
3409 // Clear the gcInfo for RSI and RDI.
3410 // While we normally update GC info prior to the last instruction that uses them,
3411 // these actually live into the helper call.
3412 gcInfo.gcMarkRegSetNpt(RBM_RSI);
3413 gcInfo.gcMarkRegSetNpt(RBM_RDI);
3416 // Generate code for a CpBlk node by the means of the VM memcpy helper call
3418 // a) The size argument of the CpBlk is not an integer constant
3419 // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
3420 void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
3422 #ifdef _TARGET_AMD64_
3423 // Make sure we got the arguments of the cpblk operation in the right registers
3424 unsigned blockSize = cpBlkNode->Size();
3425 GenTree* dstAddr = cpBlkNode->Addr();
3426 GenTree* source = cpBlkNode->Data();
3427 GenTree* srcAddr = nullptr;
3429 // Size goes in arg2
3432 assert(blockSize >= CPBLK_MOVS_LIMIT);
3433 assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
3437 noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
3440 // Source address goes in arg1
3441 if (source->gtOper == GT_IND)
3443 srcAddr = source->gtGetOp1();
3444 assert(srcAddr->isUsedFromReg());
3448 noway_assert(source->IsLocal());
3449 assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
3450 inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
3453 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
3455 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
3456 #else // !_TARGET_AMD64_
3457 noway_assert(false && "Helper call for CpBlk is not needed.");
3458 #endif // !_TARGET_AMD64_
3461 // generate code do a switch statement based on a table of ip-relative offsets
3462 void CodeGen::genTableBasedSwitch(GenTree* treeNode)
3464 genConsumeOperands(treeNode->AsOp());
3465 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
3466 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
3468 regNumber tmpReg = treeNode->GetSingleTempReg();
3470 // load the ip-relative offset (which is relative to start of fgFirstBB)
3471 getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
3473 // add it to the absolute address of fgFirstBB
3474 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
3475 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
3476 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
3478 getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
3481 // emits the table and an instruction to get the address of the first element
3482 void CodeGen::genJumpTable(GenTree* treeNode)
3484 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
3485 assert(treeNode->OperGet() == GT_JMPTABLE);
3487 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
3488 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
3489 unsigned jmpTabOffs;
3490 unsigned jmpTabBase;
3492 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
3496 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
3498 for (unsigned i = 0; i < jumpCount; i++)
3500 BasicBlock* target = *jumpTable++;
3501 noway_assert(target->bbFlags & BBF_JMP_TARGET);
3503 JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
3505 getEmitter()->emitDataGenData(i, target);
3508 getEmitter()->emitDataGenEnd();
3510 // Access to inline data is 'abstracted' by a special type of static member
3511 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
3512 // to constant data, not a real static field.
3513 getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
3514 compiler->eeFindJitDataOffs(jmpTabBase), 0);
3515 genProduceReg(treeNode);
3518 // generate code for the locked operations:
3519 // GT_LOCKADD, GT_XCHG, GT_XADD
3520 void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
3522 GenTree* data = treeNode->gtOp.gtOp2;
3523 GenTree* addr = treeNode->gtOp.gtOp1;
3524 regNumber targetReg = treeNode->gtRegNum;
3525 regNumber dataReg = data->gtRegNum;
3526 regNumber addrReg = addr->gtRegNum;
3527 var_types type = genActualType(data->TypeGet());
3530 // The register allocator should have extended the lifetime of the address
3531 // so that it is not used as the target.
3532 noway_assert(addrReg != targetReg);
3534 // If data is a lclVar that's not a last use, we'd better have allocated a register
3535 // for the result (except in the case of GT_LOCKADD which does not produce a register result).
3536 assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
3537 (data->gtFlags & GTF_VAR_DEATH) != 0);
3539 genConsumeOperands(treeNode);
3540 if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
3542 inst_RV_RV(ins_Copy(type), targetReg, dataReg);
3543 data->gtRegNum = targetReg;
3545 // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
3546 // original gtRegNum on data, after calling emitInsBinary below.
3548 switch (treeNode->OperGet())
3555 // lock is implied by xchg
3566 // all of these nodes implicitly do an indirection on op1
3567 // so create a temporary node to feed into the pattern matching
3568 GenTreeIndir i = indirForm(type, addr);
3570 getEmitter()->emitInsBinary(ins, emitTypeSize(type), &i, data);
3572 if (treeNode->gtRegNum != REG_NA)
3574 genProduceReg(treeNode);
3578 //------------------------------------------------------------------------
3579 // genCodeForCmpXchg: Produce code for a GT_CMPXCHG node.
3582 // tree - the GT_CMPXCHG node
3584 void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree)
3586 assert(tree->OperIs(GT_CMPXCHG));
3588 var_types targetType = tree->TypeGet();
3589 regNumber targetReg = tree->gtRegNum;
3591 GenTree* location = tree->gtOpLocation; // arg1
3592 GenTree* value = tree->gtOpValue; // arg2
3593 GenTree* comparand = tree->gtOpComparand; // arg3
3595 assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
3596 assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
3598 genConsumeReg(location);
3599 genConsumeReg(value);
3600 genConsumeReg(comparand);
3602 // comparand goes to RAX;
3603 // Note that we must issue this move after the genConsumeRegs(), in case any of the above
3604 // have a GT_COPY from RAX.
3605 if (comparand->gtRegNum != REG_RAX)
3607 inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
3613 getEmitter()->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
3616 if (targetReg != REG_RAX)
3618 inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
3621 genProduceReg(tree);
3624 // generate code for BoundsCheck nodes
3625 void CodeGen::genRangeCheck(GenTree* oper)
3627 noway_assert(oper->OperIsBoundsCheck());
3628 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
3630 GenTree* arrIndex = bndsChk->gtIndex;
3631 GenTree* arrLen = bndsChk->gtArrLen;
3632 GenTree* arrRef = nullptr;
3635 GenTree * src1, *src2;
3636 emitJumpKind jmpKind;
3638 genConsumeRegs(arrIndex);
3639 genConsumeRegs(arrLen);
3641 if (arrIndex->isContainedIntOrIImmed())
3643 // arrIndex is a contained constant. In this case
3644 // we will generate one of the following
3645 // cmp [mem], immed (if arrLen is a memory op)
3646 // cmp reg, immed (if arrLen is in a reg)
3648 // That is arrLen cannot be a contained immed.
3649 assert(!arrLen->isContainedIntOrIImmed());
3657 // arrIndex could either be a contained memory op or a reg
3658 // In this case we will generate one of the following
3659 // cmp [mem], immed (if arrLen is a constant)
3660 // cmp [mem], reg (if arrLen is in a reg)
3661 // cmp reg, immed (if arrIndex is in a reg)
3662 // cmp reg1, reg2 (if arraIndex is in reg1)
3663 // cmp reg, [mem] (if arrLen is a memory op)
3665 // That is only one of arrIndex or arrLen can be a memory op.
3666 assert(!arrIndex->isUsedFromMemory() || !arrLen->isUsedFromMemory());
3673 var_types bndsChkType = src2->TypeGet();
3675 // Bounds checks can only be 32 or 64 bit sized comparisons.
3676 assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
3678 // The type of the bounds check should always wide enough to compare against the index.
3679 assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
3682 getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
3683 genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
3686 //---------------------------------------------------------------------
3687 // genCodeForPhysReg - generate code for a GT_PHYSREG node
3690 // tree - the GT_PHYSREG node
3695 void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
3697 assert(tree->OperIs(GT_PHYSREG));
3699 var_types targetType = tree->TypeGet();
3700 regNumber targetReg = tree->gtRegNum;
3702 if (targetReg != tree->gtSrcReg)
3704 inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
3705 genTransferRegGCState(targetReg, tree->gtSrcReg);
3708 genProduceReg(tree);
3711 //---------------------------------------------------------------------
3712 // genCodeForNullCheck - generate code for a GT_NULLCHECK node
3715 // tree - the GT_NULLCHECK node
3720 void CodeGen::genCodeForNullCheck(GenTreeOp* tree)
3722 assert(tree->OperIs(GT_NULLCHECK));
3724 assert(tree->gtOp1->isUsedFromReg());
3725 regNumber reg = genConsumeReg(tree->gtOp1);
3726 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
3729 //------------------------------------------------------------------------
3730 // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
3731 // lower bound for the given dimension.
3734 // elemType - the element type of the array
3735 // rank - the rank of the array
3736 // dimension - the dimension for which the lower bound offset will be returned.
3741 unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
3743 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
3744 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
3747 //------------------------------------------------------------------------
3748 // genOffsetOfMDArrayLength: Returns the offset from the Array object to the
3749 // size for the given dimension.
3752 // elemType - the element type of the array
3753 // rank - the rank of the array
3754 // dimension - the dimension for which the lower bound offset will be returned.
3759 unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
3761 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
3762 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
3765 //------------------------------------------------------------------------
3766 // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
3767 // producing the effective index by subtracting the lower bound.
3770 // arrIndex - the node for which we're generating code
3776 void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
3778 GenTree* arrObj = arrIndex->ArrObj();
3779 GenTree* indexNode = arrIndex->IndexExpr();
3781 regNumber arrReg = genConsumeReg(arrObj);
3782 regNumber indexReg = genConsumeReg(indexNode);
3783 regNumber tgtReg = arrIndex->gtRegNum;
3785 unsigned dim = arrIndex->gtCurrDim;
3786 unsigned rank = arrIndex->gtArrRank;
3787 var_types elemType = arrIndex->gtArrElemType;
3789 noway_assert(tgtReg != REG_NA);
3791 // Subtract the lower bound for this dimension.
3792 // TODO-XArch-CQ: make this contained if it's an immediate that fits.
3793 if (tgtReg != indexReg)
3795 inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
3797 getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
3798 genOffsetOfMDArrayLowerBound(elemType, rank, dim));
3799 getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
3800 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
3801 genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
3803 genProduceReg(arrIndex);
3806 //------------------------------------------------------------------------
3807 // genCodeForArrOffset: Generates code to compute the flattened array offset for
3808 // one dimension of an array reference:
3809 // result = (prevDimOffset * dimSize) + effectiveIndex
3810 // where dimSize is obtained from the arrObj operand
3813 // arrOffset - the node for which we're generating code
3819 // dimSize and effectiveIndex are always non-negative, the former by design,
3820 // and the latter because it has been normalized to be zero-based.
3822 void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
3824 GenTree* offsetNode = arrOffset->gtOffset;
3825 GenTree* indexNode = arrOffset->gtIndex;
3826 GenTree* arrObj = arrOffset->gtArrObj;
3828 regNumber tgtReg = arrOffset->gtRegNum;
3829 assert(tgtReg != REG_NA);
3831 unsigned dim = arrOffset->gtCurrDim;
3832 unsigned rank = arrOffset->gtArrRank;
3833 var_types elemType = arrOffset->gtArrElemType;
3835 // First, consume the operands in the correct order.
3836 regNumber offsetReg = REG_NA;
3837 regNumber tmpReg = REG_NA;
3838 if (!offsetNode->IsIntegralConst(0))
3840 offsetReg = genConsumeReg(offsetNode);
3842 // We will use a temp register for the offset*scale+effectiveIndex computation.
3843 tmpReg = arrOffset->GetSingleTempReg();
3847 assert(offsetNode->isContained());
3849 regNumber indexReg = genConsumeReg(indexNode);
3850 // Although arrReg may not be used in the constant-index case, if we have generated
3851 // the value into a register, we must consume it, otherwise we will fail to end the
3852 // live range of the gc ptr.
3853 // TODO-CQ: Currently arrObj will always have a register allocated to it.
3854 // We could avoid allocating a register for it, which would be of value if the arrObj
3855 // is an on-stack lclVar.
3856 regNumber arrReg = REG_NA;
3857 if (arrObj->gtHasReg())
3859 arrReg = genConsumeReg(arrObj);
3862 if (!offsetNode->IsIntegralConst(0))
3864 assert(tmpReg != REG_NA);
3865 assert(arrReg != REG_NA);
3867 // Evaluate tgtReg = offsetReg*dim_size + indexReg.
3868 // tmpReg is used to load dim_size and the result of the multiplication.
3869 // Note that dim_size will never be negative.
3871 getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
3872 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
3873 inst_RV_RV(INS_imul, tmpReg, offsetReg);
3875 if (tmpReg == tgtReg)
3877 inst_RV_RV(INS_add, tmpReg, indexReg);
3881 if (indexReg != tgtReg)
3883 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
3885 inst_RV_RV(INS_add, tgtReg, tmpReg);
3890 if (indexReg != tgtReg)
3892 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
3895 genProduceReg(arrOffset);
3898 instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
3902 // Operations on SIMD vectors shouldn't come this path
3903 assert(!varTypeIsSIMD(type));
3904 if (varTypeIsFloating(type))
3906 return ins_MathOp(oper, type);
3950 #if !defined(_TARGET_64BIT_)
3969 #endif // !defined(_TARGET_64BIT_)
3977 //------------------------------------------------------------------------
3978 // genCodeForShift: Generates the code sequence for a GenTree node that
3979 // represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
3982 // tree - the bit shift node (that specifies the type of bit shift to perform).
3985 // a) All GenTrees are register allocated.
3986 // b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
3987 // it's a register-allocated expression. If it is in a register that is
3988 // not RCX, it will be moved to RCX (so RCX better not be in use!).
3990 void CodeGen::genCodeForShift(GenTree* tree)
3992 // Only the non-RMW case here.
3993 assert(tree->OperIsShiftOrRotate());
3994 assert(tree->gtOp.gtOp1->isUsedFromReg());
3995 assert(tree->gtRegNum != REG_NA);
3997 genConsumeOperands(tree->AsOp());
3999 var_types targetType = tree->TypeGet();
4000 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
4002 GenTree* operand = tree->gtGetOp1();
4003 regNumber operandReg = operand->gtRegNum;
4005 GenTree* shiftBy = tree->gtGetOp2();
4007 if (shiftBy->isContainedIntOrIImmed())
4009 // First, move the operand to the destination register and
4010 // later on perform the shift in-place.
4011 // (LSRA will try to avoid this situation through preferencing.)
4012 if (tree->gtRegNum != operandReg)
4014 inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
4017 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4018 inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
4022 // We must have the number of bits to shift stored in ECX, since we constrained this node to
4023 // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
4024 // register destination requirement.
4025 genCopyRegIfNeeded(shiftBy, REG_RCX);
4027 // The operand to be shifted must not be in ECX
4028 noway_assert(operandReg != REG_RCX);
4030 if (tree->gtRegNum != operandReg)
4032 inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
4034 inst_RV_CL(ins, tree->gtRegNum, targetType);
4037 genProduceReg(tree);
4041 //------------------------------------------------------------------------
4042 // genCodeForShiftLong: Generates the code sequence for a GenTree node that
4043 // represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
4046 // tree - the bit shift node (that specifies the type of bit shift to perform).
4049 // a) All GenTrees are register allocated.
4050 // b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
4052 // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
4053 // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
4054 // targetReg if sourceHi is a memory operand). Similarly for GT_RSH_LO, sourceLo could be marked as
4055 // contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
4057 void CodeGen::genCodeForShiftLong(GenTree* tree)
4059 // Only the non-RMW case here.
4060 genTreeOps oper = tree->OperGet();
4061 assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
4063 GenTree* operand = tree->gtOp.gtOp1;
4064 assert(operand->OperGet() == GT_LONG);
4065 assert(operand->gtOp.gtOp1->isUsedFromReg());
4066 assert(operand->gtOp.gtOp2->isUsedFromReg());
4068 GenTree* operandLo = operand->gtGetOp1();
4069 GenTree* operandHi = operand->gtGetOp2();
4071 regNumber regLo = operandLo->gtRegNum;
4072 regNumber regHi = operandHi->gtRegNum;
4074 genConsumeOperands(tree->AsOp());
4076 var_types targetType = tree->TypeGet();
4077 instruction ins = genGetInsForOper(oper, targetType);
4079 GenTree* shiftBy = tree->gtGetOp2();
4081 assert(shiftBy->isContainedIntOrIImmed());
4083 unsigned int count = shiftBy->AsIntConCommon()->IconValue();
4085 regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
4087 if (regResult != tree->gtRegNum)
4089 inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
4092 if (oper == GT_LSH_HI)
4094 inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count);
4098 assert(oper == GT_RSH_LO);
4099 inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count);
4102 genProduceReg(tree);
4106 //------------------------------------------------------------------------
4107 // genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
4108 // represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
4109 // GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
4112 // storeIndNode: the GT_STOREIND node.
4114 void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
4116 GenTree* data = storeInd->Data();
4117 GenTree* addr = storeInd->Addr();
4119 assert(data->OperIsShiftOrRotate());
4121 // This function only handles the RMW case.
4122 assert(data->gtOp.gtOp1->isUsedFromMemory());
4123 assert(data->gtOp.gtOp1->isIndir());
4124 assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
4125 assert(data->gtRegNum == REG_NA);
4127 var_types targetType = data->TypeGet();
4128 genTreeOps oper = data->OperGet();
4129 instruction ins = genGetInsForOper(oper, targetType);
4130 emitAttr attr = EA_ATTR(genTypeSize(targetType));
4132 GenTree* shiftBy = data->gtOp.gtOp2;
4133 if (shiftBy->isContainedIntOrIImmed())
4135 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4136 ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
4137 if (shiftByValue == 1)
4139 // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
4140 getEmitter()->emitInsRMW(ins, attr, storeInd);
4144 getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
4149 // We must have the number of bits to shift stored in ECX, since we constrained this node to
4150 // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
4151 // register destination requirement.
4152 regNumber shiftReg = shiftBy->gtRegNum;
4153 genCopyRegIfNeeded(shiftBy, REG_RCX);
4155 // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
4156 getEmitter()->emitInsRMW(ins, attr, storeInd);
4160 //------------------------------------------------------------------------
4161 // genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
4166 void CodeGen::genCodeForLclAddr(GenTree* tree)
4168 assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
4170 var_types targetType = tree->TypeGet();
4171 regNumber targetReg = tree->gtRegNum;
4173 // Address of a local var.
4174 noway_assert(targetType == TYP_BYREF);
4176 inst_RV_TT(INS_lea, targetReg, tree, 0, EA_BYREF);
4177 genProduceReg(tree);
4180 //------------------------------------------------------------------------
4181 // genCodeForLclFld: Produce code for a GT_LCL_FLD node.
4184 // tree - the GT_LCL_FLD node
4186 void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
4188 assert(tree->OperIs(GT_LCL_FLD));
4190 var_types targetType = tree->TypeGet();
4191 regNumber targetReg = tree->gtRegNum;
4193 noway_assert(targetReg != REG_NA);
4196 // Loading of TYP_SIMD12 (i.e. Vector3) field
4197 if (targetType == TYP_SIMD12)
4199 genLoadLclTypeSIMD12(tree);
4204 noway_assert(targetType != TYP_STRUCT);
4206 emitAttr size = emitTypeSize(targetType);
4207 unsigned offs = tree->gtLclOffs;
4208 unsigned varNum = tree->gtLclNum;
4209 assert(varNum < compiler->lvaCount);
4211 getEmitter()->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
4213 genProduceReg(tree);
4216 //------------------------------------------------------------------------
4217 // genCodeForLclVar: Produce code for a GT_LCL_VAR node.
4220 // tree - the GT_LCL_VAR node
4222 void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
4224 assert(tree->OperIs(GT_LCL_VAR));
4226 // lcl_vars are not defs
4227 assert((tree->gtFlags & GTF_VAR_DEF) == 0);
4229 bool isRegCandidate = compiler->lvaTable[tree->gtLclNum].lvIsRegCandidate();
4231 // If this is a register candidate that has been spilled, genConsumeReg() will
4232 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
4234 if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
4236 #if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
4237 // Loading of TYP_SIMD12 (i.e. Vector3) variable
4238 if (tree->TypeGet() == TYP_SIMD12)
4240 genLoadLclTypeSIMD12(tree);
4243 #endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
4245 getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(tree->gtLclNum)),
4246 emitTypeSize(tree), tree->gtRegNum, tree->gtLclNum, 0);
4247 genProduceReg(tree);
4251 //------------------------------------------------------------------------
4252 // genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
4255 // tree - the GT_STORE_LCL_FLD node
4257 void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
4259 assert(tree->OperIs(GT_STORE_LCL_FLD));
4261 var_types targetType = tree->TypeGet();
4262 noway_assert(targetType != TYP_STRUCT);
4263 assert(!varTypeIsFloating(targetType) || (targetType == tree->gtOp1->TypeGet()));
4266 // storing of TYP_SIMD12 (i.e. Vector3) field
4267 if (tree->TypeGet() == TYP_SIMD12)
4269 genStoreLclTypeSIMD12(tree);
4272 #endif // FEATURE_SIMD
4274 GenTree* op1 = tree->gtGetOp1();
4275 genConsumeRegs(op1);
4276 getEmitter()->emitInsBinary(ins_Store(targetType), emitTypeSize(tree), tree, op1);
4278 genUpdateLife(tree);
4281 //------------------------------------------------------------------------
4282 // genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
4285 // tree - the GT_STORE_LCL_VAR node
4287 void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
4289 assert(tree->OperIs(GT_STORE_LCL_VAR));
4291 var_types targetType = tree->TypeGet();
4292 regNumber targetReg = tree->gtRegNum;
4293 emitter* emit = getEmitter();
4295 GenTree* op1 = tree->gtGetOp1();
4297 // var = call, where call returns a multi-reg return value
4298 // case is handled separately.
4299 if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
4301 genMultiRegCallStoreToLocal(tree);
4305 noway_assert(targetType != TYP_STRUCT);
4306 assert(!varTypeIsFloating(targetType) || (targetType == op1->TypeGet()));
4308 unsigned lclNum = tree->gtLclNum;
4309 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
4311 // Ensure that lclVar nodes are typed correctly.
4312 assert(!varDsc->lvNormalizeOnStore() || (targetType == genActualType(varDsc->TypeGet())));
4314 #if !defined(_TARGET_64BIT_)
4315 if (targetType == TYP_LONG)
4317 genStoreLongLclVar(tree);
4320 #endif // !defined(_TARGET_64BIT_)
4323 // storing of TYP_SIMD12 (i.e. Vector3) field
4324 if (targetType == TYP_SIMD12)
4326 genStoreLclTypeSIMD12(tree);
4330 if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
4332 // This is only possible for a zero-init.
4333 noway_assert(op1->IsIntegralConst(0));
4334 genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
4335 genProduceReg(tree);
4338 #endif // FEATURE_SIMD
4340 genConsumeRegs(op1);
4342 if (targetReg == REG_NA)
4345 emit->emitInsStoreLcl(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
4346 emitTypeSize(targetType), tree);
4347 varDsc->lvRegNum = REG_STK;
4351 // Look for the case where we have a constant zero which we've marked for reuse,
4352 // but which isn't actually in the register we want. In that case, it's better to create
4353 // zero in the target register, because an xor is smaller than a copy. Note that we could
4354 // potentially handle this in the register allocator, but we can't always catch it there
4355 // because the target may not have a register allocated for it yet.
4356 if (op1->isUsedFromReg() && (op1->gtRegNum != targetReg) && (op1->IsIntegralConst(0) || op1->IsFPZero()))
4358 op1->gtRegNum = REG_NA;
4359 op1->ResetReuseRegVal();
4360 op1->SetContained();
4363 if (!op1->isUsedFromReg())
4365 // Currently, we assume that the non-reg source of a GT_STORE_LCL_VAR writing to a register
4366 // must be a constant. However, in the future we might want to support an operand used from
4367 // memory. This is a bit tricky because we have to decide it can be used from memory before
4368 // register allocation,
4369 // and this would be a case where, once that's done, we need to mark that node as always
4370 // requiring a register - which we always assume now anyway, but once we "optimize" that
4371 // we'll have to take cases like this into account.
4372 assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
4373 genSetRegToConst(targetReg, targetType, op1);
4375 else if (op1->gtRegNum != targetReg)
4377 assert(op1->gtRegNum != REG_NA);
4378 emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(tree), tree, op1);
4383 if (targetReg != REG_NA)
4385 genProduceReg(tree);
4389 //------------------------------------------------------------------------
4390 // genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
4393 // tree - the GT_INDEX_ADDR node
4395 void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
4397 GenTree* const base = node->Arr();
4398 GenTree* const index = node->Index();
4400 genConsumeReg(base);
4401 genConsumeReg(index);
4403 // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers
4404 // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the
4405 // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until
4406 // we are finished generating the code for this node.
4408 gcInfo.gcMarkRegPtrVal(base->gtRegNum, base->TypeGet());
4409 assert(!varTypeIsGC(index->TypeGet()));
4411 regNumber tmpReg = REG_NA;
4413 // Generate the bounds check if necessary.
4414 if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
4416 // Create a GT_IND(GT_LEA)) tree for the array length access.
4417 GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, node->gtLenOffset);
4418 arrLenAddr.gtRegNum = REG_NA;
4419 arrLenAddr.SetContained();
4421 GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);
4423 #ifdef _TARGET_64BIT_
4424 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index
4425 // is a native int on a 64-bit platform, we will need to widen the array length and the compare.
4426 if (index->TypeGet() == TYP_I_IMPL)
4428 // Load the array length into a register.
4429 tmpReg = node->GetSingleTempReg();
4430 arrLen.gtRegNum = tmpReg;
4431 arrLen.ClearContained();
4432 getEmitter()->emitInsLoadInd(ins_Load(TYP_INT), EA_4BYTE, arrLen.gtRegNum, &arrLen);
4437 assert(varTypeIsIntegral(index->TypeGet()));
4439 arrLen.gtRegNum = REG_NA;
4440 arrLen.SetContained();
4443 // Generate the range check.
4444 getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_I_IMPL), index, &arrLen);
4445 genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
4448 // Compute the address of the array element.
4449 switch (node->gtElemSize)
4455 getEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum,
4456 node->gtElemSize, static_cast<int>(node->gtElemOffset));
4461 // Multiply the index by the element size.
4463 // TODO-CQ: this should really just use `imul index, index, #gtElemSize`
4464 tmpReg = (tmpReg == REG_NA) ? node->GetSingleTempReg() : tmpReg;
4465 CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtElemSize, TYP_INT);
4466 inst_RV_RV(INS_imul, tmpReg, index->gtRegNum);
4467 getEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node), node->gtRegNum, base->gtRegNum, tmpReg, 1,
4468 static_cast<int>(node->gtElemOffset));
4473 gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
4475 genProduceReg(node);
4478 //------------------------------------------------------------------------
4479 // genCodeForIndir: Produce code for a GT_IND node.
4482 // tree - the GT_IND node
4484 void CodeGen::genCodeForIndir(GenTreeIndir* tree)
4486 assert(tree->OperIs(GT_IND));
4489 // Handling of Vector3 type values loaded through indirection.
4490 if (tree->TypeGet() == TYP_SIMD12)
4492 genLoadIndTypeSIMD12(tree);
4495 #endif // FEATURE_SIMD
4497 var_types targetType = tree->TypeGet();
4498 emitter* emit = getEmitter();
4500 GenTree* addr = tree->Addr();
4501 if (addr->IsCnsIntOrI() && addr->IsIconHandle(GTF_ICON_TLS_HDL))
4503 noway_assert(EA_ATTR(genTypeSize(targetType)) == EA_PTRSIZE);
4504 emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tree->gtRegNum, FLD_GLOBAL_FS,
4505 (int)addr->gtIntCon.gtIconVal);
4509 genConsumeAddress(addr);
4510 emit->emitInsLoadInd(ins_Load(targetType), emitTypeSize(tree), tree->gtRegNum, tree);
4513 genProduceReg(tree);
4516 void CodeGen::genRegCopy(GenTree* treeNode)
4518 assert(treeNode->OperGet() == GT_COPY);
4519 GenTree* op1 = treeNode->gtOp.gtOp1;
4521 if (op1->IsMultiRegCall())
4525 GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
4526 GenTreeCall* call = op1->AsCall();
4527 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
4528 unsigned regCount = retTypeDesc->GetReturnRegCount();
4530 for (unsigned i = 0; i < regCount; ++i)
4532 var_types type = retTypeDesc->GetReturnRegType(i);
4533 regNumber fromReg = call->GetRegNumByIdx(i);
4534 regNumber toReg = copyTree->GetRegNumByIdx(i);
4536 // A Multi-reg GT_COPY node will have valid reg only for those
4537 // positions that corresponding result reg of call node needs
4539 if (toReg != REG_NA)
4541 assert(toReg != fromReg);
4542 inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
4548 var_types targetType = treeNode->TypeGet();
4549 regNumber targetReg = treeNode->gtRegNum;
4550 assert(targetReg != REG_NA);
4552 // Check whether this node and the node from which we're copying the value have
4553 // different register types. This can happen if (currently iff) we have a SIMD
4554 // vector type that fits in an integer register, in which case it is passed as
4555 // an argument, or returned from a call, in an integer register and must be
4556 // copied if it's in an xmm register.
4558 bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
4559 bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
4560 if (srcFltReg != tgtFltReg)
4567 ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
4569 intReg = op1->gtRegNum;
4573 ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
4575 fpReg = op1->gtRegNum;
4577 inst_RV_RV(ins, fpReg, intReg, targetType);
4581 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
4586 // The lclVar will never be a def.
4587 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
4588 // appropriately set the gcInfo for the copied value.
4589 // If not, there are two cases we need to handle:
4590 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
4591 // will remain live in its original register.
4592 // genProduceReg() will appropriately set the gcInfo for the copied value,
4593 // and genConsumeReg will reset it.
4594 // - Otherwise, we need to update register info for the lclVar.
4596 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
4597 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
4599 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
4601 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
4603 // If we didn't just spill it (in genConsumeReg, above), then update the register info
4604 if (varDsc->lvRegNum != REG_STK)
4606 // The old location is dying
4607 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
4609 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
4611 genUpdateVarReg(varDsc, treeNode);
4613 // The new location is going live
4614 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
4620 genProduceReg(treeNode);
4623 //------------------------------------------------------------------------
4624 // genCodeForStoreInd: Produce code for a GT_STOREIND node.
4627 // tree - the GT_STOREIND node
4629 void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
4631 assert(tree->OperIs(GT_STOREIND));
4634 // Storing Vector3 of size 12 bytes through indirection
4635 if (tree->TypeGet() == TYP_SIMD12)
4637 genStoreIndTypeSIMD12(tree);
4640 #endif // FEATURE_SIMD
4642 GenTree* data = tree->Data();
4643 GenTree* addr = tree->Addr();
4644 var_types targetType = tree->TypeGet();
4646 assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
4648 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
4649 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
4651 // data and addr must be in registers.
4652 // Consume both registers so that any copies of interfering registers are taken care of.
4653 genConsumeOperands(tree);
4655 if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
4660 // At this point, we should not have any interference.
4661 // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
4662 noway_assert(data->gtRegNum != REG_ARG_0);
4664 // addr goes in REG_ARG_0
4665 genCopyRegIfNeeded(addr, REG_ARG_0);
4667 // data goes in REG_ARG_1
4668 genCopyRegIfNeeded(data, REG_ARG_1);
4670 genGCWriteBarrier(tree, writeBarrierForm);
4674 bool dataIsUnary = false;
4675 bool isRMWMemoryOp = tree->IsRMWMemoryOp();
4676 GenTree* rmwSrc = nullptr;
4678 // We must consume the operands in the proper execution order, so that liveness is
4679 // updated appropriately.
4680 genConsumeAddress(addr);
4682 // If tree represents a RMW memory op then its data is a non-leaf node marked as contained
4683 // and non-indir operand of data is the source of RMW memory op.
4686 assert(data->isContained() && !data->OperIsLeaf());
4688 GenTree* rmwDst = nullptr;
4690 dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
4693 if (tree->IsRMWDstOp1())
4695 rmwDst = data->gtGetOp1();
4696 rmwSrc = data->gtGetOp2();
4700 assert(tree->IsRMWDstOp2());
4701 rmwDst = data->gtGetOp2();
4702 rmwSrc = data->gtGetOp1();
4705 genConsumeRegs(rmwSrc);
4709 // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
4710 // For unary RMW ops, src and dst of RMW memory op is the same. Lower
4711 // clears operand counts on rmwSrc and we don't need to perform a
4712 // genConsumeReg() on it.
4713 assert(tree->IsRMWDstOp1());
4714 rmwSrc = data->gtGetOp1();
4715 rmwDst = data->gtGetOp1();
4716 assert(rmwSrc->isUsedFromMemory());
4719 assert(rmwSrc != nullptr);
4720 assert(rmwDst != nullptr);
4721 assert(Lowering::IndirsAreEquivalent(rmwDst, tree));
4725 genConsumeRegs(data);
4732 // generate code for unary RMW memory ops like neg/not
4733 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(tree), tree);
4737 if (data->OperIsShiftOrRotate())
4739 // Generate code for shift RMW memory ops.
4740 // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
4741 // <amount> <shift> [addr]).
4742 assert(tree->IsRMWDstOp1());
4743 assert(rmwSrc == data->gtGetOp2());
4744 genCodeForShiftRMW(tree);
4746 else if (data->OperGet() == GT_ADD && (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
4748 // Generate "inc/dec [mem]" instead of "add/sub [mem], 1".
4751 // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1).
4752 // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into
4753 // an assert while decoding ModR/M byte of "inc dword ptr [rax]".
4754 // It is not clear whether Decode() can handle all possible
4755 // addr modes with inc/dec. For this reason, inc/dec [mem]
4756 // is not generated while generating debuggable code. Update
4757 // the above if condition once Decode() routine is fixed.
4758 assert(rmwSrc->isContainedIntOrIImmed());
4759 instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec;
4760 getEmitter()->emitInsRMW(ins, emitTypeSize(tree), tree);
4764 // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
4765 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(tree),
4772 getEmitter()->emitInsStoreInd(ins_Store(data->TypeGet()), emitTypeSize(tree), tree);
4777 //------------------------------------------------------------------------
4778 // genCodeForSwap: Produce code for a GT_SWAP node.
4781 // tree - the GT_SWAP node
4783 void CodeGen::genCodeForSwap(GenTreeOp* tree)
4785 assert(tree->OperIs(GT_SWAP));
4787 // Swap is only supported for lclVar operands that are enregistered
4788 // We do not consume or produce any registers. Both operands remain enregistered.
4789 // However, the gc-ness may change.
4790 assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2));
4792 GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon();
4793 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
4794 var_types type1 = varDsc1->TypeGet();
4795 GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon();
4796 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
4797 var_types type2 = varDsc2->TypeGet();
4799 // We must have both int or both fp regs
4800 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
4802 // FP swap is not yet implemented (and should have NYI'd in LSRA)
4803 assert(!varTypeIsFloating(type1));
4805 regNumber oldOp1Reg = lcl1->gtRegNum;
4806 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
4807 regNumber oldOp2Reg = lcl2->gtRegNum;
4808 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
4810 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
4811 varDsc1->lvRegNum = oldOp2Reg;
4812 varDsc2->lvRegNum = oldOp1Reg;
4815 emitAttr size = EA_PTRSIZE;
4816 if (varTypeGCtype(type1) != varTypeGCtype(type2))
4818 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
4819 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
4822 inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
4824 // Update the gcInfo.
4825 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
4826 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
4827 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
4829 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
4830 // It will also dump the updates.
4831 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
4832 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
4835 //------------------------------------------------------------------------
4836 // genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
4837 // helper functions.
4840 // writeBarrierForm - the write barrier form to use
4841 // addr - the address at which to do the store
4842 // data - the data to store
4845 // true if an optimized write barrier form was used, false if not. If this
4846 // function returns false, the caller must emit a "standard" write barrier.
4848 bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
4850 assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
4852 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
4853 if (!genUseOptimizedWriteBarriers(writeBarrierForm))
4858 const static int regToHelper[2][8] = {
4859 // If the target is known to be in managed memory
4861 CORINFO_HELP_ASSIGN_REF_EAX, // EAX
4862 CORINFO_HELP_ASSIGN_REF_ECX, // ECX
4863 -1, // EDX (always the target address)
4864 CORINFO_HELP_ASSIGN_REF_EBX, // EBX
4866 CORINFO_HELP_ASSIGN_REF_EBP, // EBP
4867 CORINFO_HELP_ASSIGN_REF_ESI, // ESI
4868 CORINFO_HELP_ASSIGN_REF_EDI, // EDI
4871 // Don't know if the target is in managed memory
4873 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, // EAX
4874 CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, // ECX
4875 -1, // EDX (always the target address)
4876 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, // EBX
4878 CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, // EBP
4879 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, // ESI
4880 CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, // EDI
4884 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
4885 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
4886 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
4887 noway_assert(regToHelper[0][REG_ESP] == -1);
4888 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
4889 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
4890 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
4892 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
4893 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
4894 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
4895 noway_assert(regToHelper[1][REG_ESP] == -1);
4896 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
4897 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
4898 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
4900 regNumber reg = data->gtRegNum;
4901 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
4903 // Generate the following code:
4905 // call write_barrier_helper_reg
4907 // addr goes in REG_ARG_0
4908 genCopyRegIfNeeded(addr, REG_WRITE_BARRIER);
4910 unsigned tgtAnywhere = 0;
4911 if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
4916 // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
4917 // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
4918 // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
4920 genEmitHelperCall(regToHelper[tgtAnywhere][reg],
4922 EA_PTRSIZE); // retSize
4925 #else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
4927 #endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
4930 // Produce code for a GT_CALL node
4931 void CodeGen::genCallInstruction(GenTreeCall* call)
4933 genAlignStackBeforeCall(call);
4935 gtCallTypes callType = (gtCallTypes)call->gtCallType;
4937 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
4939 // all virtuals should have been expanded into a control expression
4940 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
4942 // Insert a GS check if necessary
4943 if (call->IsTailCallViaHelper())
4945 if (compiler->getNeedsGSSecurityCookie())
4947 #if FEATURE_FIXED_OUT_ARGS
4948 // If either of the conditions below is true, we will need a temporary register in order to perform the GS
4949 // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using
4950 // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now,
4951 // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this
4953 assert(compiler->gsGlobalSecurityCookieAddr == nullptr);
4954 assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
4956 genEmitGSCookieCheck(true);
4960 // Consume all the arg regs
4961 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4963 assert(list->OperIsList());
4965 GenTree* argNode = list->Current();
4967 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
4968 assert(curArgTabEntry);
4970 if (curArgTabEntry->regNum == REG_STK)
4975 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4976 // Deal with multi register passed struct args.
4977 if (argNode->OperGet() == GT_FIELD_LIST)
4979 GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
4980 unsigned iterationNum = 0;
4981 for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), iterationNum++)
4983 GenTree* putArgRegNode = fieldListPtr->gtOp.gtOp1;
4984 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
4985 regNumber argReg = REG_NA;
4987 if (iterationNum == 0)
4989 argReg = curArgTabEntry->regNum;
4993 assert(iterationNum == 1);
4994 argReg = curArgTabEntry->otherRegNum;
4997 genConsumeReg(putArgRegNode);
4999 // Validate the putArgRegNode has the right type.
5000 assert(putArgRegNode->TypeGet() ==
5001 compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
5002 .eightByteClassifications[iterationNum],
5003 curArgTabEntry->structDesc
5004 .eightByteSizes[iterationNum]));
5005 if (putArgRegNode->gtRegNum != argReg)
5007 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), false), argReg, putArgRegNode->gtRegNum);
5012 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
5014 regNumber argReg = curArgTabEntry->regNum;
5015 genConsumeReg(argNode);
5016 if (argNode->gtRegNum != argReg)
5018 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), false), argReg, argNode->gtRegNum);
5023 // In the case of a varargs call,
5024 // the ABI dictates that if we have floating point args,
5025 // we must pass the enregistered arguments in both the
5026 // integer and floating point registers so, let's do that.
5027 if (call->IsVarargs() && varTypeIsFloating(argNode))
5029 regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
5030 instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
5031 inst_RV_RV(ins, argNode->gtRegNum, targetReg);
5033 #endif // FEATURE_VARARG
5036 #if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5037 // The call will pop its arguments.
5038 // for each putarg_stk:
5039 ssize_t stackArgBytes = 0;
5040 GenTree* args = call->gtCallArgs;
5043 GenTree* arg = args->gtOp.gtOp1;
5044 if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
5046 if (arg->OperGet() == GT_PUTARG_STK)
5048 GenTree* source = arg->gtOp.gtOp1;
5049 unsigned size = arg->AsPutArgStk()->getArgSize();
5050 stackArgBytes += size;
5052 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
5053 assert(curArgTabEntry);
5054 assert(size == (curArgTabEntry->numSlots * TARGET_POINTER_SIZE));
5055 #ifdef FEATURE_PUT_STRUCT_ARG_STK
5056 if (source->TypeGet() == TYP_STRUCT)
5058 GenTreeObj* obj = source->AsObj();
5059 unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE);
5060 assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes);
5062 #endif // FEATURE_PUT_STRUCT_ARG_STK
5066 args = args->gtOp.gtOp2;
5068 #endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5070 // Insert a null check on "this" pointer if asked.
5071 if (call->NeedsNullCheck())
5073 const regNumber regThis = genGetThisArgReg(call);
5074 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
5077 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
5078 CORINFO_METHOD_HANDLE methHnd;
5079 GenTree* target = call->gtControlExpr;
5080 if (callType == CT_INDIRECT)
5082 assert(target == nullptr);
5083 target = call->gtCallAddr;
5088 methHnd = call->gtCallMethHnd;
5091 CORINFO_SIG_INFO* sigInfo = nullptr;
5093 // Pass the call signature information down into the emitter so the emitter can associate
5094 // native call sites with the signatures they were generated from.
5095 if (callType != CT_HELPER)
5097 sigInfo = call->callSig;
5101 // If fast tail call, then we are done. In this case we setup the args (both reg args
5102 // and stack args in incoming arg area) and call target in rax. Epilog sequence would
5103 // generate "jmp rax".
5104 if (call->IsFastTailCall())
5106 // Don't support fast tail calling JIT helpers
5107 assert(callType != CT_HELPER);
5109 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
5110 assert(target != nullptr);
5112 genConsumeReg(target);
5113 genCopyRegIfNeeded(target, REG_RAX);
5117 // For a pinvoke to unmanged code we emit a label to clear
5118 // the GC pointer state before the callsite.
5119 // We can't utilize the typical lazy killing of GC pointers
5120 // at (or inside) the callsite.
5121 if (compiler->killGCRefs(call))
5123 genDefineTempLabel(genCreateTempLabel());
5126 // Determine return value size(s).
5127 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
5128 emitAttr retSize = EA_PTRSIZE;
5129 emitAttr secondRetSize = EA_UNKNOWN;
5131 if (call->HasMultiRegRetVal())
5133 retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
5134 secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
5138 assert(!varTypeIsStruct(call));
5140 if (call->gtType == TYP_REF)
5144 else if (call->gtType == TYP_BYREF)
5150 bool fPossibleSyncHelperCall = false;
5151 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
5153 // We need to propagate the IL offset information to the call instruction, so we can emit
5154 // an IL to native mapping record for the call, to support managed return value debugging.
5155 // We don't want tail call helper calls that were converted from normal calls to get a record,
5156 // so we skip this hash table lookup logic in that case.
5157 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
5159 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
5162 #if defined(_TARGET_X86_)
5163 bool fCallerPop = call->CallerPop();
5166 if (!call->IsUnmanaged())
5168 CorInfoCallConv callConv = CORINFO_CALLCONV_DEFAULT;
5170 if ((callType != CT_HELPER) && call->callSig)
5172 callConv = call->callSig->callConv;
5175 fCallerPop |= IsCallerPop(callConv);
5177 #endif // UNIX_X86_ABI
5179 // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
5180 // adjust its stack level accordingly.
5181 // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
5182 // pop when we're done.
5183 ssize_t argSizeForEmitter = stackArgBytes;
5186 argSizeForEmitter = -stackArgBytes;
5188 #endif // defined(_TARGET_X86_)
5190 // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here
5191 // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE
5192 // transition penalty, assuming the user function contains legacy SSE instruction.
5193 // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue
5194 // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens
5195 // when there's preceding 256-bit AVX to legacy SSE transition penalty.
5196 if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
5198 assert(compiler->canUseVexEncoding());
5199 instGen(INS_vzeroupper);
5202 if (target != nullptr)
5205 if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
5207 // On x86, we need to generate a very specific pattern for indirect VSD calls:
5210 // call dword ptr [eax]
5212 // Where EAX is also used as an argument to the stub dispatch helper. Make
5213 // sure that the call target address is computed into EAX in this case.
5215 assert(compiler->virtualStubParamInfo->GetReg() == REG_VIRTUAL_STUB_TARGET);
5217 assert(target->isContainedIndir());
5218 assert(target->OperGet() == GT_IND);
5220 GenTree* addr = target->AsIndir()->Addr();
5221 assert(addr->isUsedFromReg());
5223 genConsumeReg(addr);
5224 genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
5226 getEmitter()->emitIns_Nop(3);
5229 getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD),
5231 INDEBUG_LDISASM_COMMA(sigInfo)
5235 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5236 gcInfo.gcVarPtrSetCur,
5237 gcInfo.gcRegGCrefSetCur,
5238 gcInfo.gcRegByrefSetCur,
5239 ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0);
5244 if (target->isContainedIndir())
5246 if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
5248 // Note that if gtControlExpr is an indir of an absolute address, we mark it as
5249 // contained only if it can be encoded as PC-relative offset.
5250 assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
5253 genEmitCall(emitter::EC_FUNC_TOKEN_INDIR,
5255 INDEBUG_LDISASM_COMMA(sigInfo)
5256 (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue()
5257 X86_ARG(argSizeForEmitter),
5259 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5266 genEmitCall(emitter::EC_INDIR_ARD,
5268 INDEBUG_LDISASM_COMMA(sigInfo)
5270 X86_ARG(argSizeForEmitter),
5272 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5279 // We have already generated code for gtControlExpr evaluating it into a register.
5280 // We just need to emit "call reg" in this case.
5281 assert(genIsValidIntReg(target->gtRegNum));
5284 genEmitCall(emitter::EC_INDIR_R,
5286 INDEBUG_LDISASM_COMMA(sigInfo)
5288 X86_ARG(argSizeForEmitter),
5290 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5292 genConsumeReg(target));
5296 #ifdef FEATURE_READYTORUN_COMPILER
5297 else if (call->gtEntryPoint.addr != nullptr)
5300 genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
5301 : emitter::EC_FUNC_TOKEN_INDIR,
5303 INDEBUG_LDISASM_COMMA(sigInfo)
5304 (void*) call->gtEntryPoint.addr
5305 X86_ARG(argSizeForEmitter),
5307 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5314 // Generate a direct call to a non-virtual user defined or helper method
5315 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
5317 void* addr = nullptr;
5318 if (callType == CT_HELPER)
5320 // Direct call to a helper method.
5321 helperNum = compiler->eeGetHelperNum(methHnd);
5322 noway_assert(helperNum != CORINFO_HELP_UNDEF);
5324 void* pAddr = nullptr;
5325 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
5326 assert(pAddr == nullptr);
5328 // tracking of region protected by the monitor in synchronized methods
5329 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
5331 fPossibleSyncHelperCall = true;
5336 // Direct call to a non-virtual user function.
5337 addr = call->gtDirectCallAddress;
5340 assert(addr != nullptr);
5342 // Non-virtual direct calls to known addresses
5345 genEmitCall(emitter::EC_FUNC_TOKEN,
5347 INDEBUG_LDISASM_COMMA(sigInfo)
5349 X86_ARG(argSizeForEmitter),
5351 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5356 // if it was a pinvoke we may have needed to get the address of a label
5357 if (genPendingCallLabel)
5359 assert(call->IsUnmanaged());
5360 genDefineTempLabel(genPendingCallLabel);
5361 genPendingCallLabel = nullptr;
5365 // All Callee arg registers are trashed and no longer contain any GC pointers.
5366 // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
5367 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
5368 // registers from RBM_CALLEE_TRASH.
5369 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
5370 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
5371 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
5372 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
5374 var_types returnType = call->TypeGet();
5375 if (returnType != TYP_VOID)
5378 if (varTypeIsFloating(returnType))
5380 // Spill the value from the fp stack.
5381 // Then, load it into the target register.
5382 call->gtFlags |= GTF_SPILL;
5383 regSet.rsSpillFPStack(call);
5384 call->gtFlags |= GTF_SPILLED;
5385 call->gtFlags &= ~GTF_SPILL;
5388 #endif // _TARGET_X86_
5390 regNumber returnReg;
5392 if (call->HasMultiRegRetVal())
5394 assert(retTypeDesc != nullptr);
5395 unsigned regCount = retTypeDesc->GetReturnRegCount();
5397 // If regs allocated to call node are different from ABI return
5398 // regs in which the call has returned its result, move the result
5399 // to regs allocated to call node.
5400 for (unsigned i = 0; i < regCount; ++i)
5402 var_types regType = retTypeDesc->GetReturnRegType(i);
5403 returnReg = retTypeDesc->GetABIReturnReg(i);
5404 regNumber allocatedReg = call->GetRegNumByIdx(i);
5405 if (returnReg != allocatedReg)
5407 inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
5412 // A Vector3 return value is stored in xmm0 and xmm1.
5413 // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
5414 // the native compiler doesn't guarantee it.
5415 if (returnType == TYP_SIMD12)
5417 returnReg = retTypeDesc->GetABIReturnReg(1);
5418 // Clear the upper 32 bits by two shift instructions.
5419 // retReg = retReg << 96
5420 // retReg = retReg >> 96
5421 getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
5422 getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
5424 #endif // FEATURE_SIMD
5429 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
5431 // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
5432 // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
5433 // correct argument registers.
5434 returnReg = REG_PINVOKE_TCB;
5437 #endif // _TARGET_X86_
5438 if (varTypeIsFloating(returnType))
5440 returnReg = REG_FLOATRET;
5444 returnReg = REG_INTRET;
5447 if (call->gtRegNum != returnReg)
5449 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
5453 genProduceReg(call);
5457 // If there is nothing next, that means the result is thrown away, so this value is not live.
5458 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
5459 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
5461 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
5464 #if !FEATURE_EH_FUNCLETS
5465 //-------------------------------------------------------------------------
5466 // Create a label for tracking of region protected by the monitor in synchronized methods.
5467 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
5468 // so the GC state vars have been updated before creating the label.
5470 if (fPossibleSyncHelperCall)
5474 case CORINFO_HELP_MON_ENTER:
5475 case CORINFO_HELP_MON_ENTER_STATIC:
5476 noway_assert(compiler->syncStartEmitCookie == NULL);
5477 compiler->syncStartEmitCookie =
5478 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
5479 noway_assert(compiler->syncStartEmitCookie != NULL);
5481 case CORINFO_HELP_MON_EXIT:
5482 case CORINFO_HELP_MON_EXIT_STATIC:
5483 noway_assert(compiler->syncEndEmitCookie == NULL);
5484 compiler->syncEndEmitCookie =
5485 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
5486 noway_assert(compiler->syncEndEmitCookie != NULL);
5492 #endif // !FEATURE_EH_FUNCLETS
5494 unsigned stackAdjustBias = 0;
5496 #if defined(_TARGET_X86_)
5497 // Is the caller supposed to pop the arguments?
5498 if (fCallerPop && (stackArgBytes != 0))
5500 stackAdjustBias = stackArgBytes;
5503 SubtractStackLevel(stackArgBytes);
5504 #endif // _TARGET_X86_
5506 genRemoveAlignmentAfterCall(call, stackAdjustBias);
5509 // Produce code for a GT_JMP node.
5510 // The arguments of the caller needs to be transferred to the callee before exiting caller.
5511 // The actual jump to callee is generated as part of caller epilog sequence.
5512 // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
5513 void CodeGen::genJmpMethod(GenTree* jmp)
5515 assert(jmp->OperGet() == GT_JMP);
5516 assert(compiler->compJmpOpUsed);
5518 // If no arguments, nothing to do
5519 if (compiler->info.compArgsCount == 0)
5524 // Make sure register arguments are in their initial registers
5525 // and stack arguments are put back as well.
5529 // First move any en-registered stack arguments back to the stack.
5530 // At the same time any reg arg not in correct reg is moved back to its stack location.
5532 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
5533 // But that would require us to deal with circularity while moving values around. Spilling
5534 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
5535 // are not frequent.
5536 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
5538 varDsc = compiler->lvaTable + varNum;
5540 if (varDsc->lvPromoted)
5542 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
5544 unsigned fieldVarNum = varDsc->lvFieldLclStart;
5545 varDsc = compiler->lvaTable + fieldVarNum;
5547 noway_assert(varDsc->lvIsParam);
5549 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
5551 // Skip reg args which are already in its right register for jmp call.
5552 // If not, we will spill such args to their stack locations.
5554 // If we need to generate a tail call profiler hook, then spill all
5555 // arg regs to free them up for the callback.
5556 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
5561 else if (varDsc->lvRegNum == REG_STK)
5563 // Skip args which are currently living in stack.
5567 // If we came here it means either a reg argument not in the right register or
5568 // a stack argument currently living in a register. In either case the following
5569 // assert should hold.
5570 assert(varDsc->lvRegNum != REG_STK);
5572 assert(!varDsc->lvIsStructField || (compiler->lvaTable[varDsc->lvParentLcl].lvFieldCnt == 1));
5573 var_types storeType = genActualType(varDsc->lvaArgType()); // We own the memory and can use the full move.
5574 getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), varDsc->lvRegNum, varNum, 0);
5576 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
5577 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
5578 // Therefore manually update life of varDsc->lvRegNum.
5579 regMaskTP tempMask = varDsc->lvRegMask();
5580 regSet.RemoveMaskVars(tempMask);
5581 gcInfo.gcMarkRegSetNpt(tempMask);
5582 if (compiler->lvaIsGCTracked(varDsc))
5585 if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
5587 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
5591 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
5595 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
5599 #ifdef PROFILING_SUPPORTED
5600 // At this point all arg regs are free.
5601 // Emit tail call profiler callback.
5602 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
5605 // Next move any un-enregistered register arguments back to their register.
5606 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
5607 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
5608 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
5610 varDsc = compiler->lvaTable + varNum;
5611 if (varDsc->lvPromoted)
5613 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
5615 unsigned fieldVarNum = varDsc->lvFieldLclStart;
5616 varDsc = compiler->lvaTable + fieldVarNum;
5618 noway_assert(varDsc->lvIsParam);
5620 // Skip if arg not passed in a register.
5621 if (!varDsc->lvIsRegArg)
5626 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5627 if (varTypeIsStruct(varDsc))
5629 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
5630 assert(typeHnd != nullptr);
5632 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
5633 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
5634 assert(structDesc.passedInRegisters);
5636 unsigned __int8 offset0 = 0;
5637 unsigned __int8 offset1 = 0;
5638 var_types type0 = TYP_UNKNOWN;
5639 var_types type1 = TYP_UNKNOWN;
5641 // Get the eightbyte data
5642 compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
5644 // Move the values into the right registers.
5647 // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
5648 // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
5649 // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
5650 // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
5651 // genCodeForBBList().
5652 if (type0 != TYP_UNKNOWN)
5654 getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
5655 regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
5656 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
5659 if (type1 != TYP_UNKNOWN)
5661 getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
5662 regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
5663 gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
5666 if (varDsc->lvTracked)
5668 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
5672 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5674 // Register argument
5675 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5677 // Is register argument already in the right register?
5678 // If not load it from its stack location.
5679 var_types loadType = varDsc->lvaArgType();
5680 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5682 if (varDsc->lvRegNum != argReg)
5684 assert(genIsValidReg(argReg));
5685 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
5687 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
5688 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
5689 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
5690 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
5691 regSet.AddMaskVars(genRegMask(argReg));
5692 gcInfo.gcMarkRegPtrVal(argReg, loadType);
5693 if (compiler->lvaIsGCTracked(varDsc))
5696 if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
5698 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
5702 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
5706 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
5711 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
5712 // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
5713 // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
5714 // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
5715 // values on the stack.
5716 if (compiler->info.compIsVarArgs)
5718 regNumber intArgReg;
5719 var_types loadType = varDsc->lvaArgType();
5720 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5722 if (varTypeIsFloating(loadType))
5724 intArgReg = compiler->getCallArgIntRegister(argReg);
5725 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
5726 inst_RV_RV(ins, argReg, intArgReg, loadType);
5733 fixedIntArgMask |= genRegMask(intArgReg);
5735 if (intArgReg == REG_ARG_0)
5737 assert(firstArgVarNum == BAD_VAR_NUM);
5738 firstArgVarNum = varNum;
5741 #endif // FEATURE_VARARG
5744 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
5745 // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
5746 // load the remaining arg registers (both int and float) from the corresponding
5747 // shadow stack slots. This is for the reason that we don't know the number and type
5748 // of non-fixed params passed by the caller, therefore we have to assume the worst case
5749 // of caller passing float/double args both in int and float arg regs.
5751 // This doesn't apply to x86, which doesn't pass floating point values in floating
5754 // The caller could have passed gc-ref/byref type var args. Since these are var args
5755 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
5756 // remaining arg registers from shadow stack slots as non-gc interruptible.
5757 if (fixedIntArgMask != RBM_NONE)
5759 assert(compiler->info.compIsVarArgs);
5760 assert(firstArgVarNum != BAD_VAR_NUM);
5762 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
5763 if (remainingIntArgMask != RBM_NONE)
5765 instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
5766 getEmitter()->emitDisableGC();
5767 for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
5769 regNumber argReg = intArgRegs[argNum];
5770 regMaskTP argRegMask = genRegMask(argReg);
5772 if ((remainingIntArgMask & argRegMask) != 0)
5774 remainingIntArgMask &= ~argRegMask;
5775 getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
5777 // also load it in corresponding float arg reg
5778 regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
5779 inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
5782 argOffset += REGSIZE_BYTES;
5784 getEmitter()->emitEnableGC();
5787 #endif // FEATURE_VARARG
5790 // produce code for a GT_LEA subnode
5791 void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
5793 emitAttr size = emitTypeSize(lea);
5794 genConsumeOperands(lea);
5796 if (lea->Base() && lea->Index())
5798 regNumber baseReg = lea->Base()->gtRegNum;
5799 regNumber indexReg = lea->Index()->gtRegNum;
5800 getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->Offset());
5802 else if (lea->Base())
5804 getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Offset());
5806 else if (lea->Index())
5808 getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
5815 //-------------------------------------------------------------------------------------------
5816 // genJumpKindsForTree: Determine the number and kinds of conditional branches
5817 // necessary to implement the given GT_CMP node
5820 // cmpTree - (input) The GenTree node that is used to set the Condition codes
5821 // - The GenTree Relop node that was used to set the Condition codes
5822 // jmpKind[2] - (output) One or two conditional branch instructions
5823 // jmpToTrueLabel[2] - (output) When true we branch to the true case
5824 // When false we create a second label and branch to the false case
5825 // Only GT_EQ for a floating point compares can have a false value.
5828 // Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
5831 // At least one conditional branch instruction will be returned.
5832 // Typically only one conditional branch is needed
5833 // and the second jmpKind[] value is set to EJ_NONE
5836 // jmpToTrueLabel[i]= true implies branch when the compare operation is true.
5837 // jmpToTrueLabel[i]= false implies branch when the compare operation is false.
5838 //-------------------------------------------------------------------------------------------
5841 void CodeGen::genJumpKindsForTree(GenTree* cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
5843 // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
5844 jmpToTrueLabel[0] = true;
5845 jmpToTrueLabel[1] = true;
5847 // For integer comparisons just use genJumpKindForOper
5848 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1))
5850 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
5851 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
5852 jmpKind[1] = EJ_NONE;
5856 assert(cmpTree->OperIsCompare());
5858 // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
5859 // while generating code for compare opererators (e.g. GT_EQ etc).
5860 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
5862 // Must branch if we have an NaN, unordered
5863 switch (cmpTree->gtOper)
5868 jmpKind[1] = EJ_NONE;
5873 jmpKind[0] = EJ_jbe;
5874 jmpKind[1] = EJ_NONE;
5878 jmpKind[0] = EJ_jpe;
5879 jmpKind[1] = EJ_jne;
5884 jmpKind[1] = EJ_NONE;
5891 else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
5893 // Do not branch if we have an NaN, unordered
5894 switch (cmpTree->gtOper)
5899 jmpKind[1] = EJ_NONE;
5904 jmpKind[0] = EJ_jae;
5905 jmpKind[1] = EJ_NONE;
5909 jmpKind[0] = EJ_jne;
5910 jmpKind[1] = EJ_NONE;
5914 jmpKind[0] = EJ_jpe;
5916 jmpToTrueLabel[0] = false;
5926 //------------------------------------------------------------------------
5927 // genCompareFloat: Generate code for comparing two floating point values
5930 // treeNode - the compare tree
5935 // SSE2 instruction ucomis[s|d] is performs unordered comparison and
5936 // updates rFLAGS register as follows.
5937 // Result of compare ZF PF CF
5938 // ----------------- ------------
5939 // Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
5944 // From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
5945 // unordered comparison of floating point values. That is *.UN comparisons result in true when
5946 // one of the operands is a NaN whereas ordered comparisons results in false.
5948 // Opcode Amd64 equivalent Comment
5949 // ------ ----------------- --------
5950 // BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
5953 // BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
5956 // BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
5959 // BGT(a, b) ucomis[s|d] a, b branch if a>b
5962 // BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
5965 // BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
5968 // BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
5971 // BGE(a,b) ucomis[s|d] a, b branch if a>=b
5974 // BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
5975 // je This case is given for completeness, in case if JIT generates such
5976 // a gentree internally.
5978 // BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
5983 // BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
5984 // jne given for completeness, in case if JIT generates such a gentree
5987 // BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
5991 // As we can see from the above equalities that the operands of a compare operator need to be
5992 // reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
5993 void CodeGen::genCompareFloat(GenTree* treeNode)
5995 assert(treeNode->OperIsCompare());
5997 GenTreeOp* tree = treeNode->AsOp();
5998 GenTree* op1 = tree->gtOp1;
5999 GenTree* op2 = tree->gtOp2;
6000 var_types op1Type = op1->TypeGet();
6001 var_types op2Type = op2->TypeGet();
6003 genConsumeOperands(tree);
6005 assert(varTypeIsFloating(op1Type));
6006 assert(op1Type == op2Type);
6008 regNumber targetReg = treeNode->gtRegNum;
6013 if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
6015 // Unordered comparison case
6016 reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
6020 reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
6030 ins = ins_FloatCompare(op1Type);
6031 cmpAttr = emitTypeSize(op1Type);
6033 getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
6035 // Are we evaluating this into a register?
6036 if (targetReg != REG_NA)
6038 genSetRegToCond(targetReg, tree);
6039 genProduceReg(tree);
6043 //------------------------------------------------------------------------
6044 // genCompareInt: Generate code for comparing ints or, on amd64, longs.
6047 // treeNode - the compare tree
6051 void CodeGen::genCompareInt(GenTree* treeNode)
6053 assert(treeNode->OperIsCompare() || treeNode->OperIs(GT_CMP));
6055 GenTreeOp* tree = treeNode->AsOp();
6056 GenTree* op1 = tree->gtOp1;
6057 GenTree* op2 = tree->gtOp2;
6058 var_types op1Type = op1->TypeGet();
6059 var_types op2Type = op2->TypeGet();
6060 regNumber targetReg = tree->gtRegNum;
6062 genConsumeOperands(tree);
6064 assert(!op1->isContainedIntOrIImmed());
6065 assert(!varTypeIsFloating(op2Type));
6068 var_types type = TYP_UNKNOWN;
6070 if (tree->OperIs(GT_TEST_EQ, GT_TEST_NE))
6074 // Unlike many xarch instructions TEST doesn't have a form with a 16/32/64 bit first operand and
6075 // an 8 bit immediate second operand. But if the immediate value fits in 8 bits then we can simply
6076 // emit a 8 bit TEST instruction, unless we're targeting x86 and the first operand is a non-byteable
6078 // Note that lowering does something similar but its main purpose is to allow memory operands to be
6079 // contained so it doesn't handle other kind of operands. It could do more but on x86 that results
6080 // in additional register constrains and that may be worse than wasting 3 bytes on an immediate.
6083 (!op1->isUsedFromReg() || isByteReg(op1->gtRegNum)) &&
6085 (op2->IsCnsIntOrI() && genSmallTypeCanRepresentValue(TYP_UBYTE, op2->AsIntCon()->IconValue())))
6090 else if (op1->isUsedFromReg() && op2->IsIntegralConst(0))
6092 // We're comparing a register to 0 so we can generate "test reg1, reg1"
6093 // instead of the longer "cmp reg1, 0"
6102 if (type == TYP_UNKNOWN)
6104 if (op1Type == op2Type)
6108 else if (genTypeSize(op1Type) == genTypeSize(op2Type))
6110 // If the types are different but have the same size then we'll use TYP_INT or TYP_LONG.
6111 // This primarily deals with small type mixes (e.g. byte/ubyte) that need to be widened
6112 // and compared as int. We should not get long type mixes here but handle that as well
6114 type = genTypeSize(op1Type) == 8 ? TYP_LONG : TYP_INT;
6118 // In the types are different simply use TYP_INT. This deals with small type/int type
6119 // mixes (e.g. byte/short ubyte/int) that need to be widened and compared as int.
6120 // Lowering is expected to handle any mixes that involve long types (e.g. int/long).
6124 // The common type cannot be smaller than any of the operand types, we're probably mixing int/long
6125 assert(genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type)));
6126 // Small unsigned int types (TYP_BOOL can use anything) should use unsigned comparisons
6127 assert(!(varTypeIsSmallInt(type) && varTypeIsUnsigned(type)) || ((tree->gtFlags & GTF_UNSIGNED) != 0));
6128 // If op1 is smaller then it cannot be in memory, we're probably missing a cast
6129 assert((genTypeSize(op1Type) >= genTypeSize(type)) || !op1->isUsedFromMemory());
6130 // If op2 is smaller then it cannot be in memory, we're probably missing a cast
6131 assert((genTypeSize(op2Type) >= genTypeSize(type)) || !op2->isUsedFromMemory());
6132 // If we ended up with a small type and op2 is a constant then make sure we don't lose constant bits
6133 assert(!op2->IsCnsIntOrI() || !varTypeIsSmall(type) ||
6134 genSmallTypeCanRepresentValue(type, op2->AsIntCon()->IconValue()));
6137 // The type cannot be larger than the machine word size
6138 assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL));
6139 // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned
6140 assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type));
6142 getEmitter()->emitInsBinary(ins, emitTypeSize(type), op1, op2);
6144 // Are we evaluating this into a register?
6145 if (targetReg != REG_NA)
6147 genSetRegToCond(targetReg, tree);
6148 genProduceReg(tree);
6152 //-------------------------------------------------------------------------------------------
6153 // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
6154 // corresponding to a binary Relational operator result.
6157 // dstReg - The target register to set to 1 or 0
6158 // tree - The GenTree Relop node that was used to set the Condition codes
6160 // Return Value: none
6163 // A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
6164 //-------------------------------------------------------------------------------------------
6166 void CodeGen::genSetRegToCond(regNumber dstReg, GenTree* tree)
6168 noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
6170 emitJumpKind jumpKind[2];
6171 bool branchToTrueLabel[2];
6172 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
6174 if (jumpKind[1] == EJ_NONE)
6176 // Set (lower byte of) reg according to the flags
6177 inst_SET(jumpKind[0], dstReg);
6182 // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
6183 // These are represented by two conditions.
6184 if (tree->gtOper == GT_EQ)
6186 // This must be an ordered comparison.
6187 assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
6191 // This must be BNE.UN
6192 assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
6196 // Here is the sample code generated in each case:
6197 // BEQ == cmp, jpe <false label>, je <true label>
6198 // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
6199 // setnp reg // if (PF==0) reg = 1 else reg = 0
6200 // jpe L1 // Jmp if PF==1
6204 // BNE.UN == cmp, jpe <true label>, jne <true label>
6205 // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
6211 // reverse the jmpkind condition before setting dstReg if it is to false label.
6212 inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
6214 BasicBlock* label = genCreateTempLabel();
6215 inst_JMP(jumpKind[0], label);
6217 // second branch is always to true label
6218 assert(branchToTrueLabel[1]);
6219 inst_SET(jumpKind[1], dstReg);
6220 genDefineTempLabel(label);
6223 var_types treeType = tree->TypeGet();
6224 if (treeType == TYP_INT || treeType == TYP_LONG)
6226 // Set the higher bytes to 0
6227 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
6231 noway_assert(treeType == TYP_BYTE);
6235 #if !defined(_TARGET_64BIT_)
6236 //------------------------------------------------------------------------
6237 // genLongToIntCast: Generate code for long to int casts on x86.
6240 // cast - The GT_CAST node
6246 // The cast node and its sources (via GT_LONG) must have been assigned registers.
6247 // The destination cannot be a floating point type or a small integer type.
6249 void CodeGen::genLongToIntCast(GenTree* cast)
6251 assert(cast->OperGet() == GT_CAST);
6253 GenTree* src = cast->gtGetOp1();
6254 noway_assert(src->OperGet() == GT_LONG);
6256 genConsumeRegs(src);
6258 var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG;
6259 var_types dstType = cast->CastToType();
6260 regNumber loSrcReg = src->gtGetOp1()->gtRegNum;
6261 regNumber hiSrcReg = src->gtGetOp2()->gtRegNum;
6262 regNumber dstReg = cast->gtRegNum;
6264 assert((dstType == TYP_INT) || (dstType == TYP_UINT));
6265 assert(genIsValidIntReg(loSrcReg));
6266 assert(genIsValidIntReg(hiSrcReg));
6267 assert(genIsValidIntReg(dstReg));
6269 if (cast->gtOverflow())
6272 // Generate an overflow check for [u]long to [u]int casts:
6274 // long -> int - check if the upper 33 bits are all 0 or all 1
6276 // ulong -> int - check if the upper 33 bits are all 0
6278 // long -> uint - check if the upper 32 bits are all 0
6279 // ulong -> uint - check if the upper 32 bits are all 0
6282 if ((srcType == TYP_LONG) && (dstType == TYP_INT))
6284 BasicBlock* allOne = genCreateTempLabel();
6285 BasicBlock* success = genCreateTempLabel();
6287 inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
6288 inst_JMP(EJ_js, allOne);
6290 inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
6291 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6292 inst_JMP(EJ_jmp, success);
6294 genDefineTempLabel(allOne);
6295 inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE);
6296 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6298 genDefineTempLabel(success);
6302 if ((srcType == TYP_ULONG) && (dstType == TYP_INT))
6304 inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
6305 genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW);
6308 inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
6309 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6313 if (dstReg != loSrcReg)
6315 inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE);
6318 genProduceReg(cast);
6322 //------------------------------------------------------------------------
6323 // genIntToIntCast: Generate code for an integer cast
6324 // This method handles integer overflow checking casts
6325 // as well as ordinary integer casts.
6328 // treeNode - The GT_CAST node
6334 // The treeNode is not a contained node and must have an assigned register.
6335 // For a signed convert from byte, the source must be in a byte-addressable register.
6336 // Neither the source nor target type can be a floating point type.
6338 // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
6339 // TODO: refactor to use getCastDescription
6341 void CodeGen::genIntToIntCast(GenTree* treeNode)
6343 assert(treeNode->OperGet() == GT_CAST);
6345 GenTree* castOp = treeNode->gtCast.CastOp();
6346 var_types srcType = genActualType(castOp->TypeGet());
6347 noway_assert(genTypeSize(srcType) >= 4);
6348 assert(genTypeSize(srcType) <= genTypeSize(TYP_I_IMPL));
6350 regNumber targetReg = treeNode->gtRegNum;
6351 regNumber sourceReg = castOp->gtRegNum;
6352 var_types dstType = treeNode->CastToType();
6353 bool isUnsignedDst = varTypeIsUnsigned(dstType);
6354 bool isUnsignedSrc = varTypeIsUnsigned(srcType);
6356 // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
6357 if (!isUnsignedSrc && treeNode->IsUnsigned())
6359 srcType = genUnsignedType(srcType);
6360 isUnsignedSrc = true;
6363 bool requiresOverflowCheck = false;
6365 assert(genIsValidIntReg(targetReg));
6366 assert(genIsValidIntReg(sourceReg));
6368 instruction ins = INS_invalid;
6369 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
6370 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
6372 if (srcSize < dstSize)
6375 // dstType cannot be a long type on x86, such casts should have been decomposed.
6376 // srcType cannot be a small type since it's the "actual type" of the cast operand.
6377 // This means that widening casts do not actually occur on x86.
6380 // This is a widening cast from TYP_(U)INT to TYP_(U)LONG.
6381 assert(dstSize == EA_8BYTE);
6382 assert(srcSize == EA_4BYTE);
6384 // When widening, overflows can only happen if the source type is signed and the
6385 // destination type is unsigned. Since the overflow check ensures that the value
6386 // is positive a cheaper mov instruction can be used instead of movsxd.
6387 if (treeNode->gtOverflow() && !isUnsignedSrc && isUnsignedDst)
6389 requiresOverflowCheck = true;
6394 ins = isUnsignedSrc ? INS_mov : INS_movsxd;
6400 // Narrowing cast, or sign-changing cast
6401 noway_assert(srcSize >= dstSize);
6403 // Is this an Overflow checking cast?
6404 if (treeNode->gtOverflow())
6406 requiresOverflowCheck = true;
6411 ins = ins_Move_Extend(dstType, false);
6415 noway_assert(ins != INS_invalid);
6417 genConsumeReg(castOp);
6419 if (requiresOverflowCheck)
6421 ssize_t typeMin = 0;
6422 ssize_t typeMax = 0;
6423 ssize_t typeMask = 0;
6424 bool needScratchReg = false;
6425 bool signCheckOnly = false;
6427 /* Do we need to compare the value, or just check masks */
6432 typeMask = ssize_t((int)0xFFFFFF80);
6433 typeMin = SCHAR_MIN;
6434 typeMax = SCHAR_MAX;
6438 typeMask = ssize_t((int)0xFFFFFF00L);
6442 typeMask = ssize_t((int)0xFFFF8000);
6448 typeMask = ssize_t((int)0xFFFF0000L);
6452 if (srcType == TYP_UINT)
6454 signCheckOnly = true;
6458 typeMask = ssize_t((int)0x80000000);
6465 if (srcType == TYP_INT)
6467 signCheckOnly = true;
6471 needScratchReg = true;
6476 noway_assert(srcType == TYP_ULONG);
6477 signCheckOnly = true;
6481 noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
6482 signCheckOnly = true;
6486 NO_WAY("Unknown type");
6492 // We only need to check for a negative value in sourceReg
6493 inst_RV_RV(INS_test, sourceReg, sourceReg, srcType, srcSize);
6494 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
6498 // When we are converting from unsigned or to unsigned, we
6499 // will only have to check for any bits set using 'typeMask'
6500 if (isUnsignedSrc || isUnsignedDst)
6504 regNumber tmpReg = treeNode->GetSingleTempReg();
6505 inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
6506 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, srcSize, tmpReg, 32); // Shift right by 32 bits
6507 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Throw if result shift is non-zero
6511 noway_assert(typeMask != 0);
6512 inst_RV_IV(INS_TEST, sourceReg, typeMask, srcSize);
6513 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6518 // For a narrowing signed cast
6520 // We must check the value is in a signed range.
6522 // Compare with the MAX
6524 noway_assert((typeMin != 0) && (typeMax != 0));
6526 inst_RV_IV(INS_cmp, sourceReg, typeMax, srcSize);
6527 genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
6529 // Compare with the MIN
6531 inst_RV_IV(INS_cmp, sourceReg, typeMin, srcSize);
6532 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
6536 if (targetReg != sourceReg
6537 #ifdef _TARGET_AMD64_
6538 // On amd64, we can hit this path for a same-register
6539 // 4-byte to 8-byte widening conversion, and need to
6540 // emit the instruction to set the high bits correctly.
6541 || (dstSize == EA_8BYTE && srcSize == EA_4BYTE)
6542 #endif // _TARGET_AMD64_
6544 inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize);
6546 else // non-overflow checking cast
6548 // We may have code transformations that result in casts where srcType is the same as dstType.
6549 // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
6550 // long constant to a long lclVar.
6551 if (srcType == dstType)
6558 if (targetReg != sourceReg
6559 #ifdef _TARGET_AMD64_
6560 // On amd64, 'mov' is the opcode used to zero-extend from
6561 // 4 bytes to 8 bytes.
6562 || (dstSize == EA_8BYTE && srcSize == EA_4BYTE)
6563 #endif // _TARGET_AMD64_
6566 inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize);
6569 #ifdef _TARGET_AMD64_
6570 else if (ins == INS_movsxd)
6572 inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize);
6574 #endif // _TARGET_AMD64_
6577 noway_assert(ins == INS_movsx || ins == INS_movzx);
6578 noway_assert(srcSize >= dstSize);
6580 /* Generate "mov targetReg, castOp->gtReg */
6581 inst_RV_RV(ins, targetReg, sourceReg, srcType, dstSize);
6585 genProduceReg(treeNode);
6588 //------------------------------------------------------------------------
6589 // genFloatToFloatCast: Generate code for a cast between float and double
6592 // treeNode - The GT_CAST node
6598 // Cast is a non-overflow conversion.
6599 // The treeNode must have an assigned register.
6600 // The cast is between float and double or vice versa.
6602 void CodeGen::genFloatToFloatCast(GenTree* treeNode)
6604 // float <--> double conversions are always non-overflow ones
6605 assert(treeNode->OperGet() == GT_CAST);
6606 assert(!treeNode->gtOverflow());
6608 regNumber targetReg = treeNode->gtRegNum;
6609 assert(genIsValidFloatReg(targetReg));
6611 GenTree* op1 = treeNode->gtOp.gtOp1;
6613 // If not contained, must be a valid float reg.
6614 if (op1->isUsedFromReg())
6616 assert(genIsValidFloatReg(op1->gtRegNum));
6620 var_types dstType = treeNode->CastToType();
6621 var_types srcType = op1->TypeGet();
6622 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
6624 genConsumeOperands(treeNode->AsOp());
6625 if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->gtRegNum)))
6627 // source and destinations types are the same and also reside in the same register.
6628 // we just need to consume and produce the reg in this case.
6633 instruction ins = ins_FloatConv(dstType, srcType);
6634 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
6637 genProduceReg(treeNode);
6640 //------------------------------------------------------------------------
6641 // genIntToFloatCast: Generate code to cast an int/long to float/double
6644 // treeNode - The GT_CAST node
6650 // Cast is a non-overflow conversion.
6651 // The treeNode must have an assigned register.
6652 // SrcType= int32/uint32/int64/uint64 and DstType=float/double.
6654 void CodeGen::genIntToFloatCast(GenTree* treeNode)
6656 // int type --> float/double conversions are always non-overflow ones
6657 assert(treeNode->OperGet() == GT_CAST);
6658 assert(!treeNode->gtOverflow());
6660 regNumber targetReg = treeNode->gtRegNum;
6661 assert(genIsValidFloatReg(targetReg));
6663 GenTree* op1 = treeNode->gtOp.gtOp1;
6665 if (op1->isUsedFromReg())
6667 assert(genIsValidIntReg(op1->gtRegNum));
6671 var_types dstType = treeNode->CastToType();
6672 var_types srcType = op1->TypeGet();
6673 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
6675 #if !defined(_TARGET_64BIT_)
6676 // We expect morph to replace long to float/double casts with helper calls
6677 noway_assert(!varTypeIsLong(srcType));
6678 #endif // !defined(_TARGET_64BIT_)
6680 // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
6681 // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
6682 // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
6683 // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
6684 // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
6685 // temp and using temp as operand of cast operation.
6686 if (srcType == TYP_BYREF)
6688 noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
6689 srcType = TYP_I_IMPL;
6692 // force the srcType to unsigned if GT_UNSIGNED flag is set
6693 if (treeNode->gtFlags & GTF_UNSIGNED)
6695 srcType = genUnsignedType(srcType);
6698 noway_assert(!varTypeIsGC(srcType));
6700 // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
6701 // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
6702 // either the front-end or lowering phase to have generated two levels of cast.
6703 // The first one is for widening smaller int type to int32 and the second one is
6704 // to the float/double.
6705 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
6706 noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
6708 // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
6709 // here since they should have been lowered apropriately.
6710 noway_assert(srcType != TYP_UINT);
6711 noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
6713 // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
6714 // which does a partial write to lower 4/8 bytes of xmm register keeping the other
6715 // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
6716 // the partial write could introduce a false dependency and could cause a stall
6717 // if there are further uses of xmmReg. We have such a case occuring with a
6718 // customer reported version of SpectralNorm benchmark, resulting in 2x perf
6719 // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
6720 // cvtsi2ss/sd instruction.
6722 genConsumeOperands(treeNode->AsOp());
6723 getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
6725 // Note that here we need to specify srcType that will determine
6726 // the size of source reg/mem operand and rex.w prefix.
6727 instruction ins = ins_FloatConv(dstType, TYP_INT);
6728 getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
6730 // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
6731 // will interpret ULONG value as LONG. Hence we need to adjust the
6732 // result if sign-bit of srcType is set.
6733 if (srcType == TYP_ULONG)
6735 // The instruction sequence below is less accurate than what clang
6736 // and gcc generate. However, we keep the current sequence for backward compatiblity.
6737 // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
6738 // should be also updated for consistent conversion result.
6739 assert(dstType == TYP_DOUBLE);
6740 assert(op1->isUsedFromReg());
6742 // Set the flags without modifying op1.
6743 // test op1Reg, op1Reg
6744 inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
6746 // No need to adjust result if op1 >= 0 i.e. positive
6748 BasicBlock* label = genCreateTempLabel();
6749 inst_JMP(EJ_jge, label);
6751 // Adjust the result
6752 // result = result + 0x43f00000 00000000
6753 // addsd resultReg, 0x43f00000 00000000
6754 CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask;
6755 if (*cns == nullptr)
6758 static_assert_no_msg(sizeof(double) == sizeof(__int64));
6759 *((__int64*)&d) = 0x43f0000000000000LL;
6761 *cns = getEmitter()->emitFltOrDblConst(d, EA_8BYTE);
6763 getEmitter()->emitIns_R_C(INS_addsd, EA_8BYTE, treeNode->gtRegNum, *cns, 0);
6765 genDefineTempLabel(label);
6768 genProduceReg(treeNode);
6771 //------------------------------------------------------------------------
6772 // genFloatToIntCast: Generate code to cast float/double to int/long
6775 // treeNode - The GT_CAST node
6781 // Cast is a non-overflow conversion.
6782 // The treeNode must have an assigned register.
6783 // SrcType=float/double and DstType= int32/uint32/int64/uint64
6785 // TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
6787 void CodeGen::genFloatToIntCast(GenTree* treeNode)
6789 // we don't expect to see overflow detecting float/double --> int type conversions here
6790 // as they should have been converted into helper calls by front-end.
6791 assert(treeNode->OperGet() == GT_CAST);
6792 assert(!treeNode->gtOverflow());
6794 regNumber targetReg = treeNode->gtRegNum;
6795 assert(genIsValidIntReg(targetReg));
6797 GenTree* op1 = treeNode->gtOp.gtOp1;
6799 if (op1->isUsedFromReg())
6801 assert(genIsValidFloatReg(op1->gtRegNum));
6805 var_types dstType = treeNode->CastToType();
6806 var_types srcType = op1->TypeGet();
6807 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
6809 // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
6810 // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
6811 // front-end or lowering phase to have generated two levels of cast. The first one is
6812 // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
6813 // the required smaller int type.
6814 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
6815 noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
6817 // We shouldn't be seeing uint64 here as it should have been converted
6818 // into a helper call by either front-end or lowering phase.
6819 noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
6821 // If the dstType is TYP_UINT, we have 32-bits to encode the
6822 // float number. Any of 33rd or above bits can be the sign bit.
6823 // To acheive it we pretend as if we are converting it to a long.
6824 if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
6829 // Note that we need to specify dstType here so that it will determine
6830 // the size of destination integer register and also the rex.w prefix.
6831 genConsumeOperands(treeNode->AsOp());
6832 instruction ins = ins_FloatConv(TYP_INT, srcType);
6833 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
6834 genProduceReg(treeNode);
6837 //------------------------------------------------------------------------
6838 // genCkfinite: Generate code for ckfinite opcode.
6841 // treeNode - The GT_CKFINITE node
6847 // GT_CKFINITE node has reserved an internal register.
6849 // TODO-XArch-CQ - mark the operand as contained if known to be in
6850 // memory (e.g. field or an array element).
6852 void CodeGen::genCkfinite(GenTree* treeNode)
6854 assert(treeNode->OperGet() == GT_CKFINITE);
6856 GenTree* op1 = treeNode->gtOp.gtOp1;
6857 var_types targetType = treeNode->TypeGet();
6858 int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
6859 regNumber targetReg = treeNode->gtRegNum;
6861 // Extract exponent into a register.
6862 regNumber tmpReg = treeNode->GetSingleTempReg();
6866 #ifdef _TARGET_64BIT_
6868 // Copy the floating-point value to an integer register. If we copied a float to a long, then
6869 // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
6870 // bits of the integer register.
6871 instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
6872 inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
6873 if (targetType == TYP_DOUBLE)
6875 // right shift by 32 bits to get to exponent.
6876 inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
6879 // Mask exponent with all 1's and check if the exponent is all 1's
6880 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
6881 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
6883 // If exponent is all 1's, throw ArithmeticException
6884 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
6886 // if it is a finite value copy it to targetReg
6887 if (targetReg != op1->gtRegNum)
6889 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
6892 #else // !_TARGET_64BIT_
6894 // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
6895 // There is no easy way to do this. To not require an extra register, we'll use shuffles
6896 // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
6897 // need to produce the value into the target register.
6899 // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
6900 // movaps targetReg, op1->gtRegNum
6901 // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
6902 // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
6903 // and tmpReg, <mask>
6904 // cmp tmpReg, <mask>
6906 // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
6908 // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
6909 // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
6910 // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
6911 // and tmpReg, <mask>
6912 // cmp tmpReg, <mask>
6914 // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
6916 // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
6917 // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
6918 // and tmpReg, <mask>
6919 // cmp tmpReg, <mask>
6921 // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
6923 regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
6925 if (targetType == TYP_DOUBLE)
6927 if (targetReg != op1->gtRegNum)
6929 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
6931 inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
6932 copyToTmpSrcReg = targetReg;
6936 copyToTmpSrcReg = op1->gtRegNum;
6939 // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
6940 // value, no matter the floating-point type.
6941 inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
6943 // Mask exponent with all 1's and check if the exponent is all 1's
6944 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
6945 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
6947 // If exponent is all 1's, throw ArithmeticException
6948 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
6950 if (targetReg != op1->gtRegNum)
6952 // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
6953 // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
6954 // than re-shuffling the targetReg.
6955 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
6957 else if (targetType == TYP_DOUBLE)
6959 // We need to re-shuffle the targetReg to get the correct result.
6960 inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
6963 #endif // !_TARGET_64BIT_
6965 genProduceReg(treeNode);
6968 #ifdef _TARGET_AMD64_
6969 int CodeGenInterface::genSPtoFPdelta()
6973 #ifdef UNIX_AMD64_ABI
6975 // We require frame chaining on Unix to support native tool unwinding (such as
6976 // unwinding by the native debugger). We have a CLR-only extension to the
6977 // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
6978 // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
6979 delta = genTotalFrameSize();
6981 #else // !UNIX_AMD64_ABI
6983 // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
6984 // RBP needs to be reported in unwind codes. This case would arise for methods
6986 if (compiler->compLocallocUsed)
6988 // We cannot base delta computation on compLclFrameSize since it changes from
6989 // tentative to final frame layout and hence there is a possibility of
6990 // under-estimating offset of vars from FP, which in turn results in under-
6991 // estimating instruction size.
6993 // To be predictive and so as never to under-estimate offset of vars from FP
6994 // we will always position FP at min(240, outgoing arg area size).
6995 delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
6997 else if (compiler->opts.compDbgEnC)
6999 // vm assumption on EnC methods is that rsp and rbp are equal
7004 delta = genTotalFrameSize();
7007 #endif // !UNIX_AMD64_ABI
7012 //---------------------------------------------------------------------
7013 // genTotalFrameSize - return the total size of the stack frame, including local size,
7014 // callee-saved register size, etc. For AMD64, this does not include the caller-pushed
7021 int CodeGenInterface::genTotalFrameSize()
7023 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
7025 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
7027 assert(totalFrameSize >= 0);
7028 return totalFrameSize;
7031 //---------------------------------------------------------------------
7032 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
7033 // This number is going to be negative, since the Caller-SP is at a higher
7034 // address than the frame pointer.
7036 // There must be a frame pointer to call this function!
7038 // We can't compute this directly from the Caller-SP, since the frame pointer
7039 // is based on a maximum delta from Initial-SP, so first we find SP, then
7040 // compute the FP offset.
7042 int CodeGenInterface::genCallerSPtoFPdelta()
7044 assert(isFramePointerUsed());
7045 int callerSPtoFPdelta;
7047 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
7049 assert(callerSPtoFPdelta <= 0);
7050 return callerSPtoFPdelta;
7053 //---------------------------------------------------------------------
7054 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
7056 // This number will be negative.
7058 int CodeGenInterface::genCallerSPtoInitialSPdelta()
7060 int callerSPtoSPdelta = 0;
7062 callerSPtoSPdelta -= genTotalFrameSize();
7063 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
7065 // compCalleeRegsPushed does not account for the frame pointer
7066 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
7067 if (isFramePointerUsed())
7069 callerSPtoSPdelta -= REGSIZE_BYTES;
7072 assert(callerSPtoSPdelta <= 0);
7073 return callerSPtoSPdelta;
7075 #endif // _TARGET_AMD64_
7077 //-----------------------------------------------------------------------------------------
7078 // genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
7081 // treeNode - tree node
7087 // i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
7088 // ii) tree type is floating point type.
7089 // iii) caller of this routine needs to call genProduceReg()
7090 void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
7092 regNumber targetReg = treeNode->gtRegNum;
7093 var_types targetType = treeNode->TypeGet();
7094 assert(varTypeIsFloating(targetType));
7098 CORINFO_FIELD_HANDLE* bitMask = nullptr;
7099 instruction ins = INS_invalid;
7100 void* cnsAddr = nullptr;
7101 bool dblAlign = false;
7103 switch (treeNode->OperGet())
7106 // Neg(x) = flip the sign bit.
7107 // Neg(f) = f ^ 0x80000000
7108 // Neg(d) = d ^ 0x8000000000000000
7110 if (targetType == TYP_FLOAT)
7112 bitMask = &negBitmaskFlt;
7114 static_assert_no_msg(sizeof(float) == sizeof(int));
7115 *((int*)&f) = 0x80000000;
7120 bitMask = &negBitmaskDbl;
7122 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7123 *((__int64*)&d) = 0x8000000000000000LL;
7130 assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
7132 // Abs(x) = set sign-bit to zero
7133 // Abs(f) = f & 0x7fffffff
7134 // Abs(d) = d & 0x7fffffffffffffff
7136 if (targetType == TYP_FLOAT)
7138 bitMask = &absBitmaskFlt;
7140 static_assert_no_msg(sizeof(float) == sizeof(int));
7141 *((int*)&f) = 0x7fffffff;
7146 bitMask = &absBitmaskDbl;
7148 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7149 *((__int64*)&d) = 0x7fffffffffffffffLL;
7156 assert(!"genSSE2: unsupported oper");
7161 if (*bitMask == nullptr)
7163 assert(cnsAddr != nullptr);
7164 *bitMask = getEmitter()->emitAnyConst(cnsAddr, genTypeSize(targetType), dblAlign);
7167 // We need an additional register for bitmask.
7168 regNumber tmpReg = treeNode->GetSingleTempReg();
7170 // Move operand into targetReg only if the reg reserved for
7171 // internal purpose is not the same as targetReg.
7172 GenTree* op1 = treeNode->gtOp.gtOp1;
7173 assert(op1->isUsedFromReg());
7174 regNumber operandReg = genConsumeReg(op1);
7175 if (tmpReg != targetReg)
7177 if (operandReg != targetReg)
7179 inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
7182 operandReg = tmpReg;
7185 getEmitter()->emitIns_R_C(ins_Load(targetType, false), emitTypeSize(targetType), tmpReg, *bitMask, 0);
7186 assert(ins != INS_invalid);
7187 inst_RV_RV(ins, targetReg, operandReg, targetType);
7190 //-----------------------------------------------------------------------------------------
7191 // genSSE41RoundOp - generate SSE41 code for the given tree as a round operation
7194 // treeNode - tree node
7200 // i) SSE4.1 is supported by the underlying hardware
7201 // ii) treeNode oper is a GT_INTRINSIC
7202 // iii) treeNode type is a floating point type
7203 // iv) treeNode is not used from memory
7204 // v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
7205 // vi) caller of this routine needs to call genProduceReg()
7206 void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode)
7208 // i) SSE4.1 is supported by the underlying hardware
7209 assert(compiler->compSupports(InstructionSet_SSE41));
7211 // ii) treeNode oper is a GT_INTRINSIC
7212 assert(treeNode->OperGet() == GT_INTRINSIC);
7214 GenTree* srcNode = treeNode->gtGetOp1();
7216 // iii) treeNode type is floating point type
7217 assert(varTypeIsFloating(srcNode));
7218 assert(srcNode->TypeGet() == treeNode->TypeGet());
7220 // iv) treeNode is not used from memory
7221 assert(!treeNode->isUsedFromMemory());
7223 genConsumeOperands(treeNode);
7225 instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_roundss : INS_roundsd;
7226 emitAttr size = emitTypeSize(treeNode);
7228 regNumber dstReg = treeNode->gtRegNum;
7232 // v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
7233 switch (treeNode->gtIntrinsic.gtIntrinsicId)
7235 case CORINFO_INTRINSIC_Round:
7239 case CORINFO_INTRINSIC_Ceiling:
7243 case CORINFO_INTRINSIC_Floor:
7249 assert(!"genSSE41RoundOp: unsupported intrinsic");
7253 if (srcNode->isContained() || srcNode->isUsedFromSpillTemp())
7255 emitter* emit = getEmitter();
7257 TempDsc* tmpDsc = nullptr;
7258 unsigned varNum = BAD_VAR_NUM;
7259 unsigned offset = (unsigned)-1;
7261 if (srcNode->isUsedFromSpillTemp())
7263 assert(srcNode->IsRegOptional());
7265 tmpDsc = getSpillTempDsc(srcNode);
7266 varNum = tmpDsc->tdTempNum();
7269 compiler->tmpRlsTemp(tmpDsc);
7271 else if (srcNode->isIndir())
7273 GenTreeIndir* memIndir = srcNode->AsIndir();
7274 GenTree* memBase = memIndir->gtOp1;
7276 switch (memBase->OperGet())
7278 case GT_LCL_VAR_ADDR:
7280 varNum = memBase->AsLclVarCommon()->GetLclNum();
7283 // Ensure that all the GenTreeIndir values are set to their defaults.
7284 assert(memBase->gtRegNum == REG_NA);
7285 assert(!memIndir->HasIndex());
7286 assert(memIndir->Scale() == 1);
7287 assert(memIndir->Offset() == 0);
7292 case GT_CLS_VAR_ADDR:
7294 emit->emitIns_R_C_I(ins, size, dstReg, memBase->gtClsVar.gtClsVarHnd, 0, ival);
7300 emit->emitIns_R_A_I(ins, size, dstReg, memIndir, ival);
7307 switch (srcNode->OperGet())
7311 GenTreeDblCon* dblConst = srcNode->AsDblCon();
7312 CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst->gtDconVal, emitTypeSize(dblConst));
7314 emit->emitIns_R_C_I(ins, size, dstReg, hnd, 0, ival);
7320 GenTreeLclFld* lclField = srcNode->AsLclFld();
7322 varNum = lclField->GetLclNum();
7323 offset = lclField->gtLclFld.gtLclOffs;
7329 assert(srcNode->IsRegOptional() ||
7330 !compiler->lvaTable[srcNode->gtLclVar.gtLclNum].lvIsRegCandidate());
7332 varNum = srcNode->AsLclVar()->GetLclNum();
7343 // Ensure we got a good varNum and offset.
7344 // We also need to check for `tmpDsc != nullptr` since spill temp numbers
7345 // are negative and start with -1, which also happens to be BAD_VAR_NUM.
7346 assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
7347 assert(offset != (unsigned)-1);
7349 emit->emitIns_R_S_I(ins, size, dstReg, varNum, offset, ival);
7353 inst_RV_RV_IV(ins, size, dstReg, srcNode->gtRegNum, ival);
7357 //---------------------------------------------------------------------
7358 // genIntrinsic - generate code for a given intrinsic
7361 // treeNode - the GT_INTRINSIC node
7366 void CodeGen::genIntrinsic(GenTree* treeNode)
7368 // Right now only Sqrt/Abs are treated as math intrinsics.
7369 switch (treeNode->gtIntrinsic.gtIntrinsicId)
7371 case CORINFO_INTRINSIC_Sqrt:
7373 // Both operand and its result must be of the same floating point type.
7374 GenTree* srcNode = treeNode->gtOp.gtOp1;
7375 assert(varTypeIsFloating(srcNode));
7376 assert(srcNode->TypeGet() == treeNode->TypeGet());
7378 genConsumeOperands(treeNode->AsOp());
7379 getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, srcNode);
7383 case CORINFO_INTRINSIC_Abs:
7384 genSSE2BitwiseOp(treeNode);
7387 case CORINFO_INTRINSIC_Round:
7388 case CORINFO_INTRINSIC_Ceiling:
7389 case CORINFO_INTRINSIC_Floor:
7390 genSSE41RoundOp(treeNode->AsOp());
7394 assert(!"genIntrinsic: Unsupported intrinsic");
7398 genProduceReg(treeNode);
7401 //-------------------------------------------------------------------------- //
7402 // getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
7405 // treeNode - the GT_PUTARG_STK node
7408 // The number of the base variable.
7411 // If tail call the outgoing args are placed in the caller's incoming arg stack space.
7412 // Otherwise, they go in the outgoing arg area on the current frame.
7414 // On Windows the caller always creates slots (homing space) in its frame for the
7415 // first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
7416 // For System V systems there is no such calling convention requirement, and the code needs to find
7417 // the first stack passed argument from the caller. This is done by iterating over
7418 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
7420 unsigned CodeGen::getBaseVarForPutArgStk(GenTree* treeNode)
7422 assert(treeNode->OperGet() == GT_PUTARG_STK);
7424 unsigned baseVarNum;
7426 // Whether to setup stk arg in incoming or out-going arg area?
7427 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
7428 // All other calls - stk arg is setup in out-going arg area.
7429 if (treeNode->AsPutArgStk()->putInIncomingArgArea())
7431 // See the note in the function header re: finding the first stack passed argument.
7432 baseVarNum = getFirstArgWithStackSlot();
7433 assert(baseVarNum != BAD_VAR_NUM);
7436 // This must be a fast tail call.
7437 assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
7439 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
7440 // because fast tail call requires that in-coming arg area of caller is >= out-going
7441 // arg area required for tail call.
7442 LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
7443 assert(varDsc != nullptr);
7445 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
7446 assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
7447 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7448 // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
7449 assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
7450 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7455 #if FEATURE_FIXED_OUT_ARGS
7456 baseVarNum = compiler->lvaOutgoingArgSpaceVar;
7457 #else // !FEATURE_FIXED_OUT_ARGS
7458 assert(!"No BaseVarForPutArgStk on x86");
7459 baseVarNum = BAD_VAR_NUM;
7460 #endif // !FEATURE_FIXED_OUT_ARGS
7466 //---------------------------------------------------------------------
7467 // genAlignStackBeforeCall: Align the stack if necessary before a call.
7470 // putArgStk - the putArgStk node.
7472 void CodeGen::genAlignStackBeforeCall(GenTreePutArgStk* putArgStk)
7474 #if defined(UNIX_X86_ABI)
7476 genAlignStackBeforeCall(putArgStk->gtCall);
7478 #endif // UNIX_X86_ABI
7481 //---------------------------------------------------------------------
7482 // genAlignStackBeforeCall: Align the stack if necessary before a call.
7485 // call - the call node.
7487 void CodeGen::genAlignStackBeforeCall(GenTreeCall* call)
7489 #if defined(UNIX_X86_ABI)
7491 // Have we aligned the stack yet?
7492 if (!call->fgArgInfo->IsStkAlignmentDone())
7494 // We haven't done any stack alignment yet for this call. We might need to create
7495 // an alignment adjustment, even if this function itself doesn't have any stack args.
7496 // This can happen if this function call is part of a nested call sequence, and the outer
7497 // call has already pushed some arguments.
7499 unsigned stkLevel = genStackLevel + call->fgArgInfo->GetStkSizeBytes();
7500 call->fgArgInfo->ComputeStackAlignment(stkLevel);
7502 unsigned padStkAlign = call->fgArgInfo->GetStkAlign();
7503 if (padStkAlign != 0)
7505 // Now generate the alignment
7506 inst_RV_IV(INS_sub, REG_SPBASE, padStkAlign, EA_PTRSIZE);
7507 AddStackLevel(padStkAlign);
7508 AddNestedAlignment(padStkAlign);
7511 call->fgArgInfo->SetStkAlignmentDone();
7514 #endif // UNIX_X86_ABI
7517 //---------------------------------------------------------------------
7518 // genRemoveAlignmentAfterCall: After a call, remove the alignment
7519 // added before the call, if any.
7522 // call - the call node.
7523 // bias - additional stack adjustment
7526 // When bias > 0, caller should adjust stack level appropriately as
7527 // bias is not considered when adjusting stack level.
7529 void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias)
7531 #if defined(_TARGET_X86_)
7532 #if defined(UNIX_X86_ABI)
7533 // Put back the stack pointer if there was any padding for stack alignment
7534 unsigned padStkAlign = call->fgArgInfo->GetStkAlign();
7535 unsigned padStkAdjust = padStkAlign + bias;
7537 if (padStkAdjust != 0)
7539 inst_RV_IV(INS_add, REG_SPBASE, padStkAdjust, EA_PTRSIZE);
7540 SubtractStackLevel(padStkAlign);
7541 SubtractNestedAlignment(padStkAlign);
7543 #else // UNIX_X86_ABI
7548 #endif // !UNIX_X86_ABI_
7549 #else // _TARGET_X86_
7551 #endif // !_TARGET_X86
7556 //---------------------------------------------------------------------
7557 // genAdjustStackForPutArgStk:
7558 // adjust the stack pointer for a putArgStk node if necessary.
7561 // putArgStk - the putArgStk node.
7563 // Returns: true if the stack pointer was adjusted; false otherwise.
7566 // Sets `m_pushStkArg` to true if the stack arg needs to be pushed,
7567 // false if the stack arg needs to be stored at the current stack
7568 // pointer address. This is exactly the opposite of the return value
7569 // of this function.
7571 bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
7573 const unsigned argSize = putArgStk->getArgSize();
7574 GenTree* source = putArgStk->gtGetOp1();
7577 if (!source->OperIs(GT_FIELD_LIST) && varTypeIsSIMD(source))
7579 inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
7580 AddStackLevel(argSize);
7581 m_pushStkArg = false;
7584 #endif // FEATURE_SIMD
7586 // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
7587 // This is set in Lowering, and is true if and only if:
7588 // - This argument contains any GC pointers OR
7589 // - It is a GT_FIELD_LIST OR
7590 // - It is less than 16 bytes in size.
7591 CLANG_FORMAT_COMMENT_ANCHOR;
7594 switch (putArgStk->gtPutArgStkKind)
7596 case GenTreePutArgStk::Kind::RepInstr:
7597 case GenTreePutArgStk::Kind::Unroll:
7598 assert((putArgStk->gtNumberReferenceSlots == 0) && (source->OperGet() != GT_FIELD_LIST) && (argSize >= 16));
7600 case GenTreePutArgStk::Kind::Push:
7601 case GenTreePutArgStk::Kind::PushAllSlots:
7602 assert((putArgStk->gtNumberReferenceSlots != 0) || (source->OperGet() == GT_FIELD_LIST) || (argSize < 16));
7604 case GenTreePutArgStk::Kind::Invalid:
7606 assert(!"Uninitialized GenTreePutArgStk::Kind");
7611 if (putArgStk->isPushKind())
7613 m_pushStkArg = true;
7618 m_pushStkArg = false;
7619 inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
7620 AddStackLevel(argSize);
7625 //---------------------------------------------------------------------
7626 // genPutArgStkFieldList - generate code for passing a GT_FIELD_LIST arg on the stack.
7629 // treeNode - the GT_PUTARG_STK node whose op1 is a GT_FIELD_LIST
7634 void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
7636 GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList();
7637 assert(fieldList != nullptr);
7639 // Set m_pushStkArg and pre-adjust the stack if necessary.
7640 const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
7642 // For now, we only support the "push" case; we will push a full slot for the first field of each slot
7643 // within the struct.
7644 assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
7646 // If we have pre-adjusted the stack and are simply storing the fields in order, set the offset to 0.
7647 // (Note that this mode is not currently being used.)
7648 // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
7649 // in reverse order, so we start with the current field offset at the size of the struct arg (which must be
7650 // a multiple of the target pointer size).
7651 unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
7652 unsigned prevFieldOffset = currentOffset;
7653 regNumber intTmpReg = REG_NA;
7654 regNumber simdTmpReg = REG_NA;
7655 if (putArgStk->AvailableTempRegCount() != 0)
7657 regMaskTP rsvdRegs = putArgStk->gtRsvdRegs;
7658 if ((rsvdRegs & RBM_ALLINT) != 0)
7660 intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT);
7661 assert(genIsValidIntReg(intTmpReg));
7663 if ((rsvdRegs & RBM_ALLFLOAT) != 0)
7665 simdTmpReg = putArgStk->GetSingleTempReg(RBM_ALLFLOAT);
7666 assert(genIsValidFloatReg(simdTmpReg));
7668 assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1));
7671 for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
7673 GenTree* const fieldNode = current->Current();
7674 const unsigned fieldOffset = current->gtFieldOffset;
7675 var_types fieldType = current->gtFieldType;
7677 // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
7678 // field list in descending order by offset.
7679 assert(!varTypeIsLong(fieldType));
7680 assert(fieldOffset <= prevFieldOffset);
7682 // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately
7683 // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been
7684 // assigned a register, and which is therefore contained.
7685 // Unlike genConsumeReg(), it handles the case where no registers are being consumed.
7686 genConsumeRegs(fieldNode);
7687 regNumber argReg = fieldNode->isUsedFromSpillTemp() ? REG_NA : fieldNode->gtRegNum;
7689 // If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
7691 // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
7692 // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
7693 // not require rounding.
7694 // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
7695 // able to detect stores into the outgoing argument area of the stack on x86.
7696 const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
7697 int adjustment = roundUp(currentOffset - fieldOffset, 4);
7698 if (fieldIsSlot && !varTypeIsSIMD(fieldType))
7700 fieldType = genActualType(fieldType);
7701 unsigned pushSize = genTypeSize(fieldType);
7702 assert((pushSize % 4) == 0);
7703 adjustment -= pushSize;
7704 while (adjustment != 0)
7706 inst_IV(INS_push, 0);
7707 currentOffset -= pushSize;
7708 AddStackLevel(pushSize);
7709 adjustment -= pushSize;
7711 m_pushStkArg = true;
7715 m_pushStkArg = false;
7717 // We always "push" floating point fields (i.e. they are full slot values that don't
7718 // require special handling).
7719 assert(varTypeIsIntegralOrI(fieldNode) || varTypeIsSIMD(fieldNode));
7721 // If we can't push this field, it needs to be in a register so that we can store
7722 // it to the stack location.
7723 if (adjustment != 0)
7725 // This moves the stack pointer to fieldOffset.
7726 // For this case, we must adjust the stack and generate stack-relative stores rather than pushes.
7727 // Adjust the stack pointer to the next slot boundary.
7728 inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
7729 currentOffset -= adjustment;
7730 AddStackLevel(adjustment);
7733 // Does it need to be in a byte register?
7734 // If so, we'll use intTmpReg, which must have been allocated as a byte register.
7735 // If it's already in a register, but not a byteable one, then move it.
7736 if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
7738 assert(intTmpReg != REG_NA);
7739 noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0);
7740 if (argReg != REG_NA)
7742 inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType);
7748 if (argReg == REG_NA)
7752 if (fieldNode->isUsedFromSpillTemp())
7754 assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD?
7755 assert(fieldNode->IsRegOptional());
7756 TempDsc* tmp = getSpillTempDsc(fieldNode);
7757 getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
7758 compiler->tmpRlsTemp(tmp);
7762 assert(varTypeIsIntegralOrI(fieldNode));
7763 switch (fieldNode->OperGet())
7766 inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
7769 if (fieldNode->IsIconHandle())
7771 inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal);
7775 inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal);
7782 currentOffset -= TARGET_POINTER_SIZE;
7783 AddStackLevel(TARGET_POINTER_SIZE);
7787 // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack.
7788 assert(varTypeIsIntegralOrI(fieldNode));
7789 switch (fieldNode->OperGet())
7792 inst_RV_TT(INS_mov, intTmpReg, fieldNode);
7795 genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode);
7800 genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset);
7805 #if defined(FEATURE_SIMD)
7806 if (fieldType == TYP_SIMD12)
7808 assert(genIsValidFloatReg(simdTmpReg));
7809 genStoreSIMD12ToStack(argReg, simdTmpReg);
7812 #endif // defined(FEATURE_SIMD)
7814 genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
7818 // We always push a slot-rounded size
7819 currentOffset -= genTypeSize(fieldType);
7823 prevFieldOffset = fieldOffset;
7825 if (currentOffset != 0)
7827 // We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
7828 inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
7829 AddStackLevel(currentOffset);
7832 #endif // _TARGET_X86_
7834 //---------------------------------------------------------------------
7835 // genPutArgStk - generate code for passing an arg on the stack.
7838 // treeNode - the GT_PUTARG_STK node
7839 // targetType - the type of the treeNode
7844 void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
7846 GenTree* data = putArgStk->gtOp1;
7847 var_types targetType = genActualType(data->TypeGet());
7851 genAlignStackBeforeCall(putArgStk);
7853 if ((data->OperGet() != GT_FIELD_LIST) && varTypeIsStruct(targetType))
7855 (void)genAdjustStackForPutArgStk(putArgStk);
7856 genPutStructArgStk(putArgStk);
7860 // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LISTs of TYP_INT.
7861 assert(targetType != TYP_LONG);
7863 const unsigned argSize = putArgStk->getArgSize();
7864 assert((argSize % TARGET_POINTER_SIZE) == 0);
7866 if (data->isContainedIntOrIImmed())
7868 if (data->IsIconHandle())
7870 inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
7874 inst_IV(INS_push, data->gtIntCon.gtIconVal);
7876 AddStackLevel(argSize);
7878 else if (data->OperGet() == GT_FIELD_LIST)
7880 genPutArgStkFieldList(putArgStk);
7884 // We should not see any contained nodes that are not immediates.
7885 assert(data->isUsedFromReg());
7886 genConsumeReg(data);
7887 genPushReg(targetType, data->gtRegNum);
7889 #else // !_TARGET_X86_
7891 unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk);
7893 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
7895 if (varTypeIsStruct(targetType))
7897 m_stkArgVarNum = baseVarNum;
7898 m_stkArgOffset = putArgStk->getArgOffset();
7899 genPutStructArgStk(putArgStk);
7900 m_stkArgVarNum = BAD_VAR_NUM;
7903 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
7905 noway_assert(targetType != TYP_STRUCT);
7907 // Get argument offset on stack.
7908 // Here we cross check that argument offset hasn't changed from lowering to codegen since
7909 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
7910 int argOffset = putArgStk->getArgOffset();
7913 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk);
7914 assert(curArgTabEntry);
7915 assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
7918 if (data->isContainedIntOrIImmed())
7920 getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
7921 (int)data->AsIntConCommon()->IconValue());
7925 assert(data->isUsedFromReg());
7926 genConsumeReg(data);
7927 getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
7931 #endif // !_TARGET_X86_
7934 //---------------------------------------------------------------------
7935 // genPutArgReg - generate code for a GT_PUTARG_REG node
7938 // tree - the GT_PUTARG_REG node
7943 void CodeGen::genPutArgReg(GenTreeOp* tree)
7945 assert(tree->OperIs(GT_PUTARG_REG));
7947 var_types targetType = tree->TypeGet();
7948 regNumber targetReg = tree->gtRegNum;
7950 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
7951 assert(targetType != TYP_STRUCT);
7952 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7954 GenTree* op1 = tree->gtOp1;
7957 // If child node is not already in the register we need, move it
7958 if (targetReg != op1->gtRegNum)
7960 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
7963 genProduceReg(tree);
7967 // genPushReg: Push a register value onto the stack and adjust the stack level
7970 // type - the type of value to be stored
7971 // reg - the register containing the value
7974 // For TYP_LONG, the srcReg must be a floating point register.
7975 // Otherwise, the register type must be consistent with the given type.
7977 void CodeGen::genPushReg(var_types type, regNumber srcReg)
7979 unsigned size = genTypeSize(type);
7980 if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
7982 assert(genIsValidIntReg(srcReg));
7983 inst_RV(INS_push, srcReg, type);
7988 emitAttr attr = emitTypeSize(type);
7989 if (type == TYP_LONG)
7991 // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg.
7992 // This is only used when we are pushing a struct from memory to memory, and basically is
7993 // handling an 8-byte "chunk", as opposed to strictly a long type.
7998 ins = ins_Store(type);
8000 assert(genIsValidFloatReg(srcReg));
8001 inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
8002 getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
8004 AddStackLevel(size);
8006 #endif // _TARGET_X86_
8008 #if defined(FEATURE_PUT_STRUCT_ARG_STK)
8009 // genStoreRegToStackArg: Store a register value into the stack argument area
8012 // type - the type of value to be stored
8013 // reg - the register containing the value
8014 // offset - the offset from the base (see Assumptions below)
8017 // A type of TYP_STRUCT instructs this method to store a 16-byte chunk
8018 // at the given offset (i.e. not the full struct).
8021 // The caller must set the context appropriately before calling this method:
8022 // - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call.
8023 // - On x86, the caller must set m_pushStkArg if this method should push the argument.
8024 // Otherwise, the argument is stored at the given offset from sp.
8026 // TODO: In the below code the load and store instructions are for 16 bytes, but the
8027 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
8028 // this probably needs to be changed.
8030 void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset)
8032 assert(srcReg != REG_NA);
8037 if (type == TYP_STRUCT)
8040 // This should be changed!
8047 if (varTypeIsSIMD(type))
8049 assert(genIsValidFloatReg(srcReg));
8050 ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly
8053 #endif // FEATURE_SIMD
8055 if (type == TYP_LONG)
8057 assert(genIsValidFloatReg(srcReg));
8061 #endif // _TARGET_X86_
8063 assert((varTypeIsFloating(type) && genIsValidFloatReg(srcReg)) ||
8064 (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg)));
8065 ins = ins_Store(type);
8067 attr = emitTypeSize(type);
8068 size = genTypeSize(type);
8074 genPushReg(type, srcReg);
8078 getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset);
8080 #else // !_TARGET_X86_
8081 assert(m_stkArgVarNum != BAD_VAR_NUM);
8082 getEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset);
8083 #endif // !_TARGET_X86_
8086 //---------------------------------------------------------------------
8087 // genPutStructArgStk - generate code for copying a struct arg on the stack by value.
8088 // In case there are references to heap object in the struct,
8089 // it generates the gcinfo as well.
8092 // putArgStk - the GT_PUTARG_STK node
8095 // In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number
8096 // corresponding to the argument area (where we will put the argument on the stack).
8097 // For tail calls this is the baseVarNum = 0.
8098 // For non tail calls this is the outgoingArgSpace.
8099 void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
8101 GenTree* source = putArgStk->gtGetOp1();
8102 var_types targetType = source->TypeGet();
8104 #if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
8105 if (putArgStk->isSIMD12())
8107 genPutArgStkSIMD12(putArgStk);
8110 #endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
8112 if (varTypeIsSIMD(targetType))
8114 regNumber srcReg = genConsumeReg(source);
8115 assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
8116 genStoreRegToStackArg(targetType, srcReg, 0);
8120 assert(targetType == TYP_STRUCT);
8122 if (putArgStk->gtNumberReferenceSlots == 0)
8124 switch (putArgStk->gtPutArgStkKind)
8126 case GenTreePutArgStk::Kind::RepInstr:
8127 genStructPutArgRepMovs(putArgStk);
8129 case GenTreePutArgStk::Kind::Unroll:
8130 genStructPutArgUnroll(putArgStk);
8132 case GenTreePutArgStk::Kind::Push:
8133 genStructPutArgUnroll(putArgStk);
8141 // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
8142 CLANG_FORMAT_COMMENT_ANCHOR;
8145 // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so
8146 // that the emitter properly detects the need to update the method's GC information.
8148 // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs
8149 // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the
8151 assert(m_pushStkArg);
8153 GenTree* srcAddr = source->gtGetOp1();
8154 BYTE* gcPtrs = putArgStk->gtGcPtrs;
8155 const unsigned numSlots = putArgStk->gtNumSlots;
8157 regNumber srcRegNum = srcAddr->gtRegNum;
8158 const bool srcAddrInReg = srcRegNum != REG_NA;
8160 unsigned srcLclNum = 0;
8161 unsigned srcLclOffset = 0;
8164 genConsumeReg(srcAddr);
8168 assert(srcAddr->OperIsLocalAddr());
8170 srcLclNum = srcAddr->AsLclVarCommon()->gtLclNum;
8171 if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
8173 srcLclOffset = srcAddr->AsLclFld()->gtLclOffs;
8177 for (int i = numSlots - 1; i >= 0; --i)
8180 if (gcPtrs[i] == TYPE_GC_NONE)
8182 slotAttr = EA_4BYTE;
8184 else if (gcPtrs[i] == TYPE_GC_REF)
8186 slotAttr = EA_GCREF;
8190 assert(gcPtrs[i] == TYPE_GC_BYREF);
8191 slotAttr = EA_BYREF;
8194 const unsigned offset = i * TARGET_POINTER_SIZE;
8197 getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
8201 getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
8203 AddStackLevel(TARGET_POINTER_SIZE);
8205 #else // !defined(_TARGET_X86_)
8207 // Consume these registers.
8208 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
8209 genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
8211 const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr();
8212 const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF;
8215 unsigned numGCSlotsCopied = 0;
8218 BYTE* gcPtrs = putArgStk->gtGcPtrs;
8219 const unsigned numSlots = putArgStk->gtNumSlots;
8220 for (unsigned i = 0; i < numSlots;)
8222 if (gcPtrs[i] == TYPE_GC_NONE)
8224 // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively)
8225 // instead of a sequence of movsp instructions to save cycles and code size.
8226 unsigned adjacentNonGCSlotCount = 0;
8229 adjacentNonGCSlotCount++;
8231 } while ((i < numSlots) && (gcPtrs[i] == TYPE_GC_NONE));
8233 // If we have a very small contiguous non-ref region, it's better just to
8234 // emit a sequence of movsp instructions
8235 if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
8237 for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--)
8244 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount);
8245 instGen(INS_r_movsp);
8250 assert((gcPtrs[i] == TYPE_GC_REF) || (gcPtrs[i] == TYPE_GC_BYREF));
8252 // We have a GC (byref or ref) pointer
8253 // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction,
8254 // but the logic for emitting a GC info record is not available (it is internal for the emitter
8255 // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
8256 // instGen(INS_movsp); and emission of gc info.
8258 var_types memType = (gcPtrs[i] == TYPE_GC_REF) ? TYP_REF : TYP_BYREF;
8259 getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
8260 genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE);
8268 // Source for the copy operation.
8269 // If a LocalAddr, use EA_PTRSIZE - copy from stack.
8270 // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
8271 getEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE);
8273 // Always copying to the stack - outgoing arg area
8274 // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
8275 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
8280 assert(numGCSlotsCopied == putArgStk->gtNumberReferenceSlots);
8281 #endif // _TARGET_X86_
8284 #endif // defined(FEATURE_PUT_STRUCT_ARG_STK)
8286 /*****************************************************************************
8288 * Create and record GC Info for the function.
8290 #ifndef JIT32_GCENCODER
8292 #else // !JIT32_GCENCODER
8294 #endif // !JIT32_GCENCODER
8295 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
8297 #ifdef JIT32_GCENCODER
8298 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
8299 #else // !JIT32_GCENCODER
8300 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
8301 #endif // !JIT32_GCENCODER
8304 #ifdef JIT32_GCENCODER
8305 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
8306 unsigned prologSize,
8307 unsigned epilogSize DEBUGARG(void* codePtr))
8314 #ifdef WIN64EXCEPTIONS
8315 // We should do this before gcInfoBlockHdrSave since varPtrTableSize must be finalized before it
8316 if (compiler->ehAnyFunclets())
8318 gcInfo.gcMarkFilterVarsPinned();
8325 compiler->compInfoBlkSize =
8326 gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
8328 size_t argTabOffset = 0;
8329 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
8333 if (genInterruptible)
8335 gcHeaderISize += compiler->compInfoBlkSize;
8336 gcPtrMapISize += ptrMapSize;
8340 gcHeaderNSize += compiler->compInfoBlkSize;
8341 gcPtrMapNSize += ptrMapSize;
8344 #endif // DISPLAY_SIZES
8346 compiler->compInfoBlkSize += ptrMapSize;
8348 /* Allocate the info block for the method */
8350 compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
8352 #if 0 // VERBOSE_SIZES
8353 // TODO-X86-Cleanup: 'dataSize', below, is not defined
8355 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
8357 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
8358 compiler->info.compILCodeSize,
8359 compiler->compInfoBlkSize,
8360 codeSize + dataSize,
8361 codeSize + dataSize - prologSize - epilogSize,
8362 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
8363 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
8364 compiler->info.compClassName,
8365 compiler->info.compMethodName);
8370 /* Fill in the info block and return it to the caller */
8372 void* infoPtr = compiler->compInfoBlkAddr;
8374 /* Create the method info block: header followed by GC tracking tables */
8376 compiler->compInfoBlkAddr +=
8377 gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
8379 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
8380 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
8381 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
8387 BYTE* temp = (BYTE*)infoPtr;
8388 unsigned size = compiler->compInfoBlkAddr - temp;
8389 BYTE* ptab = temp + headerSize;
8391 noway_assert(size == headerSize + ptrMapSize);
8393 printf("Method info block - header [%u bytes]:", headerSize);
8395 for (unsigned i = 0; i < size; i++)
8399 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
8400 printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
8405 printf("\n %04X: ", i);
8408 printf("%02X ", *temp++);
8418 if (compiler->opts.dspGCtbls)
8420 const BYTE* base = (BYTE*)infoPtr;
8422 unsigned methodSize;
8425 printf("GC Info for method %s\n", compiler->info.compFullName);
8426 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
8428 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
8429 // printf("size of header encoding is %3u\n", size);
8432 if (compiler->opts.dspGCtbls)
8435 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
8436 // printf("size of pointer table is %3u\n", size);
8438 noway_assert(compiler->compInfoBlkAddr == (base + size));
8443 if (jitOpts.testMask & 128)
8445 for (unsigned offs = 0; offs < codeSize; offs++)
8447 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
8451 #endif // DUMP_GC_TABLES
8453 /* Make sure we ended up generating the expected number of bytes */
8455 noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
8460 #else // !JIT32_GCENCODER
8461 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
8463 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
8464 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
8465 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
8466 assert(gcInfoEncoder);
8468 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
8469 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
8471 // We keep the call count for the second call to gcMakeRegPtrTable() below.
8472 unsigned callCnt = 0;
8473 // First we figure out the encoder ID's for the stack slots and registers.
8474 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
8475 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
8476 gcInfoEncoder->FinalizeSlotIds();
8477 // Now we can actually use those slot ID's to declare live ranges.
8478 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
8480 if (compiler->opts.compDbgEnC)
8482 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
8486 // -saved 'this' pointer and bool for synchronized methods
8488 // 4 slots for RBP + return address + RSI + RDI
8489 int preservedAreaSize = 4 * REGSIZE_BYTES;
8491 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
8493 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
8495 preservedAreaSize += REGSIZE_BYTES;
8498 // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
8499 preservedAreaSize += 4;
8502 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
8504 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
8507 if (compiler->opts.IsReversePInvoke())
8509 unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
8510 assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount);
8511 LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber];
8512 gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.lvStkOffs);
8515 gcInfoEncoder->Build();
8517 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
8518 // let's save the values anyway for debugging purposes
8519 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
8520 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
8522 #endif // !JIT32_GCENCODER
8524 /*****************************************************************************
8525 * Emit a call to a helper function.
8529 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
8531 void* addr = nullptr;
8532 void* pAddr = nullptr;
8534 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
8535 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
8536 regNumber callTarget = REG_NA;
8537 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
8541 assert(pAddr != nullptr);
8543 // Absolute indirect call addr
8544 // Note: Order of checks is important. First always check for pc-relative and next
8545 // zero-relative. Because the former encoding is 1-byte smaller than the latter.
8546 if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
8547 genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
8549 // generate call whose target is specified by 32-bit offset relative to PC or zero.
8550 callType = emitter::EC_FUNC_TOKEN_INDIR;
8555 #ifdef _TARGET_AMD64_
8556 // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
8557 // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
8562 if (callTargetReg == REG_NA)
8564 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
8565 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
8566 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
8567 regMaskTP callTargetMask = genRegMask(callTargetReg);
8568 noway_assert((callTargetMask & killMask) == callTargetMask);
8572 // The call target must not overwrite any live variable, though it may not be in the
8573 // kill set for the call.
8574 regMaskTP callTargetMask = genRegMask(callTargetReg);
8575 noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
8579 callTarget = callTargetReg;
8580 CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
8581 callType = emitter::EC_INDIR_ARD;
8586 getEmitter()->emitIns_Call(callType,
8587 compiler->eeFindHelper(helper),
8588 INDEBUG_LDISASM_COMMA(nullptr) addr,
8591 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),
8592 gcInfo.gcVarPtrSetCur,
8593 gcInfo.gcRegGCrefSetCur,
8594 gcInfo.gcRegByrefSetCur,
8595 BAD_IL_OFFSET, // IL offset
8597 REG_NA, 0, 0, // xreg, xmul, disp
8599 emitter::emitNoGChelper(helper));
8602 regTracker.rsTrashRegSet(killMask);
8605 #if !defined(_TARGET_64BIT_)
8606 //-----------------------------------------------------------------------------
8608 // Code Generation for Long integers
8610 //-----------------------------------------------------------------------------
8612 //------------------------------------------------------------------------
8613 // genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
8616 // treeNode - A TYP_LONG lclVar node.
8622 // 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
8623 // Its operand must be a GT_LONG node.
8625 void CodeGen::genStoreLongLclVar(GenTree* treeNode)
8627 emitter* emit = getEmitter();
8629 GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
8630 unsigned lclNum = lclNode->gtLclNum;
8631 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
8632 assert(varDsc->TypeGet() == TYP_LONG);
8633 assert(!varDsc->lvPromoted);
8634 GenTree* op1 = treeNode->gtOp.gtOp1;
8635 noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
8636 genConsumeRegs(op1);
8638 if (op1->OperGet() == GT_LONG)
8640 GenTree* loVal = op1->gtGetOp1();
8641 GenTree* hiVal = op1->gtGetOp2();
8643 noway_assert((loVal->gtRegNum != REG_NA) && (hiVal->gtRegNum != REG_NA));
8645 emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
8646 emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
8648 else if (op1->OperGet() == GT_MUL_LONG)
8650 assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
8653 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
8654 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum,
8655 genTypeSize(TYP_INT));
8658 #endif // !defined(_TARGET_64BIT_)
8660 /*****************************************************************************
8661 * Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
8662 * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
8663 * disassembler thinks the instructions as the same as we do.
8666 // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
8667 // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
8668 //#define ALL_XARCH_EMITTER_UNIT_TESTS
8670 #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
8671 void CodeGen::genAmd64EmitterUnitTests()
8678 if (!compiler->opts.altJit)
8680 // No point doing this in a "real" JIT.
8684 // Mark the "fake" instructions in the output.
8685 printf("*************** In genAmd64EmitterUnitTests()\n");
8688 // genDefineTempLabel(genCreateTempLabel());
8689 // to create artificial labels to help separate groups of tests.
8694 CLANG_FORMAT_COMMENT_ANCHOR;
8696 #ifdef ALL_XARCH_EMITTER_UNIT_TESTS
8697 genDefineTempLabel(genCreateTempLabel());
8699 // vhaddpd ymm0,ymm1,ymm2
8700 getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8701 // vaddss xmm0,xmm1,xmm2
8702 getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8703 // vaddsd xmm0,xmm1,xmm2
8704 getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8705 // vaddps xmm0,xmm1,xmm2
8706 getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8707 // vaddps ymm0,ymm1,ymm2
8708 getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8709 // vaddpd xmm0,xmm1,xmm2
8710 getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8711 // vaddpd ymm0,ymm1,ymm2
8712 getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8713 // vsubss xmm0,xmm1,xmm2
8714 getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8715 // vsubsd xmm0,xmm1,xmm2
8716 getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8717 // vsubps ymm0,ymm1,ymm2
8718 getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8719 // vsubps ymm0,ymm1,ymm2
8720 getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8721 // vsubpd xmm0,xmm1,xmm2
8722 getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8723 // vsubpd ymm0,ymm1,ymm2
8724 getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8725 // vmulss xmm0,xmm1,xmm2
8726 getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8727 // vmulsd xmm0,xmm1,xmm2
8728 getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8729 // vmulps xmm0,xmm1,xmm2
8730 getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8731 // vmulpd xmm0,xmm1,xmm2
8732 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8733 // vmulps ymm0,ymm1,ymm2
8734 getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8735 // vmulpd ymm0,ymm1,ymm2
8736 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8737 // vandps xmm0,xmm1,xmm2
8738 getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8739 // vandpd xmm0,xmm1,xmm2
8740 getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8741 // vandps ymm0,ymm1,ymm2
8742 getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8743 // vandpd ymm0,ymm1,ymm2
8744 getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8745 // vorps xmm0,xmm1,xmm2
8746 getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8747 // vorpd xmm0,xmm1,xmm2
8748 getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8749 // vorps ymm0,ymm1,ymm2
8750 getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8751 // vorpd ymm0,ymm1,ymm2
8752 getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8753 // vdivss xmm0,xmm1,xmm2
8754 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8755 // vdivsd xmm0,xmm1,xmm2
8756 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8757 // vdivss xmm0,xmm1,xmm2
8758 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8759 // vdivsd xmm0,xmm1,xmm2
8760 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8762 // vdivss xmm0,xmm1,xmm2
8763 getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8764 // vdivsd xmm0,xmm1,xmm2
8765 getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8766 #endif // ALL_XARCH_EMITTER_UNIT_TESTS
8767 printf("*************** End of genAmd64EmitterUnitTests()\n");
8770 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
8772 #endif // _TARGET_AMD64_
8774 #endif // !LEGACY_BACKEND