1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Amd64/x86 Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
25 #include "gcinfoencoder.h"
27 /*****************************************************************************
29 * Generate code that will set the given register to the integer constant.
32 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
34 // Reg cannot be a FP reg
35 assert(!genIsValidFloatReg(reg));
37 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
38 // relocatable. Other ref type constants (e.g. string objects) go through a different
40 noway_assert(type != TYP_REF || val == 0);
44 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
48 // TODO-XArch-CQ: needs all the optimized cases
49 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
53 /*****************************************************************************
55 * Generate code to check that the GS cookie wasn't thrashed by a buffer
56 * overrun. If pushReg is true, preserve all registers around code sequence.
57 * Otherwise ECX could be modified.
59 * Implementation Note: pushReg = true, in case of tail calls.
61 void CodeGen::genEmitGSCookieCheck(bool pushReg)
63 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
65 // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
66 // executing GS cookie check will not collect the object pointed to by EAX.
68 // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
69 // In such case make sure that the correct GC-ness of RDX is reported as well, so
70 // a GC object pointed by RDX will not be collected.
73 // Handle multi-reg return type values
74 if (compiler->compMethodReturnsMultiRegRetType())
76 ReturnTypeDesc retTypeDesc;
77 if (varTypeIsLong(compiler->info.compRetNativeType))
79 retTypeDesc.InitializeLongReturnType(compiler);
81 else // we must have a struct return type
83 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
86 unsigned regCount = retTypeDesc.GetReturnRegCount();
88 // Only x86 and x64 Unix ABI allows multi-reg return and
89 // number of result regs should be equal to MAX_RET_REG_COUNT.
90 assert(regCount == MAX_RET_REG_COUNT);
92 for (unsigned i = 0; i < regCount; ++i)
94 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
97 else if (compiler->compMethodReturnsRetBufAddr())
99 // This is for returning in an implicit RetBuf.
100 // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
102 // In case the return is in an implicit RetBuf, the native return type should be a struct
103 assert(varTypeIsStruct(compiler->info.compRetNativeType));
105 gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
107 // ... all other cases.
110 #ifdef _TARGET_AMD64_
111 // For x64, structs that are not returned in registers are always
112 // returned in implicit RetBuf. If we reached here, we should not have
113 // a RetBuf and the return type should not be a struct.
114 assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
115 assert(!varTypeIsStruct(compiler->info.compRetNativeType));
116 #endif // _TARGET_AMD64_
118 // For x86 Windows we can't make such assertions since we generate code for returning of
119 // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
120 // compRetNativeType could be TYP_STRUCT.
121 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
125 regNumber regGSCheck;
126 regMaskTP regMaskGSCheck = RBM_NONE;
130 // Non-tail call: we can use any callee trash register that is not
131 // a return register or contain 'this' pointer (keep alive this), since
132 // we are generating GS cookie check after a GT_RETURN block.
133 // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
134 // as return register for two-register-returned structs.
135 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
136 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
138 regGSCheck = REG_ARG_1;
142 regGSCheck = REG_ARG_0;
148 // It doesn't matter which register we pick, since we're going to save and restore it
150 // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes?
151 regGSCheck = REG_EAX;
152 regMaskGSCheck = RBM_EAX;
153 #else // !_TARGET_X86_
154 // Tail calls from methods that need GS check: We need to preserve registers while
155 // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
156 // check, we might need a register. This won't be an issue for jmp calls for the
157 // reason mentioned below (see comment starting with "Jmp Calls:").
159 // The following are the possible solutions in case of tail prefixed calls:
160 // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
161 // present in methods that require GS cookie check. Rest of the tail calls that
162 // do not require R11 will be honored.
163 // 2) Internal register - GT_CALL node reserves an internal register and emits GS
164 // cookie check as part of tail call codegen. GenExitCode() needs to special case
165 // fast tail calls implemented as epilog+jmp or such tail calls should always get
166 // dispatched via helper.
167 // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
168 // right execution order during rationalization.
170 // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
171 // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
172 // VSD calls from methods that need GS check.
174 // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
175 // ignores tail prefix. In future, if we intend to support tail calls from such a method,
176 // consider one of the options mentioned above. For now adding an assert that we don't
177 // expect to see a tail call in a method that requires GS check.
178 noway_assert(!compiler->compTailCallUsed);
180 // Jmp calls: specify method handle using which JIT queries VM for its entry point
181 // address and hence it can neither be a VSD call nor PInvoke calli with cookie
182 // parameter. Therefore, in case of jmp calls it is safe to use R11.
183 regGSCheck = REG_R11;
184 #endif // !_TARGET_X86_
187 regMaskTP byrefPushedRegs = RBM_NONE;
188 regMaskTP norefPushedRegs = RBM_NONE;
189 regMaskTP pushedRegs = RBM_NONE;
191 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
193 #if defined(_TARGET_AMD64_)
194 // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
195 // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
196 if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
198 genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
199 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
202 #endif // defined(_TARGET_AMD64_)
204 assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
205 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
206 (int)compiler->gsGlobalSecurityCookieVal);
211 // Ngen case - GS cookie value needs to be accessed through an indirection.
213 pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
215 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
216 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
217 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
220 BasicBlock* gsCheckBlk = genCreateTempLabel();
221 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
222 inst_JMP(jmpEqual, gsCheckBlk);
223 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
224 genDefineTempLabel(gsCheckBlk);
226 genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
229 BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
231 #if FEATURE_EH_FUNCLETS
232 // Generate a call to the finally, like this:
233 // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
234 // call finally-funclet
235 // jmp finally-return // Only for non-retless finally calls
236 // The jmp can be a NOP if we're going to the next block.
237 // If we're generating code for the main function (not a funclet), and there is no localloc,
238 // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP
239 // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI).
241 if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
242 (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
245 inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
246 #endif // !UNIX_X86_ABI
250 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
252 getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
254 if (block->bbFlags & BBF_RETLESS_CALL)
256 // We have a retless call, and the last instruction generated was a call.
257 // If the next block is in a different EH region (or is the end of the code
258 // block), then we need to generate a breakpoint here (since it will never
259 // get executed) to get proper unwind behavior.
261 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
263 instGen(INS_BREAKPOINT); // This should never get executed
268 // TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other
270 #ifndef JIT32_GCENCODER
271 // Because of the way the flowgraph is connected, the liveness info for this one instruction
272 // after the call is not (can not be) correct in cases where a variable has a last use in the
273 // handler. So turn off GC reporting for this single instruction.
274 getEmitter()->emitDisableGC();
275 #endif // JIT32_GCENCODER
277 // Now go to where the finally funclet needs to return to.
278 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
281 // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
282 // to the next instruction? This would depend on stack walking from within the finally
283 // handler working without this instruction being in this special EH region.
288 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
291 #ifndef JIT32_GCENCODER
292 getEmitter()->emitEnableGC();
293 #endif // JIT32_GCENCODER
296 #else // !FEATURE_EH_FUNCLETS
298 // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
299 // corresponding to the finally's nesting level. When invoked in response to an exception, the
302 // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
305 // mov [ebp - (n + 1)], 0
306 // mov [ebp - n ], 0xFC
316 noway_assert(isFramePointerUsed());
318 // Get the nesting level which contains the finally
319 unsigned finallyNesting = 0;
320 compiler->fgGetNestingLevel(block, &finallyNesting);
322 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
323 unsigned filterEndOffsetSlotOffs;
324 filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
326 unsigned curNestingSlotOffs;
327 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
329 // Zero out the slot for the next nesting level
330 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
331 curNestingSlotOffs - TARGET_POINTER_SIZE);
332 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
335 // Now push the address where the finally funclet should return to directly.
336 if (!(block->bbFlags & BBF_RETLESS_CALL))
338 assert(block->isBBCallAlwaysPair());
339 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
343 // EE expects a DWORD, so we give him 0
344 inst_IV(INS_push_hide, 0);
347 // Jump to the finally BB
348 inst_JMP(EJ_jmp, block->bbJumpDest);
350 #endif // !FEATURE_EH_FUNCLETS
352 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
353 // jump target using bbJumpDest - that is already used to point
354 // to the finally block. So just skip past the BBJ_ALWAYS unless the
356 if (!(block->bbFlags & BBF_RETLESS_CALL))
358 assert(block->isBBCallAlwaysPair());
359 block = block->bbNext;
364 #if FEATURE_EH_FUNCLETS
365 void CodeGen::genEHCatchRet(BasicBlock* block)
367 // Set RAX to the address the VM should return to after the catch.
368 // Generate a RIP-relative
369 // lea reg, [rip + disp32] ; the RIP is implicit
370 // which will be position-indepenent.
371 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
374 #else // !FEATURE_EH_FUNCLETS
376 void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block)
378 // The last statement of the block must be a GT_RETFILT, which has already been generated.
379 assert(block->lastNode() != nullptr);
380 assert(block->lastNode()->OperGet() == GT_RETFILT);
382 if (block->bbJumpKind == BBJ_EHFINALLYRET)
384 assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
386 // Return using a pop-jmp sequence. As the "try" block calls
387 // the finally with a jmp, this leaves the x86 call-ret stack
388 // balanced in the normal flow of path.
390 noway_assert(isFramePointerRequired());
391 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
392 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
396 assert(block->bbJumpKind == BBJ_EHFILTERRET);
398 // The return value has already been computed.
403 #endif // !FEATURE_EH_FUNCLETS
405 // Move an immediate value into an integer register
407 void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
409 // reg cannot be a FP register
410 assert(!genIsValidFloatReg(reg));
412 if (!compiler->opts.compReloc)
414 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
417 if ((imm == 0) && !EA_IS_RELOC(size))
419 instGen_Set_Reg_To_Zero(size, reg, flags);
423 if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
425 getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
429 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
432 regTracker.rsTrackRegIntCns(reg, imm);
435 /***********************************************************************************
437 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
438 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
439 * genProduceReg() on the target register.
441 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
444 switch (tree->gtOper)
448 // relocatable values tend to come down as a CNS_INT of native int type
449 // so the line between these two opcodes is kind of blurry
450 GenTreeIntConCommon* con = tree->AsIntConCommon();
451 ssize_t cnsVal = con->IconValue();
453 if (con->ImmedValNeedsReloc(compiler))
455 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
456 regTracker.rsTrackRegTrash(targetReg);
460 genSetRegToIcon(targetReg, cnsVal, targetType);
467 double constValue = tree->gtDblCon.gtDconVal;
469 // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
470 if (*(__int64*)&constValue == 0)
472 // A faster/smaller way to generate 0
473 instruction ins = genGetInsForOper(GT_XOR, targetType);
474 inst_RV_RV(ins, targetReg, targetReg, targetType);
479 if (targetType == TYP_FLOAT)
481 float f = forceCastToFloat(constValue);
482 cns = genMakeConst(&f, targetType, tree, false);
486 cns = genMakeConst(&constValue, targetType, tree, true);
489 inst_RV_TT(ins_Load(targetType), targetReg, cns);
499 // Generate code to get the high N bits of a N*N=2N bit multiplication result
500 void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
502 if (treeNode->OperGet() == GT_MULHI)
504 assert(!(treeNode->gtFlags & GTF_UNSIGNED));
506 assert(!treeNode->gtOverflowEx());
508 regNumber targetReg = treeNode->gtRegNum;
509 var_types targetType = treeNode->TypeGet();
510 emitter* emit = getEmitter();
511 emitAttr size = emitTypeSize(treeNode);
512 GenTree* op1 = treeNode->gtOp.gtOp1;
513 GenTree* op2 = treeNode->gtOp.gtOp2;
515 // to get the high bits of the multiply, we are constrained to using the
516 // 1-op form: RDX:RAX = RAX * rm
517 // The 3-op form (Rx=Ry*Rz) does not support it.
519 genConsumeOperands(treeNode->AsOp());
521 GenTree* regOp = op1;
524 // Set rmOp to the memory operand (if any)
525 if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == REG_RAX)))
530 assert(regOp->isUsedFromReg());
532 // Setup targetReg when neither of the source operands was a matching register
533 if (regOp->gtRegNum != REG_RAX)
535 inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->gtRegNum, targetType);
539 if ((treeNode->gtFlags & GTF_UNSIGNED) == 0)
547 emit->emitInsBinary(ins, size, treeNode, rmOp);
549 // Move the result to the desired register, if necessary
550 if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX)
552 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
557 //------------------------------------------------------------------------
558 // genCodeForLongUMod: Generate code for a tree of the form
559 // `(umod (gt_long x y) (const int))`
562 // node - the node for which to generate code
564 void CodeGen::genCodeForLongUMod(GenTreeOp* node)
566 assert(node != nullptr);
567 assert(node->OperGet() == GT_UMOD);
568 assert(node->TypeGet() == TYP_INT);
570 GenTreeOp* const dividend = node->gtOp1->AsOp();
571 assert(dividend->OperGet() == GT_LONG);
572 assert(varTypeIsLong(dividend));
574 genConsumeOperands(node);
576 GenTree* const dividendLo = dividend->gtOp1;
577 GenTree* const dividendHi = dividend->gtOp2;
578 assert(dividendLo->isUsedFromReg());
579 assert(dividendHi->isUsedFromReg());
581 GenTree* const divisor = node->gtOp2;
582 assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);
583 assert(divisor->gtSkipReloadOrCopy()->isUsedFromReg());
584 assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2);
585 assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff);
587 // dividendLo must be in RAX; dividendHi must be in RDX
588 genCopyRegIfNeeded(dividendLo, REG_EAX);
589 genCopyRegIfNeeded(dividendHi, REG_EDX);
591 // At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
592 // contains the 32bit divisor. We want to generate the following code:
594 // cmp edx, divisor->gtRegNum
600 // div divisor->gtRegNum
604 // div divisor->gtRegNum
606 // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c.
608 BasicBlock* const noOverflow = genCreateTempLabel();
610 // cmp edx, divisor->gtRegNum
612 inst_RV_RV(INS_cmp, REG_EDX, divisor->gtRegNum);
613 inst_JMP(EJ_jb, noOverflow);
618 // div divisor->gtRegNum
620 const regNumber tempReg = genRegNumFromMask(node->gtRsvdRegs);
621 inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT);
622 inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT);
623 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
624 inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
625 inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT);
628 // div divisor->gtRegNum
629 genDefineTempLabel(noOverflow);
630 inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
632 const regNumber targetReg = node->gtRegNum;
633 if (targetReg != REG_EDX)
635 inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT);
639 #endif // _TARGET_X86_
641 //------------------------------------------------------------------------
642 // genCodeForDivMod: Generate code for a DIV or MOD operation.
645 // treeNode - the node to generate the code for
647 void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
649 GenTree* dividend = treeNode->gtOp1;
651 if (varTypeIsLong(dividend->TypeGet()))
653 genCodeForLongUMod(treeNode);
656 #endif // _TARGET_X86_
658 GenTree* divisor = treeNode->gtOp2;
659 genTreeOps oper = treeNode->OperGet();
660 emitAttr size = emitTypeSize(treeNode);
661 regNumber targetReg = treeNode->gtRegNum;
662 var_types targetType = treeNode->TypeGet();
663 emitter* emit = getEmitter();
665 // dividend is in a register.
666 assert(dividend->isUsedFromReg());
668 genConsumeOperands(treeNode->AsOp());
669 if (varTypeIsFloating(targetType))
671 // Check that divisor is a valid operand.
672 // Note that a reg optional operand is a treated as a memory op
673 // if no register is allocated to it.
674 assert(divisor->isUsedFromReg() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
675 divisor->IsRegOptional());
677 // Floating point div/rem operation
678 assert(oper == GT_DIV || oper == GT_MOD);
680 if (dividend->gtRegNum == targetReg)
682 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
684 else if (divisor->isUsedFromReg() && divisor->gtRegNum == targetReg)
686 // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
687 // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
688 // LSRA has to make sure that such a register assignment is not generated for floating
689 // point div/rem operations.
691 !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
695 inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
696 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
701 // dividend must be in RAX
702 genCopyRegIfNeeded(dividend, REG_RAX);
704 // zero or sign extend rax to rdx
705 if (oper == GT_UMOD || oper == GT_UDIV)
707 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
711 emit->emitIns(INS_cdq, size);
712 // the cdq instruction writes RDX, So clear the gcInfo for RDX
713 gcInfo.gcMarkRegSetNpt(RBM_RDX);
716 // Perform the 'targetType' (64-bit or 32-bit) divide instruction
718 if (oper == GT_UMOD || oper == GT_UDIV)
727 emit->emitInsBinary(ins, size, treeNode, divisor);
729 // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
730 // Move the result to the desired register, if necessary
731 if (oper == GT_DIV || oper == GT_UDIV)
733 if (targetReg != REG_RAX)
735 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
740 assert((oper == GT_MOD) || (oper == GT_UMOD));
741 if (targetReg != REG_RDX)
743 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
747 genProduceReg(treeNode);
750 //------------------------------------------------------------------------
751 // genCodeForBinary: Generate code for many binary arithmetic operators
752 // This method is expected to have called genConsumeOperands() before calling it.
755 // treeNode - The binary operation for which we are generating code.
761 // Mul and div variants have special constraints on x64 so are not handled here.
762 // See teh assert below for the operators that are handled.
764 void CodeGen::genCodeForBinary(GenTree* treeNode)
766 const genTreeOps oper = treeNode->OperGet();
767 regNumber targetReg = treeNode->gtRegNum;
768 var_types targetType = treeNode->TypeGet();
769 emitter* emit = getEmitter();
771 #if defined(_TARGET_64BIT_)
772 assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
773 #else // !defined(_TARGET_64BIT_)
774 assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
775 oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI ||
776 oper == GT_ADD || oper == GT_SUB);
777 #endif // !defined(_TARGET_64BIT_)
779 GenTreePtr op1 = treeNode->gtGetOp1();
780 GenTreePtr op2 = treeNode->gtGetOp2();
782 // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed"
783 if (!op1->isUsedFromReg())
785 assert(treeNode->OperIsCommutative());
786 assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
788 op1 = treeNode->gtGetOp2();
789 op2 = treeNode->gtGetOp1();
792 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
794 // The arithmetic node must be sitting in a register (since it's not contained)
795 noway_assert(targetReg != REG_NA);
797 regNumber op1reg = op1->isUsedFromReg() ? op1->gtRegNum : REG_NA;
798 regNumber op2reg = op2->isUsedFromReg() ? op2->gtRegNum : REG_NA;
803 // This is the case of reg1 = reg1 op reg2
804 // We're ready to emit the instruction without any moves
805 if (op1reg == targetReg)
810 // We have reg1 = reg2 op reg1
811 // In order for this operation to be correct
812 // we need that op is a commutative operation so
813 // we can convert it into reg1 = reg1 op reg2 and emit
814 // the same code as above
815 else if (op2reg == targetReg)
817 noway_assert(GenTree::OperIsCommutative(oper));
821 // now we know there are 3 different operands so attempt to use LEA
822 else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
823 && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags())
825 if (op2->isContainedIntOrIImmed())
827 emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
828 (int)op2->AsIntConCommon()->IconValue());
832 assert(op2reg != REG_NA);
833 emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
835 genProduceReg(treeNode);
838 // dest, op1 and op2 registers are different:
839 // reg3 = reg1 op reg2
840 // We can implement this by issuing a mov:
842 // reg3 = reg3 op reg2
845 inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
846 regTracker.rsTrackRegCopy(targetReg, op1reg);
847 gcInfo.gcMarkRegPtrVal(targetReg, targetType);
852 // try to use an inc or dec
853 if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
855 if (src->IsIntegralConst(1))
857 emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
858 genProduceReg(treeNode);
861 else if (src->IsIntegralConst(-1))
863 emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
864 genProduceReg(treeNode);
868 regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
869 noway_assert(r == targetReg);
871 if (treeNode->gtOverflowEx())
873 #if !defined(_TARGET_64BIT_)
874 assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
876 assert(oper == GT_ADD || oper == GT_SUB);
878 genCheckOverflow(treeNode);
880 genProduceReg(treeNode);
883 //------------------------------------------------------------------------
884 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
887 // treeNode - The tree node to evaluate whether is a struct return.
890 // For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
891 // Otherwise returns false.
892 // For other platforms always returns false.
894 bool CodeGen::isStructReturn(GenTreePtr treeNode)
896 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
897 // For the GT_RET_FILT, the return is always
898 // a bool or a void, for the end of a finally block.
899 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
900 if (treeNode->OperGet() != GT_RETURN)
905 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
906 return varTypeIsStruct(treeNode);
907 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
908 assert(!varTypeIsStruct(treeNode));
910 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
913 //------------------------------------------------------------------------
914 // genStructReturn: Generates code for returning a struct.
917 // treeNode - The GT_RETURN tree node.
923 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
924 void CodeGen::genStructReturn(GenTreePtr treeNode)
926 assert(treeNode->OperGet() == GT_RETURN);
927 GenTreePtr op1 = treeNode->gtGetOp1();
929 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
930 if (op1->OperGet() == GT_LCL_VAR)
932 GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
933 LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
934 assert(varDsc->lvIsMultiRegRet);
936 ReturnTypeDesc retTypeDesc;
937 retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
938 unsigned regCount = retTypeDesc.GetReturnRegCount();
939 assert(regCount == MAX_RET_REG_COUNT);
941 if (varTypeIsEnregisterableStruct(op1))
943 // Right now the only enregistrable structs supported are SIMD vector types.
944 assert(varTypeIsSIMD(op1));
945 assert(op1->isUsedFromReg());
947 // This is a case of operand is in a single reg and needs to be
948 // returned in multiple ABI return registers.
949 regNumber opReg = genConsumeReg(op1);
950 regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
951 regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
953 if (opReg != reg0 && opReg != reg1)
955 // Operand reg is different from return regs.
956 // Copy opReg to reg0 and let it to be handled by one of the
958 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
964 assert(opReg != reg1);
966 // reg0 - already has required 8-byte in bit position [63:0].
968 // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
969 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
973 assert(opReg == reg1);
976 // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
977 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
979 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
983 assert(op1->isUsedFromMemory());
985 // Copy var on stack into ABI return registers
987 for (unsigned i = 0; i < regCount; ++i)
989 var_types type = retTypeDesc.GetReturnRegType(i);
990 regNumber reg = retTypeDesc.GetABIReturnReg(i);
991 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
992 offset += genTypeSize(type);
998 assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
1000 genConsumeRegs(op1);
1002 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1003 GenTreeCall* call = actualOp1->AsCall();
1004 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
1005 unsigned regCount = retTypeDesc->GetReturnRegCount();
1006 assert(regCount == MAX_RET_REG_COUNT);
1008 // Handle circular dependency between call allocated regs and ABI return regs.
1010 // It is possible under LSRA stress that originally allocated regs of call node,
1011 // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
1012 // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
1013 // kind of circular dependency could arise between xmm0 and xmm1 return regs.
1014 // Codegen is expected to handle such circular dependency.
1016 var_types regType0 = retTypeDesc->GetReturnRegType(0);
1017 regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
1018 regNumber allocatedReg0 = call->GetRegNumByIdx(0);
1020 var_types regType1 = retTypeDesc->GetReturnRegType(1);
1021 regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
1022 regNumber allocatedReg1 = call->GetRegNumByIdx(1);
1024 if (op1->IsCopyOrReload())
1026 // GT_COPY/GT_RELOAD will have valid reg for those positions
1027 // that need to be copied or reloaded.
1028 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
1029 if (reloadReg != REG_NA)
1031 allocatedReg0 = reloadReg;
1034 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
1035 if (reloadReg != REG_NA)
1037 allocatedReg1 = reloadReg;
1041 if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
1043 // Circular dependency - swap allocatedReg0 and allocatedReg1
1044 if (varTypeIsFloating(regType0))
1046 assert(varTypeIsFloating(regType1));
1048 // The fastest way to swap two XMM regs is using PXOR
1049 inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
1050 inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
1051 inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
1055 assert(varTypeIsIntegral(regType0));
1056 assert(varTypeIsIntegral(regType1));
1057 inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
1060 else if (allocatedReg1 == returnReg0)
1062 // Change the order of moves to correctly handle dependency.
1063 if (allocatedReg1 != returnReg1)
1065 inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
1068 if (allocatedReg0 != returnReg0)
1070 inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
1075 // No circular dependency case.
1076 if (allocatedReg0 != returnReg0)
1078 inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
1081 if (allocatedReg1 != returnReg1)
1083 inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
1092 //------------------------------------------------------------------------
1093 // genReturn: Generates code for return statement.
1094 // In case of struct return, delegates to the genStructReturn method.
1097 // treeNode - The GT_RETURN or GT_RETFILT tree node.
1102 void CodeGen::genReturn(GenTreePtr treeNode)
1104 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1105 GenTreePtr op1 = treeNode->gtGetOp1();
1106 var_types targetType = treeNode->TypeGet();
1109 if (targetType == TYP_VOID)
1111 assert(op1 == nullptr);
1116 if (treeNode->TypeGet() == TYP_LONG)
1118 assert(op1 != nullptr);
1119 noway_assert(op1->OperGet() == GT_LONG);
1120 GenTree* loRetVal = op1->gtGetOp1();
1121 GenTree* hiRetVal = op1->gtGetOp2();
1122 noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
1124 genConsumeReg(loRetVal);
1125 genConsumeReg(hiRetVal);
1126 if (loRetVal->gtRegNum != REG_LNGRET_LO)
1128 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
1130 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
1132 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
1136 #endif // !defined(_TARGET_X86_)
1138 if (isStructReturn(treeNode))
1140 genStructReturn(treeNode);
1142 else if (targetType != TYP_VOID)
1144 assert(op1 != nullptr);
1145 noway_assert(op1->gtRegNum != REG_NA);
1147 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
1148 // consumed a reg for the operand. This is because the variable
1149 // is dead after return. But we are issuing more instructions
1150 // like "profiler leave callback" after this consumption. So
1151 // if you are issuing more instructions after this point,
1152 // remember to keep the variable live up until the new method
1153 // exit point where it is actually dead.
1156 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
1158 if (varTypeIsFloating(treeNode))
1160 // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
1161 // If it already has a home location, use that. Otherwise, we need a temp.
1162 if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
1164 // Store local variable to its home location, if necessary.
1165 if ((op1->gtFlags & GTF_REG_VAL) != 0)
1167 op1->gtFlags &= ~GTF_REG_VAL;
1168 inst_TT_RV(ins_Store(op1->gtType,
1169 compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
1170 op1, op1->gtRegNum);
1172 // Now, load it to the fp stack.
1173 getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
1177 // Spill the value, which should be in a register, then load it to the fp stack.
1178 // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
1179 op1->gtFlags |= GTF_SPILL;
1180 regSet.rsSpillTree(op1->gtRegNum, op1);
1181 op1->gtFlags |= GTF_SPILLED;
1182 op1->gtFlags &= ~GTF_SPILL;
1184 TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
1185 inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
1186 op1->gtFlags &= ~GTF_SPILLED;
1187 compiler->tmpRlsTemp(t);
1191 #endif // _TARGET_X86_
1193 if (op1->gtRegNum != retReg)
1195 inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
1201 #ifdef PROFILING_SUPPORTED
1203 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
1204 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
1205 // in the handling of the GT_RETURN statement.
1206 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
1207 // for the return registers containing GC refs.
1209 // There will be a single return block while generating profiler ELT callbacks.
1211 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
1212 // In flowgraph and other places assert that the last node of a block marked as
1213 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
1214 // maintain such an invariant irrespective of whether profiler hook needed or not.
1215 // Also, there is not much to be gained by materializing it as an explicit node.
1216 if (compiler->compCurBB == compiler->genReturnBB)
1219 // Since we are invalidating the assumption that we would slip into the epilog
1220 // right after the "return", we need to preserve the return reg's GC state
1221 // across the call until actual method return.
1222 if (varTypeIsGC(compiler->info.compRetType))
1224 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
1227 genProfilingLeaveCallback();
1229 if (varTypeIsGC(compiler->info.compRetType))
1231 gcInfo.gcMarkRegSetNpt(REG_INTRET);
1237 /*****************************************************************************
1239 * Generate code for a single node in the tree.
1240 * Preconditions: All operands have been evaluated
1243 void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
1245 regNumber targetReg;
1246 #if !defined(_TARGET_64BIT_)
1247 if (treeNode->TypeGet() == TYP_LONG)
1249 // All long enregistered nodes will have been decomposed into their
1250 // constituent lo and hi nodes.
1254 #endif // !defined(_TARGET_64BIT_)
1256 targetReg = treeNode->gtRegNum;
1258 var_types targetType = treeNode->TypeGet();
1259 emitter* emit = getEmitter();
1262 // Validate that all the operands for the current node are consumed in order.
1263 // This is important because LSRA ensures that any necessary copies will be
1264 // handled correctly.
1265 lastConsumedNode = nullptr;
1266 if (compiler->verbose)
1268 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
1269 compiler->gtDispLIRNode(treeNode, "Generating: ");
1273 // Is this a node whose value is already in a register? LSRA denotes this by
1274 // setting the GTF_REUSE_REG_VAL flag.
1275 if (treeNode->IsReuseRegVal())
1277 // For now, this is only used for constant nodes.
1278 assert((treeNode->OperIsConst()));
1279 JITDUMP(" TreeNode is marked ReuseReg\n");
1283 // contained nodes are part of their parents for codegen purposes
1284 // ex : immediates, most LEAs
1285 if (treeNode->isContained())
1290 switch (treeNode->gtOper)
1292 #ifndef JIT32_GCENCODER
1293 case GT_START_NONGC:
1294 getEmitter()->emitDisableGC();
1296 #endif // !defined(JIT32_GCENCODER)
1299 #ifdef PROFILING_SUPPORTED
1300 // We should be seeing this only if profiler hook is needed
1301 noway_assert(compiler->compIsProfilerHookNeeded());
1303 // Right now this node is used only for tail calls. In future if
1304 // we intend to use it for Enter or Leave hooks, add a data member
1305 // to this node indicating the kind of profiler hook. For example,
1306 // helper number can be used.
1307 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
1308 #endif // PROFILING_SUPPORTED
1312 genLclHeap(treeNode);
1317 assert(!treeNode->IsIconHandle(GTF_ICON_TLS_HDL));
1318 #endif // _TARGET_X86_
1322 genSetRegToConst(targetReg, targetType, treeNode);
1323 genProduceReg(treeNode);
1328 if (varTypeIsFloating(targetType))
1330 assert(treeNode->gtOper == GT_NEG);
1331 genSSE2BitwiseOp(treeNode);
1335 GenTreePtr operand = treeNode->gtGetOp1();
1336 assert(operand->isUsedFromReg());
1337 regNumber operandReg = genConsumeReg(operand);
1339 if (operandReg != targetReg)
1341 inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
1344 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1345 inst_RV(ins, targetReg, targetType);
1347 genProduceReg(treeNode);
1353 assert(varTypeIsIntegralOrI(treeNode));
1356 #if !defined(_TARGET_64BIT_)
1361 #endif // !defined(_TARGET_64BIT_)
1364 genConsumeOperands(treeNode->AsOp());
1365 genCodeForBinary(treeNode);
1373 genCodeForShift(treeNode);
1374 // genCodeForShift() calls genProduceReg()
1377 #if !defined(_TARGET_64BIT_)
1380 // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
1381 // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
1382 // targetReg if sourceHi is a memory operand). Similarly for GT_RSH_LO, sourceLo could be marked as
1383 // contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
1384 genCodeForShiftLong(treeNode);
1389 if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
1391 // Casts float/double <--> double/float
1392 genFloatToFloatCast(treeNode);
1394 else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
1396 // Casts float/double --> int32/int64
1397 genFloatToIntCast(treeNode);
1399 else if (varTypeIsFloating(targetType))
1401 // Casts int32/uint32/int64/uint64 --> float/double
1402 genIntToFloatCast(treeNode);
1406 // Casts int <--> int
1407 genIntToIntCast(treeNode);
1409 // The per-case functions call genProduceReg()
1414 // lcl_vars are not defs
1415 assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
1417 GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
1418 bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
1420 if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
1422 assert(treeNode->InReg() || (treeNode->gtFlags & GTF_SPILLED));
1425 // If this is a register candidate that has been spilled, genConsumeReg() will
1426 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
1428 if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
1430 assert(!isRegCandidate);
1431 #if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
1432 // Loading of TYP_SIMD12 (i.e. Vector3) variable
1433 if (treeNode->TypeGet() == TYP_SIMD12)
1435 genLoadLclTypeSIMD12(treeNode);
1438 #endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
1440 emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
1441 emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
1442 genProduceReg(treeNode);
1447 case GT_LCL_FLD_ADDR:
1448 case GT_LCL_VAR_ADDR:
1449 // Address of a local var. This by itself should never be allocated a register.
1450 // If it is worth storing the address in a register then it should be cse'ed into
1451 // a temp and that would be allocated a register.
1452 noway_assert(targetType == TYP_BYREF);
1453 noway_assert(!treeNode->InReg());
1455 inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
1456 genProduceReg(treeNode);
1461 noway_assert(targetType != TYP_STRUCT);
1462 noway_assert(treeNode->gtRegNum != REG_NA);
1465 // Loading of TYP_SIMD12 (i.e. Vector3) field
1466 if (treeNode->TypeGet() == TYP_SIMD12)
1468 genLoadLclTypeSIMD12(treeNode);
1473 emitAttr size = emitTypeSize(targetType);
1474 unsigned offs = treeNode->gtLclFld.gtLclOffs;
1475 unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
1476 assert(varNum < compiler->lvaCount);
1478 emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
1480 genProduceReg(treeNode);
1483 case GT_STORE_LCL_FLD:
1485 noway_assert(targetType != TYP_STRUCT);
1486 noway_assert(!treeNode->InReg());
1487 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
1490 // storing of TYP_SIMD12 (i.e. Vector3) field
1491 if (treeNode->TypeGet() == TYP_SIMD12)
1493 genStoreLclTypeSIMD12(treeNode);
1496 #endif // FEATURE_SIMD
1498 GenTreePtr op1 = treeNode->gtGetOp1();
1499 genConsumeRegs(op1);
1500 emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
1504 case GT_STORE_LCL_VAR:
1506 GenTreePtr op1 = treeNode->gtGetOp1();
1508 // var = call, where call returns a multi-reg return value
1509 // case is handled separately.
1510 if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
1512 genMultiRegCallStoreToLocal(treeNode);
1516 noway_assert(targetType != TYP_STRUCT);
1517 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
1519 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
1520 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1522 // Ensure that lclVar nodes are typed correctly.
1523 assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
1525 #if !defined(_TARGET_64BIT_)
1526 if (treeNode->TypeGet() == TYP_LONG)
1528 genStoreLongLclVar(treeNode);
1531 #endif // !defined(_TARGET_64BIT_)
1534 // storing of TYP_SIMD12 (i.e. Vector3) field
1535 if (treeNode->TypeGet() == TYP_SIMD12)
1537 genStoreLclTypeSIMD12(treeNode);
1541 if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
1543 // This is only possible for a zero-init.
1544 noway_assert(op1->IsIntegralConst(0));
1545 genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
1546 genProduceReg(treeNode);
1549 #endif // FEATURE_SIMD
1551 genConsumeRegs(op1);
1553 if (treeNode->gtRegNum == REG_NA)
1556 emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
1557 emitTypeSize(targetType), treeNode);
1558 varDsc->lvRegNum = REG_STK;
1562 // Look for the case where we have a constant zero which we've marked for reuse,
1563 // but which isn't actually in the register we want. In that case, it's better to create
1564 // zero in the target register, because an xor is smaller than a copy. Note that we could
1565 // potentially handle this in the register allocator, but we can't always catch it there
1566 // because the target may not have a register allocated for it yet.
1567 if (op1->isUsedFromReg() && (op1->gtRegNum != treeNode->gtRegNum) &&
1568 (op1->IsIntegralConst(0) || op1->IsFPZero()))
1570 op1->gtRegNum = REG_NA;
1571 op1->ResetReuseRegVal();
1574 if (!op1->isUsedFromReg())
1576 // Currently, we assume that the non-reg source of a GT_STORE_LCL_VAR writing to a register
1577 // must be a constant. However, in the future we might want to support an operand used from
1578 // memory. This is a bit tricky because we have to decide it can be used from memory before
1579 // register allocation,
1580 // and this would be a case where, once that's done, we need to mark that node as always
1581 // requiring a register - which we always assume now anyway, but once we "optimize" that
1582 // we'll have to take cases like this into account.
1583 assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
1584 genSetRegToConst(treeNode->gtRegNum, targetType, op1);
1586 else if (op1->gtRegNum != treeNode->gtRegNum)
1588 assert(op1->gtRegNum != REG_NA);
1589 emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
1594 if (treeNode->gtRegNum != REG_NA)
1596 genProduceReg(treeNode);
1602 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
1603 // the return register, if it's not already there. The processing is the same as GT_RETURN.
1604 if (targetType != TYP_VOID)
1606 // For filters, the IL spec says the result is type int32. Further, the only specified legal values
1607 // are 0 or 1, with the use of other values "undefined".
1608 assert(targetType == TYP_INT);
1614 genReturn(treeNode);
1619 // if we are here, it is the case where there is an LEA that cannot
1620 // be folded into a parent instruction
1621 GenTreeAddrMode* lea = treeNode->AsAddrMode();
1622 genLeaInstruction(lea);
1624 // genLeaInstruction calls genProduceReg()
1630 // Handling of Vector3 type values loaded through indirection.
1631 if (treeNode->TypeGet() == TYP_SIMD12)
1633 genLoadIndTypeSIMD12(treeNode);
1636 #endif // FEATURE_SIMD
1638 GenTree* addr = treeNode->AsIndir()->Addr();
1639 if (addr->IsCnsIntOrI() && addr->IsIconHandle(GTF_ICON_TLS_HDL))
1641 noway_assert(EA_ATTR(genTypeSize(treeNode->gtType)) == EA_PTRSIZE);
1642 emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, treeNode->gtRegNum, FLD_GLOBAL_FS,
1643 (int)addr->gtIntCon.gtIconVal);
1647 genConsumeAddress(addr);
1648 emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
1650 genProduceReg(treeNode);
1658 genCodeForMulHi(treeNode->AsOp());
1659 genProduceReg(treeNode);
1665 emitAttr size = emitTypeSize(treeNode);
1666 bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
1667 bool requiresOverflowCheck = treeNode->gtOverflowEx();
1669 GenTree* op1 = treeNode->gtGetOp1();
1670 GenTree* op2 = treeNode->gtGetOp2();
1672 // there are 3 forms of x64 multiply:
1673 // 1-op form with 128 result: RDX:RAX = RAX * rm
1674 // 2-op form: reg *= rm
1675 // 3-op form: reg = rm * imm
1677 genConsumeOperands(treeNode->AsOp());
1679 // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
1681 // immOp :: Only one operand can be an immediate
1682 // rmOp :: Only one operand can be a memory op.
1683 // regOp :: A register op (especially the operand that matches 'targetReg')
1684 // (can be nullptr when we have both a memory op and an immediate op)
1686 GenTree* immOp = nullptr;
1687 GenTree* rmOp = op1;
1690 if (op2->isContainedIntOrIImmed())
1694 else if (op1->isContainedIntOrIImmed())
1700 if (immOp != nullptr)
1702 // This must be a non-floating point operation.
1703 assert(!varTypeIsFloating(treeNode));
1705 // CQ: When possible use LEA for mul by imm 3, 5 or 9
1706 ssize_t imm = immOp->AsIntConCommon()->IconValue();
1708 if (!requiresOverflowCheck && rmOp->isUsedFromReg() && ((imm == 3) || (imm == 5) || (imm == 9)))
1710 // We will use the LEA instruction to perform this multiply
1711 // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
1712 unsigned int scale = (unsigned int)(imm - 1);
1713 getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
1717 // use the 3-op form with immediate
1718 ins = getEmitter()->inst3opImulForReg(targetReg);
1719 emit->emitInsBinary(ins, size, rmOp, immOp);
1722 else // we have no contained immediate operand
1727 regNumber mulTargetReg = targetReg;
1728 if (isUnsignedMultiply && requiresOverflowCheck)
1731 mulTargetReg = REG_RAX;
1735 ins = genGetInsForOper(GT_MUL, targetType);
1738 // Set rmOp to the memory operand (if any)
1739 // or set regOp to the op2 when it has the matching target register for our multiply op
1741 if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == mulTargetReg)))
1746 assert(regOp->isUsedFromReg());
1748 // Setup targetReg when neither of the source operands was a matching register
1749 if (regOp->gtRegNum != mulTargetReg)
1751 inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
1754 emit->emitInsBinary(ins, size, treeNode, rmOp);
1756 // Move the result to the desired register, if necessary
1757 if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
1759 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
1763 if (requiresOverflowCheck)
1765 // Overflow checking is only used for non-floating point types
1766 noway_assert(!varTypeIsFloating(treeNode));
1768 genCheckOverflow(treeNode);
1771 genProduceReg(treeNode);
1777 // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
1778 // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
1779 // on float/double args.
1780 noway_assert(!varTypeIsFloating(treeNode));
1784 genCodeForDivMod(treeNode->AsOp());
1788 genIntrinsic(treeNode);
1793 genSIMDIntrinsic(treeNode->AsSIMD());
1795 #endif // FEATURE_SIMD
1798 genCkfinite(treeNode);
1810 // TODO-XArch-CQ: Check if we can use the currently set flags.
1811 // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
1812 // (signed < or >= where targetReg != REG_NA)
1814 GenTreePtr op1 = treeNode->gtGetOp1();
1815 var_types op1Type = op1->TypeGet();
1817 if (varTypeIsFloating(op1Type))
1819 genCompareFloat(treeNode);
1821 #if !defined(_TARGET_64BIT_)
1822 // X86 Long comparison
1823 else if (varTypeIsLong(op1Type))
1826 // The result of an unlowered long compare on a 32-bit target must either be
1827 // a) materialized into a register, or
1830 // A long compare that has a result that is used but not materialized into a register should
1831 // have been handled by Lowering::LowerCompare.
1834 assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
1836 genCompareLong(treeNode);
1838 #endif // !defined(_TARGET_64BIT_)
1841 genCompareInt(treeNode);
1848 GenTree* cmp = treeNode->gtOp.gtOp1;
1850 assert(cmp->OperIsCompare());
1851 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
1853 #if !defined(_TARGET_64BIT_)
1854 // Long-typed compares should have been handled by Lowering::LowerCompare.
1855 assert(!varTypeIsLong(cmp->gtGetOp1()));
1858 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
1859 // is governed by a flag NOT by the inherent type of the node
1860 // TODO-XArch-CQ: Check if we can use the currently set flags.
1861 emitJumpKind jumpKind[2];
1862 bool branchToTrueLabel[2];
1863 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
1865 BasicBlock* skipLabel = nullptr;
1866 if (jumpKind[0] != EJ_NONE)
1868 BasicBlock* jmpTarget;
1869 if (branchToTrueLabel[0])
1871 jmpTarget = compiler->compCurBB->bbJumpDest;
1875 // This case arises only for ordered GT_EQ right now
1876 assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
1877 skipLabel = genCreateTempLabel();
1878 jmpTarget = skipLabel;
1881 inst_JMP(jumpKind[0], jmpTarget);
1884 if (jumpKind[1] != EJ_NONE)
1886 // the second conditional branch always has to be to the true label
1887 assert(branchToTrueLabel[1]);
1888 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
1891 if (skipLabel != nullptr)
1893 genDefineTempLabel(skipLabel);
1900 GenTreeJumpCC* jcc = treeNode->AsJumpCC();
1902 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
1904 CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
1905 emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind);
1907 inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
1913 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
1914 // based on the contents of 'data'
1916 GenTree* data = treeNode->gtOp.gtOp1;
1917 genConsumeRegs(data);
1918 GenTreeIntCon cns = intForm(TYP_INT, 0);
1919 emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
1921 BasicBlock* skipLabel = genCreateTempLabel();
1923 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1924 inst_JMP(jmpEqual, skipLabel);
1926 // emit the call to the EE-helper that stops for GC (or other reasons)
1927 assert(treeNode->gtRsvdRegs != RBM_NONE);
1928 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
1929 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
1930 assert(genIsValidIntReg(tmpReg));
1932 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
1933 genDefineTempLabel(skipLabel);
1938 genStoreInd(treeNode);
1942 // This is handled at the time we call genConsumeReg() on the GT_COPY
1947 // Swap is only supported for lclVar operands that are enregistered
1948 // We do not consume or produce any registers. Both operands remain enregistered.
1949 // However, the gc-ness may change.
1950 assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
1952 GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
1953 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
1954 var_types type1 = varDsc1->TypeGet();
1955 GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
1956 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
1957 var_types type2 = varDsc2->TypeGet();
1959 // We must have both int or both fp regs
1960 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
1962 // FP swap is not yet implemented (and should have NYI'd in LSRA)
1963 assert(!varTypeIsFloating(type1));
1965 regNumber oldOp1Reg = lcl1->gtRegNum;
1966 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
1967 regNumber oldOp2Reg = lcl2->gtRegNum;
1968 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
1970 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
1971 varDsc1->lvRegNum = oldOp2Reg;
1972 varDsc2->lvRegNum = oldOp1Reg;
1975 emitAttr size = EA_PTRSIZE;
1976 if (varTypeGCtype(type1) != varTypeGCtype(type2))
1978 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
1979 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
1982 inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
1984 // Update the gcInfo.
1985 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
1986 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
1987 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
1989 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
1990 // It will also dump the updates.
1991 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
1992 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
2003 genPutArgStk(treeNode->AsPutArgStk());
2008 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
2009 noway_assert(targetType != TYP_STRUCT);
2010 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2011 // commas show up here commonly, as part of a nullchk operation
2012 GenTree* op1 = treeNode->gtOp.gtOp1;
2013 // If child node is not already in the register we need, move it
2015 if (treeNode->gtRegNum != op1->gtRegNum)
2017 inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
2019 genProduceReg(treeNode);
2024 genCallInstruction(treeNode->AsCall());
2028 genJmpMethod(treeNode);
2034 genLockedInstructions(treeNode->AsOp());
2037 case GT_MEMORYBARRIER:
2038 instGen_MemoryBarrier();
2043 GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
2044 GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
2045 GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
2047 assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
2048 assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
2050 genConsumeReg(location);
2051 genConsumeReg(value);
2052 genConsumeReg(comparand);
2053 // comparand goes to RAX;
2054 // Note that we must issue this move after the genConsumeRegs(), in case any of the above
2055 // have a GT_COPY from RAX.
2056 if (comparand->gtRegNum != REG_RAX)
2058 inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
2064 emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
2067 if (targetReg != REG_RAX)
2069 inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
2072 genProduceReg(treeNode);
2076 // do nothing - reload is just a marker.
2077 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
2078 // into the register specified in this node.
2085 if (treeNode->gtFlags & GTF_NO_OP_NO)
2087 noway_assert(!"GTF_NO_OP_NO should not be set");
2091 getEmitter()->emitIns_Nop(1);
2095 case GT_ARR_BOUNDS_CHECK:
2098 #endif // FEATURE_SIMD
2099 genRangeCheck(treeNode);
2103 if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
2105 inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
2107 genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
2109 genProduceReg(treeNode);
2117 assert(treeNode->gtOp.gtOp1->isUsedFromReg());
2118 regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
2119 emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
2125 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
2127 /* Catch arguments get passed in a register. genCodeForBBlist()
2128 would have marked it as holding a GC object, but not used. */
2130 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
2131 genConsumeReg(treeNode);
2134 #if !FEATURE_EH_FUNCLETS
2137 // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
2138 // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
2140 unsigned finallyNesting;
2141 finallyNesting = treeNode->gtVal.gtVal1;
2142 noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
2143 noway_assert(finallyNesting < compiler->compHndBBtabCount);
2145 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
2146 unsigned filterEndOffsetSlotOffs;
2147 PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
2148 TARGET_POINTER_SIZE); // below doesn't underflow.
2149 filterEndOffsetSlotOffs =
2150 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
2152 unsigned curNestingSlotOffs;
2153 curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
2154 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
2156 #endif // !FEATURE_EH_FUNCLETS
2158 case GT_PINVOKE_PROLOG:
2159 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
2161 // the runtime side requires the codegen here to be consistent
2162 emit->emitDisableRandomNops();
2166 genPendingCallLabel = genCreateTempLabel();
2167 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
2168 emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
2172 if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
2174 assert(treeNode->AsObj()->gtGcPtrCount != 0);
2175 genCodeForCpObj(treeNode->AsObj());
2180 case GT_STORE_DYN_BLK:
2182 genCodeForStoreBlk(treeNode->AsBlk());
2186 genJumpTable(treeNode);
2189 case GT_SWITCH_TABLE:
2190 genTableBasedSwitch(treeNode);
2194 genCodeForArrIndex(treeNode->AsArrIndex());
2198 genCodeForArrOffset(treeNode->AsArrOffs());
2201 case GT_CLS_VAR_ADDR:
2202 getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
2203 genProduceReg(treeNode);
2206 #if !defined(_TARGET_64BIT_)
2208 assert(treeNode->isUsedFromReg());
2209 genConsumeRegs(treeNode);
2214 // Do nothing; these nodes are simply markers for debug info.
2221 _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n",
2222 GenTree::NodeName(treeNode->OperGet()));
2224 assert(!"Unknown node in codegen");
2230 //----------------------------------------------------------------------------------
2231 // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
2234 // treeNode - Gentree of GT_STORE_LCL_VAR
2240 // The child of store is a multi-reg call node.
2241 // genProduceReg() on treeNode is made by caller of this routine.
2243 void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
2245 assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
2247 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2248 // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
2249 assert(varTypeIsStruct(treeNode));
2251 // Assumption: current x64 Unix implementation requires that a multi-reg struct
2252 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
2253 // being struct promoted.
2254 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
2255 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
2256 noway_assert(varDsc->lvIsMultiRegRet);
2258 GenTree* op1 = treeNode->gtGetOp1();
2259 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
2260 GenTreeCall* call = actualOp1->AsCall();
2261 assert(call->HasMultiRegRetVal());
2263 genConsumeRegs(op1);
2265 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
2266 assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
2267 unsigned regCount = retTypeDesc->GetReturnRegCount();
2269 if (treeNode->gtRegNum != REG_NA)
2271 // Right now the only enregistrable structs supported are SIMD types.
2272 assert(varTypeIsSIMD(treeNode));
2273 assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
2274 assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
2276 // This is a case of two 8-bytes that comprise the operand is in
2277 // two different xmm registers and needs to assembled into a single
2279 regNumber targetReg = treeNode->gtRegNum;
2280 regNumber reg0 = call->GetRegNumByIdx(0);
2281 regNumber reg1 = call->GetRegNumByIdx(1);
2283 if (op1->IsCopyOrReload())
2285 // GT_COPY/GT_RELOAD will have valid reg for those positions
2286 // that need to be copied or reloaded.
2287 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
2288 if (reloadReg != REG_NA)
2293 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
2294 if (reloadReg != REG_NA)
2300 if (targetReg != reg0 && targetReg != reg1)
2302 // Copy reg0 into targetReg and let it to be handled by one
2303 // of the cases below.
2304 inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
2308 if (targetReg == reg0)
2310 // targeReg[63:0] = targetReg[63:0]
2311 // targetReg[127:64] = reg1[127:64]
2312 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
2316 assert(targetReg == reg1);
2318 // We need two shuffles to achieve this
2320 // targeReg[63:0] = targetReg[63:0]
2321 // targetReg[127:64] = reg0[63:0]
2324 // targeReg[63:0] = targetReg[127:64]
2325 // targetReg[127:64] = targetReg[63:0]
2327 // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
2328 // and next swap low and high 8-bytes of targetReg to have them
2329 // rearranged in the right order.
2330 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
2331 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
2338 for (unsigned i = 0; i < regCount; ++i)
2340 var_types type = retTypeDesc->GetReturnRegType(i);
2341 regNumber reg = call->GetRegNumByIdx(i);
2342 if (op1->IsCopyOrReload())
2344 // GT_COPY/GT_RELOAD will have valid reg for those positions
2345 // that need to be copied or reloaded.
2346 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
2347 if (reloadReg != REG_NA)
2353 assert(reg != REG_NA);
2354 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
2355 offset += genTypeSize(type);
2358 varDsc->lvRegNum = REG_STK;
2360 #elif defined(_TARGET_X86_)
2361 // Longs are returned in two return registers on x86.
2362 assert(varTypeIsLong(treeNode));
2364 // Assumption: current x86 implementation requires that a multi-reg long
2365 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
2367 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
2368 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
2369 noway_assert(varDsc->lvIsMultiRegRet);
2371 GenTree* op1 = treeNode->gtGetOp1();
2372 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
2373 GenTreeCall* call = actualOp1->AsCall();
2374 assert(call->HasMultiRegRetVal());
2376 genConsumeRegs(op1);
2378 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
2379 unsigned regCount = retTypeDesc->GetReturnRegCount();
2380 assert(regCount == MAX_RET_REG_COUNT);
2384 for (unsigned i = 0; i < regCount; ++i)
2386 var_types type = retTypeDesc->GetReturnRegType(i);
2387 regNumber reg = call->GetRegNumByIdx(i);
2388 if (op1->IsCopyOrReload())
2390 // GT_COPY/GT_RELOAD will have valid reg for those positions
2391 // that need to be copied or reloaded.
2392 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
2393 if (reloadReg != REG_NA)
2399 assert(reg != REG_NA);
2400 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
2401 offset += genTypeSize(type);
2404 varDsc->lvRegNum = REG_STK;
2405 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
2406 assert(!"Unreached");
2407 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
2410 //------------------------------------------------------------------------
2411 // genLclHeap: Generate code for localloc.
2414 // tree - the localloc tree to generate.
2417 // Note that for x86, we don't track ESP movements while generating the localloc code.
2418 // The ESP tracking is used to report stack pointer-relative GC info, which is not
2419 // interesting while doing the localloc construction. Also, for functions with localloc,
2420 // we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
2421 // call arguments. We store the ESP after the localloc is complete in the LocAllocSP
2422 // variable. This variable is implicitly reported to the VM in the GC info (its position
2423 // is defined by convention relative to other items), and is used by the GC to find the
2424 // "base" stack pointer in functions with localloc.
2426 void CodeGen::genLclHeap(GenTreePtr tree)
2428 assert(tree->OperGet() == GT_LCLHEAP);
2429 assert(compiler->compLocallocUsed);
2431 GenTreePtr size = tree->gtOp.gtOp1;
2432 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
2434 regNumber targetReg = tree->gtRegNum;
2435 regMaskTP tmpRegsMask = tree->gtRsvdRegs;
2436 regNumber regCnt = REG_NA;
2437 var_types type = genActualType(size->gtType);
2438 emitAttr easz = emitTypeSize(type);
2439 BasicBlock* endLabel = nullptr;
2443 if (compiler->opts.compStackCheckOnRet)
2445 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
2446 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
2447 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2448 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
2450 BasicBlock* esp_check = genCreateTempLabel();
2451 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2452 inst_JMP(jmpEqual, esp_check);
2453 getEmitter()->emitIns(INS_BREAKPOINT);
2454 genDefineTempLabel(esp_check);
2458 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
2459 noway_assert(genStackLevel == 0); // Can't have anything on the stack
2461 unsigned stackAdjustment = 0;
2462 BasicBlock* loop = nullptr;
2464 // compute the amount of memory to allocate to properly STACK_ALIGN.
2466 if (size->IsCnsIntOrI())
2468 // If size is a constant, then it must be contained.
2469 assert(size->isContained());
2471 // If amount is zero then return null in targetReg
2472 amount = size->gtIntCon.gtIconVal;
2475 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
2479 // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
2480 amount = AlignUp(amount, STACK_ALIGN);
2484 // The localloc requested memory size is non-constant.
2486 // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
2487 genConsumeRegAndCopy(size, targetReg);
2488 endLabel = genCreateTempLabel();
2489 getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
2490 inst_JMP(EJ_je, endLabel);
2492 // Compute the size of the block to allocate and perform alignment.
2493 // If compInitMem=true, we can reuse targetReg as regcnt,
2494 // since we don't need any internal registers.
2495 if (compiler->info.compInitMem)
2497 assert(genCountBits(tmpRegsMask) == 0);
2502 assert(genCountBits(tmpRegsMask) >= 1);
2503 regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
2504 tmpRegsMask &= ~regCntMask;
2505 regCnt = genRegNumFromMask(regCntMask);
2506 if (regCnt != targetReg)
2508 // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
2509 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
2513 // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
2517 // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
2518 // elements, not a byte count, after the alignment. So instead of the "and", which
2519 // becomes unnecessary, generate a shift, e.g.:
2523 inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
2525 if (compiler->info.compInitMem)
2527 // Convert the count from a count of bytes to a loop count. We will loop once per
2528 // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
2529 // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
2530 // words per iteration on x64. We will shift off all the stack alignment bits
2531 // added above, so there is no need for an 'and' instruction.
2533 // --- shr regCnt, 2 (or 4) ---
2534 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
2538 // Otherwise, mask off the low bits to align the byte count.
2539 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
2543 #if FEATURE_FIXED_OUT_ARGS
2544 // If we have an outgoing arg area then we must adjust the SP by popping off the
2545 // outgoing arg area. We will restore it right before we return from this method.
2547 // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
2548 // are the cases that need to be handled:
2549 // i) Method has out-going arg area.
2550 // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
2551 // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
2552 // ii) Method has no out-going arg area.
2553 // Nothing to pop off from the stack.
2554 if (compiler->lvaOutgoingArgSpaceSize > 0)
2556 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
2558 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
2559 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
2563 if (size->IsCnsIntOrI())
2565 // We should reach here only for non-zero, constant size allocations.
2567 assert((amount % STACK_ALIGN) == 0);
2568 assert((amount % REGSIZE_BYTES) == 0);
2570 // For small allocations we will generate up to six push 0 inline
2571 size_t cntRegSizedWords = amount / REGSIZE_BYTES;
2572 if (cntRegSizedWords <= 6)
2574 for (; cntRegSizedWords != 0; cntRegSizedWords--)
2576 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
2581 bool doNoInitLessThanOnePageAlloc =
2582 !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
2585 bool needRegCntRegister = true;
2586 #else // !_TARGET_X86_
2587 bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
2588 #endif // !_TARGET_X86_
2590 if (needRegCntRegister)
2592 // If compInitMem=true, we can reuse targetReg as regcnt.
2593 // Since size is a constant, regCnt is not yet initialized.
2594 assert(regCnt == REG_NA);
2595 if (compiler->info.compInitMem)
2597 assert(genCountBits(tmpRegsMask) == 0);
2602 assert(genCountBits(tmpRegsMask) >= 1);
2603 regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
2604 tmpRegsMask &= ~regCntMask;
2605 regCnt = genRegNumFromMask(regCntMask);
2609 if (doNoInitLessThanOnePageAlloc)
2611 // Since the size is less than a page, simply adjust ESP.
2612 // ESP might already be in the guard page, so we must touch it BEFORE
2613 // the alloc, not after.
2614 CLANG_FORMAT_COMMENT_ANCHOR;
2617 // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
2618 // to ESP. So do the work in the count register.
2619 // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
2620 // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
2622 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
2623 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
2624 inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
2625 inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
2626 #else // !_TARGET_X86_
2627 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
2628 inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
2629 #endif // !_TARGET_X86_
2634 // else, "mov regCnt, amount"
2636 if (compiler->info.compInitMem)
2638 // When initializing memory, we want 'amount' to be the loop count.
2639 assert((amount % STACK_ALIGN) == 0);
2640 amount /= STACK_ALIGN;
2643 genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
2646 loop = genCreateTempLabel();
2647 if (compiler->info.compInitMem)
2649 // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
2650 // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
2651 // Since we have to zero out the allocated memory AND ensure that RSP is always valid
2652 // by tickling the pages, we will just push 0's on the stack.
2654 assert(genIsValidIntReg(regCnt));
2657 genDefineTempLabel(loop);
2659 static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
2660 unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);
2662 for (unsigned i = 0; i < count; i++)
2664 inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
2666 // Note that the stack must always be aligned to STACK_ALIGN bytes
2668 // Decrement the loop counter and loop if not done.
2669 inst_RV(INS_dec, regCnt, TYP_I_IMPL);
2670 inst_JMP(EJ_jne, loop);
2674 // At this point 'regCnt' is set to the total number of bytes to localloc.
2676 // We don't need to zero out the allocated memory. However, we do have
2677 // to tickle the pages to ensure that ESP is always valid and is
2678 // in sync with the "stack guard page". Note that in the worst
2679 // case ESP is on the last byte of the guard page. Thus you must
2680 // touch ESP+0 first not ESP+x01000.
2682 // Another subtlety is that you don't want ESP to be exactly on the
2683 // boundary of the guard page because PUSH is predecrement, thus
2684 // call setup would not touch the guard page but just beyond it
2686 // Note that we go through a few hoops so that ESP never points to
2687 // illegal pages at any time during the tickling process
2690 // add REGCNT, ESP // reg now holds ultimate ESP
2691 // jb loop // result is smaller than orignial ESP (no wrap around)
2692 // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
2694 // test ESP, [ESP+0] // tickle the page
2696 // sub REGTMP, PAGE_SIZE
2703 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
2704 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
2705 inst_JMP(EJ_jb, loop);
2707 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
2709 genDefineTempLabel(loop);
2711 // Tickle the decremented value, and move back to ESP,
2712 // note that it has to be done BEFORE the update of ESP since
2713 // ESP might already be on the guard page. It is OK to leave
2714 // the final value of ESP on the guard page
2715 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
2717 // This is a harmless trick to avoid the emitter trying to track the
2718 // decrement of the ESP - we do the subtraction in another reg instead
2719 // of adjusting ESP directly.
2720 assert(tmpRegsMask != RBM_NONE);
2721 assert(genCountBits(tmpRegsMask) == 1);
2722 regNumber regTmp = genRegNumFromMask(tmpRegsMask);
2724 inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
2725 inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
2726 inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
2728 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
2729 inst_JMP(EJ_jae, loop);
2731 // Move the final value to ESP
2732 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
2736 // Re-adjust SP to allocate out-going arg area
2737 if (stackAdjustment > 0)
2739 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
2740 inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
2743 // Return the stackalloc'ed address in result register.
2744 // TargetReg = RSP + stackAdjustment.
2745 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
2747 if (endLabel != nullptr)
2749 genDefineTempLabel(endLabel);
2754 // Write the lvaLocAllocSPvar stack frame slot
2755 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
2757 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
2761 if (compiler->opts.compNeedStackProbes)
2763 genGenerateStackProbe();
2769 if (compiler->opts.compStackCheckOnRet)
2771 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
2772 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
2773 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2774 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
2778 genProduceReg(tree);
2781 void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
2783 #ifdef JIT32_GCENCODER
2784 assert(!storeBlkNode->gtBlkOpGcUnsafe);
2786 if (storeBlkNode->gtBlkOpGcUnsafe)
2788 getEmitter()->emitDisableGC();
2790 #endif // JIT32_GCENCODER
2792 bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
2794 switch (storeBlkNode->gtBlkOpKind)
2796 #ifdef _TARGET_AMD64_
2797 case GenTreeBlk::BlkOpKindHelper:
2800 genCodeForCpBlk(storeBlkNode);
2804 genCodeForInitBlk(storeBlkNode);
2807 #endif // _TARGET_AMD64_
2808 case GenTreeBlk::BlkOpKindRepInstr:
2811 genCodeForCpBlkRepMovs(storeBlkNode);
2815 genCodeForInitBlkRepStos(storeBlkNode);
2818 case GenTreeBlk::BlkOpKindUnroll:
2821 genCodeForCpBlkUnroll(storeBlkNode);
2825 genCodeForInitBlkUnroll(storeBlkNode);
2832 #ifndef JIT32_GCENCODER
2833 if (storeBlkNode->gtBlkOpGcUnsafe)
2835 getEmitter()->emitEnableGC();
2837 #endif // !defined(JIT32_GCENCODER)
2841 //------------------------------------------------------------------------
2842 // genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos.
2845 // initBlkNode - The Block store for which we are generating code.
2849 // The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
2850 // Any value larger than that, we'll use the helper even if both the fill byte and the
2851 // size are integer constants.
2853 // The size must either be a non-constant or less than INITBLK_STOS_LIMIT bytes.
2855 void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
2857 // Make sure we got the arguments of the initblk/initobj operation in the right registers.
2858 unsigned size = initBlkNode->Size();
2859 GenTreePtr dstAddr = initBlkNode->Addr();
2860 GenTreePtr initVal = initBlkNode->Data();
2861 if (initVal->OperIsInitVal())
2863 initVal = initVal->gtGetOp1();
2867 assert(dstAddr->isUsedFromReg());
2868 assert(initVal->isUsedFromReg());
2869 #ifdef _TARGET_AMD64_
2872 if (initVal->IsCnsIntOrI())
2874 #ifdef _TARGET_AMD64_
2875 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
2877 // Note that a size of zero means a non-constant size.
2878 assert((size == 0) || (size > CPBLK_UNROLL_LIMIT));
2884 genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
2885 instGen(INS_r_stosb);
2888 // Generate code for InitBlk by performing a loop unroll
2890 // a) Both the size and fill byte value are integer constants.
2891 // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
2893 void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
2895 // Make sure we got the arguments of the initblk/initobj operation in the right registers
2896 unsigned size = initBlkNode->Size();
2897 GenTreePtr dstAddr = initBlkNode->Addr();
2898 GenTreePtr initVal = initBlkNode->Data();
2899 if (initVal->OperIsInitVal())
2901 initVal = initVal->gtGetOp1();
2904 assert(dstAddr->isUsedFromReg());
2905 assert(initVal->isUsedFromReg() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
2907 assert(size <= INITBLK_UNROLL_LIMIT);
2908 assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
2910 emitter* emit = getEmitter();
2912 genConsumeOperands(initBlkNode);
2914 // If the initVal was moved, or spilled and reloaded to a different register,
2915 // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
2916 // which needs to be the new register.
2917 regNumber valReg = initVal->gtRegNum;
2918 initVal = initVal->gtSkipReloadOrCopy();
2920 unsigned offset = 0;
2922 // Perform an unroll using SSE2 loads and stores.
2923 if (size >= XMM_REGSIZE_BYTES)
2925 regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
2928 assert(initBlkNode->gtRsvdRegs != RBM_NONE);
2929 assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
2930 assert(genIsValidFloatReg(tmpReg));
2933 if (initVal->gtIntCon.gtIconVal != 0)
2935 emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
2936 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
2938 // For x86, we need one more to convert it from 8 bytes to 16 bytes.
2939 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
2940 #endif // _TARGET_X86_
2944 emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
2947 // Determine how many 16 byte slots we're going to fill using SSE movs.
2948 size_t slots = size / XMM_REGSIZE_BYTES;
2952 emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
2953 offset += XMM_REGSIZE_BYTES;
2957 // Fill the remainder (or a < 16 byte sized struct)
2958 if ((size & 8) != 0)
2961 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
2962 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2964 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2966 #else // !_TARGET_X86_
2968 emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
2971 #endif // !_TARGET_X86_
2973 if ((size & 4) != 0)
2975 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2978 if ((size & 2) != 0)
2980 emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
2983 if ((size & 1) != 0)
2985 emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
2989 // Generates code for InitBlk by calling the VM memset helper function.
2991 // a) The size argument of the InitBlk is not an integer constant.
2992 // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
2993 void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
2995 #ifdef _TARGET_AMD64_
2996 // Make sure we got the arguments of the initblk operation in the right registers
2997 unsigned blockSize = initBlkNode->Size();
2998 GenTreePtr dstAddr = initBlkNode->Addr();
2999 GenTreePtr initVal = initBlkNode->Data();
3000 if (initVal->OperIsInitVal())
3002 initVal = initVal->gtGetOp1();
3005 assert(dstAddr->isUsedFromReg());
3006 assert(initVal->isUsedFromReg());
3010 assert(blockSize >= CPBLK_MOVS_LIMIT);
3013 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
3015 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
3016 #else // !_TARGET_AMD64_
3017 NYI_X86("Helper call for InitBlk");
3018 #endif // !_TARGET_AMD64_
3021 // Generate code for a load from some address + offset
3022 // baseNode: tree node which can be either a local address or arbitrary node
3023 // offset: distance from the baseNode from which to load
3024 void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
3026 emitter* emit = getEmitter();
3028 if (baseNode->OperIsLocalAddr())
3030 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
3032 offset += baseNode->gtLclFld.gtLclOffs;
3034 emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
3038 emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
3042 //------------------------------------------------------------------------
3043 // genCodeForStoreOffset: Generate code to store a reg to [base + offset].
3046 // ins - the instruction to generate.
3047 // size - the size that needs to be stored.
3048 // src - the register which needs to be stored.
3049 // baseNode - the base, relative to which to store the src register.
3050 // offset - the offset that is added to the baseNode to calculate the address to store into.
3052 void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
3054 emitter* emit = getEmitter();
3056 if (baseNode->OperIsLocalAddr())
3058 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
3060 offset += baseNode->gtLclFld.gtLclOffs;
3063 emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
3067 emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
3071 // Generates CpBlk code by performing a loop unroll
3073 // The size argument of the CpBlk node is a constant and <= 64 bytes.
3074 // This may seem small but covers >95% of the cases in several framework assemblies.
3076 void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
3078 // Make sure we got the arguments of the cpblk operation in the right registers
3079 unsigned size = cpBlkNode->Size();
3080 GenTreePtr dstAddr = cpBlkNode->Addr();
3081 GenTreePtr source = cpBlkNode->Data();
3082 GenTreePtr srcAddr = nullptr;
3083 assert(size <= CPBLK_UNROLL_LIMIT);
3085 emitter* emit = getEmitter();
3087 if (source->gtOper == GT_IND)
3089 srcAddr = source->gtGetOp1();
3090 if (srcAddr->isUsedFromReg())
3092 genConsumeReg(srcAddr);
3097 noway_assert(source->IsLocal());
3098 // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
3099 // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
3100 if (source->OperGet() == GT_LCL_VAR)
3102 source->SetOper(GT_LCL_VAR_ADDR);
3106 assert(source->OperGet() == GT_LCL_FLD);
3107 source->SetOper(GT_LCL_FLD_ADDR);
3112 if (dstAddr->isUsedFromReg())
3114 genConsumeReg(dstAddr);
3117 unsigned offset = 0;
3119 // If the size of this struct is larger than 16 bytes
3120 // let's use SSE2 to be able to do 16 byte at a time
3121 // loads and stores.
3123 if (size >= XMM_REGSIZE_BYTES)
3125 assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
3126 regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
3127 assert(genIsValidFloatReg(xmmReg));
3128 size_t slots = size / XMM_REGSIZE_BYTES;
3130 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3131 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3132 // this probably needs to be changed.
3136 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
3138 genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
3139 offset += XMM_REGSIZE_BYTES;
3143 // Fill the remainder (15 bytes or less) if there's one.
3144 if ((size & 0xf) != 0)
3146 // Grab the integer temp register to emit the remaining loads and stores.
3147 regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
3149 if ((size & 8) != 0)
3152 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
3153 for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
3155 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
3156 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
3158 #else // !_TARGET_X86_
3159 genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
3160 genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
3162 #endif // !_TARGET_X86_
3164 if ((size & 4) != 0)
3166 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
3167 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
3170 if ((size & 2) != 0)
3172 genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
3173 genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
3176 if ((size & 1) != 0)
3178 genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
3179 genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
3184 // Generate code for CpBlk by using rep movs
3186 // The size argument of the CpBlk is a constant and is between
3187 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3188 void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
3190 // Make sure we got the arguments of the cpblk operation in the right registers
3191 unsigned size = cpBlkNode->Size();
3192 GenTreePtr dstAddr = cpBlkNode->Addr();
3193 GenTreePtr source = cpBlkNode->Data();
3194 GenTreePtr srcAddr = nullptr;
3197 assert(dstAddr->isUsedFromReg());
3198 assert(source->isContained());
3203 noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
3209 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
3211 assert(size > CPBLK_UNROLL_LIMIT);
3216 genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
3217 instGen(INS_r_movsb);
3220 #ifdef FEATURE_PUT_STRUCT_ARG_STK
3221 //------------------------------------------------------------------------
3222 // CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area
3225 // size - The size of bytes remaining to be moved
3226 // longTmpReg - The tmp register to be used for the long value
3227 // srcAddr - The address of the source struct
3228 // offset - The current offset being copied
3231 // Returns the number of bytes moved (8 or 0).
3234 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
3235 // not an even multiple of 16.
3236 // On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register.
3237 // This is checked by genStoreRegToStackArg.
3239 unsigned CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset)
3242 instruction longMovIns = INS_movq;
3243 #else // !_TARGET_X86_
3244 instruction longMovIns = INS_mov;
3245 #endif // !_TARGET_X86_
3246 if ((size & 8) != 0)
3248 genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset);
3249 genStoreRegToStackArg(TYP_LONG, longTmpReg, offset);
3255 //------------------------------------------------------------------------
3256 // CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area
3259 // size - The size of bytes remaining to be moved
3260 // intTmpReg - The tmp register to be used for the long value
3261 // srcAddr - The address of the source struct
3262 // offset - The current offset being copied
3265 // Returns the number of bytes moved (4 or 0).
3268 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
3269 // not an even multiple of 16.
3270 // intTmpReg must be an integer register.
3271 // This is checked by genStoreRegToStackArg.
3273 unsigned CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
3275 if ((size & 4) != 0)
3277 genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset);
3278 genStoreRegToStackArg(TYP_INT, intTmpReg, offset);
3284 //------------------------------------------------------------------------
3285 // CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area
3288 // size - The size of bytes remaining to be moved
3289 // intTmpReg - The tmp register to be used for the long value
3290 // srcAddr - The address of the source struct
3291 // offset - The current offset being copied
3294 // Returns the number of bytes moved (2 or 0).
3297 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
3298 // not an even multiple of 16.
3299 // intTmpReg must be an integer register.
3300 // This is checked by genStoreRegToStackArg.
3302 unsigned CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
3304 if ((size & 2) != 0)
3306 genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset);
3307 genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset);
3313 //------------------------------------------------------------------------
3314 // CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area
3317 // size - The size of bytes remaining to be moved
3318 // intTmpReg - The tmp register to be used for the long value
3319 // srcAddr - The address of the source struct
3320 // offset - The current offset being copied
3323 // Returns the number of bytes moved (1 or 0).
3326 // This is used in the PutArgStkKindUnroll case, to move any bytes that are
3327 // not an even multiple of 16.
3328 // intTmpReg must be an integer register.
3329 // This is checked by genStoreRegToStackArg.
3331 unsigned CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
3334 if ((size & 1) != 0)
3336 genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset);
3337 genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset);
3343 //---------------------------------------------------------------------------------------------------------------//
3344 // genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
3347 // putArgNode - the PutArgStk tree.
3350 // m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the
3353 // TODO-Amd64-Unix: Try to share code with copyblk.
3354 // Need refactoring of copyblk before it could be used for putarg_stk.
3355 // The difference for now is that a putarg_stk contains its children, while cpyblk does not.
3356 // This creates differences in code. After some significant refactoring it could be reused.
3358 void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
3360 // We will never call this method for SIMD types, which are stored directly
3361 // in genPutStructArgStk().
3362 noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
3364 // Make sure we got the arguments of the cpblk operation in the right registers
3365 GenTreePtr dstAddr = putArgNode;
3366 GenTreePtr src = putArgNode->gtOp.gtOp1;
3368 unsigned size = putArgNode->getArgSize();
3369 assert(size <= CPBLK_UNROLL_LIMIT);
3371 emitter* emit = getEmitter();
3372 unsigned putArgOffset = putArgNode->getArgOffset();
3374 assert(src->isContained());
3376 assert(src->gtOper == GT_OBJ);
3378 if (src->gtOp.gtOp1->isUsedFromReg())
3380 genConsumeReg(src->gtOp.gtOp1);
3383 unsigned offset = 0;
3385 regNumber xmmTmpReg = REG_NA;
3386 regNumber intTmpReg = REG_NA;
3387 regNumber longTmpReg = REG_NA;
3389 // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's
3390 // less than 16 bytes, we will just be using pushes
3393 xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
3394 longTmpReg = xmmTmpReg;
3396 if ((size & 0x7) != 0)
3398 intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
3400 #else // !_TARGET_X86_
3401 // On x64 we use an XMM register only for 16-byte chunks.
3402 if (size >= XMM_REGSIZE_BYTES)
3404 xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
3406 if ((size & 0xf) != 0)
3408 intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
3409 longTmpReg = intTmpReg;
3411 #endif // !_TARGET_X86_
3413 // If the size of this struct is larger than 16 bytes
3414 // let's use SSE2 to be able to do 16 byte at a time
3415 // loads and stores.
3416 if (size >= XMM_REGSIZE_BYTES)
3419 assert(!m_pushStkArg);
3420 #endif // _TARGET_X86_
3421 assert(putArgNode->gtRsvdRegs != RBM_NONE);
3422 size_t slots = size / XMM_REGSIZE_BYTES;
3424 assert(putArgNode->gtGetOp1()->isContained());
3425 assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
3427 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3428 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3429 // this probably needs to be changed.
3433 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset);
3436 genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);
3438 offset += XMM_REGSIZE_BYTES;
3442 // Fill the remainder (15 bytes or less) if there's one.
3443 if ((size & 0xf) != 0)
3448 // This case is currently supported only for the case where the total size is
3449 // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse
3450 // order. However, morph has ensured that we have a struct that is an even
3451 // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment.
3452 assert(((size & 0xc) == size) && (offset == 0));
3453 // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on
3454 // whether we've got an 8 byte chunk, and then push it on the stack.
3455 unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, size & 0x8);
3456 // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk)
3457 // and push it on the stack.
3458 pushedBytes += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, 0);
3461 #endif // _TARGET_X86_
3463 offset += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, offset);
3464 offset += genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
3465 offset += genMove2IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
3466 offset += genMove1IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
3467 assert(offset == size);
3472 //------------------------------------------------------------------------
3473 // genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
3476 // putArgNode - the PutArgStk tree.
3479 // The size argument of the PutArgStk (for structs) is a constant and is between
3480 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3481 // m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go.
3483 void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode)
3485 assert(putArgNode->TypeGet() == TYP_STRUCT);
3486 assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
3488 // Make sure we got the arguments of the cpblk operation in the right registers
3489 GenTreePtr dstAddr = putArgNode;
3490 GenTreePtr srcAddr = putArgNode->gtGetOp1();
3493 assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
3494 assert(srcAddr->isContained());
3496 genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
3497 instGen(INS_r_movsb);
3500 //------------------------------------------------------------------------
3501 // If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
3502 // must be cleared to zeroes. The native compiler doesn't clear the upper bits
3503 // and there is no way to know if the caller is native or not. So, the upper
3504 // 32 bits of Vector argument on stack are always cleared to zero.
3505 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
3506 void CodeGen::genClearStackVec3ArgUpperBits()
3511 printf("*************** In genClearStackVec3ArgUpperBits()\n");
3515 assert(compiler->compGeneratingProlog);
3517 unsigned varNum = 0;
3519 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
3521 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
3522 assert(varDsc->lvIsParam);
3524 // Does var has simd12 type?
3525 if (varDsc->lvType != TYP_SIMD12)
3530 if (!varDsc->lvIsRegArg)
3532 // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
3533 getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
3537 // Assume that for x64 linux, an argument is fully in registers
3538 // or fully on stack.
3539 regNumber argReg = varDsc->GetOtherArgReg();
3541 // Clear the upper 32 bits by two shift instructions.
3542 // argReg = argReg << 96
3543 getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
3544 // argReg = argReg >> 96
3545 getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
3549 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
3550 #endif // FEATURE_PUT_STRUCT_ARG_STK
3552 // Generate code for CpObj nodes wich copy structs that have interleaved
3554 // This will generate a sequence of movsp instructions for the cases of non-gc members.
3555 // Note that movsp is an alias for movsd on x86 and movsq on x64.
3556 // and calls to the BY_REF_ASSIGN helper otherwise.
3557 void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
3559 // Make sure we got the arguments of the cpobj operation in the right registers
3560 GenTreePtr dstAddr = cpObjNode->Addr();
3561 GenTreePtr source = cpObjNode->Data();
3562 GenTreePtr srcAddr = nullptr;
3563 var_types srcAddrType = TYP_BYREF;
3564 bool sourceIsLocal = false;
3566 assert(source->isContained());
3567 if (source->gtOper == GT_IND)
3569 srcAddr = source->gtGetOp1();
3570 assert(srcAddr->isUsedFromReg());
3574 noway_assert(source->IsLocal());
3575 sourceIsLocal = true;
3578 bool dstOnStack = dstAddr->OperIsLocalAddr();
3581 bool isRepMovspUsed = false;
3583 assert(dstAddr->isUsedFromReg());
3585 // If the GenTree node has data about GC pointers, this means we're dealing
3586 // with CpObj, so this requires special logic.
3587 assert(cpObjNode->gtGcPtrCount > 0);
3589 // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields
3590 // and it needs src = RSI and dst = RDI.
3591 // Either these registers must not contain lclVars, or they must be dying or marked for spill.
3592 // This is because these registers are incremented as we go through the struct.
3595 GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
3596 GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
3597 unsigned srcLclVarNum = BAD_VAR_NUM;
3598 unsigned dstLclVarNum = BAD_VAR_NUM;
3599 bool isSrcAddrLiveOut = false;
3600 bool isDstAddrLiveOut = false;
3601 if (genIsRegCandidateLocal(actualSrcAddr))
3603 srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
3604 isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
3606 if (genIsRegCandidateLocal(actualDstAddr))
3608 dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
3609 isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
3611 assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
3612 ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
3613 assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
3614 ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
3615 srcAddrType = srcAddr->TypeGet();
3619 // Consume the operands and get them into the right registers.
3620 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
3621 genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
3622 gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType);
3623 gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
3625 unsigned slots = cpObjNode->gtSlots;
3627 // If we can prove it's on the stack we don't need to use the write barrier.
3630 if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
3633 // If the destination of the CpObj is on the stack, make sure we allocated
3634 // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively).
3635 assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
3636 regNumber tmpReg = REG_RCX;
3637 isRepMovspUsed = true;
3640 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
3641 instGen(INS_r_movsp);
3645 // For small structs, it's better to emit a sequence of movsp than to
3646 // emit a rep movsp instruction.
3656 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
3657 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
3665 // Let's see if we can use rep movsp instead of a sequence of movsp instructions
3666 // to save cycles and code size.
3668 unsigned nonGcSlotCount = 0;
3674 } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
3676 // If we have a very small contiguous non-gc region, it's better just to
3677 // emit a sequence of movsp instructions
3678 if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
3680 while (nonGcSlotCount > 0)
3689 // Otherwise, we can save code-size and improve CQ by emitting
3690 // rep movsp (alias for movsd/movsq for x86/x64)
3691 assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
3692 regNumber tmpReg = REG_RCX;
3693 isRepMovspUsed = true;
3695 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
3696 instGen(INS_r_movsp);
3701 // We have a GC pointer, call the memory barrier.
3702 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
3708 assert(gcPtrCount == 0);
3711 // Clear the gcInfo for RSI and RDI.
3712 // While we normally update GC info prior to the last instruction that uses them,
3713 // these actually live into the helper call.
3714 gcInfo.gcMarkRegSetNpt(RBM_RSI);
3715 gcInfo.gcMarkRegSetNpt(RBM_RDI);
3718 // Generate code for a CpBlk node by the means of the VM memcpy helper call
3720 // a) The size argument of the CpBlk is not an integer constant
3721 // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
3722 void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
3724 #ifdef _TARGET_AMD64_
3725 // Make sure we got the arguments of the cpblk operation in the right registers
3726 unsigned blockSize = cpBlkNode->Size();
3727 GenTreePtr dstAddr = cpBlkNode->Addr();
3728 GenTreePtr source = cpBlkNode->Data();
3729 GenTreePtr srcAddr = nullptr;
3731 // Size goes in arg2
3734 assert(blockSize >= CPBLK_MOVS_LIMIT);
3735 assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
3739 noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
3742 // Source address goes in arg1
3743 if (source->gtOper == GT_IND)
3745 srcAddr = source->gtGetOp1();
3746 assert(srcAddr->isUsedFromReg());
3750 noway_assert(source->IsLocal());
3751 assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
3752 inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
3755 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
3757 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
3758 #else // !_TARGET_AMD64_
3759 noway_assert(false && "Helper call for CpBlk is not needed.");
3760 #endif // !_TARGET_AMD64_
3763 // generate code do a switch statement based on a table of ip-relative offsets
3764 void CodeGen::genTableBasedSwitch(GenTree* treeNode)
3766 genConsumeOperands(treeNode->AsOp());
3767 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
3768 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
3770 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
3772 // load the ip-relative offset (which is relative to start of fgFirstBB)
3773 getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
3775 // add it to the absolute address of fgFirstBB
3776 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
3777 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
3778 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
3780 getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
3783 // emits the table and an instruction to get the address of the first element
3784 void CodeGen::genJumpTable(GenTree* treeNode)
3786 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
3787 assert(treeNode->OperGet() == GT_JMPTABLE);
3789 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
3790 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
3791 unsigned jmpTabOffs;
3792 unsigned jmpTabBase;
3794 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
3798 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
3800 for (unsigned i = 0; i < jumpCount; i++)
3802 BasicBlock* target = *jumpTable++;
3803 noway_assert(target->bbFlags & BBF_JMP_TARGET);
3805 JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
3807 getEmitter()->emitDataGenData(i, target);
3810 getEmitter()->emitDataGenEnd();
3812 // Access to inline data is 'abstracted' by a special type of static member
3813 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
3814 // to constant data, not a real static field.
3815 getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
3816 compiler->eeFindJitDataOffs(jmpTabBase), 0);
3817 genProduceReg(treeNode);
3820 // generate code for the locked operations:
3821 // GT_LOCKADD, GT_XCHG, GT_XADD
3822 void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
3824 GenTree* data = treeNode->gtOp.gtOp2;
3825 GenTree* addr = treeNode->gtOp.gtOp1;
3826 regNumber targetReg = treeNode->gtRegNum;
3827 regNumber dataReg = data->gtRegNum;
3828 regNumber addrReg = addr->gtRegNum;
3829 var_types type = genActualType(data->TypeGet());
3832 // The register allocator should have extended the lifetime of the address
3833 // so that it is not used as the target.
3834 noway_assert(addrReg != targetReg);
3836 // If data is a lclVar that's not a last use, we'd better have allocated a register
3837 // for the result (except in the case of GT_LOCKADD which does not produce a register result).
3838 assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
3839 (data->gtFlags & GTF_VAR_DEATH) != 0);
3841 genConsumeOperands(treeNode);
3842 if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
3844 inst_RV_RV(ins_Copy(type), targetReg, dataReg);
3845 data->gtRegNum = targetReg;
3847 // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
3848 // original gtRegNum on data, after calling emitInsBinary below.
3850 switch (treeNode->OperGet())
3857 // lock is implied by xchg
3868 // all of these nodes implicitly do an indirection on op1
3869 // so create a temporary node to feed into the pattern matching
3870 GenTreeIndir i = indirForm(type, addr);
3871 getEmitter()->emitInsBinary(ins, emitTypeSize(type), &i, data);
3873 if (treeNode->gtRegNum != REG_NA)
3875 genProduceReg(treeNode);
3879 // generate code for BoundsCheck nodes
3880 void CodeGen::genRangeCheck(GenTreePtr oper)
3883 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
3884 #else // !FEATURE_SIMD
3885 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
3886 #endif // !FEATURE_SIMD
3888 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
3890 GenTreePtr arrIndex = bndsChk->gtIndex;
3891 GenTreePtr arrLen = bndsChk->gtArrLen;
3892 GenTreePtr arrRef = nullptr;
3895 GenTree * src1, *src2;
3896 emitJumpKind jmpKind;
3898 genConsumeRegs(arrIndex);
3899 genConsumeRegs(arrLen);
3901 if (arrIndex->isContainedIntOrIImmed())
3903 // arrIndex is a contained constant. In this case
3904 // we will generate one of the following
3905 // cmp [mem], immed (if arrLen is a memory op)
3906 // cmp reg, immed (if arrLen is in a reg)
3908 // That is arrLen cannot be a contained immed.
3909 assert(!arrLen->isContainedIntOrIImmed());
3917 // arrIndex could either be a contained memory op or a reg
3918 // In this case we will generate one of the following
3919 // cmp [mem], immed (if arrLen is a constant)
3920 // cmp [mem], reg (if arrLen is in a reg)
3921 // cmp reg, immed (if arrIndex is in a reg)
3922 // cmp reg1, reg2 (if arraIndex is in reg1)
3923 // cmp reg, [mem] (if arrLen is a memory op)
3925 // That is only one of arrIndex or arrLen can be a memory op.
3926 assert(!arrIndex->isUsedFromMemory() || !arrLen->isUsedFromMemory());
3933 var_types bndsChkType = src2->TypeGet();
3935 // Bounds checks can only be 32 or 64 bit sized comparisons.
3936 assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
3938 // The type of the bounds check should always wide enough to compare against the index.
3939 assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
3942 getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
3943 genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
3946 //------------------------------------------------------------------------
3947 // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
3948 // lower bound for the given dimension.
3951 // elemType - the element type of the array
3952 // rank - the rank of the array
3953 // dimension - the dimension for which the lower bound offset will be returned.
3958 unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
3960 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
3961 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
3964 //------------------------------------------------------------------------
3965 // genOffsetOfMDArrayLength: Returns the offset from the Array object to the
3966 // size for the given dimension.
3969 // elemType - the element type of the array
3970 // rank - the rank of the array
3971 // dimension - the dimension for which the lower bound offset will be returned.
3976 unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
3978 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
3979 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
3982 //------------------------------------------------------------------------
3983 // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
3984 // producing the effective index by subtracting the lower bound.
3987 // arrIndex - the node for which we're generating code
3993 void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
3995 GenTreePtr arrObj = arrIndex->ArrObj();
3996 GenTreePtr indexNode = arrIndex->IndexExpr();
3998 regNumber arrReg = genConsumeReg(arrObj);
3999 regNumber indexReg = genConsumeReg(indexNode);
4000 regNumber tgtReg = arrIndex->gtRegNum;
4002 unsigned dim = arrIndex->gtCurrDim;
4003 unsigned rank = arrIndex->gtArrRank;
4004 var_types elemType = arrIndex->gtArrElemType;
4006 noway_assert(tgtReg != REG_NA);
4008 // Subtract the lower bound for this dimension.
4009 // TODO-XArch-CQ: make this contained if it's an immediate that fits.
4010 if (tgtReg != indexReg)
4012 inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
4014 getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
4015 genOffsetOfMDArrayLowerBound(elemType, rank, dim));
4016 getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
4017 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
4018 genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
4020 genProduceReg(arrIndex);
4023 //------------------------------------------------------------------------
4024 // genCodeForArrOffset: Generates code to compute the flattened array offset for
4025 // one dimension of an array reference:
4026 // result = (prevDimOffset * dimSize) + effectiveIndex
4027 // where dimSize is obtained from the arrObj operand
4030 // arrOffset - the node for which we're generating code
4036 // dimSize and effectiveIndex are always non-negative, the former by design,
4037 // and the latter because it has been normalized to be zero-based.
4039 void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
4041 GenTreePtr offsetNode = arrOffset->gtOffset;
4042 GenTreePtr indexNode = arrOffset->gtIndex;
4043 GenTreePtr arrObj = arrOffset->gtArrObj;
4045 regNumber tgtReg = arrOffset->gtRegNum;
4046 assert(tgtReg != REG_NA);
4048 unsigned dim = arrOffset->gtCurrDim;
4049 unsigned rank = arrOffset->gtArrRank;
4050 var_types elemType = arrOffset->gtArrElemType;
4052 // First, consume the operands in the correct order.
4053 regNumber offsetReg = REG_NA;
4054 regNumber tmpReg = REG_NA;
4055 if (!offsetNode->IsIntegralConst(0))
4057 offsetReg = genConsumeReg(offsetNode);
4059 // We will use a temp register for the offset*scale+effectiveIndex computation.
4060 regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
4061 tmpReg = genRegNumFromMask(tmpRegMask);
4065 assert(offsetNode->isContained());
4067 regNumber indexReg = genConsumeReg(indexNode);
4068 // Although arrReg may not be used in the constant-index case, if we have generated
4069 // the value into a register, we must consume it, otherwise we will fail to end the
4070 // live range of the gc ptr.
4071 // TODO-CQ: Currently arrObj will always have a register allocated to it.
4072 // We could avoid allocating a register for it, which would be of value if the arrObj
4073 // is an on-stack lclVar.
4074 regNumber arrReg = REG_NA;
4075 if (arrObj->gtHasReg())
4077 arrReg = genConsumeReg(arrObj);
4080 if (!offsetNode->IsIntegralConst(0))
4082 assert(tmpReg != REG_NA);
4083 assert(arrReg != REG_NA);
4085 // Evaluate tgtReg = offsetReg*dim_size + indexReg.
4086 // tmpReg is used to load dim_size and the result of the multiplication.
4087 // Note that dim_size will never be negative.
4089 getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
4090 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
4091 inst_RV_RV(INS_imul, tmpReg, offsetReg);
4093 if (tmpReg == tgtReg)
4095 inst_RV_RV(INS_add, tmpReg, indexReg);
4099 if (indexReg != tgtReg)
4101 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
4103 inst_RV_RV(INS_add, tgtReg, tmpReg);
4108 if (indexReg != tgtReg)
4110 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
4113 genProduceReg(arrOffset);
4116 // make a temporary indir we can feed to pattern matching routines
4117 // in cases where we don't want to instantiate all the indirs that happen
4119 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
4121 GenTreeIndir i(GT_IND, type, base, nullptr);
4122 i.gtRegNum = REG_NA;
4123 // has to be nonnull (because contained nodes can't be the last in block)
4124 // but don't want it to be a valid pointer
4125 i.gtNext = (GenTree*)(-1);
4129 // make a temporary int we can feed to pattern matching routines
4130 // in cases where we don't want to instantiate
4132 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
4134 GenTreeIntCon i(type, value);
4135 i.gtRegNum = REG_NA;
4136 // has to be nonnull (because contained nodes can't be the last in block)
4137 // but don't want it to be a valid pointer
4138 i.gtNext = (GenTree*)(-1);
4142 instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
4146 // Operations on SIMD vectors shouldn't come this path
4147 assert(!varTypeIsSIMD(type));
4148 if (varTypeIsFloating(type))
4150 return ins_MathOp(oper, type);
4194 #if !defined(_TARGET_64BIT_)
4213 #endif // !defined(_TARGET_64BIT_)
4221 //------------------------------------------------------------------------
4222 // genCodeForShift: Generates the code sequence for a GenTree node that
4223 // represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
4226 // tree - the bit shift node (that specifies the type of bit shift to perform).
4229 // a) All GenTrees are register allocated.
4230 // b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
4231 // it's a register-allocated expression. If it is in a register that is
4232 // not RCX, it will be moved to RCX (so RCX better not be in use!).
4234 void CodeGen::genCodeForShift(GenTreePtr tree)
4236 // Only the non-RMW case here.
4237 assert(tree->OperIsShiftOrRotate());
4238 assert(tree->gtOp.gtOp1->isUsedFromReg());
4239 assert(tree->gtRegNum != REG_NA);
4241 genConsumeOperands(tree->AsOp());
4243 var_types targetType = tree->TypeGet();
4244 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
4246 GenTreePtr operand = tree->gtGetOp1();
4247 regNumber operandReg = operand->gtRegNum;
4249 GenTreePtr shiftBy = tree->gtGetOp2();
4251 if (shiftBy->isContainedIntOrIImmed())
4253 // First, move the operand to the destination register and
4254 // later on perform the shift in-place.
4255 // (LSRA will try to avoid this situation through preferencing.)
4256 if (tree->gtRegNum != operandReg)
4258 inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
4261 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4262 inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
4266 // We must have the number of bits to shift stored in ECX, since we constrained this node to
4267 // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
4268 // register destination requirement.
4269 genCopyRegIfNeeded(shiftBy, REG_RCX);
4271 // The operand to be shifted must not be in ECX
4272 noway_assert(operandReg != REG_RCX);
4274 if (tree->gtRegNum != operandReg)
4276 inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
4278 inst_RV_CL(ins, tree->gtRegNum, targetType);
4281 genProduceReg(tree);
4285 //------------------------------------------------------------------------
4286 // genCodeForShiftLong: Generates the code sequence for a GenTree node that
4287 // represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
4290 // tree - the bit shift node (that specifies the type of bit shift to perform).
4293 // a) All GenTrees are register allocated.
4294 // b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
4296 void CodeGen::genCodeForShiftLong(GenTreePtr tree)
4298 // Only the non-RMW case here.
4299 genTreeOps oper = tree->OperGet();
4300 assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
4302 GenTree* operand = tree->gtOp.gtOp1;
4303 assert(operand->OperGet() == GT_LONG);
4304 assert(operand->gtOp.gtOp1->isUsedFromReg());
4305 assert(operand->gtOp.gtOp2->isUsedFromReg());
4307 GenTree* operandLo = operand->gtGetOp1();
4308 GenTree* operandHi = operand->gtGetOp2();
4310 regNumber regLo = operandLo->gtRegNum;
4311 regNumber regHi = operandHi->gtRegNum;
4313 genConsumeOperands(tree->AsOp());
4315 var_types targetType = tree->TypeGet();
4316 instruction ins = genGetInsForOper(oper, targetType);
4318 GenTreePtr shiftBy = tree->gtGetOp2();
4320 assert(shiftBy->isContainedIntOrIImmed());
4322 unsigned int count = shiftBy->AsIntConCommon()->IconValue();
4324 regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
4326 if (regResult != tree->gtRegNum)
4328 inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
4331 if (oper == GT_LSH_HI)
4333 inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count);
4337 assert(oper == GT_RSH_LO);
4338 inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count);
4341 genProduceReg(tree);
4345 //------------------------------------------------------------------------
4346 // genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
4347 // represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
4348 // GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
4351 // storeIndNode: the GT_STOREIND node.
4353 void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
4355 GenTree* data = storeInd->Data();
4356 GenTree* addr = storeInd->Addr();
4358 assert(data->OperIsShiftOrRotate());
4360 // This function only handles the RMW case.
4361 assert(data->gtOp.gtOp1->isUsedFromMemory());
4362 assert(data->gtOp.gtOp1->isIndir());
4363 assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
4364 assert(data->gtRegNum == REG_NA);
4366 var_types targetType = data->TypeGet();
4367 genTreeOps oper = data->OperGet();
4368 instruction ins = genGetInsForOper(oper, targetType);
4369 emitAttr attr = EA_ATTR(genTypeSize(targetType));
4371 GenTree* shiftBy = data->gtOp.gtOp2;
4372 if (shiftBy->isContainedIntOrIImmed())
4374 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4375 ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
4376 if (shiftByValue == 1)
4378 // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
4379 getEmitter()->emitInsRMW(ins, attr, storeInd);
4383 getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
4388 // We must have the number of bits to shift stored in ECX, since we constrained this node to
4389 // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
4390 // register destination requirement.
4391 regNumber shiftReg = shiftBy->gtRegNum;
4392 genCopyRegIfNeeded(shiftBy, REG_RCX);
4394 // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
4395 getEmitter()->emitInsRMW(ins, attr, storeInd);
4399 void CodeGen::genRegCopy(GenTree* treeNode)
4401 assert(treeNode->OperGet() == GT_COPY);
4402 GenTree* op1 = treeNode->gtOp.gtOp1;
4404 if (op1->IsMultiRegCall())
4408 GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
4409 GenTreeCall* call = op1->AsCall();
4410 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
4411 unsigned regCount = retTypeDesc->GetReturnRegCount();
4413 for (unsigned i = 0; i < regCount; ++i)
4415 var_types type = retTypeDesc->GetReturnRegType(i);
4416 regNumber fromReg = call->GetRegNumByIdx(i);
4417 regNumber toReg = copyTree->GetRegNumByIdx(i);
4419 // A Multi-reg GT_COPY node will have valid reg only for those
4420 // positions that corresponding result reg of call node needs
4422 if (toReg != REG_NA)
4424 assert(toReg != fromReg);
4425 inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
4431 var_types targetType = treeNode->TypeGet();
4432 regNumber targetReg = treeNode->gtRegNum;
4433 assert(targetReg != REG_NA);
4435 // Check whether this node and the node from which we're copying the value have
4436 // different register types. This can happen if (currently iff) we have a SIMD
4437 // vector type that fits in an integer register, in which case it is passed as
4438 // an argument, or returned from a call, in an integer register and must be
4439 // copied if it's in an xmm register.
4441 bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
4442 bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
4443 if (srcFltReg != tgtFltReg)
4450 ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
4452 intReg = op1->gtRegNum;
4456 ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
4458 fpReg = op1->gtRegNum;
4460 inst_RV_RV(ins, fpReg, intReg, targetType);
4464 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
4469 // The lclVar will never be a def.
4470 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
4471 // appropriately set the gcInfo for the copied value.
4472 // If not, there are two cases we need to handle:
4473 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
4474 // will remain live in its original register.
4475 // genProduceReg() will appropriately set the gcInfo for the copied value,
4476 // and genConsumeReg will reset it.
4477 // - Otherwise, we need to update register info for the lclVar.
4479 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
4480 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
4482 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
4484 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
4486 // If we didn't just spill it (in genConsumeReg, above), then update the register info
4487 if (varDsc->lvRegNum != REG_STK)
4489 // The old location is dying
4490 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
4492 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
4494 genUpdateVarReg(varDsc, treeNode);
4496 // The new location is going live
4497 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
4503 genProduceReg(treeNode);
4506 //------------------------------------------------------------------------
4507 // genStoreInd: Generate code for a GT_STOREIND node.
4510 // treeNode - The GT_STOREIND node for which to generate code.
4515 void CodeGen::genStoreInd(GenTreePtr node)
4517 assert(node->OperGet() == GT_STOREIND);
4520 // Storing Vector3 of size 12 bytes through indirection
4521 if (node->TypeGet() == TYP_SIMD12)
4523 genStoreIndTypeSIMD12(node);
4526 #endif // FEATURE_SIMD
4528 GenTreeStoreInd* storeInd = node->AsStoreInd();
4529 GenTree* data = storeInd->Data();
4530 GenTree* addr = storeInd->Addr();
4531 var_types targetType = storeInd->TypeGet();
4533 assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
4535 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
4536 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
4538 // data and addr must be in registers.
4539 // Consume both registers so that any copies of interfering registers are taken care of.
4540 genConsumeOperands(storeInd->AsOp());
4542 if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
4547 // At this point, we should not have any interference.
4548 // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
4549 noway_assert(data->gtRegNum != REG_ARG_0);
4551 // addr goes in REG_ARG_0
4552 genCopyRegIfNeeded(addr, REG_ARG_0);
4554 // data goes in REG_ARG_1
4555 genCopyRegIfNeeded(data, REG_ARG_1);
4557 genGCWriteBarrier(storeInd, writeBarrierForm);
4561 bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
4562 bool dataIsUnary = false;
4563 bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
4564 GenTree* rmwSrc = nullptr;
4566 // We must consume the operands in the proper execution order, so that liveness is
4567 // updated appropriately.
4570 genConsumeAddress(addr);
4573 // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
4574 // and non-indir operand of data is the source of RMW memory op.
4577 assert(data->isContained() && !data->OperIsLeaf());
4579 GenTreePtr rmwDst = nullptr;
4581 dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
4584 if (storeInd->IsRMWDstOp1())
4586 rmwDst = data->gtGetOp1();
4587 rmwSrc = data->gtGetOp2();
4591 assert(storeInd->IsRMWDstOp2());
4592 rmwDst = data->gtGetOp2();
4593 rmwSrc = data->gtGetOp1();
4596 genConsumeRegs(rmwSrc);
4600 // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
4601 // For unary RMW ops, src and dst of RMW memory op is the same. Lower
4602 // clears operand counts on rmwSrc and we don't need to perform a
4603 // genConsumeReg() on it.
4604 assert(storeInd->IsRMWDstOp1());
4605 rmwSrc = data->gtGetOp1();
4606 rmwDst = data->gtGetOp1();
4607 assert(rmwSrc->isUsedFromMemory());
4610 assert(rmwSrc != nullptr);
4611 assert(rmwDst != nullptr);
4612 assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
4616 genConsumeRegs(data);
4621 genConsumeAddress(addr);
4628 // generate code for unary RMW memory ops like neg/not
4629 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
4634 if (data->OperIsShiftOrRotate())
4636 // Generate code for shift RMW memory ops.
4637 // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
4638 // <amount> <shift> [addr]).
4639 assert(storeInd->IsRMWDstOp1());
4640 assert(rmwSrc == data->gtGetOp2());
4641 genCodeForShiftRMW(storeInd);
4643 else if (data->OperGet() == GT_ADD && (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
4645 // Generate "inc/dec [mem]" instead of "add/sub [mem], 1".
4648 // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1).
4649 // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into
4650 // an assert while decoding ModR/M byte of "inc dword ptr [rax]".
4651 // It is not clear whether Decode() can handle all possible
4652 // addr modes with inc/dec. For this reason, inc/dec [mem]
4653 // is not generated while generating debuggable code. Update
4654 // the above if condition once Decode() routine is fixed.
4655 assert(rmwSrc->isContainedIntOrIImmed());
4656 instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec;
4657 getEmitter()->emitInsRMW(ins, emitTypeSize(storeInd), storeInd);
4661 // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
4662 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
4669 getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
4674 //------------------------------------------------------------------------
4675 // genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
4676 // helper functions.
4679 // writeBarrierForm - the write barrier form to use
4680 // addr - the address at which to do the store
4681 // data - the data to store
4684 // true if an optimized write barrier form was used, false if not. If this
4685 // function returns false, the caller must emit a "standard" write barrier.
4687 bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
4689 assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
4691 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
4692 bool useOptimizedWriteBarriers = true;
4695 useOptimizedWriteBarriers =
4696 (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
4699 if (!useOptimizedWriteBarriers)
4704 const static int regToHelper[2][8] = {
4705 // If the target is known to be in managed memory
4707 CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
4708 CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
4711 // Don't know if the target is in managed memory
4713 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
4714 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
4715 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
4719 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
4720 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
4721 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
4722 noway_assert(regToHelper[0][REG_ESP] == -1);
4723 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
4724 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
4725 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
4727 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
4728 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
4729 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
4730 noway_assert(regToHelper[1][REG_ESP] == -1);
4731 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
4732 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
4733 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
4735 regNumber reg = data->gtRegNum;
4736 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
4738 // Generate the following code:
4740 // call write_barrier_helper_reg
4742 // addr goes in REG_ARG_0
4743 genCopyRegIfNeeded(addr, REG_WRITE_BARRIER);
4745 unsigned tgtAnywhere = 0;
4746 if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
4751 // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
4752 // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
4753 // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
4755 genEmitHelperCall(regToHelper[tgtAnywhere][reg],
4757 EA_PTRSIZE); // retSize
4760 #else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
4762 #endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
4765 // Produce code for a GT_CALL node
4766 void CodeGen::genCallInstruction(GenTreeCall* call)
4768 genAlignStackBeforeCall(call);
4770 gtCallTypes callType = (gtCallTypes)call->gtCallType;
4772 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
4774 // all virtuals should have been expanded into a control expression
4775 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
4777 // Insert a GS check if necessary
4778 if (call->IsTailCallViaHelper())
4780 if (compiler->getNeedsGSSecurityCookie())
4782 #if FEATURE_FIXED_OUT_ARGS
4783 // If either of the conditions below is true, we will need a temporary register in order to perform the GS
4784 // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using
4785 // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now,
4786 // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this
4788 assert(compiler->gsGlobalSecurityCookieAddr == nullptr);
4789 assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
4791 genEmitGSCookieCheck(true);
4795 // Consume all the arg regs
4796 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4798 assert(list->OperIsList());
4800 GenTreePtr argNode = list->Current();
4802 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
4803 assert(curArgTabEntry);
4805 if (curArgTabEntry->regNum == REG_STK)
4810 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4811 // Deal with multi register passed struct args.
4812 if (argNode->OperGet() == GT_FIELD_LIST)
4814 GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
4815 unsigned iterationNum = 0;
4816 for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), iterationNum++)
4818 GenTreePtr putArgRegNode = fieldListPtr->gtOp.gtOp1;
4819 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
4820 regNumber argReg = REG_NA;
4822 if (iterationNum == 0)
4824 argReg = curArgTabEntry->regNum;
4828 assert(iterationNum == 1);
4829 argReg = curArgTabEntry->otherRegNum;
4832 genConsumeReg(putArgRegNode);
4834 // Validate the putArgRegNode has the right type.
4835 assert(putArgRegNode->TypeGet() ==
4836 compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
4837 .eightByteClassifications[iterationNum],
4838 curArgTabEntry->structDesc
4839 .eightByteSizes[iterationNum]));
4840 if (putArgRegNode->gtRegNum != argReg)
4842 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
4843 putArgRegNode->gtRegNum);
4848 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4850 regNumber argReg = curArgTabEntry->regNum;
4851 genConsumeReg(argNode);
4852 if (argNode->gtRegNum != argReg)
4854 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
4859 // In the case of a varargs call,
4860 // the ABI dictates that if we have floating point args,
4861 // we must pass the enregistered arguments in both the
4862 // integer and floating point registers so, let's do that.
4863 if (call->IsVarargs() && varTypeIsFloating(argNode))
4865 regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
4866 instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
4867 inst_RV_RV(ins, argNode->gtRegNum, targetReg);
4869 #endif // FEATURE_VARARG
4872 #if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4873 // The call will pop its arguments.
4874 // for each putarg_stk:
4875 ssize_t stackArgBytes = 0;
4876 GenTreePtr args = call->gtCallArgs;
4879 GenTreePtr arg = args->gtOp.gtOp1;
4880 if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
4882 #if defined(_TARGET_X86_)
4883 if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST))
4885 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
4886 assert(curArgTabEntry);
4887 stackArgBytes += curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
4890 #endif // defined(_TARGET_X86_)
4892 #ifdef FEATURE_PUT_STRUCT_ARG_STK
4893 if (genActualType(arg->TypeGet()) == TYP_STRUCT)
4895 assert(arg->OperGet() == GT_PUTARG_STK);
4897 GenTreeObj* obj = arg->gtGetOp1()->AsObj();
4898 unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE);
4900 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
4901 assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes);
4903 stackArgBytes += argBytes;
4906 #endif // FEATURE_PUT_STRUCT_ARG_STK
4909 stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
4912 args = args->gtOp.gtOp2;
4914 #endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4916 // Insert a null check on "this" pointer if asked.
4917 if (call->NeedsNullCheck())
4919 const regNumber regThis = genGetThisArgReg(call);
4920 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
4923 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
4924 CORINFO_METHOD_HANDLE methHnd;
4925 GenTree* target = call->gtControlExpr;
4926 if (callType == CT_INDIRECT)
4928 assert(target == nullptr);
4929 target = call->gtCallAddr;
4934 methHnd = call->gtCallMethHnd;
4937 CORINFO_SIG_INFO* sigInfo = nullptr;
4939 // Pass the call signature information down into the emitter so the emitter can associate
4940 // native call sites with the signatures they were generated from.
4941 if (callType != CT_HELPER)
4943 sigInfo = call->callSig;
4947 // If fast tail call, then we are done. In this case we setup the args (both reg args
4948 // and stack args in incoming arg area) and call target in rax. Epilog sequence would
4949 // generate "jmp rax".
4950 if (call->IsFastTailCall())
4952 // Don't support fast tail calling JIT helpers
4953 assert(callType != CT_HELPER);
4955 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
4956 assert(target != nullptr);
4958 genConsumeReg(target);
4959 genCopyRegIfNeeded(target, REG_RAX);
4963 // For a pinvoke to unmanged code we emit a label to clear
4964 // the GC pointer state before the callsite.
4965 // We can't utilize the typical lazy killing of GC pointers
4966 // at (or inside) the callsite.
4967 if (call->IsUnmanaged())
4969 genDefineTempLabel(genCreateTempLabel());
4972 // Determine return value size(s).
4973 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
4974 emitAttr retSize = EA_PTRSIZE;
4975 emitAttr secondRetSize = EA_UNKNOWN;
4977 if (call->HasMultiRegRetVal())
4979 retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
4980 secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
4984 assert(!varTypeIsStruct(call));
4986 if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
4990 else if (call->gtType == TYP_BYREF)
4996 bool fPossibleSyncHelperCall = false;
4997 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
4999 // We need to propagate the IL offset information to the call instruction, so we can emit
5000 // an IL to native mapping record for the call, to support managed return value debugging.
5001 // We don't want tail call helper calls that were converted from normal calls to get a record,
5002 // so we skip this hash table lookup logic in that case.
5003 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
5005 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
5008 #if defined(_TARGET_X86_)
5009 bool fCallerPop = (call->gtFlags & GTF_CALL_POP_ARGS) != 0;
5013 CorInfoCallConv callConv = CORINFO_CALLCONV_DEFAULT;
5015 if ((callType != CT_HELPER) && call->callSig)
5017 callConv = call->callSig->callConv;
5020 fCallerPop |= IsCallerPop(callConv);
5022 #endif // UNIX_X86_ABI
5024 // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
5025 // adjust its stack level accordingly.
5026 // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
5027 // pop when we're done.
5028 ssize_t argSizeForEmitter = stackArgBytes;
5031 argSizeForEmitter = -stackArgBytes;
5033 #endif // defined(_TARGET_X86_)
5035 #ifdef FEATURE_AVX_SUPPORT
5036 // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here
5037 // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE
5038 // transition penalty, assuming the user function contains legacy SSE instruction.
5039 // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue
5040 // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens
5041 // when there's preceding 256-bit AVX to legacy SSE transition penalty.
5042 if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
5044 assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
5045 instGen(INS_vzeroupper);
5049 if (target != nullptr)
5052 if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
5054 // On x86, we need to generate a very specific pattern for indirect VSD calls:
5057 // call dword ptr [eax]
5059 // Where EAX is also used as an argument to the stub dispatch helper. Make
5060 // sure that the call target address is computed into EAX in this case.
5062 assert(REG_VIRTUAL_STUB_PARAM == REG_VIRTUAL_STUB_TARGET);
5064 assert(target->isContainedIndir());
5065 assert(target->OperGet() == GT_IND);
5067 GenTree* addr = target->AsIndir()->Addr();
5068 assert(addr->isUsedFromReg());
5070 genConsumeReg(addr);
5071 genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
5073 getEmitter()->emitIns_Nop(3);
5076 getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD),
5078 INDEBUG_LDISASM_COMMA(sigInfo)
5082 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5083 gcInfo.gcVarPtrSetCur,
5084 gcInfo.gcRegGCrefSetCur,
5085 gcInfo.gcRegByrefSetCur,
5086 ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0);
5091 if (target->isContainedIndir())
5093 if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
5095 // Note that if gtControlExpr is an indir of an absolute address, we mark it as
5096 // contained only if it can be encoded as PC-relative offset.
5097 assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
5100 genEmitCall(emitter::EC_FUNC_TOKEN_INDIR,
5102 INDEBUG_LDISASM_COMMA(sigInfo)
5103 (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue()
5104 X86_ARG(argSizeForEmitter),
5106 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5113 genEmitCall(emitter::EC_INDIR_ARD,
5115 INDEBUG_LDISASM_COMMA(sigInfo)
5117 X86_ARG(argSizeForEmitter),
5119 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5126 // We have already generated code for gtControlExpr evaluating it into a register.
5127 // We just need to emit "call reg" in this case.
5128 assert(genIsValidIntReg(target->gtRegNum));
5131 genEmitCall(emitter::EC_INDIR_R,
5133 INDEBUG_LDISASM_COMMA(sigInfo)
5135 X86_ARG(argSizeForEmitter),
5137 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5139 genConsumeReg(target));
5143 #ifdef FEATURE_READYTORUN_COMPILER
5144 else if (call->gtEntryPoint.addr != nullptr)
5147 genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
5148 : emitter::EC_FUNC_TOKEN_INDIR,
5150 INDEBUG_LDISASM_COMMA(sigInfo)
5151 (void*) call->gtEntryPoint.addr
5152 X86_ARG(argSizeForEmitter),
5154 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5161 // Generate a direct call to a non-virtual user defined or helper method
5162 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
5164 void* addr = nullptr;
5165 if (callType == CT_HELPER)
5167 // Direct call to a helper method.
5168 helperNum = compiler->eeGetHelperNum(methHnd);
5169 noway_assert(helperNum != CORINFO_HELP_UNDEF);
5171 void* pAddr = nullptr;
5172 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
5174 if (addr == nullptr)
5179 // tracking of region protected by the monitor in synchronized methods
5180 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
5182 fPossibleSyncHelperCall = true;
5187 // Direct call to a non-virtual user function.
5188 addr = call->gtDirectCallAddress;
5191 // Non-virtual direct calls to known addresses
5194 genEmitCall(emitter::EC_FUNC_TOKEN,
5196 INDEBUG_LDISASM_COMMA(sigInfo)
5198 X86_ARG(argSizeForEmitter),
5200 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5205 // if it was a pinvoke we may have needed to get the address of a label
5206 if (genPendingCallLabel)
5208 assert(call->IsUnmanaged());
5209 genDefineTempLabel(genPendingCallLabel);
5210 genPendingCallLabel = nullptr;
5214 // All Callee arg registers are trashed and no longer contain any GC pointers.
5215 // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
5216 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
5217 // registers from RBM_CALLEE_TRASH.
5218 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
5219 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
5220 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
5221 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
5223 var_types returnType = call->TypeGet();
5224 if (returnType != TYP_VOID)
5227 if (varTypeIsFloating(returnType))
5229 // Spill the value from the fp stack.
5230 // Then, load it into the target register.
5231 call->gtFlags |= GTF_SPILL;
5232 regSet.rsSpillFPStack(call);
5233 call->gtFlags |= GTF_SPILLED;
5234 call->gtFlags &= ~GTF_SPILL;
5237 #endif // _TARGET_X86_
5239 regNumber returnReg;
5241 if (call->HasMultiRegRetVal())
5243 assert(retTypeDesc != nullptr);
5244 unsigned regCount = retTypeDesc->GetReturnRegCount();
5246 // If regs allocated to call node are different from ABI return
5247 // regs in which the call has returned its result, move the result
5248 // to regs allocated to call node.
5249 for (unsigned i = 0; i < regCount; ++i)
5251 var_types regType = retTypeDesc->GetReturnRegType(i);
5252 returnReg = retTypeDesc->GetABIReturnReg(i);
5253 regNumber allocatedReg = call->GetRegNumByIdx(i);
5254 if (returnReg != allocatedReg)
5256 inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
5261 // A Vector3 return value is stored in xmm0 and xmm1.
5262 // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
5263 // the native compiler doesn't guarantee it.
5264 if (returnType == TYP_SIMD12)
5266 returnReg = retTypeDesc->GetABIReturnReg(1);
5267 // Clear the upper 32 bits by two shift instructions.
5268 // retReg = retReg << 96
5269 // retReg = retReg >> 96
5270 getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
5271 getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
5273 #endif // FEATURE_SIMD
5278 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
5280 // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
5281 // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
5282 // correct argument registers.
5283 returnReg = REG_PINVOKE_TCB;
5286 #endif // _TARGET_X86_
5287 if (varTypeIsFloating(returnType))
5289 returnReg = REG_FLOATRET;
5293 returnReg = REG_INTRET;
5296 if (call->gtRegNum != returnReg)
5298 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
5302 genProduceReg(call);
5306 // If there is nothing next, that means the result is thrown away, so this value is not live.
5307 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
5308 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
5310 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
5313 unsigned stackAdjustBias = 0;
5315 #if defined(_TARGET_X86_)
5316 //-------------------------------------------------------------------------
5317 // Create a label for tracking of region protected by the monitor in synchronized methods.
5318 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
5319 // so the GC state vars have been updated before creating the label.
5321 if (fPossibleSyncHelperCall)
5325 case CORINFO_HELP_MON_ENTER:
5326 case CORINFO_HELP_MON_ENTER_STATIC:
5327 noway_assert(compiler->syncStartEmitCookie == NULL);
5328 compiler->syncStartEmitCookie =
5329 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
5330 noway_assert(compiler->syncStartEmitCookie != NULL);
5332 case CORINFO_HELP_MON_EXIT:
5333 case CORINFO_HELP_MON_EXIT_STATIC:
5334 noway_assert(compiler->syncEndEmitCookie == NULL);
5335 compiler->syncEndEmitCookie =
5336 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
5337 noway_assert(compiler->syncEndEmitCookie != NULL);
5344 // Is the caller supposed to pop the arguments?
5345 if (fCallerPop && (stackArgBytes != 0))
5347 stackAdjustBias = stackArgBytes;
5350 SubtractStackLevel(stackArgBytes);
5351 #endif // _TARGET_X86_
5353 genRemoveAlignmentAfterCall(call, stackAdjustBias);
5356 // Produce code for a GT_JMP node.
5357 // The arguments of the caller needs to be transferred to the callee before exiting caller.
5358 // The actual jump to callee is generated as part of caller epilog sequence.
5359 // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
5360 void CodeGen::genJmpMethod(GenTreePtr jmp)
5362 assert(jmp->OperGet() == GT_JMP);
5363 assert(compiler->compJmpOpUsed);
5365 // If no arguments, nothing to do
5366 if (compiler->info.compArgsCount == 0)
5371 // Make sure register arguments are in their initial registers
5372 // and stack arguments are put back as well.
5376 // First move any en-registered stack arguments back to the stack.
5377 // At the same time any reg arg not in correct reg is moved back to its stack location.
5379 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
5380 // But that would require us to deal with circularity while moving values around. Spilling
5381 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
5382 // are not frequent.
5383 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
5385 varDsc = compiler->lvaTable + varNum;
5387 if (varDsc->lvPromoted)
5389 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
5391 unsigned fieldVarNum = varDsc->lvFieldLclStart;
5392 varDsc = compiler->lvaTable + fieldVarNum;
5394 noway_assert(varDsc->lvIsParam);
5396 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
5398 // Skip reg args which are already in its right register for jmp call.
5399 // If not, we will spill such args to their stack locations.
5401 // If we need to generate a tail call profiler hook, then spill all
5402 // arg regs to free them up for the callback.
5403 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
5408 else if (varDsc->lvRegNum == REG_STK)
5410 // Skip args which are currently living in stack.
5414 // If we came here it means either a reg argument not in the right register or
5415 // a stack argument currently living in a register. In either case the following
5416 // assert should hold.
5417 assert(varDsc->lvRegNum != REG_STK);
5419 var_types loadType = varDsc->lvaArgType();
5420 getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
5422 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
5423 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
5424 // Therefore manually update life of varDsc->lvRegNum.
5425 regMaskTP tempMask = varDsc->lvRegMask();
5426 regSet.RemoveMaskVars(tempMask);
5427 gcInfo.gcMarkRegSetNpt(tempMask);
5428 if (compiler->lvaIsGCTracked(varDsc))
5431 if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
5433 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
5437 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
5441 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
5445 #ifdef PROFILING_SUPPORTED
5446 // At this point all arg regs are free.
5447 // Emit tail call profiler callback.
5448 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
5451 // Next move any un-enregistered register arguments back to their register.
5452 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
5453 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
5454 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
5456 varDsc = compiler->lvaTable + varNum;
5457 if (varDsc->lvPromoted)
5459 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
5461 unsigned fieldVarNum = varDsc->lvFieldLclStart;
5462 varDsc = compiler->lvaTable + fieldVarNum;
5464 noway_assert(varDsc->lvIsParam);
5466 // Skip if arg not passed in a register.
5467 if (!varDsc->lvIsRegArg)
5472 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5473 if (varTypeIsStruct(varDsc))
5475 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
5476 assert(typeHnd != nullptr);
5478 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
5479 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
5480 assert(structDesc.passedInRegisters);
5482 unsigned __int8 offset0 = 0;
5483 unsigned __int8 offset1 = 0;
5484 var_types type0 = TYP_UNKNOWN;
5485 var_types type1 = TYP_UNKNOWN;
5487 // Get the eightbyte data
5488 compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
5490 // Move the values into the right registers.
5493 // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
5494 // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
5495 // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
5496 // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
5497 // genCodeForBBList().
5498 if (type0 != TYP_UNKNOWN)
5500 getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
5501 regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
5502 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
5505 if (type1 != TYP_UNKNOWN)
5507 getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
5508 regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
5509 gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
5512 if (varDsc->lvTracked)
5514 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
5518 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5520 // Register argument
5521 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5523 // Is register argument already in the right register?
5524 // If not load it from its stack location.
5525 var_types loadType = varDsc->lvaArgType();
5526 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5528 if (varDsc->lvRegNum != argReg)
5530 assert(genIsValidReg(argReg));
5531 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
5533 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
5534 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
5535 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
5536 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
5537 regSet.AddMaskVars(genRegMask(argReg));
5538 gcInfo.gcMarkRegPtrVal(argReg, loadType);
5539 if (compiler->lvaIsGCTracked(varDsc))
5542 if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
5544 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
5548 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
5552 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
5557 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
5558 // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
5559 // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
5560 // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
5561 // values on the stack.
5562 if (compiler->info.compIsVarArgs)
5564 regNumber intArgReg;
5565 var_types loadType = varDsc->lvaArgType();
5566 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5568 if (varTypeIsFloating(loadType))
5570 intArgReg = compiler->getCallArgIntRegister(argReg);
5571 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
5572 inst_RV_RV(ins, argReg, intArgReg, loadType);
5579 fixedIntArgMask |= genRegMask(intArgReg);
5581 if (intArgReg == REG_ARG_0)
5583 assert(firstArgVarNum == BAD_VAR_NUM);
5584 firstArgVarNum = varNum;
5587 #endif // FEATURE_VARARG
5590 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
5591 // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
5592 // load the remaining arg registers (both int and float) from the corresponding
5593 // shadow stack slots. This is for the reason that we don't know the number and type
5594 // of non-fixed params passed by the caller, therefore we have to assume the worst case
5595 // of caller passing float/double args both in int and float arg regs.
5597 // This doesn't apply to x86, which doesn't pass floating point values in floating
5600 // The caller could have passed gc-ref/byref type var args. Since these are var args
5601 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
5602 // remaining arg registers from shadow stack slots as non-gc interruptible.
5603 if (fixedIntArgMask != RBM_NONE)
5605 assert(compiler->info.compIsVarArgs);
5606 assert(firstArgVarNum != BAD_VAR_NUM);
5608 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
5609 if (remainingIntArgMask != RBM_NONE)
5611 instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
5612 getEmitter()->emitDisableGC();
5613 for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
5615 regNumber argReg = intArgRegs[argNum];
5616 regMaskTP argRegMask = genRegMask(argReg);
5618 if ((remainingIntArgMask & argRegMask) != 0)
5620 remainingIntArgMask &= ~argRegMask;
5621 getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
5623 // also load it in corresponding float arg reg
5624 regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
5625 inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
5628 argOffset += REGSIZE_BYTES;
5630 getEmitter()->emitEnableGC();
5633 #endif // FEATURE_VARARG
5636 // produce code for a GT_LEA subnode
5637 void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
5639 emitAttr size = emitTypeSize(lea);
5640 genConsumeOperands(lea);
5642 if (lea->Base() && lea->Index())
5644 regNumber baseReg = lea->Base()->gtRegNum;
5645 regNumber indexReg = lea->Index()->gtRegNum;
5646 getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
5648 else if (lea->Base())
5650 getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
5652 else if (lea->Index())
5654 getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
5661 //-------------------------------------------------------------------------------------------
5662 // genJumpKindsForTree: Determine the number and kinds of conditional branches
5663 // necessary to implement the given GT_CMP node
5666 // cmpTree - (input) The GenTree node that is used to set the Condition codes
5667 // - The GenTree Relop node that was used to set the Condition codes
5668 // jmpKind[2] - (output) One or two conditional branch instructions
5669 // jmpToTrueLabel[2] - (output) When true we branch to the true case
5670 // When false we create a second label and branch to the false case
5671 // Only GT_EQ for a floating point compares can have a false value.
5674 // Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
5677 // At least one conditional branch instruction will be returned.
5678 // Typically only one conditional branch is needed
5679 // and the second jmpKind[] value is set to EJ_NONE
5682 // jmpToTrueLabel[i]= true implies branch when the compare operation is true.
5683 // jmpToTrueLabel[i]= false implies branch when the compare operation is false.
5684 //-------------------------------------------------------------------------------------------
5687 void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
5689 // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
5690 jmpToTrueLabel[0] = true;
5691 jmpToTrueLabel[1] = true;
5693 // For integer comparisons just use genJumpKindForOper
5694 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
5696 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
5697 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
5698 jmpKind[1] = EJ_NONE;
5702 assert(cmpTree->OperIsCompare());
5704 // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
5705 // while generating code for compare opererators (e.g. GT_EQ etc).
5706 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
5708 // Must branch if we have an NaN, unordered
5709 switch (cmpTree->gtOper)
5714 jmpKind[1] = EJ_NONE;
5719 jmpKind[0] = EJ_jbe;
5720 jmpKind[1] = EJ_NONE;
5724 jmpKind[0] = EJ_jpe;
5725 jmpKind[1] = EJ_jne;
5730 jmpKind[1] = EJ_NONE;
5737 else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
5739 // Do not branch if we have an NaN, unordered
5740 switch (cmpTree->gtOper)
5745 jmpKind[1] = EJ_NONE;
5750 jmpKind[0] = EJ_jae;
5751 jmpKind[1] = EJ_NONE;
5755 jmpKind[0] = EJ_jne;
5756 jmpKind[1] = EJ_NONE;
5760 jmpKind[0] = EJ_jpe;
5762 jmpToTrueLabel[0] = false;
5772 #if !defined(_TARGET_64BIT_)
5773 //------------------------------------------------------------------------
5774 // genJumpKindsForTreeLongHi: Generate the jump types for compare
5775 // operators of the high parts of a compare with long type operands
5776 // on x86 for the case where rel-op result needs to be materialized into a
5780 // cmpTree - The GT_CMP node
5781 // jmpKind - Return array of jump kinds
5782 // jmpToTrueLabel - Return array of if the jump is going to true label
5787 void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
5789 assert(cmpTree->OperIsCompare());
5790 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
5792 switch (cmpTree->gtOper)
5796 if (compareKind == CK_SIGNED)
5810 if (compareKind == CK_SIGNED)
5823 // GT_EQ will not jump to the true label if the hi parts are equal
5824 jmpKind[0] = EJ_NONE;
5825 jmpKind[1] = EJ_jne;
5829 // GT_NE will always jump to the true label if the high parts are not equal
5830 jmpKind[0] = EJ_jne;
5831 jmpKind[1] = EJ_NONE;
5839 //------------------------------------------------------------------------
5840 // genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
5841 // is manifested in a register.
5844 // treeNode - the compare tree
5849 // For long compares, we need to compare the high parts of operands first, then the low parts.
5850 // If the high compare is false, we do not need to compare the low parts. For less than and
5851 // greater than, if the high compare is true, we can assume the entire compare is true. For
5852 // compares that are realized in a register, we will generate:
5854 // Opcode x86 equivalent Comment
5855 // ------ -------------- -------
5856 // GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
5857 // jne label is false.
5861 // GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
5862 // jne label is true.
5866 // GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5867 // jne label correctly and we do not need to check lo. Otherwise,
5868 // cmp loOp1,loOp2 we need to compare the lo halves
5871 // GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5872 // jne label correctly and we do not need to check lo. Otherwise,
5873 // cmp loOp1,loOp2 we need to compare the lo halves
5876 // GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5877 // jne label correctly and we do not need to check lo. Otherwise,
5878 // cmp loOp1,loOp2 we need to compare the lo halves
5881 // GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5882 // jne label correctly and we do not need to check lo. Otherwise,
5883 // cmp loOp1,loOp2 we need to compare the lo halves
5886 // For signed long comparisons, we need additional labels, as we need to use signed conditions on the
5887 // "set" instruction:
5889 // GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5890 // jne labelHi correctly and we do not need to check lo. Otherwise,
5891 // cmp loOp1,loOp2 we need to compare the lo halves
5892 // setb Unsigned set for lo compare
5894 // labelHi: setl Signed set for high compare
5897 // GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5898 // jne labelHi correctly and we do not need to check lo. Otherwise,
5899 // cmp loOp1,loOp2 we need to compare the lo halves
5900 // setbe Unsigend set for lo compare
5902 // labelHi: setle Signed set for hi compare
5905 // GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5906 // jne labelHi correctly and we do not need to check lo. Otherwise,
5907 // cmp loOp1,loOp2 we need to compare the lo halves
5908 // seta Unsigned set for lo compare
5910 // labelHi: setg Signed set for high compare
5913 // GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
5914 // jne labelHi correctly and we do not need to check lo. Otherwise,
5915 // cmp loOp1,loOp2 we need to compare the lo halves
5916 // setae Unsigned set for lo compare
5918 // labelHi: setge Signed set for hi compare
5921 // TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
5922 void CodeGen::genCompareLong(GenTreePtr treeNode)
5924 assert(treeNode->OperIsCompare());
5926 GenTreeOp* tree = treeNode->AsOp();
5927 GenTreePtr op1 = tree->gtOp1;
5928 GenTreePtr op2 = tree->gtOp2;
5930 assert(varTypeIsLong(op1->TypeGet()));
5931 assert(varTypeIsLong(op2->TypeGet()));
5933 regNumber targetReg = treeNode->gtRegNum;
5935 genConsumeOperands(tree);
5937 GenTreePtr loOp1 = op1->gtGetOp1();
5938 GenTreePtr hiOp1 = op1->gtGetOp2();
5939 GenTreePtr loOp2 = op2->gtGetOp1();
5940 GenTreePtr hiOp2 = op2->gtGetOp2();
5942 // Create compare for the high parts
5943 instruction ins = INS_cmp;
5944 var_types cmpType = TYP_INT;
5945 emitAttr cmpAttr = emitTypeSize(cmpType);
5947 // Emit the compare instruction
5948 getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
5950 // If the result is not being materialized in a register, we're done.
5951 if (targetReg == REG_NA)
5956 // Generate the first jump for the high compare
5957 CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
5959 BasicBlock* labelHi = genCreateTempLabel();
5960 BasicBlock* labelFinal = genCreateTempLabel();
5962 if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
5964 // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
5965 // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
5966 // need to worry about the sign of the comparison, so we can use the simplified case.
5968 // We only have to check for equality for the hi comparison. If they are not equal, then the set will
5969 // do the right thing. If they are equal, we have to check the lo halves.
5970 inst_JMP(EJ_jne, labelHi);
5972 // Emit the comparison. Perform the set for the lo. Jump to labelFinal
5973 getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
5975 // The low set must be unsigned
5976 emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
5978 inst_SET(jumpKindLo, targetReg);
5979 inst_JMP(EJ_jmp, labelFinal);
5981 // Define the label for hi jump target here. If we have jumped here, we want to set
5982 // the target register based on the jump kind of the actual compare type.
5984 genDefineTempLabel(labelHi);
5985 inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
5987 genDefineTempLabel(labelFinal);
5988 // Set the higher bytes to 0
5989 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
5990 genProduceReg(tree);
5994 // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
5995 // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
5996 // we can just jump to the set that the lo compare will use.
5998 // We only have to check for equality for the hi comparison. If they are not equal, then the set will
5999 // do the right thing. If they are equal, we have to check the lo halves.
6000 inst_JMP(EJ_jne, labelFinal);
6002 // Emit the comparison
6003 getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
6005 // Define the label for hi jump target here. If we have jumped here, we want to set
6006 // the target register based on the jump kind of the lower half (the actual compare
6007 // type). If we have fallen through, then we are doing a normal int compare for the
6010 genDefineTempLabel(labelFinal);
6012 // The low set must be unsigned
6013 emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
6015 inst_SET(jumpKindLo, targetReg);
6016 // Set the higher bytes to 0
6017 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
6018 genProduceReg(tree);
6021 #endif //! defined(_TARGET_64BIT_)
6023 //------------------------------------------------------------------------
6024 // genCompareFloat: Generate code for comparing two floating point values
6027 // treeNode - the compare tree
6032 // SSE2 instruction ucomis[s|d] is performs unordered comparison and
6033 // updates rFLAGS register as follows.
6034 // Result of compare ZF PF CF
6035 // ----------------- ------------
6036 // Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
6041 // From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
6042 // unordered comparison of floating point values. That is *.UN comparisons result in true when
6043 // one of the operands is a NaN whereas ordered comparisons results in false.
6045 // Opcode Amd64 equivalent Comment
6046 // ------ ----------------- --------
6047 // BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
6050 // BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
6053 // BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
6056 // BGT(a, b) ucomis[s|d] a, b branch if a>b
6059 // BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
6062 // BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
6065 // BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
6068 // BGE(a,b) ucomis[s|d] a, b branch if a>=b
6071 // BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
6072 // je This case is given for completeness, in case if JIT generates such
6073 // a gentree internally.
6075 // BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
6080 // BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
6081 // jne given for completeness, in case if JIT generates such a gentree
6084 // BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
6088 // As we can see from the above equalities that the operands of a compare operator need to be
6089 // reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
6090 void CodeGen::genCompareFloat(GenTreePtr treeNode)
6092 assert(treeNode->OperIsCompare());
6094 GenTreeOp* tree = treeNode->AsOp();
6095 GenTreePtr op1 = tree->gtOp1;
6096 GenTreePtr op2 = tree->gtOp2;
6097 var_types op1Type = op1->TypeGet();
6098 var_types op2Type = op2->TypeGet();
6100 genConsumeOperands(tree);
6102 assert(varTypeIsFloating(op1Type));
6103 assert(op1Type == op2Type);
6105 regNumber targetReg = treeNode->gtRegNum;
6110 if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
6112 // Unordered comparison case
6113 reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
6117 reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
6122 GenTreePtr tmp = op1;
6127 ins = ins_FloatCompare(op1Type);
6128 cmpAttr = emitTypeSize(op1Type);
6130 getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
6132 // Are we evaluating this into a register?
6133 if (targetReg != REG_NA)
6135 genSetRegToCond(targetReg, tree);
6136 genProduceReg(tree);
6140 //------------------------------------------------------------------------
6141 // genCompareInt: Generate code for comparing ints or, on amd64, longs.
6144 // treeNode - the compare tree
6148 void CodeGen::genCompareInt(GenTreePtr treeNode)
6150 assert(treeNode->OperIsCompare());
6152 GenTreeOp* tree = treeNode->AsOp();
6153 GenTreePtr op1 = tree->gtOp1;
6154 GenTreePtr op2 = tree->gtOp2;
6155 var_types op1Type = op1->TypeGet();
6156 var_types op2Type = op2->TypeGet();
6157 regNumber targetReg = tree->gtRegNum;
6159 // Case of op1 == 0 or op1 != 0:
6160 // Optimize generation of 'test' instruction if op1 sets flags.
6162 // Note that if LSRA has inserted any GT_RELOAD/GT_COPY before
6163 // op1, it will not modify the flags set by codegen of op1.
6164 // Similarly op1 could also be reg-optional at its use and
6165 // it was spilled after producing its result in a register.
6166 // Spill code too will not modify the flags set by op1.
6167 GenTree* realOp1 = op1->gtSkipReloadOrCopy();
6168 if (realOp1->gtSetFlags())
6170 // op1 must set ZF and SF flags
6171 assert(realOp1->gtSetZSFlags());
6173 // Must be (in)equality against zero.
6174 assert(tree->OperIs(GT_EQ, GT_NE));
6175 assert(op2->IsIntegralConst(0));
6176 assert(op2->isContained());
6178 // Just consume the operands
6179 genConsumeOperands(tree);
6181 // No need to generate test instruction since
6184 // Are we evaluating this into a register?
6185 if (targetReg != REG_NA)
6187 genSetRegToCond(targetReg, tree);
6188 genProduceReg(tree);
6195 // If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)),
6196 // then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic
6197 // would set or clear Zero flag.
6198 if ((targetReg == REG_NA) && tree->OperIs(GT_EQ, GT_NE))
6200 // Is it a SIMD (in)Equality that doesn't need to materialize result into a register?
6201 if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality())
6203 // Must be comparing against true or false.
6204 assert(op2->IsIntegralConst(0) || op2->IsIntegralConst(1));
6205 assert(op2->isContainedIntOrIImmed());
6207 // In this case SIMD (in)Equality will set or clear
6208 // Zero flag, based on which GT_JTRUE would generate
6209 // the right conditional jump.
6213 #endif // FEATURE_SIMD
6215 genConsumeOperands(tree);
6217 // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm.
6218 // https://github.com/dotnet/coreclr/issues/7270
6219 assert(!op1->isContainedIntOrIImmed()); // We no longer support
6220 assert(!varTypeIsFloating(op2Type));
6224 if (tree->OperIs(GT_TEST_EQ, GT_TEST_NE))
6228 else if (op1->isUsedFromReg() && op2->IsIntegralConst(0))
6230 // We're comparing a register to 0 so we can generate "test reg1, reg1"
6231 // instead of the longer "cmp reg1, 0"
6242 if (op1Type == op2Type)
6246 else if (genTypeSize(op1Type) == genTypeSize(op2Type))
6248 // If the types are different but have the same size then we'll use TYP_INT or TYP_LONG.
6249 // This primarily deals with small type mixes (e.g. byte/ubyte) that need to be widened
6250 // and compared as int. We should not get long type mixes here but handle that as well
6252 type = genTypeSize(op1Type) == 8 ? TYP_LONG : TYP_INT;
6256 // In the types are different simply use TYP_INT. This deals with small type/int type
6257 // mixes (e.g. byte/short ubyte/int) that need to be widened and compared as int.
6258 // Lowering is expected to handle any mixes that involve long types (e.g. int/long).
6262 // The common type cannot be larger than the machine word size
6263 assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL));
6264 // The common type cannot be smaller than any of the operand types, we're probably mixing int/long
6265 assert(genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type)));
6266 // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned
6267 assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type));
6268 // Small unsigned int types (TYP_BOOL can use anything) should use unsigned comparisons
6269 assert(!(varTypeIsSmallInt(type) && varTypeIsUnsigned(type)) || ((tree->gtFlags & GTF_UNSIGNED) != 0));
6270 // If op1 is smaller then it cannot be in memory, we're probably missing a cast
6271 assert((genTypeSize(op1Type) >= genTypeSize(type)) || !op1->isUsedFromMemory());
6272 // If op2 is smaller then it cannot be in memory, we're probably missing a cast
6273 assert((genTypeSize(op2Type) >= genTypeSize(type)) || !op2->isUsedFromMemory());
6274 // If op2 is a constant then it should fit in the common type
6275 assert(!op2->IsCnsIntOrI() || genTypeCanRepresentValue(type, op2->AsIntCon()->IconValue()));
6277 getEmitter()->emitInsBinary(ins, emitTypeSize(type), op1, op2);
6279 // Are we evaluating this into a register?
6280 if (targetReg != REG_NA)
6282 genSetRegToCond(targetReg, tree);
6283 genProduceReg(tree);
6287 //-------------------------------------------------------------------------------------------
6288 // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
6289 // corresponding to a binary Relational operator result.
6292 // dstReg - The target register to set to 1 or 0
6293 // tree - The GenTree Relop node that was used to set the Condition codes
6295 // Return Value: none
6298 // A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
6299 //-------------------------------------------------------------------------------------------
6301 void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
6303 noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
6305 emitJumpKind jumpKind[2];
6306 bool branchToTrueLabel[2];
6307 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
6309 if (jumpKind[1] == EJ_NONE)
6311 // Set (lower byte of) reg according to the flags
6312 inst_SET(jumpKind[0], dstReg);
6317 // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
6318 // These are represented by two conditions.
6319 if (tree->gtOper == GT_EQ)
6321 // This must be an ordered comparison.
6322 assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
6326 // This must be BNE.UN
6327 assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
6331 // Here is the sample code generated in each case:
6332 // BEQ == cmp, jpe <false label>, je <true label>
6333 // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
6334 // setnp reg // if (PF==0) reg = 1 else reg = 0
6335 // jpe L1 // Jmp if PF==1
6339 // BNE.UN == cmp, jpe <true label>, jne <true label>
6340 // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
6346 // reverse the jmpkind condition before setting dstReg if it is to false label.
6347 inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
6349 BasicBlock* label = genCreateTempLabel();
6350 inst_JMP(jumpKind[0], label);
6352 // second branch is always to true label
6353 assert(branchToTrueLabel[1]);
6354 inst_SET(jumpKind[1], dstReg);
6355 genDefineTempLabel(label);
6358 var_types treeType = tree->TypeGet();
6359 if (treeType == TYP_INT || treeType == TYP_LONG)
6361 // Set the higher bytes to 0
6362 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
6366 noway_assert(treeType == TYP_BYTE);
6370 #if !defined(_TARGET_64BIT_)
6371 //------------------------------------------------------------------------
6372 // genIntToIntCast: Generate code for long to int casts on x86.
6375 // cast - The GT_CAST node
6381 // The cast node and its sources (via GT_LONG) must have been assigned registers.
6382 // The destination cannot be a floating point type or a small integer type.
6384 void CodeGen::genLongToIntCast(GenTree* cast)
6386 assert(cast->OperGet() == GT_CAST);
6388 GenTree* src = cast->gtGetOp1();
6389 noway_assert(src->OperGet() == GT_LONG);
6391 genConsumeRegs(src);
6393 var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG;
6394 var_types dstType = cast->CastToType();
6395 regNumber loSrcReg = src->gtGetOp1()->gtRegNum;
6396 regNumber hiSrcReg = src->gtGetOp2()->gtRegNum;
6397 regNumber dstReg = cast->gtRegNum;
6399 assert((dstType == TYP_INT) || (dstType == TYP_UINT));
6400 assert(genIsValidIntReg(loSrcReg));
6401 assert(genIsValidIntReg(hiSrcReg));
6402 assert(genIsValidIntReg(dstReg));
6404 if (cast->gtOverflow())
6407 // Generate an overflow check for [u]long to [u]int casts:
6409 // long -> int - check if the upper 33 bits are all 0 or all 1
6411 // ulong -> int - check if the upper 33 bits are all 0
6413 // long -> uint - check if the upper 32 bits are all 0
6414 // ulong -> uint - check if the upper 32 bits are all 0
6417 if ((srcType == TYP_LONG) && (dstType == TYP_INT))
6419 BasicBlock* allOne = genCreateTempLabel();
6420 BasicBlock* success = genCreateTempLabel();
6422 inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
6423 inst_JMP(EJ_js, allOne);
6425 inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
6426 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6427 inst_JMP(EJ_jmp, success);
6429 genDefineTempLabel(allOne);
6430 inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE);
6431 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6433 genDefineTempLabel(success);
6437 if ((srcType == TYP_ULONG) && (dstType == TYP_INT))
6439 inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
6440 genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW);
6443 inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
6444 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6448 if (dstReg != loSrcReg)
6450 inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE);
6453 genProduceReg(cast);
6457 //------------------------------------------------------------------------
6458 // genIntToIntCast: Generate code for an integer cast
6459 // This method handles integer overflow checking casts
6460 // as well as ordinary integer casts.
6463 // treeNode - The GT_CAST node
6469 // The treeNode is not a contained node and must have an assigned register.
6470 // For a signed convert from byte, the source must be in a byte-addressable register.
6471 // Neither the source nor target type can be a floating point type.
6473 // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
6474 // TODO: refactor to use getCastDescription
6476 void CodeGen::genIntToIntCast(GenTreePtr treeNode)
6478 assert(treeNode->OperGet() == GT_CAST);
6480 GenTreePtr castOp = treeNode->gtCast.CastOp();
6481 var_types srcType = genActualType(castOp->TypeGet());
6483 #if !defined(_TARGET_64BIT_)
6484 if (varTypeIsLong(srcType))
6486 genLongToIntCast(treeNode);
6489 #endif // !defined(_TARGET_64BIT_)
6491 regNumber targetReg = treeNode->gtRegNum;
6492 regNumber sourceReg = castOp->gtRegNum;
6493 var_types dstType = treeNode->CastToType();
6494 bool isUnsignedDst = varTypeIsUnsigned(dstType);
6495 bool isUnsignedSrc = varTypeIsUnsigned(srcType);
6497 // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
6498 if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
6500 srcType = genUnsignedType(srcType);
6501 isUnsignedSrc = true;
6504 bool requiresOverflowCheck = false;
6505 bool needAndAfter = false;
6507 assert(genIsValidIntReg(targetReg));
6508 assert(genIsValidIntReg(sourceReg));
6510 instruction ins = INS_invalid;
6511 emitAttr size = EA_UNKNOWN;
6513 if (genTypeSize(srcType) < genTypeSize(dstType))
6517 // Is this an Overflow checking cast?
6518 // We only need to handle one case, as the other casts can never overflow.
6519 // cast from TYP_INT to TYP_ULONG
6521 if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
6523 requiresOverflowCheck = true;
6524 size = EA_ATTR(genTypeSize(srcType));
6529 // we need the source size
6530 size = EA_ATTR(genTypeSize(srcType));
6531 noway_assert(size < EA_PTRSIZE);
6533 ins = ins_Move_Extend(srcType, castOp->InReg());
6536 Special case: ins_Move_Extend assumes the destination type is no bigger
6537 than TYP_INT. movsx and movzx can already extend all the way to
6538 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
6539 but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
6541 if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
6544 NYI_X86("Cast to 64 bit for x86/RyuJIT");
6545 #else // !_TARGET_X86_
6547 #endif // !_TARGET_X86_
6551 Special case: for a cast of byte to char we first
6552 have to expand the byte (w/ sign extension), then
6553 mask off the high bits.
6554 Use 'movsx' followed by 'and'
6556 if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
6558 noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
6559 needAndAfter = true;
6565 // Narrowing cast, or sign-changing cast
6566 noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
6568 // Is this an Overflow checking cast?
6569 if (treeNode->gtOverflow())
6571 requiresOverflowCheck = true;
6572 size = EA_ATTR(genTypeSize(srcType));
6577 size = EA_ATTR(genTypeSize(dstType));
6578 ins = ins_Move_Extend(dstType, castOp->InReg());
6582 noway_assert(ins != INS_invalid);
6584 genConsumeReg(castOp);
6586 if (requiresOverflowCheck)
6588 ssize_t typeMin = 0;
6589 ssize_t typeMax = 0;
6590 ssize_t typeMask = 0;
6591 bool needScratchReg = false;
6592 bool signCheckOnly = false;
6594 /* Do we need to compare the value, or just check masks */
6599 typeMask = ssize_t((int)0xFFFFFF80);
6600 typeMin = SCHAR_MIN;
6601 typeMax = SCHAR_MAX;
6605 typeMask = ssize_t((int)0xFFFFFF00L);
6609 typeMask = ssize_t((int)0xFFFF8000);
6615 typeMask = ssize_t((int)0xFFFF0000L);
6619 if (srcType == TYP_UINT)
6621 signCheckOnly = true;
6625 typeMask = 0xFFFFFFFF80000000LL;
6632 if (srcType == TYP_INT)
6634 signCheckOnly = true;
6638 needScratchReg = true;
6643 noway_assert(srcType == TYP_ULONG);
6644 signCheckOnly = true;
6648 noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
6649 signCheckOnly = true;
6653 NO_WAY("Unknown type");
6659 // We only need to check for a negative value in sourceReg
6660 inst_RV_IV(INS_cmp, sourceReg, 0, size);
6661 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
6665 regNumber tmpReg = REG_NA;
6669 // We need an additional temp register
6670 // Make sure we have exactly one allocated.
6671 assert(treeNode->gtRsvdRegs != RBM_NONE);
6672 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
6673 tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
6676 // When we are converting from unsigned or to unsigned, we
6677 // will only have to check for any bits set using 'typeMask'
6678 if (isUnsignedSrc || isUnsignedDst)
6682 inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
6683 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
6684 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
6688 noway_assert(typeMask != 0);
6689 inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
6690 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
6695 // For a narrowing signed cast
6697 // We must check the value is in a signed range.
6699 // Compare with the MAX
6701 noway_assert((typeMin != 0) && (typeMax != 0));
6703 inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
6704 genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
6706 // Compare with the MIN
6708 inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
6709 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
6713 if (targetReg != sourceReg
6714 #ifdef _TARGET_AMD64_
6715 // On amd64, we can hit this path for a same-register
6716 // 4-byte to 8-byte widening conversion, and need to
6717 // emit the instruction to set the high bits correctly.
6718 || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
6719 #endif // _TARGET_AMD64_
6721 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
6723 else // non-overflow checking cast
6725 noway_assert(size < EA_PTRSIZE || srcType == dstType);
6727 // We may have code transformations that result in casts where srcType is the same as dstType.
6728 // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
6729 // long constant to a long lclVar.
6730 if (srcType == dstType)
6734 /* Is the value sitting in a non-byte-addressable register? */
6735 else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
6739 // for unsigned values we can AND, so it need not be a byte register
6744 // Move the value into a byte register
6745 noway_assert(!"Signed byte convert from non-byte-addressable register");
6748 /* Generate "mov targetReg, castOp->gtReg */
6749 if (targetReg != sourceReg)
6751 inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
6757 noway_assert((needAndAfter == false) && isUnsignedDst);
6759 /* Generate "and reg, MASK */
6760 unsigned fillPattern;
6761 if (size == EA_1BYTE)
6765 else if (size == EA_2BYTE)
6767 fillPattern = 0xffff;
6771 fillPattern = 0xffffffff;
6774 inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
6776 #ifdef _TARGET_AMD64_
6777 else if (ins == INS_movsxd)
6779 noway_assert(!needAndAfter);
6780 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
6782 #endif // _TARGET_AMD64_
6783 else if (ins == INS_mov)
6785 noway_assert(!needAndAfter);
6786 if (targetReg != sourceReg
6787 #ifdef _TARGET_AMD64_
6788 // On amd64, 'mov' is the opcode used to zero-extend from
6789 // 4 bytes to 8 bytes.
6790 || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
6791 #endif // _TARGET_AMD64_
6794 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
6799 noway_assert(ins == INS_movsx || ins == INS_movzx);
6801 /* Generate "mov targetReg, castOp->gtReg */
6802 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
6804 /* Mask off high bits for cast from byte to char */
6807 noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
6808 inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
6813 genProduceReg(treeNode);
6816 //------------------------------------------------------------------------
6817 // genFloatToFloatCast: Generate code for a cast between float and double
6820 // treeNode - The GT_CAST node
6826 // Cast is a non-overflow conversion.
6827 // The treeNode must have an assigned register.
6828 // The cast is between float and double or vice versa.
6830 void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
6832 // float <--> double conversions are always non-overflow ones
6833 assert(treeNode->OperGet() == GT_CAST);
6834 assert(!treeNode->gtOverflow());
6836 regNumber targetReg = treeNode->gtRegNum;
6837 assert(genIsValidFloatReg(targetReg));
6839 GenTreePtr op1 = treeNode->gtOp.gtOp1;
6841 // If not contained, must be a valid float reg.
6842 if (op1->isUsedFromReg())
6844 assert(genIsValidFloatReg(op1->gtRegNum));
6848 var_types dstType = treeNode->CastToType();
6849 var_types srcType = op1->TypeGet();
6850 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
6852 genConsumeOperands(treeNode->AsOp());
6853 if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->gtRegNum)))
6855 // source and destinations types are the same and also reside in the same register.
6856 // we just need to consume and produce the reg in this case.
6861 instruction ins = ins_FloatConv(dstType, srcType);
6862 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
6865 genProduceReg(treeNode);
6868 //------------------------------------------------------------------------
6869 // genIntToFloatCast: Generate code to cast an int/long to float/double
6872 // treeNode - The GT_CAST node
6878 // Cast is a non-overflow conversion.
6879 // The treeNode must have an assigned register.
6880 // SrcType= int32/uint32/int64/uint64 and DstType=float/double.
6882 void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
6884 // int type --> float/double conversions are always non-overflow ones
6885 assert(treeNode->OperGet() == GT_CAST);
6886 assert(!treeNode->gtOverflow());
6888 regNumber targetReg = treeNode->gtRegNum;
6889 assert(genIsValidFloatReg(targetReg));
6891 GenTreePtr op1 = treeNode->gtOp.gtOp1;
6893 if (op1->isUsedFromReg())
6895 assert(genIsValidIntReg(op1->gtRegNum));
6899 var_types dstType = treeNode->CastToType();
6900 var_types srcType = op1->TypeGet();
6901 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
6903 #if !defined(_TARGET_64BIT_)
6904 // We expect morph to replace long to float/double casts with helper calls
6905 noway_assert(!varTypeIsLong(srcType));
6906 #endif // !defined(_TARGET_64BIT_)
6908 // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
6909 // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
6910 // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
6911 // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
6912 // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
6913 // temp and using temp as operand of cast operation.
6914 if (srcType == TYP_BYREF)
6916 noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
6917 srcType = TYP_I_IMPL;
6920 // force the srcType to unsigned if GT_UNSIGNED flag is set
6921 if (treeNode->gtFlags & GTF_UNSIGNED)
6923 srcType = genUnsignedType(srcType);
6926 noway_assert(!varTypeIsGC(srcType));
6928 // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
6929 // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
6930 // either the front-end or lowering phase to have generated two levels of cast.
6931 // The first one is for widening smaller int type to int32 and the second one is
6932 // to the float/double.
6933 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
6934 noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
6936 // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
6937 // here since they should have been lowered apropriately.
6938 noway_assert(srcType != TYP_UINT);
6939 noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
6941 // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
6942 // which does a partial write to lower 4/8 bytes of xmm register keeping the other
6943 // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
6944 // the partial write could introduce a false dependency and could cause a stall
6945 // if there are further uses of xmmReg. We have such a case occuring with a
6946 // customer reported version of SpectralNorm benchmark, resulting in 2x perf
6947 // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
6948 // cvtsi2ss/sd instruction.
6950 genConsumeOperands(treeNode->AsOp());
6951 getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
6953 // Note that here we need to specify srcType that will determine
6954 // the size of source reg/mem operand and rex.w prefix.
6955 instruction ins = ins_FloatConv(dstType, TYP_INT);
6956 getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
6958 // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
6959 // will interpret ULONG value as LONG. Hence we need to adjust the
6960 // result if sign-bit of srcType is set.
6961 if (srcType == TYP_ULONG)
6963 // The instruction sequence below is less accurate than what clang
6964 // and gcc generate. However, we keep the current sequence for backward compatiblity.
6965 // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
6966 // should be also updated for consistent conversion result.
6967 assert(dstType == TYP_DOUBLE);
6968 assert(op1->isUsedFromReg());
6970 // Set the flags without modifying op1.
6971 // test op1Reg, op1Reg
6972 inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
6974 // No need to adjust result if op1 >= 0 i.e. positive
6976 BasicBlock* label = genCreateTempLabel();
6977 inst_JMP(EJ_jge, label);
6979 // Adjust the result
6980 // result = result + 0x43f00000 00000000
6981 // addsd resultReg, 0x43f00000 00000000
6982 GenTreePtr* cns = &u8ToDblBitmask;
6983 if (*cns == nullptr)
6986 static_assert_no_msg(sizeof(double) == sizeof(__int64));
6987 *((__int64*)&d) = 0x43f0000000000000LL;
6989 *cns = genMakeConst(&d, dstType, treeNode, true);
6991 inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
6993 genDefineTempLabel(label);
6996 genProduceReg(treeNode);
6999 //------------------------------------------------------------------------
7000 // genFloatToIntCast: Generate code to cast float/double to int/long
7003 // treeNode - The GT_CAST node
7009 // Cast is a non-overflow conversion.
7010 // The treeNode must have an assigned register.
7011 // SrcType=float/double and DstType= int32/uint32/int64/uint64
7013 // TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
7015 void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
7017 // we don't expect to see overflow detecting float/double --> int type conversions here
7018 // as they should have been converted into helper calls by front-end.
7019 assert(treeNode->OperGet() == GT_CAST);
7020 assert(!treeNode->gtOverflow());
7022 regNumber targetReg = treeNode->gtRegNum;
7023 assert(genIsValidIntReg(targetReg));
7025 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7027 if (op1->isUsedFromReg())
7029 assert(genIsValidFloatReg(op1->gtRegNum));
7033 var_types dstType = treeNode->CastToType();
7034 var_types srcType = op1->TypeGet();
7035 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
7037 // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
7038 // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
7039 // front-end or lowering phase to have generated two levels of cast. The first one is
7040 // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
7041 // the required smaller int type.
7042 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
7043 noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
7045 // We shouldn't be seeing uint64 here as it should have been converted
7046 // into a helper call by either front-end or lowering phase.
7047 noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
7049 // If the dstType is TYP_UINT, we have 32-bits to encode the
7050 // float number. Any of 33rd or above bits can be the sign bit.
7051 // To acheive it we pretend as if we are converting it to a long.
7052 if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
7057 // Note that we need to specify dstType here so that it will determine
7058 // the size of destination integer register and also the rex.w prefix.
7059 genConsumeOperands(treeNode->AsOp());
7060 instruction ins = ins_FloatConv(TYP_INT, srcType);
7061 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
7062 genProduceReg(treeNode);
7065 //------------------------------------------------------------------------
7066 // genCkfinite: Generate code for ckfinite opcode.
7069 // treeNode - The GT_CKFINITE node
7075 // GT_CKFINITE node has reserved an internal register.
7077 // TODO-XArch-CQ - mark the operand as contained if known to be in
7078 // memory (e.g. field or an array element).
7080 void CodeGen::genCkfinite(GenTreePtr treeNode)
7082 assert(treeNode->OperGet() == GT_CKFINITE);
7084 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7085 var_types targetType = treeNode->TypeGet();
7086 int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
7087 regNumber targetReg = treeNode->gtRegNum;
7089 // Extract exponent into a register.
7090 assert(treeNode->gtRsvdRegs != RBM_NONE);
7091 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
7092 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
7096 #ifdef _TARGET_64BIT_
7098 // Copy the floating-point value to an integer register. If we copied a float to a long, then
7099 // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
7100 // bits of the integer register.
7101 instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
7102 inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
7103 if (targetType == TYP_DOUBLE)
7105 // right shift by 32 bits to get to exponent.
7106 inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
7109 // Mask exponent with all 1's and check if the exponent is all 1's
7110 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
7111 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
7113 // If exponent is all 1's, throw ArithmeticException
7114 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
7116 // if it is a finite value copy it to targetReg
7117 if (targetReg != op1->gtRegNum)
7119 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
7122 #else // !_TARGET_64BIT_
7124 // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
7125 // There is no easy way to do this. To not require an extra register, we'll use shuffles
7126 // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
7127 // need to produce the value into the target register.
7129 // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
7130 // movaps targetReg, op1->gtRegNum
7131 // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
7132 // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
7133 // and tmpReg, <mask>
7134 // cmp tmpReg, <mask>
7136 // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
7138 // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
7139 // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
7140 // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
7141 // and tmpReg, <mask>
7142 // cmp tmpReg, <mask>
7144 // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
7146 // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
7147 // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
7148 // and tmpReg, <mask>
7149 // cmp tmpReg, <mask>
7151 // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
7153 regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
7155 if (targetType == TYP_DOUBLE)
7157 if (targetReg != op1->gtRegNum)
7159 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
7161 inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
7162 copyToTmpSrcReg = targetReg;
7166 copyToTmpSrcReg = op1->gtRegNum;
7169 // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
7170 // value, no matter the floating-point type.
7171 inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
7173 // Mask exponent with all 1's and check if the exponent is all 1's
7174 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
7175 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
7177 // If exponent is all 1's, throw ArithmeticException
7178 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
7180 if (targetReg != op1->gtRegNum)
7182 // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
7183 // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
7184 // than re-shuffling the targetReg.
7185 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
7187 else if (targetType == TYP_DOUBLE)
7189 // We need to re-shuffle the targetReg to get the correct result.
7190 inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
7193 #endif // !_TARGET_64BIT_
7195 genProduceReg(treeNode);
7198 #ifdef _TARGET_AMD64_
7199 int CodeGenInterface::genSPtoFPdelta()
7203 #ifdef UNIX_AMD64_ABI
7205 // We require frame chaining on Unix to support native tool unwinding (such as
7206 // unwinding by the native debugger). We have a CLR-only extension to the
7207 // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
7208 // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
7209 delta = genTotalFrameSize();
7211 #else // !UNIX_AMD64_ABI
7213 // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
7214 // RBP needs to be reported in unwind codes. This case would arise for methods
7216 if (compiler->compLocallocUsed)
7218 // We cannot base delta computation on compLclFrameSize since it changes from
7219 // tentative to final frame layout and hence there is a possibility of
7220 // under-estimating offset of vars from FP, which in turn results in under-
7221 // estimating instruction size.
7223 // To be predictive and so as never to under-estimate offset of vars from FP
7224 // we will always position FP at min(240, outgoing arg area size).
7225 delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
7227 else if (compiler->opts.compDbgEnC)
7229 // vm assumption on EnC methods is that rsp and rbp are equal
7234 delta = genTotalFrameSize();
7237 #endif // !UNIX_AMD64_ABI
7242 //---------------------------------------------------------------------
7243 // genTotalFrameSize - return the total size of the stack frame, including local size,
7244 // callee-saved register size, etc. For AMD64, this does not include the caller-pushed
7251 int CodeGenInterface::genTotalFrameSize()
7253 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
7255 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
7257 assert(totalFrameSize >= 0);
7258 return totalFrameSize;
7261 //---------------------------------------------------------------------
7262 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
7263 // This number is going to be negative, since the Caller-SP is at a higher
7264 // address than the frame pointer.
7266 // There must be a frame pointer to call this function!
7268 // We can't compute this directly from the Caller-SP, since the frame pointer
7269 // is based on a maximum delta from Initial-SP, so first we find SP, then
7270 // compute the FP offset.
7272 int CodeGenInterface::genCallerSPtoFPdelta()
7274 assert(isFramePointerUsed());
7275 int callerSPtoFPdelta;
7277 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
7279 assert(callerSPtoFPdelta <= 0);
7280 return callerSPtoFPdelta;
7283 //---------------------------------------------------------------------
7284 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
7286 // This number will be negative.
7288 int CodeGenInterface::genCallerSPtoInitialSPdelta()
7290 int callerSPtoSPdelta = 0;
7292 callerSPtoSPdelta -= genTotalFrameSize();
7293 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
7295 // compCalleeRegsPushed does not account for the frame pointer
7296 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
7297 if (isFramePointerUsed())
7299 callerSPtoSPdelta -= REGSIZE_BYTES;
7302 assert(callerSPtoSPdelta <= 0);
7303 return callerSPtoSPdelta;
7305 #endif // _TARGET_AMD64_
7307 //-----------------------------------------------------------------------------------------
7308 // genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
7311 // treeNode - tree node
7317 // i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
7318 // ii) tree type is floating point type.
7319 // iii) caller of this routine needs to call genProduceReg()
7320 void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
7322 regNumber targetReg = treeNode->gtRegNum;
7323 var_types targetType = treeNode->TypeGet();
7324 assert(varTypeIsFloating(targetType));
7328 GenTreePtr* bitMask = nullptr;
7329 instruction ins = INS_invalid;
7330 void* cnsAddr = nullptr;
7331 bool dblAlign = false;
7333 switch (treeNode->OperGet())
7336 // Neg(x) = flip the sign bit.
7337 // Neg(f) = f ^ 0x80000000
7338 // Neg(d) = d ^ 0x8000000000000000
7339 ins = genGetInsForOper(GT_XOR, targetType);
7340 if (targetType == TYP_FLOAT)
7342 bitMask = &negBitmaskFlt;
7344 static_assert_no_msg(sizeof(float) == sizeof(int));
7345 *((int*)&f) = 0x80000000;
7350 bitMask = &negBitmaskDbl;
7352 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7353 *((__int64*)&d) = 0x8000000000000000LL;
7360 assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
7362 // Abs(x) = set sign-bit to zero
7363 // Abs(f) = f & 0x7fffffff
7364 // Abs(d) = d & 0x7fffffffffffffff
7365 ins = genGetInsForOper(GT_AND, targetType);
7366 if (targetType == TYP_FLOAT)
7368 bitMask = &absBitmaskFlt;
7370 static_assert_no_msg(sizeof(float) == sizeof(int));
7371 *((int*)&f) = 0x7fffffff;
7376 bitMask = &absBitmaskDbl;
7378 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7379 *((__int64*)&d) = 0x7fffffffffffffffLL;
7386 assert(!"genSSE2: unsupported oper");
7391 if (*bitMask == nullptr)
7393 assert(cnsAddr != nullptr);
7394 *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
7397 // We need an additional register for bitmask.
7398 // Make sure we have one allocated.
7399 assert(treeNode->gtRsvdRegs != RBM_NONE);
7400 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
7401 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
7403 // Move operand into targetReg only if the reg reserved for
7404 // internal purpose is not the same as targetReg.
7405 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7406 assert(op1->isUsedFromReg());
7407 regNumber operandReg = genConsumeReg(op1);
7408 if (tmpReg != targetReg)
7410 if (operandReg != targetReg)
7412 inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
7415 operandReg = tmpReg;
7418 inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
7419 assert(ins != INS_invalid);
7420 inst_RV_RV(ins, targetReg, operandReg, targetType);
7423 //---------------------------------------------------------------------
7424 // genIntrinsic - generate code for a given intrinsic
7427 // treeNode - the GT_INTRINSIC node
7432 void CodeGen::genIntrinsic(GenTreePtr treeNode)
7434 // Right now only Sqrt/Abs are treated as math intrinsics.
7435 switch (treeNode->gtIntrinsic.gtIntrinsicId)
7437 case CORINFO_INTRINSIC_Sqrt:
7439 // Both operand and its result must be of the same floating point type.
7440 GenTreePtr srcNode = treeNode->gtOp.gtOp1;
7441 assert(varTypeIsFloating(srcNode));
7442 assert(srcNode->TypeGet() == treeNode->TypeGet());
7444 genConsumeOperands(treeNode->AsOp());
7445 getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, srcNode);
7449 case CORINFO_INTRINSIC_Abs:
7450 genSSE2BitwiseOp(treeNode);
7454 assert(!"genIntrinsic: Unsupported intrinsic");
7458 genProduceReg(treeNode);
7461 //-------------------------------------------------------------------------- //
7462 // getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
7465 // treeNode - the GT_PUTARG_STK node
7468 // The number of the base variable.
7471 // If tail call the outgoing args are placed in the caller's incoming arg stack space.
7472 // Otherwise, they go in the outgoing arg area on the current frame.
7474 // On Windows the caller always creates slots (homing space) in its frame for the
7475 // first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
7476 // For System V systems there is no such calling convention requirement, and the code needs to find
7477 // the first stack passed argument from the caller. This is done by iterating over
7478 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
7480 unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
7482 assert(treeNode->OperGet() == GT_PUTARG_STK);
7484 unsigned baseVarNum;
7486 // Whether to setup stk arg in incoming or out-going arg area?
7487 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
7488 // All other calls - stk arg is setup in out-going arg area.
7489 if (treeNode->AsPutArgStk()->putInIncomingArgArea())
7491 // See the note in the function header re: finding the first stack passed argument.
7492 baseVarNum = getFirstArgWithStackSlot();
7493 assert(baseVarNum != BAD_VAR_NUM);
7496 // This must be a fast tail call.
7497 assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
7499 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
7500 // because fast tail call requires that in-coming arg area of caller is >= out-going
7501 // arg area required for tail call.
7502 LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
7503 assert(varDsc != nullptr);
7505 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
7506 assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
7507 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7508 // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
7509 assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
7510 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7515 #if FEATURE_FIXED_OUT_ARGS
7516 baseVarNum = compiler->lvaOutgoingArgSpaceVar;
7517 #else // !FEATURE_FIXED_OUT_ARGS
7518 assert(!"No BaseVarForPutArgStk on x86");
7519 baseVarNum = BAD_VAR_NUM;
7520 #endif // !FEATURE_FIXED_OUT_ARGS
7526 //---------------------------------------------------------------------
7527 // genAlignStackBeforeCall: Align the stack if necessary before a call.
7530 // putArgStk - the putArgStk node.
7532 void CodeGen::genAlignStackBeforeCall(GenTreePutArgStk* putArgStk)
7534 #if defined(UNIX_X86_ABI)
7536 genAlignStackBeforeCall(putArgStk->gtCall);
7538 #endif // UNIX_X86_ABI
7541 //---------------------------------------------------------------------
7542 // genAlignStackBeforeCall: Align the stack if necessary before a call.
7545 // call - the call node.
7547 void CodeGen::genAlignStackBeforeCall(GenTreeCall* call)
7549 #if defined(UNIX_X86_ABI)
7551 // Have we aligned the stack yet?
7552 if (!call->fgArgInfo->IsStkAlignmentDone())
7554 // We haven't done any stack alignment yet for this call. We might need to create
7555 // an alignment adjustment, even if this function itself doesn't have any stack args.
7556 // This can happen if this function call is part of a nested call sequence, and the outer
7557 // call has already pushed some arguments.
7559 unsigned stkLevel = genStackLevel + call->fgArgInfo->GetStkSizeBytes();
7560 call->fgArgInfo->ComputeStackAlignment(stkLevel);
7562 unsigned padStkAlign = call->fgArgInfo->GetStkAlign();
7563 if (padStkAlign != 0)
7565 // Now generate the alignment
7566 inst_RV_IV(INS_sub, REG_SPBASE, padStkAlign, EA_PTRSIZE);
7567 AddStackLevel(padStkAlign);
7568 AddNestedAlignment(padStkAlign);
7571 call->fgArgInfo->SetStkAlignmentDone();
7574 #endif // UNIX_X86_ABI
7577 //---------------------------------------------------------------------
7578 // genRemoveAlignmentAfterCall: After a call, remove the alignment
7579 // added before the call, if any.
7582 // call - the call node.
7583 // bias - additional stack adjustment
7586 // When bias > 0, caller should adjust stack level appropriately as
7587 // bias is not considered when adjusting stack level.
7589 void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias)
7591 #if defined(_TARGET_X86_)
7592 #if defined(UNIX_X86_ABI)
7593 // Put back the stack pointer if there was any padding for stack alignment
7594 unsigned padStkAlign = call->fgArgInfo->GetStkAlign();
7595 unsigned padStkAdjust = padStkAlign + bias;
7597 if (padStkAdjust != 0)
7599 inst_RV_IV(INS_add, REG_SPBASE, padStkAdjust, EA_PTRSIZE);
7600 SubtractStackLevel(padStkAlign);
7601 SubtractNestedAlignment(padStkAlign);
7603 #else // UNIX_X86_ABI
7608 #endif // !UNIX_X86_ABI_
7609 #else // _TARGET_X86_
7611 #endif // !_TARGET_X86
7616 //---------------------------------------------------------------------
7617 // genAdjustStackForPutArgStk:
7618 // adjust the stack pointer for a putArgStk node if necessary.
7621 // putArgStk - the putArgStk node.
7623 // Returns: true if the stack pointer was adjusted; false otherwise.
7626 // Sets `m_pushStkArg` to true if the stack arg needs to be pushed,
7627 // false if the stack arg needs to be stored at the current stack
7628 // pointer address. This is exactly the opposite of the return value
7629 // of this function.
7631 bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
7634 if (varTypeIsSIMD(putArgStk))
7636 const unsigned argSize = genTypeSize(putArgStk);
7637 inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
7638 AddStackLevel(argSize);
7639 m_pushStkArg = false;
7642 #endif // FEATURE_SIMD
7644 const unsigned argSize = putArgStk->getArgSize();
7646 // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
7647 // This is set in Lowering, and is true if and only if:
7648 // - This argument contains any GC pointers OR
7649 // - It is a GT_FIELD_LIST OR
7650 // - It is less than 16 bytes in size.
7651 CLANG_FORMAT_COMMENT_ANCHOR;
7654 switch (putArgStk->gtPutArgStkKind)
7656 case GenTreePutArgStk::Kind::RepInstr:
7657 case GenTreePutArgStk::Kind::Unroll:
7658 assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) &&
7661 case GenTreePutArgStk::Kind::Push:
7662 case GenTreePutArgStk::Kind::PushAllSlots:
7663 assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) ||
7666 case GenTreePutArgStk::Kind::Invalid:
7668 assert(!"Uninitialized GenTreePutArgStk::Kind");
7673 if (putArgStk->isPushKind())
7675 m_pushStkArg = true;
7680 m_pushStkArg = false;
7681 inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
7682 AddStackLevel(argSize);
7687 //---------------------------------------------------------------------
7688 // genPutArgStkFieldList - generate code for passing a GT_FIELD_LIST arg on the stack.
7691 // treeNode - the GT_PUTARG_STK node whose op1 is a GT_FIELD_LIST
7696 void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
7698 GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList();
7699 assert(fieldList != nullptr);
7701 // Set m_pushStkArg and pre-adjust the stack if necessary.
7702 const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
7704 // For now, we only support the "push" case; we will push a full slot for the first field of each slot
7705 // within the struct.
7706 assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
7708 // If we have pre-adjusted the stack and are simply storing the fields in order, set the offset to 0.
7709 // (Note that this mode is not currently being used.)
7710 // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
7711 // in reverse order, so we start with the current field offset at the size of the struct arg (which must be
7712 // a multiple of the target pointer size).
7713 unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
7714 unsigned prevFieldOffset = currentOffset;
7715 regNumber intTmpReg = REG_NA;
7716 regNumber simdTmpReg = REG_NA;
7717 if (putArgStk->gtRsvdRegs != RBM_NONE)
7719 regMaskTP rsvdRegs = putArgStk->gtRsvdRegs;
7720 if ((rsvdRegs & RBM_ALLINT) != 0)
7722 intTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLINT);
7723 assert(genIsValidIntReg(intTmpReg));
7725 if ((rsvdRegs & RBM_ALLFLOAT) != 0)
7727 simdTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLFLOAT);
7728 assert(genIsValidFloatReg(simdTmpReg));
7730 assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1));
7733 for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
7735 GenTree* const fieldNode = current->Current();
7736 const unsigned fieldOffset = current->gtFieldOffset;
7737 var_types fieldType = current->gtFieldType;
7739 // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
7740 // field list in descending order by offset.
7741 assert(!varTypeIsLong(fieldType));
7742 assert(fieldOffset <= prevFieldOffset);
7744 // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately
7745 // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been
7746 // assigned a register, and which is therefore contained.
7747 // Unlike genConsumeReg(), it handles the case where no registers are being consumed.
7748 genConsumeRegs(fieldNode);
7749 regNumber argReg = fieldNode->isUsedFromSpillTemp() ? REG_NA : fieldNode->gtRegNum;
7751 // If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
7753 // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
7754 // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
7755 // not require rounding.
7756 // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
7757 // able to detect stores into the outgoing argument area of the stack on x86.
7758 const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
7759 int adjustment = roundUp(currentOffset - fieldOffset, 4);
7760 if (fieldIsSlot && !varTypeIsSIMD(fieldType))
7762 fieldType = genActualType(fieldType);
7763 unsigned pushSize = genTypeSize(fieldType);
7764 assert((pushSize % 4) == 0);
7765 adjustment -= pushSize;
7766 while (adjustment != 0)
7768 inst_IV(INS_push, 0);
7769 currentOffset -= pushSize;
7770 AddStackLevel(pushSize);
7771 adjustment -= pushSize;
7773 m_pushStkArg = true;
7777 m_pushStkArg = false;
7779 // We always "push" floating point fields (i.e. they are full slot values that don't
7780 // require special handling).
7781 assert(varTypeIsIntegralOrI(fieldNode) || varTypeIsSIMD(fieldNode));
7783 // If we can't push this field, it needs to be in a register so that we can store
7784 // it to the stack location.
7785 if (adjustment != 0)
7787 // This moves the stack pointer to fieldOffset.
7788 // For this case, we must adjust the stack and generate stack-relative stores rather than pushes.
7789 // Adjust the stack pointer to the next slot boundary.
7790 inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
7791 currentOffset -= adjustment;
7792 AddStackLevel(adjustment);
7795 // Does it need to be in a byte register?
7796 // If so, we'll use intTmpReg, which must have been allocated as a byte register.
7797 // If it's already in a register, but not a byteable one, then move it.
7798 if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
7800 assert(intTmpReg != REG_NA);
7801 noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0);
7802 if (argReg != REG_NA)
7804 inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType);
7810 if (argReg == REG_NA)
7814 if (fieldNode->isUsedFromSpillTemp())
7816 assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD?
7817 assert(fieldNode->IsRegOptional());
7818 TempDsc* tmp = getSpillTempDsc(fieldNode);
7819 getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
7820 compiler->tmpRlsTemp(tmp);
7824 assert(varTypeIsIntegralOrI(fieldNode));
7825 switch (fieldNode->OperGet())
7828 inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
7831 if (fieldNode->IsIconHandle())
7833 inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal);
7837 inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal);
7844 currentOffset -= TARGET_POINTER_SIZE;
7845 AddStackLevel(TARGET_POINTER_SIZE);
7849 // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack.
7850 assert(varTypeIsIntegralOrI(fieldNode));
7851 switch (fieldNode->OperGet())
7854 inst_RV_TT(INS_mov, intTmpReg, fieldNode);
7857 genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode);
7862 genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset);
7867 #if defined(FEATURE_SIMD)
7868 if (fieldType == TYP_SIMD12)
7870 assert(genIsValidFloatReg(simdTmpReg));
7871 genStoreSIMD12ToStack(argReg, simdTmpReg);
7874 #endif // defined(FEATURE_SIMD)
7876 genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
7880 // We always push a slot-rounded size
7881 currentOffset -= genTypeSize(fieldType);
7885 prevFieldOffset = fieldOffset;
7887 if (currentOffset != 0)
7889 // We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
7890 inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
7891 AddStackLevel(currentOffset);
7894 #endif // _TARGET_X86_
7896 //---------------------------------------------------------------------
7897 // genPutArgStk - generate code for passing an arg on the stack.
7900 // treeNode - the GT_PUTARG_STK node
7901 // targetType - the type of the treeNode
7906 void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
7908 var_types targetType = putArgStk->TypeGet();
7912 genAlignStackBeforeCall(putArgStk);
7914 if (varTypeIsStruct(targetType))
7916 (void)genAdjustStackForPutArgStk(putArgStk);
7917 genPutStructArgStk(putArgStk);
7921 // The following logic is applicable for x86 arch.
7922 assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
7924 GenTreePtr data = putArgStk->gtOp1;
7926 // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LIST,
7927 // and the type of the putArgStk is TYP_VOID.
7928 assert(targetType != TYP_LONG);
7930 const unsigned argSize = putArgStk->getArgSize();
7931 assert((argSize % TARGET_POINTER_SIZE) == 0);
7933 if (data->isContainedIntOrIImmed())
7935 if (data->IsIconHandle())
7937 inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
7941 inst_IV(INS_push, data->gtIntCon.gtIconVal);
7943 AddStackLevel(argSize);
7945 else if (data->OperGet() == GT_FIELD_LIST)
7947 genPutArgStkFieldList(putArgStk);
7951 // We should not see any contained nodes that are not immediates.
7952 assert(data->isUsedFromReg());
7953 genConsumeReg(data);
7954 genPushReg(targetType, data->gtRegNum);
7956 #else // !_TARGET_X86_
7958 unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk);
7960 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
7962 if (varTypeIsStruct(targetType))
7964 m_stkArgVarNum = baseVarNum;
7965 m_stkArgOffset = putArgStk->getArgOffset();
7966 genPutStructArgStk(putArgStk);
7967 m_stkArgVarNum = BAD_VAR_NUM;
7970 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
7972 noway_assert(targetType != TYP_STRUCT);
7973 assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
7975 // Get argument offset on stack.
7976 // Here we cross check that argument offset hasn't changed from lowering to codegen since
7977 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
7978 int argOffset = putArgStk->getArgOffset();
7981 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk);
7982 assert(curArgTabEntry);
7983 assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
7986 GenTreePtr data = putArgStk->gtOp1;
7988 if (data->isContainedIntOrIImmed())
7990 getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
7991 (int)data->AsIntConCommon()->IconValue());
7995 assert(data->isUsedFromReg());
7996 genConsumeReg(data);
7997 getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
8001 #endif // !_TARGET_X86_
8005 // genPushReg: Push a register value onto the stack and adjust the stack level
8008 // type - the type of value to be stored
8009 // reg - the register containing the value
8012 // For TYP_LONG, the srcReg must be a floating point register.
8013 // Otherwise, the register type must be consistent with the given type.
8015 void CodeGen::genPushReg(var_types type, regNumber srcReg)
8017 unsigned size = genTypeSize(type);
8018 if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
8020 assert(genIsValidIntReg(srcReg));
8021 inst_RV(INS_push, srcReg, type);
8026 emitAttr attr = emitTypeSize(type);
8027 if (type == TYP_LONG)
8029 // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg.
8030 // This is only used when we are pushing a struct from memory to memory, and basically is
8031 // handling an 8-byte "chunk", as opposed to strictly a long type.
8036 ins = ins_Store(type);
8038 assert(genIsValidFloatReg(srcReg));
8039 inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
8040 getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
8042 AddStackLevel(size);
8044 #endif // _TARGET_X86_
8046 #if defined(FEATURE_PUT_STRUCT_ARG_STK)
8047 // genStoreRegToStackArg: Store a register value into the stack argument area
8050 // type - the type of value to be stored
8051 // reg - the register containing the value
8052 // offset - the offset from the base (see Assumptions below)
8055 // A type of TYP_STRUCT instructs this method to store a 16-byte chunk
8056 // at the given offset (i.e. not the full struct).
8059 // The caller must set the context appropriately before calling this method:
8060 // - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call.
8061 // - On x86, the caller must set m_pushStkArg if this method should push the argument.
8062 // Otherwise, the argument is stored at the given offset from sp.
8064 // TODO: In the below code the load and store instructions are for 16 bytes, but the
8065 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
8066 // this probably needs to be changed.
8068 void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset)
8070 assert(srcReg != REG_NA);
8075 if (type == TYP_STRUCT)
8078 // This should be changed!
8085 if (varTypeIsSIMD(type))
8087 assert(genIsValidFloatReg(srcReg));
8088 ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly
8091 #endif // FEATURE_SIMD
8093 if (type == TYP_LONG)
8095 assert(genIsValidFloatReg(srcReg));
8099 #endif // _TARGET_X86_
8101 assert((varTypeIsFloating(type) && genIsValidFloatReg(srcReg)) ||
8102 (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg)));
8103 ins = ins_Store(type);
8105 attr = emitTypeSize(type);
8106 size = genTypeSize(type);
8112 genPushReg(type, srcReg);
8116 getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset);
8118 #else // !_TARGET_X86_
8119 assert(m_stkArgVarNum != BAD_VAR_NUM);
8120 getEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset);
8121 #endif // !_TARGET_X86_
8124 //---------------------------------------------------------------------
8125 // genPutStructArgStk - generate code for copying a struct arg on the stack by value.
8126 // In case there are references to heap object in the struct,
8127 // it generates the gcinfo as well.
8130 // putArgStk - the GT_PUTARG_STK node
8133 // In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number
8134 // corresponding to the argument area (where we will put the argument on the stack).
8135 // For tail calls this is the baseVarNum = 0.
8136 // For non tail calls this is the outgoingArgSpace.
8137 void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
8139 var_types targetType = putArgStk->TypeGet();
8141 #if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
8142 if (targetType == TYP_SIMD12)
8144 genPutArgStkSIMD12(putArgStk);
8147 #endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
8149 if (varTypeIsSIMD(targetType))
8151 regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1());
8152 assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
8153 genStoreRegToStackArg(targetType, srcReg, 0);
8157 assert(targetType == TYP_STRUCT);
8159 if (putArgStk->gtNumberReferenceSlots == 0)
8161 switch (putArgStk->gtPutArgStkKind)
8163 case GenTreePutArgStk::Kind::RepInstr:
8164 genStructPutArgRepMovs(putArgStk);
8166 case GenTreePutArgStk::Kind::Unroll:
8167 genStructPutArgUnroll(putArgStk);
8169 case GenTreePutArgStk::Kind::Push:
8170 genStructPutArgUnroll(putArgStk);
8178 // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
8179 CLANG_FORMAT_COMMENT_ANCHOR;
8182 // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so
8183 // that the emitter properly detects the need to update the method's GC information.
8185 // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs
8186 // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the
8188 assert(m_pushStkArg);
8190 GenTree* srcAddr = putArgStk->gtGetOp1()->gtGetOp1();
8191 BYTE* gcPtrs = putArgStk->gtGcPtrs;
8192 const unsigned numSlots = putArgStk->gtNumSlots;
8194 regNumber srcRegNum = srcAddr->gtRegNum;
8195 const bool srcAddrInReg = srcRegNum != REG_NA;
8197 unsigned srcLclNum = 0;
8198 unsigned srcLclOffset = 0;
8201 genConsumeReg(srcAddr);
8205 assert(srcAddr->OperIsLocalAddr());
8207 srcLclNum = srcAddr->AsLclVarCommon()->gtLclNum;
8208 if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
8210 srcLclOffset = srcAddr->AsLclFld()->gtLclOffs;
8214 for (int i = numSlots - 1; i >= 0; --i)
8217 if (gcPtrs[i] == TYPE_GC_NONE)
8219 slotAttr = EA_4BYTE;
8221 else if (gcPtrs[i] == TYPE_GC_REF)
8223 slotAttr = EA_GCREF;
8227 assert(gcPtrs[i] == TYPE_GC_BYREF);
8228 slotAttr = EA_BYREF;
8231 const unsigned offset = i * TARGET_POINTER_SIZE;
8234 getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
8238 getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
8240 AddStackLevel(TARGET_POINTER_SIZE);
8242 #else // !defined(_TARGET_X86_)
8244 // Consume these registers.
8245 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
8246 genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
8248 const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr();
8249 const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF;
8252 unsigned numGCSlotsCopied = 0;
8255 BYTE* gcPtrs = putArgStk->gtGcPtrs;
8256 const unsigned numSlots = putArgStk->gtNumSlots;
8257 for (unsigned i = 0; i < numSlots;)
8259 if (gcPtrs[i] == TYPE_GC_NONE)
8261 // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively)
8262 // instead of a sequence of movsp instructions to save cycles and code size.
8263 unsigned adjacentNonGCSlotCount = 0;
8266 adjacentNonGCSlotCount++;
8268 } while ((i < numSlots) && (gcPtrs[i] == TYPE_GC_NONE));
8270 // If we have a very small contiguous non-ref region, it's better just to
8271 // emit a sequence of movsp instructions
8272 if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
8274 for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--)
8281 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount);
8282 instGen(INS_r_movsp);
8287 assert((gcPtrs[i] == TYPE_GC_REF) || (gcPtrs[i] == TYPE_GC_BYREF));
8289 // We have a GC (byref or ref) pointer
8290 // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction,
8291 // but the logic for emitting a GC info record is not available (it is internal for the emitter
8292 // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
8293 // instGen(INS_movsp); and emission of gc info.
8295 var_types memType = (gcPtrs[i] == TYPE_GC_REF) ? TYP_REF : TYP_BYREF;
8296 getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
8297 genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE);
8306 // Source for the copy operation.
8307 // If a LocalAddr, use EA_PTRSIZE - copy from stack.
8308 // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
8309 getEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE);
8311 // Always copying to the stack - outgoing arg area
8312 // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
8313 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
8318 assert(numGCSlotsCopied == putArgStk->gtNumberReferenceSlots);
8319 #endif // _TARGET_X86_
8322 #endif // defined(FEATURE_PUT_STRUCT_ARG_STK)
8324 /*****************************************************************************
8326 * Create and record GC Info for the function.
8328 #ifndef JIT32_GCENCODER
8330 #else // !JIT32_GCENCODER
8332 #endif // !JIT32_GCENCODER
8333 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
8335 #ifdef JIT32_GCENCODER
8336 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
8337 #else // !JIT32_GCENCODER
8338 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
8339 #endif // !JIT32_GCENCODER
8342 #ifdef JIT32_GCENCODER
8343 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
8344 unsigned prologSize,
8345 unsigned epilogSize DEBUGARG(void* codePtr))
8354 compiler->compInfoBlkSize =
8355 gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
8357 size_t argTabOffset = 0;
8358 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
8362 if (genInterruptible)
8364 gcHeaderISize += compiler->compInfoBlkSize;
8365 gcPtrMapISize += ptrMapSize;
8369 gcHeaderNSize += compiler->compInfoBlkSize;
8370 gcPtrMapNSize += ptrMapSize;
8373 #endif // DISPLAY_SIZES
8375 compiler->compInfoBlkSize += ptrMapSize;
8377 /* Allocate the info block for the method */
8379 compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
8381 #if 0 // VERBOSE_SIZES
8382 // TODO-X86-Cleanup: 'dataSize', below, is not defined
8384 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
8386 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
8387 compiler->info.compILCodeSize,
8388 compiler->compInfoBlkSize,
8389 codeSize + dataSize,
8390 codeSize + dataSize - prologSize - epilogSize,
8391 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
8392 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
8393 compiler->info.compClassName,
8394 compiler->info.compMethodName);
8399 /* Fill in the info block and return it to the caller */
8401 void* infoPtr = compiler->compInfoBlkAddr;
8403 /* Create the method info block: header followed by GC tracking tables */
8405 compiler->compInfoBlkAddr +=
8406 gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
8408 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
8409 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
8410 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
8416 BYTE* temp = (BYTE*)infoPtr;
8417 unsigned size = compiler->compInfoBlkAddr - temp;
8418 BYTE* ptab = temp + headerSize;
8420 noway_assert(size == headerSize + ptrMapSize);
8422 printf("Method info block - header [%u bytes]:", headerSize);
8424 for (unsigned i = 0; i < size; i++)
8428 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
8429 printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
8434 printf("\n %04X: ", i);
8437 printf("%02X ", *temp++);
8447 if (compiler->opts.dspGCtbls)
8449 const BYTE* base = (BYTE*)infoPtr;
8451 unsigned methodSize;
8454 printf("GC Info for method %s\n", compiler->info.compFullName);
8455 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
8457 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
8458 // printf("size of header encoding is %3u\n", size);
8461 if (compiler->opts.dspGCtbls)
8464 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
8465 // printf("size of pointer table is %3u\n", size);
8467 noway_assert(compiler->compInfoBlkAddr == (base + size));
8472 if (jitOpts.testMask & 128)
8474 for (unsigned offs = 0; offs < codeSize; offs++)
8476 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
8480 #endif // DUMP_GC_TABLES
8482 /* Make sure we ended up generating the expected number of bytes */
8484 noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
8489 #else // !JIT32_GCENCODER
8490 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
8492 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
8493 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
8494 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
8495 assert(gcInfoEncoder);
8497 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
8498 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
8500 // We keep the call count for the second call to gcMakeRegPtrTable() below.
8501 unsigned callCnt = 0;
8502 // First we figure out the encoder ID's for the stack slots and registers.
8503 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
8504 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
8505 gcInfoEncoder->FinalizeSlotIds();
8506 // Now we can actually use those slot ID's to declare live ranges.
8507 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
8509 if (compiler->opts.compDbgEnC)
8511 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
8515 // -saved 'this' pointer and bool for synchronized methods
8517 // 4 slots for RBP + return address + RSI + RDI
8518 int preservedAreaSize = 4 * REGSIZE_BYTES;
8520 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
8522 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
8524 preservedAreaSize += REGSIZE_BYTES;
8527 // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
8528 preservedAreaSize += 4;
8531 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
8533 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
8536 if (compiler->opts.IsReversePInvoke())
8538 unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
8539 assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount);
8540 LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber];
8541 gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.lvStkOffs);
8544 gcInfoEncoder->Build();
8546 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
8547 // let's save the values anyway for debugging purposes
8548 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
8549 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
8551 #endif // !JIT32_GCENCODER
8553 /*****************************************************************************
8554 * Emit a call to a helper function.
8558 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
8560 void* addr = nullptr;
8561 void* pAddr = nullptr;
8563 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
8564 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
8565 regNumber callTarget = REG_NA;
8566 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
8570 assert(pAddr != nullptr);
8572 // Absolute indirect call addr
8573 // Note: Order of checks is important. First always check for pc-relative and next
8574 // zero-relative. Because the former encoding is 1-byte smaller than the latter.
8575 if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
8576 genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
8578 // generate call whose target is specified by 32-bit offset relative to PC or zero.
8579 callType = emitter::EC_FUNC_TOKEN_INDIR;
8584 #ifdef _TARGET_AMD64_
8585 // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
8586 // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
8591 if (callTargetReg == REG_NA)
8593 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
8594 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
8595 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
8596 regMaskTP callTargetMask = genRegMask(callTargetReg);
8597 noway_assert((callTargetMask & killMask) == callTargetMask);
8601 // The call target must not overwrite any live variable, though it may not be in the
8602 // kill set for the call.
8603 regMaskTP callTargetMask = genRegMask(callTargetReg);
8604 noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
8608 callTarget = callTargetReg;
8609 CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
8610 callType = emitter::EC_INDIR_ARD;
8615 getEmitter()->emitIns_Call(callType,
8616 compiler->eeFindHelper(helper),
8617 INDEBUG_LDISASM_COMMA(nullptr) addr,
8620 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),
8621 gcInfo.gcVarPtrSetCur,
8622 gcInfo.gcRegGCrefSetCur,
8623 gcInfo.gcRegByrefSetCur,
8624 BAD_IL_OFFSET, // IL offset
8626 REG_NA, 0, 0, // xreg, xmul, disp
8628 emitter::emitNoGChelper(helper));
8631 regTracker.rsTrashRegSet(killMask);
8632 regTracker.rsTrashRegsForGCInterruptability();
8635 #if !defined(_TARGET_64BIT_)
8636 //-----------------------------------------------------------------------------
8638 // Code Generation for Long integers
8640 //-----------------------------------------------------------------------------
8642 //------------------------------------------------------------------------
8643 // genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
8646 // treeNode - A TYP_LONG lclVar node.
8652 // 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
8653 // Its operand must be a GT_LONG node.
8655 void CodeGen::genStoreLongLclVar(GenTree* treeNode)
8657 emitter* emit = getEmitter();
8659 GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
8660 unsigned lclNum = lclNode->gtLclNum;
8661 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
8662 assert(varDsc->TypeGet() == TYP_LONG);
8663 assert(!varDsc->lvPromoted);
8664 GenTreePtr op1 = treeNode->gtOp.gtOp1;
8665 noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
8666 genConsumeRegs(op1);
8668 if (op1->OperGet() == GT_LONG)
8670 // Definitions of register candidates will have been lowered to 2 int lclVars.
8671 assert(!treeNode->InReg());
8673 GenTreePtr loVal = op1->gtGetOp1();
8674 GenTreePtr hiVal = op1->gtGetOp2();
8676 // NYI: Contained immediates.
8677 NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA),
8678 "Store of long lclVar with contained immediate");
8680 emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
8681 emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
8683 else if (op1->OperGet() == GT_MUL_LONG)
8685 assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
8688 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
8689 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum,
8690 genTypeSize(TYP_INT));
8693 #endif // !defined(_TARGET_64BIT_)
8695 /*****************************************************************************
8696 * Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
8697 * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
8698 * disassembler thinks the instructions as the same as we do.
8701 // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
8702 // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
8703 //#define ALL_XARCH_EMITTER_UNIT_TESTS
8705 #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
8706 void CodeGen::genAmd64EmitterUnitTests()
8713 if (!compiler->opts.altJit)
8715 // No point doing this in a "real" JIT.
8719 // Mark the "fake" instructions in the output.
8720 printf("*************** In genAmd64EmitterUnitTests()\n");
8723 // genDefineTempLabel(genCreateTempLabel());
8724 // to create artificial labels to help separate groups of tests.
8729 CLANG_FORMAT_COMMENT_ANCHOR;
8731 #ifdef ALL_XARCH_EMITTER_UNIT_TESTS
8732 #ifdef FEATURE_AVX_SUPPORT
8733 genDefineTempLabel(genCreateTempLabel());
8735 // vhaddpd ymm0,ymm1,ymm2
8736 getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8737 // vaddss xmm0,xmm1,xmm2
8738 getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8739 // vaddsd xmm0,xmm1,xmm2
8740 getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8741 // vaddps xmm0,xmm1,xmm2
8742 getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8743 // vaddps ymm0,ymm1,ymm2
8744 getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8745 // vaddpd xmm0,xmm1,xmm2
8746 getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8747 // vaddpd ymm0,ymm1,ymm2
8748 getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8749 // vsubss xmm0,xmm1,xmm2
8750 getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8751 // vsubsd xmm0,xmm1,xmm2
8752 getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8753 // vsubps ymm0,ymm1,ymm2
8754 getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8755 // vsubps ymm0,ymm1,ymm2
8756 getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8757 // vsubpd xmm0,xmm1,xmm2
8758 getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8759 // vsubpd ymm0,ymm1,ymm2
8760 getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8761 // vmulss xmm0,xmm1,xmm2
8762 getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8763 // vmulsd xmm0,xmm1,xmm2
8764 getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8765 // vmulps xmm0,xmm1,xmm2
8766 getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8767 // vmulpd xmm0,xmm1,xmm2
8768 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8769 // vmulps ymm0,ymm1,ymm2
8770 getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8771 // vmulpd ymm0,ymm1,ymm2
8772 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8773 // vandps xmm0,xmm1,xmm2
8774 getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8775 // vandpd xmm0,xmm1,xmm2
8776 getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8777 // vandps ymm0,ymm1,ymm2
8778 getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8779 // vandpd ymm0,ymm1,ymm2
8780 getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8781 // vorps xmm0,xmm1,xmm2
8782 getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8783 // vorpd xmm0,xmm1,xmm2
8784 getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8785 // vorps ymm0,ymm1,ymm2
8786 getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8787 // vorpd ymm0,ymm1,ymm2
8788 getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8789 // vdivss xmm0,xmm1,xmm2
8790 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8791 // vdivsd xmm0,xmm1,xmm2
8792 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8793 // vdivss xmm0,xmm1,xmm2
8794 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8795 // vdivsd xmm0,xmm1,xmm2
8796 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8798 // vdivss xmm0,xmm1,xmm2
8799 getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8800 // vdivsd xmm0,xmm1,xmm2
8801 getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8802 #endif // FEATURE_AVX_SUPPORT
8803 #endif // ALL_XARCH_EMITTER_UNIT_TESTS
8804 printf("*************** End of genAmd64EmitterUnitTests()\n");
8807 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
8809 #endif // _TARGET_AMD64_
8811 #endif // !LEGACY_BACKEND