1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Amd64/x86 Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
25 #include "gcinfoencoder.h"
27 // Get the register assigned to the given node
29 regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
31 return tree->gtRegNum;
34 //------------------------------------------------------------------------
35 // genSpillVar: Spill a local variable
38 // tree - the lclVar node for the variable being spilled
44 // The lclVar must be a register candidate (lvRegCandidate)
46 void CodeGen::genSpillVar(GenTreePtr tree)
48 unsigned varNum = tree->gtLclVarCommon.gtLclNum;
49 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
51 assert(varDsc->lvIsRegCandidate());
53 // We don't actually need to spill if it is already living in memory
54 bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
57 var_types lclTyp = varDsc->TypeGet();
58 if (varDsc->lvNormalizeOnStore())
60 lclTyp = genActualType(lclTyp);
62 emitAttr size = emitTypeSize(lclTyp);
64 bool restoreRegVar = false;
65 if (tree->gtOper == GT_REG_VAR)
67 tree->SetOper(GT_LCL_VAR);
71 // mask off the flag to generate the right spill code, then bring it back
72 tree->gtFlags &= ~GTF_REG_VAL;
74 instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
75 #if CPU_LONG_USES_REGPAIR
76 if (varTypeIsMultiReg(tree))
78 assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
79 assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
80 regNumber regLo = genRegPairLo(tree->gtRegPair);
81 regNumber regHi = genRegPairHi(tree->gtRegPair);
82 inst_TT_RV(storeIns, tree, regLo);
83 inst_TT_RV(storeIns, tree, regHi, 4);
88 assert(varDsc->lvRegNum == tree->gtRegNum);
89 inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
91 tree->gtFlags |= GTF_REG_VAL;
95 tree->SetOper(GT_REG_VAR);
98 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
99 gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
101 if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
104 if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
106 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
110 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
113 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
117 tree->gtFlags &= ~GTF_SPILL;
118 varDsc->lvRegNum = REG_STK;
119 if (varTypeIsMultiReg(tree))
121 varDsc->lvOtherReg = REG_STK;
126 void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
128 assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
129 varDsc->lvRegNum = tree->gtRegNum;
132 /*****************************************************************************/
133 /*****************************************************************************/
135 /*****************************************************************************
137 * Generate code that will set the given register to the integer constant.
140 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
142 // Reg cannot be a FP reg
143 assert(!genIsValidFloatReg(reg));
145 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
146 // relocatable. Other ref type constants (e.g. string objects) go through a different
148 noway_assert(type != TYP_REF || val == 0);
152 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
156 // TODO-XArch-CQ: needs all the optimized cases
157 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
161 /*****************************************************************************
163 * Generate code to check that the GS cookie wasn't thrashed by a buffer
164 * overrun. If pushReg is true, preserve all registers around code sequence.
165 * Otherwise ECX could be modified.
167 * Implementation Note: pushReg = true, in case of tail calls.
169 void CodeGen::genEmitGSCookieCheck(bool pushReg)
171 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
173 // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
174 // executing GS cookie check will not collect the object pointed to by EAX.
176 // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
177 // In such case make sure that the correct GC-ness of RDX is reported as well, so
178 // a GC object pointed by RDX will not be collected.
181 // Handle multi-reg return type values
182 if (compiler->compMethodReturnsMultiRegRetType())
184 ReturnTypeDesc retTypeDesc;
185 if (varTypeIsLong(compiler->info.compRetNativeType))
187 retTypeDesc.InitializeLongReturnType(compiler);
189 else // we must have a struct return type
191 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
194 unsigned regCount = retTypeDesc.GetReturnRegCount();
196 // Only x86 and x64 Unix ABI allows multi-reg return and
197 // number of result regs should be equal to MAX_RET_REG_COUNT.
198 assert(regCount == MAX_RET_REG_COUNT);
200 for (unsigned i = 0; i < regCount; ++i)
202 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
205 else if (compiler->compMethodReturnsRetBufAddr())
207 // This is for returning in an implicit RetBuf.
208 // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
210 // In case the return is in an implicit RetBuf, the native return type should be a struct
211 assert(varTypeIsStruct(compiler->info.compRetNativeType));
213 gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
215 // ... all other cases.
218 #ifdef _TARGET_AMD64_
219 // For x64, structs that are not returned in registers are always
220 // returned in implicit RetBuf. If we reached here, we should not have
221 // a RetBuf and the return type should not be a struct.
222 assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
223 assert(!varTypeIsStruct(compiler->info.compRetNativeType));
224 #endif // _TARGET_AMD64_
226 // For x86 Windows we can't make such assertions since we generate code for returning of
227 // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
228 // compRetNativeType could be TYP_STRUCT.
229 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
233 regNumber regGSCheck;
234 regMaskTP regMaskGSCheck = RBM_NONE;
238 // Non-tail call: we can use any callee trash register that is not
239 // a return register or contain 'this' pointer (keep alive this), since
240 // we are generating GS cookie check after a GT_RETURN block.
241 // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
242 // as return register for two-register-returned structs.
243 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
244 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
246 regGSCheck = REG_ARG_1;
250 regGSCheck = REG_ARG_0;
256 // It doesn't matter which register we pick, since we're going to save and restore it
258 // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes?
259 regGSCheck = REG_EAX;
260 regMaskGSCheck = RBM_EAX;
261 #else // !_TARGET_X86_
262 // Tail calls from methods that need GS check: We need to preserve registers while
263 // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
264 // check, we might need a register. This won't be an issue for jmp calls for the
265 // reason mentioned below (see comment starting with "Jmp Calls:").
267 // The following are the possible solutions in case of tail prefixed calls:
268 // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
269 // present in methods that require GS cookie check. Rest of the tail calls that
270 // do not require R11 will be honored.
271 // 2) Internal register - GT_CALL node reserves an internal register and emits GS
272 // cookie check as part of tail call codegen. GenExitCode() needs to special case
273 // fast tail calls implemented as epilog+jmp or such tail calls should always get
274 // dispatched via helper.
275 // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
276 // right execution order during rationalization.
278 // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
279 // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
280 // VSD calls from methods that need GS check.
282 // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
283 // ignores tail prefix. In future, if we intend to support tail calls from such a method,
284 // consider one of the options mentioned above. For now adding an assert that we don't
285 // expect to see a tail call in a method that requires GS check.
286 noway_assert(!compiler->compTailCallUsed);
288 // Jmp calls: specify method handle using which JIT queries VM for its entry point
289 // address and hence it can neither be a VSD call nor PInvoke calli with cookie
290 // parameter. Therefore, in case of jmp calls it is safe to use R11.
291 regGSCheck = REG_R11;
292 #endif // !_TARGET_X86_
295 regMaskTP byrefPushedRegs = RBM_NONE;
296 regMaskTP norefPushedRegs = RBM_NONE;
297 regMaskTP pushedRegs = RBM_NONE;
299 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
301 #if defined(_TARGET_AMD64_)
302 // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
303 // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
304 if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
306 genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
307 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
310 #endif // defined(_TARGET_AMD64_)
312 assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
313 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
314 (int)compiler->gsGlobalSecurityCookieVal);
319 // Ngen case - GS cookie value needs to be accessed through an indirection.
321 pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
323 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
324 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
325 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
328 BasicBlock* gsCheckBlk = genCreateTempLabel();
329 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
330 inst_JMP(jmpEqual, gsCheckBlk);
331 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
332 genDefineTempLabel(gsCheckBlk);
334 genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
337 /*****************************************************************************
339 * Generate code for all the basic blocks in the function.
342 void CodeGen::genCodeForBBlist()
347 unsigned savedStkLvl;
350 genInterruptibleUsed = true;
352 // You have to be careful if you create basic blocks from now on
353 compiler->fgSafeBasicBlockCreation = false;
355 // This stress mode is not comptible with fully interruptible GC
356 if (genInterruptible && compiler->opts.compStackCheckOnCall)
358 compiler->opts.compStackCheckOnCall = false;
361 // This stress mode is not comptible with fully interruptible GC
362 if (genInterruptible && compiler->opts.compStackCheckOnRet)
364 compiler->opts.compStackCheckOnRet = false;
368 // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
369 genPrepForEHCodegen();
371 assert(!compiler->fgFirstBBScratch ||
372 compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
374 /* Initialize the spill tracking logic */
378 #ifdef DEBUGGING_SUPPORT
379 /* Initialize the line# tracking logic */
381 if (compiler->opts.compScopeInfo)
387 // The current implementation of switch tables requires the first block to have a label so it
388 // can generate offsets to the switch label targets.
389 // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
390 if (compiler->fgHasSwitch)
392 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
395 genPendingCallLabel = nullptr;
397 /* Initialize the pointer tracking code */
399 gcInfo.gcRegPtrSetInit();
400 gcInfo.gcVarPtrSetInit();
402 /* If any arguments live in registers, mark those regs as such */
404 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
406 /* Is this variable a parameter assigned to a register? */
408 if (!varDsc->lvIsParam || !varDsc->lvRegister)
413 /* Is the argument live on entry to the method? */
415 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
420 /* Is this a floating-point argument? */
422 if (varDsc->IsFloatRegType())
427 noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
429 /* Mark the register as holding the variable */
431 regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
434 unsigned finallyNesting = 0;
436 // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
437 // allocation at the start of each basic block.
438 VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
440 /*-------------------------------------------------------------------------
442 * Walk the basic blocks and generate code for each one
447 BasicBlock* lblk; /* previous block */
449 for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
452 if (compiler->verbose)
454 printf("\n=============== Generating ");
455 block->dspBlockHeader(compiler, true, true);
456 compiler->fgDispBBLiveness(block);
460 // Figure out which registers hold variables on entry to this block
462 regSet.ClearMaskVars();
463 gcInfo.gcRegGCrefSetCur = RBM_NONE;
464 gcInfo.gcRegByrefSetCur = RBM_NONE;
466 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
468 genUpdateLife(block->bbLiveIn);
470 // Even if liveness didn't change, we need to update the registers containing GC references.
471 // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
472 // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
473 // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
475 regMaskTP newLiveRegSet = RBM_NONE;
476 regMaskTP newRegGCrefSet = RBM_NONE;
477 regMaskTP newRegByrefSet = RBM_NONE;
479 VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
480 VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
482 VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
483 while (iter.NextElem(compiler, &varIndex))
485 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
486 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
488 if (varDsc->lvIsInReg())
490 newLiveRegSet |= varDsc->lvRegMask();
491 if (varDsc->lvType == TYP_REF)
493 newRegGCrefSet |= varDsc->lvRegMask();
495 else if (varDsc->lvType == TYP_BYREF)
497 newRegByrefSet |= varDsc->lvRegMask();
500 if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
502 VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
505 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
507 else if (compiler->lvaIsGCTracked(varDsc))
510 if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
512 VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
515 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
519 regSet.rsMaskVars = newLiveRegSet;
522 if (compiler->verbose)
524 if (!VarSetOps::IsEmpty(compiler, addedGCVars))
526 printf("\t\t\t\t\t\t\tAdded GCVars: ");
527 dumpConvertedVarSet(compiler, addedGCVars);
530 if (!VarSetOps::IsEmpty(compiler, removedGCVars))
532 printf("\t\t\t\t\t\t\tRemoved GCVars: ");
533 dumpConvertedVarSet(compiler, removedGCVars);
539 gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
540 gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
542 /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
543 represent the exception object (TYP_REF).
544 We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
545 to the block, it will be the first thing evaluated
546 (thanks to GTF_ORDER_SIDEEFF).
549 if (handlerGetsXcptnObj(block->bbCatchTyp))
551 for (GenTree* node : LIR::AsRange(block))
553 if (node->OperGet() == GT_CATCH_ARG)
555 gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
561 /* Start a new code output block */
563 genUpdateCurrentFunclet(block);
565 if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
567 getEmitter()->emitLoopAlign();
571 if (compiler->opts.dspCode)
573 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
577 block->bbEmitCookie = nullptr;
579 if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
581 /* Mark a label and update the current set of live GC refs */
583 block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
584 gcInfo.gcRegByrefSetCur, FALSE);
587 if (block == compiler->fgFirstColdBlock)
590 if (compiler->verbose)
592 printf("\nThis is the start of the cold region of the method\n");
595 // We should never have a block that falls through into the Cold section
596 noway_assert(!lblk->bbFallsThrough());
598 // We require the block that starts the Cold section to have a label
599 noway_assert(block->bbEmitCookie);
600 getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
603 /* Both stacks are always empty on entry to a basic block */
607 savedStkLvl = genStackLevel;
609 /* Tell everyone which basic block we're working on */
611 compiler->compCurBB = block;
613 #ifdef DEBUGGING_SUPPORT
616 // BBF_INTERNAL blocks don't correspond to any single IL instruction.
617 if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
618 !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
619 // emit a NO_MAPPING entry, immediately after the prolog.
621 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
624 bool firstMapping = true;
625 #endif // DEBUGGING_SUPPORT
627 /*---------------------------------------------------------------------
629 * Generate code for each statement-tree in the block
632 CLANG_FORMAT_COMMENT_ANCHOR;
634 #if FEATURE_EH_FUNCLETS
635 if (block->bbFlags & BBF_FUNCLET_BEG)
637 genReserveFuncletProlog(block);
639 #endif // FEATURE_EH_FUNCLETS
641 // Clear compCurStmt and compCurLifeTree.
642 compiler->compCurStmt = nullptr;
643 compiler->compCurLifeTree = nullptr;
645 // Traverse the block in linear order, generating code for each node as we
646 // as we encounter it.
647 CLANG_FORMAT_COMMENT_ANCHOR;
649 #ifdef DEBUGGING_SUPPORT
650 IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
652 for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
654 #ifdef DEBUGGING_SUPPORT
655 // Do we have a new IL offset?
656 if (node->OperGet() == GT_IL_OFFSET)
658 genEnsureCodeEmitted(currentILOffset);
659 currentILOffset = node->gtStmt.gtStmtILoffsx;
660 genIPmappingAdd(currentILOffset, firstMapping);
661 firstMapping = false;
663 #endif // DEBUGGING_SUPPORT
666 if (node->OperGet() == GT_IL_OFFSET)
668 noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
669 node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
671 if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
672 node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
674 while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
676 genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
682 genCodeForTreeNode(node);
683 if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
687 } // end for each node in block
690 // The following set of register spill checks and GC pointer tracking checks used to be
691 // performed at statement boundaries. Now, with LIR, there are no statements, so they are
692 // performed at the end of each block.
693 // TODO: could these checks be performed more frequently? E.g., at each location where
694 // the register allocator says there are no live non-variable registers. Perhaps this could
695 // be done by (a) keeping a running count of live non-variable registers by using
696 // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
697 // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
698 // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
699 // there will be no live non-variable registers.
703 /* Make sure we didn't bungle pointer register tracking */
705 regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
706 regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
708 // If return is a GC-type, clear it. Note that if a common
709 // epilog is generated (genReturnBB) it has a void return
710 // even though we might return a ref. We can't use the compRetType
711 // as the determiner because something we are tracking as a byref
712 // might be used as a return value of a int function (which is legal)
713 GenTree* blockLastNode = block->lastNode();
714 if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
715 (varTypeIsGC(compiler->info.compRetType) ||
716 (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
718 nonVarPtrRegs &= ~RBM_INTRET;
723 printf("Regset after BB%02u gcr=", block->bbNum);
724 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
725 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
727 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
728 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
729 printf(", regVars=");
730 printRegMaskInt(regSet.rsMaskVars);
731 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
735 noway_assert(nonVarPtrRegs == RBM_NONE);
738 #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
739 if (block->bbNext == nullptr)
741 // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
742 // (it's as good as any, but better than the prolog, which can only be a single instruction
743 // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
744 // thinks the instructions are the same as we do.
745 genAmd64EmitterUnitTests();
747 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
749 #ifdef DEBUGGING_SUPPORT
750 // It is possible to reach the end of the block without generating code for the current IL offset.
751 // For example, if the following IR ends the current block, no code will have been generated for
754 // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
756 // N001 ( 0, 0) [000039] ------------ nop void
758 // This can lead to problems when debugging the generated code. To prevent these issues, make sure
759 // we've generated code for the last IL offset we saw in the block.
760 genEnsureCodeEmitted(currentILOffset);
762 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
766 /* Is this the last block, and are there any open scopes left ? */
768 bool isLastBlockProcessed = (block->bbNext == nullptr);
769 if (block->isBBCallAlwaysPair())
771 isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
774 if (isLastBlockProcessed && siOpenScopeList.scNext)
776 /* This assert no longer holds, because we may insert a throw
777 block to demarcate the end of a try or finally region when they
778 are at the end of the method. It would be nice if we could fix
779 our code so that this throw block will no longer be necessary. */
781 // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
783 siCloseAllOpenScopes();
787 #endif // DEBUGGING_SUPPORT
789 genStackLevel -= savedStkLvl;
792 // compCurLife should be equal to the liveOut set, except that we don't keep
793 // it up to date for vars that are not register candidates
794 // (it would be nice to have a xor set function)
796 VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
797 VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
798 VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
799 while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
801 unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
802 LclVarDsc* varDsc = compiler->lvaTable + varNum;
803 assert(!varDsc->lvIsRegCandidate());
807 /* Both stacks should always be empty on exit from a basic block */
808 noway_assert(genStackLevel == 0);
810 #ifdef _TARGET_AMD64_
811 // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
812 // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
813 // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
814 // The document "X64 and ARM ABIs.docx" has more details. The situations:
815 // 1. If the call instruction is in a different EH region as the instruction that follows it.
816 // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
817 // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
819 // We handle case #1 here, and case #2 in the emitter.
820 if (getEmitter()->emitIsLastInsCall())
822 // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
823 // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
824 // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
825 // generated before the OS epilog starts, such as a GS cookie check.
826 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
828 // We only need the NOP if we're not going to generate any more code as part of the block end.
830 switch (block->bbJumpKind)
834 case BBJ_CALLFINALLY:
836 // We're going to generate more code below anyway, so no need for the NOP.
839 case BBJ_EHFINALLYRET:
840 case BBJ_EHFILTERRET:
841 // These are the "epilog follows" case, handled in the emitter.
846 if (block->bbNext == nullptr)
848 // Call immediately before the end of the code; we should never get here .
849 instGen(INS_BREAKPOINT); // This should never get executed
860 // These can't have a call as the last instruction!
863 noway_assert(!"Unexpected bbJumpKind");
868 #endif // _TARGET_AMD64_
870 /* Do we need to generate a jump or return? */
872 switch (block->bbJumpKind)
875 inst_JMP(EJ_jmp, block->bbJumpDest);
883 // If we have a throw at the end of a function or funclet, we need to emit another instruction
884 // afterwards to help the OS unwinder determine the correct context during unwind.
885 // We insert an unexecuted breakpoint instruction in several situations
886 // following a throw instruction:
887 // 1. If the throw is the last instruction of the function or funclet. This helps
888 // the OS unwinder determine the correct context during an unwind from the
890 // 2. If this is this is the last block of the hot section.
891 // 3. If the subsequent block is a special throw block.
892 // 4. On AMD64, if the next block is in a different EH region.
893 if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
894 !BasicBlock::sameEHRegion(block, block->bbNext) ||
895 (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
896 block->bbNext == compiler->fgFirstColdBlock)
898 instGen(INS_BREAKPOINT); // This should never get executed
903 case BBJ_CALLFINALLY:
905 #if FEATURE_EH_FUNCLETS
907 // Generate a call to the finally, like this:
908 // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
909 // call finally-funclet
910 // jmp finally-return // Only for non-retless finally calls
911 // The jmp can be a NOP if we're going to the next block.
912 // If we're generating code for the main function (not a funclet), and there is no localloc,
913 // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
914 // instead of loading the PSPSym in this case.
916 if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
918 inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
922 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
924 getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
926 if (block->bbFlags & BBF_RETLESS_CALL)
928 // We have a retless call, and the last instruction generated was a call.
929 // If the next block is in a different EH region (or is the end of the code
930 // block), then we need to generate a breakpoint here (since it will never
931 // get executed) to get proper unwind behavior.
933 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
935 instGen(INS_BREAKPOINT); // This should never get executed
940 // Because of the way the flowgraph is connected, the liveness info for this one instruction
941 // after the call is not (can not be) correct in cases where a variable has a last use in the
942 // handler. So turn off GC reporting for this single instruction.
943 getEmitter()->emitDisableGC();
945 // Now go to where the finally funclet needs to return to.
946 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
949 // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
950 // to the next instruction? This would depend on stack walking from within the finally
951 // handler working without this instruction being in this special EH region.
956 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
959 getEmitter()->emitEnableGC();
962 #else // !FEATURE_EH_FUNCLETS
964 // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
965 // corresponding to the finally's nesting level. When invoked in response to an exception, the
968 // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
971 // mov [ebp - (n + 1)], 0
972 // mov [ebp - n ], 0xFC
982 noway_assert(isFramePointerUsed());
984 // Get the nesting level which contains the finally
985 compiler->fgGetNestingLevel(block, &finallyNesting);
987 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
988 unsigned filterEndOffsetSlotOffs;
989 filterEndOffsetSlotOffs =
990 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
992 unsigned curNestingSlotOffs;
993 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
995 // Zero out the slot for the next nesting level
996 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
997 curNestingSlotOffs - TARGET_POINTER_SIZE);
998 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
1001 // Now push the address where the finally funclet should return to directly.
1002 if (!(block->bbFlags & BBF_RETLESS_CALL))
1004 assert(block->isBBCallAlwaysPair());
1005 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
1009 // EE expects a DWORD, so we give him 0
1010 inst_IV(INS_push_hide, 0);
1013 // Jump to the finally BB
1014 inst_JMP(EJ_jmp, block->bbJumpDest);
1016 #endif // !FEATURE_EH_FUNCLETS
1018 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
1019 // jump target using bbJumpDest - that is already used to point
1020 // to the finally block. So just skip past the BBJ_ALWAYS unless the
1021 // block is RETLESS.
1022 if (!(block->bbFlags & BBF_RETLESS_CALL))
1024 assert(block->isBBCallAlwaysPair());
1027 block = block->bbNext;
1032 #if FEATURE_EH_FUNCLETS
1034 case BBJ_EHCATCHRET:
1035 // Set RAX to the address the VM should return to after the catch.
1036 // Generate a RIP-relative
1037 // lea reg, [rip + disp32] ; the RIP is implicit
1038 // which will be position-indepenent.
1039 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
1042 case BBJ_EHFINALLYRET:
1043 case BBJ_EHFILTERRET:
1044 genReserveFuncletEpilog(block);
1047 #else // !FEATURE_EH_FUNCLETS
1049 case BBJ_EHCATCHRET:
1050 noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
1052 case BBJ_EHFINALLYRET:
1053 case BBJ_EHFILTERRET:
1055 // The last statement of the block must be a GT_RETFILT, which has already been generated.
1056 assert(block->lastNode() != nullptr);
1057 assert(block->lastNode()->OperGet() == GT_RETFILT);
1059 if (block->bbJumpKind == BBJ_EHFINALLYRET)
1061 assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
1063 // Return using a pop-jmp sequence. As the "try" block calls
1064 // the finally with a jmp, this leaves the x86 call-ret stack
1065 // balanced in the normal flow of path.
1067 noway_assert(isFramePointerRequired());
1068 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
1069 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
1073 assert(block->bbJumpKind == BBJ_EHFILTERRET);
1075 // The return value has already been computed.
1081 #endif // !FEATURE_EH_FUNCLETS
1089 noway_assert(!"Unexpected bbJumpKind");
1094 compiler->compCurBB = nullptr;
1097 } //------------------ END-FOR each block of the method -------------------
1099 /* Nothing is live at this point */
1100 genUpdateLife(VarSetOps::MakeEmpty(compiler));
1102 /* Finalize the spill tracking logic */
1104 regSet.rsSpillEnd();
1106 /* Finalize the temp tracking logic */
1111 if (compiler->verbose)
1114 printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
1115 compiler->compSizeEstimate);
1116 printf("%s\n", compiler->info.compFullName);
1121 // return the child that has the same reg as the dst (if any)
1122 // other child returned (out param) in 'other'
1123 GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
1125 if (tree->gtRegNum == REG_NA)
1131 GenTreePtr op1 = tree->gtOp.gtOp1;
1132 GenTreePtr op2 = tree->gtOp.gtOp2;
1133 if (op1->gtRegNum == tree->gtRegNum)
1138 if (op2->gtRegNum == tree->gtRegNum)
1150 // Move an immediate value into an integer register
1152 void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
1154 // reg cannot be a FP register
1155 assert(!genIsValidFloatReg(reg));
1157 if (!compiler->opts.compReloc)
1159 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
1162 if ((imm == 0) && !EA_IS_RELOC(size))
1164 instGen_Set_Reg_To_Zero(size, reg, flags);
1168 if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
1170 getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
1174 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
1177 regTracker.rsTrackRegIntCns(reg, imm);
1180 /***********************************************************************************
1182 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
1183 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
1184 * genProduceReg() on the target register.
1186 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
1189 switch (tree->gtOper)
1193 // relocatable values tend to come down as a CNS_INT of native int type
1194 // so the line between these two opcodes is kind of blurry
1195 GenTreeIntConCommon* con = tree->AsIntConCommon();
1196 ssize_t cnsVal = con->IconValue();
1198 if (con->ImmedValNeedsReloc(compiler))
1200 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
1201 regTracker.rsTrackRegTrash(targetReg);
1205 genSetRegToIcon(targetReg, cnsVal, targetType);
1212 double constValue = tree->gtDblCon.gtDconVal;
1214 // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
1215 if (*(__int64*)&constValue == 0)
1217 // A faster/smaller way to generate 0
1218 instruction ins = genGetInsForOper(GT_XOR, targetType);
1219 inst_RV_RV(ins, targetReg, targetReg, targetType);
1224 if (targetType == TYP_FLOAT)
1226 float f = forceCastToFloat(constValue);
1227 cns = genMakeConst(&f, targetType, tree, false);
1231 cns = genMakeConst(&constValue, targetType, tree, true);
1234 inst_RV_TT(ins_Load(targetType), targetReg, cns);
1244 // Generate code to get the high N bits of a N*N=2N bit multiplication result
1245 void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
1247 if (treeNode->OperGet() == GT_MULHI)
1249 assert(!(treeNode->gtFlags & GTF_UNSIGNED));
1251 assert(!treeNode->gtOverflowEx());
1253 regNumber targetReg = treeNode->gtRegNum;
1254 var_types targetType = treeNode->TypeGet();
1255 emitter* emit = getEmitter();
1256 emitAttr size = emitTypeSize(treeNode);
1257 GenTree* op1 = treeNode->gtOp.gtOp1;
1258 GenTree* op2 = treeNode->gtOp.gtOp2;
1260 // to get the high bits of the multiply, we are constrained to using the
1261 // 1-op form: RDX:RAX = RAX * rm
1262 // The 3-op form (Rx=Ry*Rz) does not support it.
1264 genConsumeOperands(treeNode->AsOp());
1266 GenTree* regOp = op1;
1267 GenTree* rmOp = op2;
1269 // Set rmOp to the contained memory operand (if any)
1270 if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
1275 assert(!regOp->isContained());
1277 // Setup targetReg when neither of the source operands was a matching register
1278 if (regOp->gtRegNum != targetReg)
1280 inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
1284 if ((treeNode->gtFlags & GTF_UNSIGNED) == 0)
1292 emit->emitInsBinary(ins, size, treeNode, rmOp);
1294 // Move the result to the desired register, if necessary
1295 if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX)
1297 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
1301 //------------------------------------------------------------------------
1302 // genCodeForDivMod: Generate code for a DIV or MOD operation.
1305 // treeNode - the node to generate the code for
1307 void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
1309 GenTree* dividend = treeNode->gtOp1;
1310 GenTree* divisor = treeNode->gtOp2;
1311 genTreeOps oper = treeNode->OperGet();
1312 emitAttr size = emitTypeSize(treeNode);
1313 regNumber targetReg = treeNode->gtRegNum;
1314 var_types targetType = treeNode->TypeGet();
1315 emitter* emit = getEmitter();
1318 bool dividendIsLong = varTypeIsLong(dividend->TypeGet());
1319 GenTree* dividendLo = nullptr;
1320 GenTree* dividendHi = nullptr;
1324 // If dividend is a GT_LONG, the we need to make sure its lo and hi parts are not contained.
1325 dividendLo = dividend->gtGetOp1();
1326 dividendHi = dividend->gtGetOp2();
1328 assert(!dividendLo->isContained());
1329 assert(!dividendHi->isContained());
1330 assert(divisor->IsCnsIntOrI());
1335 // dividend is not contained.
1336 assert(!dividend->isContained());
1339 genConsumeOperands(treeNode->AsOp());
1340 if (varTypeIsFloating(targetType))
1342 // divisor is not contained or if contained is a memory op.
1343 // Note that a reg optional operand is a treated as a memory op
1344 // if no register is allocated to it.
1345 assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
1346 divisor->IsRegOptional());
1348 // Floating point div/rem operation
1349 assert(oper == GT_DIV || oper == GT_MOD);
1351 if (dividend->gtRegNum == targetReg)
1353 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
1355 else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
1357 // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
1358 // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
1359 // LSRA has to make sure that such a register assignment is not generated for floating
1360 // point div/rem operations.
1362 !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
1366 inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
1367 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
1375 assert(dividendLo != nullptr && dividendHi != nullptr);
1377 // dividendLo must be in RAX; dividendHi must be in RDX
1378 if (dividendLo->gtRegNum != REG_EAX)
1380 inst_RV_RV(INS_mov, REG_EAX, dividendLo->gtRegNum, targetType);
1382 if (dividendHi->gtRegNum != REG_EDX)
1384 inst_RV_RV(INS_mov, REG_EDX, dividendHi->gtRegNum, targetType);
1389 if (dividend->gtRegNum != REG_RAX)
1391 // dividend must be in RAX
1392 inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
1395 // zero or sign extend rax to rdx
1397 if (!dividendIsLong)
1400 if (oper == GT_UMOD || oper == GT_UDIV)
1402 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
1406 emit->emitIns(INS_cdq, size);
1407 // the cdq instruction writes RDX, So clear the gcInfo for RDX
1408 gcInfo.gcMarkRegSetNpt(RBM_RDX);
1412 // Perform the 'targetType' (64-bit or 32-bit) divide instruction
1414 if (oper == GT_UMOD || oper == GT_UDIV)
1423 emit->emitInsBinary(ins, size, treeNode, divisor);
1425 // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
1426 // Move the result to the desired register, if necessary
1427 if (oper == GT_DIV || oper == GT_UDIV)
1429 if (targetReg != REG_RAX)
1431 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
1436 assert((oper == GT_MOD) || (oper == GT_UMOD));
1437 if (targetReg != REG_RDX)
1439 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
1443 genProduceReg(treeNode);
1446 //------------------------------------------------------------------------
1447 // genCodeForBinary: Generate code for many binary arithmetic operators
1448 // This method is expected to have called genConsumeOperands() before calling it.
1451 // treeNode - The binary operation for which we are generating code.
1457 // Mul and div variants have special constraints on x64 so are not handled here.
1458 // See teh assert below for the operators that are handled.
1460 void CodeGen::genCodeForBinary(GenTree* treeNode)
1462 const genTreeOps oper = treeNode->OperGet();
1463 regNumber targetReg = treeNode->gtRegNum;
1464 var_types targetType = treeNode->TypeGet();
1465 emitter* emit = getEmitter();
1467 #if defined(_TARGET_64BIT_)
1468 assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
1469 #else // !defined(_TARGET_64BIT_)
1470 assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
1471 oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI ||
1472 oper == GT_ADD || oper == GT_SUB);
1473 #endif // !defined(_TARGET_64BIT_)
1475 GenTreePtr op1 = treeNode->gtGetOp1();
1476 GenTreePtr op2 = treeNode->gtGetOp2();
1478 // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
1479 if (op1->isContained())
1481 assert(treeNode->OperIsCommutative());
1482 assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
1484 op1 = treeNode->gtGetOp2();
1485 op2 = treeNode->gtGetOp1();
1488 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1490 // The arithmetic node must be sitting in a register (since it's not contained)
1491 noway_assert(targetReg != REG_NA);
1493 regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
1494 regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
1499 // This is the case of reg1 = reg1 op reg2
1500 // We're ready to emit the instruction without any moves
1501 if (op1reg == targetReg)
1506 // We have reg1 = reg2 op reg1
1507 // In order for this operation to be correct
1508 // we need that op is a commutative operation so
1509 // we can convert it into reg1 = reg1 op reg2 and emit
1510 // the same code as above
1511 else if (op2reg == targetReg)
1513 noway_assert(GenTree::OperIsCommutative(oper));
1517 // now we know there are 3 different operands so attempt to use LEA
1518 else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
1519 && (op2->isContainedIntOrIImmed() || !op2->isContained()))
1521 if (op2->isContainedIntOrIImmed())
1523 emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
1524 (int)op2->AsIntConCommon()->IconValue());
1528 assert(op2reg != REG_NA);
1529 emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
1531 genProduceReg(treeNode);
1534 // dest, op1 and op2 registers are different:
1535 // reg3 = reg1 op reg2
1536 // We can implement this by issuing a mov:
1538 // reg3 = reg3 op reg2
1541 inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
1542 regTracker.rsTrackRegCopy(targetReg, op1reg);
1543 gcInfo.gcMarkRegPtrVal(targetReg, targetType);
1548 // try to use an inc or dec
1549 if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
1551 if (src->IsIntegralConst(1))
1553 emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
1554 genProduceReg(treeNode);
1557 else if (src->IsIntegralConst(-1))
1559 emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
1560 genProduceReg(treeNode);
1564 regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
1565 noway_assert(r == targetReg);
1567 if (treeNode->gtOverflowEx())
1569 #if !defined(_TARGET_64BIT_)
1570 assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
1572 assert(oper == GT_ADD || oper == GT_SUB);
1574 genCheckOverflow(treeNode);
1576 genProduceReg(treeNode);
1579 //------------------------------------------------------------------------
1580 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
1583 // treeNode - The tree node to evaluate whether is a struct return.
1586 // For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
1587 // Otherwise returns false.
1588 // For other platforms always returns false.
1590 bool CodeGen::isStructReturn(GenTreePtr treeNode)
1592 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
1593 // For the GT_RET_FILT, the return is always
1594 // a bool or a void, for the end of a finally block.
1595 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1596 if (treeNode->OperGet() != GT_RETURN)
1601 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1602 return varTypeIsStruct(treeNode);
1603 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
1604 assert(!varTypeIsStruct(treeNode));
1606 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
1609 //------------------------------------------------------------------------
1610 // genStructReturn: Generates code for returning a struct.
1613 // treeNode - The GT_RETURN tree node.
1619 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
1620 void CodeGen::genStructReturn(GenTreePtr treeNode)
1622 assert(treeNode->OperGet() == GT_RETURN);
1623 GenTreePtr op1 = treeNode->gtGetOp1();
1625 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1626 if (op1->OperGet() == GT_LCL_VAR)
1628 GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
1629 LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
1630 assert(varDsc->lvIsMultiRegRet);
1632 ReturnTypeDesc retTypeDesc;
1633 retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
1634 unsigned regCount = retTypeDesc.GetReturnRegCount();
1635 assert(regCount == MAX_RET_REG_COUNT);
1637 if (varTypeIsEnregisterableStruct(op1))
1639 // Right now the only enregistrable structs supported are SIMD vector types.
1640 assert(varTypeIsSIMD(op1));
1641 assert(!op1->isContained());
1643 // This is a case of operand is in a single reg and needs to be
1644 // returned in multiple ABI return registers.
1645 regNumber opReg = genConsumeReg(op1);
1646 regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
1647 regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
1649 if (opReg != reg0 && opReg != reg1)
1651 // Operand reg is different from return regs.
1652 // Copy opReg to reg0 and let it to be handled by one of the
1654 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
1660 assert(opReg != reg1);
1662 // reg0 - already has required 8-byte in bit position [63:0].
1664 // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
1665 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
1669 assert(opReg == reg1);
1672 // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
1673 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
1675 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
1679 assert(op1->isContained());
1681 // Copy var on stack into ABI return registers
1683 for (unsigned i = 0; i < regCount; ++i)
1685 var_types type = retTypeDesc.GetReturnRegType(i);
1686 regNumber reg = retTypeDesc.GetABIReturnReg(i);
1687 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
1688 offset += genTypeSize(type);
1694 assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
1696 genConsumeRegs(op1);
1698 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1699 GenTreeCall* call = actualOp1->AsCall();
1700 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
1701 unsigned regCount = retTypeDesc->GetReturnRegCount();
1702 assert(regCount == MAX_RET_REG_COUNT);
1704 // Handle circular dependency between call allocated regs and ABI return regs.
1706 // It is possible under LSRA stress that originally allocated regs of call node,
1707 // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
1708 // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
1709 // kind of circular dependency could arise between xmm0 and xmm1 return regs.
1710 // Codegen is expected to handle such circular dependency.
1712 var_types regType0 = retTypeDesc->GetReturnRegType(0);
1713 regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
1714 regNumber allocatedReg0 = call->GetRegNumByIdx(0);
1716 var_types regType1 = retTypeDesc->GetReturnRegType(1);
1717 regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
1718 regNumber allocatedReg1 = call->GetRegNumByIdx(1);
1720 if (op1->IsCopyOrReload())
1722 // GT_COPY/GT_RELOAD will have valid reg for those positions
1723 // that need to be copied or reloaded.
1724 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
1725 if (reloadReg != REG_NA)
1727 allocatedReg0 = reloadReg;
1730 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
1731 if (reloadReg != REG_NA)
1733 allocatedReg1 = reloadReg;
1737 if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
1739 // Circular dependency - swap allocatedReg0 and allocatedReg1
1740 if (varTypeIsFloating(regType0))
1742 assert(varTypeIsFloating(regType1));
1744 // The fastest way to swap two XMM regs is using PXOR
1745 inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
1746 inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
1747 inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
1751 assert(varTypeIsIntegral(regType0));
1752 assert(varTypeIsIntegral(regType1));
1753 inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
1756 else if (allocatedReg1 == returnReg0)
1758 // Change the order of moves to correctly handle dependency.
1759 if (allocatedReg1 != returnReg1)
1761 inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
1764 if (allocatedReg0 != returnReg0)
1766 inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
1771 // No circular dependency case.
1772 if (allocatedReg0 != returnReg0)
1774 inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
1777 if (allocatedReg1 != returnReg1)
1779 inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
1788 //------------------------------------------------------------------------
1789 // genReturn: Generates code for return statement.
1790 // In case of struct return, delegates to the genStructReturn method.
1793 // treeNode - The GT_RETURN or GT_RETFILT tree node.
1798 void CodeGen::genReturn(GenTreePtr treeNode)
1800 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1801 GenTreePtr op1 = treeNode->gtGetOp1();
1802 var_types targetType = treeNode->TypeGet();
1805 if (targetType == TYP_VOID)
1807 assert(op1 == nullptr);
1812 if (treeNode->TypeGet() == TYP_LONG)
1814 assert(op1 != nullptr);
1815 noway_assert(op1->OperGet() == GT_LONG);
1816 GenTree* loRetVal = op1->gtGetOp1();
1817 GenTree* hiRetVal = op1->gtGetOp2();
1818 noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
1820 genConsumeReg(loRetVal);
1821 genConsumeReg(hiRetVal);
1822 if (loRetVal->gtRegNum != REG_LNGRET_LO)
1824 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
1826 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
1828 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
1832 #endif // !defined(_TARGET_X86_)
1834 if (isStructReturn(treeNode))
1836 genStructReturn(treeNode);
1838 else if (targetType != TYP_VOID)
1840 assert(op1 != nullptr);
1841 noway_assert(op1->gtRegNum != REG_NA);
1843 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
1844 // consumed a reg for the operand. This is because the variable
1845 // is dead after return. But we are issuing more instructions
1846 // like "profiler leave callback" after this consumption. So
1847 // if you are issuing more instructions after this point,
1848 // remember to keep the variable live up until the new method
1849 // exit point where it is actually dead.
1852 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
1854 if (varTypeIsFloating(treeNode))
1856 // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
1857 // If it already has a home location, use that. Otherwise, we need a temp.
1858 if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
1860 // Store local variable to its home location, if necessary.
1861 if ((op1->gtFlags & GTF_REG_VAL) != 0)
1863 op1->gtFlags &= ~GTF_REG_VAL;
1864 inst_TT_RV(ins_Store(op1->gtType,
1865 compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
1866 op1, op1->gtRegNum);
1868 // Now, load it to the fp stack.
1869 getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
1873 // Spill the value, which should be in a register, then load it to the fp stack.
1874 // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
1875 op1->gtFlags |= GTF_SPILL;
1876 regSet.rsSpillTree(op1->gtRegNum, op1);
1877 op1->gtFlags |= GTF_SPILLED;
1878 op1->gtFlags &= ~GTF_SPILL;
1880 TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
1881 inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
1882 op1->gtFlags &= ~GTF_SPILLED;
1883 compiler->tmpRlsTemp(t);
1887 #endif // _TARGET_X86_
1889 if (op1->gtRegNum != retReg)
1891 inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
1897 #ifdef PROFILING_SUPPORTED
1899 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
1900 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
1901 // in the handling of the GT_RETURN statement.
1902 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
1903 // for the return registers containing GC refs.
1905 // There will be a single return block while generating profiler ELT callbacks.
1907 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
1908 // In flowgraph and other places assert that the last node of a block marked as
1909 // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
1910 // maintain such an invariant irrespective of whether profiler hook needed or not.
1911 // Also, there is not much to be gained by materializing it as an explicit node.
1912 if (compiler->compCurBB == compiler->genReturnBB)
1915 // Since we are invalidating the assumption that we would slip into the epilog
1916 // right after the "return", we need to preserve the return reg's GC state
1917 // across the call until actual method return.
1918 if (varTypeIsGC(compiler->info.compRetType))
1920 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
1923 genProfilingLeaveCallback();
1925 if (varTypeIsGC(compiler->info.compRetType))
1927 gcInfo.gcMarkRegSetNpt(REG_INTRET);
1933 /*****************************************************************************
1935 * Generate code for a single node in the tree.
1936 * Preconditions: All operands have been evaluated
1939 void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
1941 regNumber targetReg;
1942 #if !defined(_TARGET_64BIT_)
1943 if (treeNode->TypeGet() == TYP_LONG)
1945 // All long enregistered nodes will have been decomposed into their
1946 // constituent lo and hi nodes.
1950 #endif // !defined(_TARGET_64BIT_)
1952 targetReg = treeNode->gtRegNum;
1954 var_types targetType = treeNode->TypeGet();
1955 emitter* emit = getEmitter();
1958 // Validate that all the operands for the current node are consumed in order.
1959 // This is important because LSRA ensures that any necessary copies will be
1960 // handled correctly.
1961 lastConsumedNode = nullptr;
1962 if (compiler->verbose)
1964 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
1965 printf("Generating: ");
1966 compiler->gtDispTree(treeNode, nullptr, nullptr, true);
1970 // Is this a node whose value is already in a register? LSRA denotes this by
1971 // setting the GTF_REUSE_REG_VAL flag.
1972 if (treeNode->IsReuseRegVal())
1974 // For now, this is only used for constant nodes.
1975 assert((treeNode->OperIsConst()));
1976 JITDUMP(" TreeNode is marked ReuseReg\n");
1980 // contained nodes are part of their parents for codegen purposes
1981 // ex : immediates, most LEAs
1982 if (treeNode->isContained())
1987 switch (treeNode->gtOper)
1989 case GT_START_NONGC:
1990 getEmitter()->emitDisableGC();
1994 #ifdef PROFILING_SUPPORTED
1995 // We should be seeing this only if profiler hook is needed
1996 noway_assert(compiler->compIsProfilerHookNeeded());
1998 // Right now this node is used only for tail calls. In future if
1999 // we intend to use it for Enter or Leave hooks, add a data member
2000 // to this node indicating the kind of profiler hook. For example,
2001 // helper number can be used.
2002 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
2003 #endif // PROFILING_SUPPORTED
2007 genLclHeap(treeNode);
2012 NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
2013 #endif // _TARGET_X86_
2017 genSetRegToConst(targetReg, targetType, treeNode);
2018 genProduceReg(treeNode);
2023 if (varTypeIsFloating(targetType))
2025 assert(treeNode->gtOper == GT_NEG);
2026 genSSE2BitwiseOp(treeNode);
2030 GenTreePtr operand = treeNode->gtGetOp1();
2031 assert(!operand->isContained());
2032 regNumber operandReg = genConsumeReg(operand);
2034 if (operandReg != targetReg)
2036 inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
2039 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
2040 inst_RV(ins, targetReg, targetType);
2042 genProduceReg(treeNode);
2048 assert(varTypeIsIntegralOrI(treeNode));
2051 #if !defined(_TARGET_64BIT_)
2056 #endif // !defined(_TARGET_64BIT_)
2059 genConsumeOperands(treeNode->AsOp());
2060 genCodeForBinary(treeNode);
2068 genCodeForShift(treeNode);
2069 // genCodeForShift() calls genProduceReg()
2073 if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
2075 // Casts float/double <--> double/float
2076 genFloatToFloatCast(treeNode);
2078 else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
2080 // Casts float/double --> int32/int64
2081 genFloatToIntCast(treeNode);
2083 else if (varTypeIsFloating(targetType))
2085 // Casts int32/uint32/int64/uint64 --> float/double
2086 genIntToFloatCast(treeNode);
2090 // Casts int <--> int
2091 genIntToIntCast(treeNode);
2093 // The per-case functions call genProduceReg()
2098 // lcl_vars are not defs
2099 assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
2101 GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
2102 bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
2104 if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
2106 assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
2109 // If this is a register candidate that has been spilled, genConsumeReg() will
2110 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
2112 if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
2114 assert(!isRegCandidate);
2116 emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
2117 emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
2118 genProduceReg(treeNode);
2123 case GT_LCL_FLD_ADDR:
2124 case GT_LCL_VAR_ADDR:
2125 // Address of a local var. This by itself should never be allocated a register.
2126 // If it is worth storing the address in a register then it should be cse'ed into
2127 // a temp and that would be allocated a register.
2128 noway_assert(targetType == TYP_BYREF);
2129 noway_assert(!treeNode->InReg());
2131 inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
2132 genProduceReg(treeNode);
2137 noway_assert(targetType != TYP_STRUCT);
2138 noway_assert(treeNode->gtRegNum != REG_NA);
2141 // Loading of TYP_SIMD12 (i.e. Vector3) field
2142 if (treeNode->TypeGet() == TYP_SIMD12)
2144 genLoadLclFldTypeSIMD12(treeNode);
2149 emitAttr size = emitTypeSize(targetType);
2150 unsigned offs = treeNode->gtLclFld.gtLclOffs;
2151 unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
2152 assert(varNum < compiler->lvaCount);
2154 emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
2156 genProduceReg(treeNode);
2159 case GT_STORE_LCL_FLD:
2161 noway_assert(targetType != TYP_STRUCT);
2162 noway_assert(!treeNode->InReg());
2163 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
2166 // storing of TYP_SIMD12 (i.e. Vector3) field
2167 if (treeNode->TypeGet() == TYP_SIMD12)
2169 genStoreLclFldTypeSIMD12(treeNode);
2173 GenTreePtr op1 = treeNode->gtGetOp1();
2174 genConsumeRegs(op1);
2175 emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
2179 case GT_STORE_LCL_VAR:
2181 GenTreePtr op1 = treeNode->gtGetOp1();
2183 // var = call, where call returns a multi-reg return value
2184 // case is handled separately.
2185 if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
2187 genMultiRegCallStoreToLocal(treeNode);
2191 noway_assert(targetType != TYP_STRUCT);
2192 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
2194 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
2195 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
2197 // Ensure that lclVar nodes are typed correctly.
2198 assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
2200 #if !defined(_TARGET_64BIT_)
2201 if (treeNode->TypeGet() == TYP_LONG)
2203 genStoreLongLclVar(treeNode);
2206 #endif // !defined(_TARGET_64BIT_)
2209 if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
2211 // This is only possible for a zero-init.
2212 noway_assert(op1->IsIntegralConst(0));
2213 genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
2214 genProduceReg(treeNode);
2217 #endif // FEATURE_SIMD
2219 genConsumeRegs(op1);
2221 if (treeNode->gtRegNum == REG_NA)
2224 emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
2225 emitTypeSize(targetType), treeNode);
2226 varDsc->lvRegNum = REG_STK;
2230 bool containedOp1 = op1->isContained();
2231 // Look for the case where we have a constant zero which we've marked for reuse,
2232 // but which isn't actually in the register we want. In that case, it's better to create
2233 // zero in the target register, because an xor is smaller than a copy. Note that we could
2234 // potentially handle this in the register allocator, but we can't always catch it there
2235 // because the target may not have a register allocated for it yet.
2236 if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
2237 (op1->IsIntegralConst(0) || op1->IsFPZero()))
2239 op1->gtRegNum = REG_NA;
2240 op1->ResetReuseRegVal();
2241 containedOp1 = true;
2246 // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
2247 // must be a constant. However, in the future we might want to support a contained memory op.
2248 // This is a bit tricky because we have to decide it's contained before register allocation,
2249 // and this would be a case where, once that's done, we need to mark that node as always
2250 // requiring a register - which we always assume now anyway, but once we "optimize" that
2251 // we'll have to take cases like this into account.
2252 assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
2253 genSetRegToConst(treeNode->gtRegNum, targetType, op1);
2255 else if (op1->gtRegNum != treeNode->gtRegNum)
2257 assert(op1->gtRegNum != REG_NA);
2258 emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
2263 if (treeNode->gtRegNum != REG_NA)
2265 genProduceReg(treeNode);
2271 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
2272 // the return register, if it's not already there. The processing is the same as GT_RETURN.
2273 if (targetType != TYP_VOID)
2275 // For filters, the IL spec says the result is type int32. Further, the only specified legal values
2276 // are 0 or 1, with the use of other values "undefined".
2277 assert(targetType == TYP_INT);
2283 genReturn(treeNode);
2288 // if we are here, it is the case where there is an LEA that cannot
2289 // be folded into a parent instruction
2290 GenTreeAddrMode* lea = treeNode->AsAddrMode();
2291 genLeaInstruction(lea);
2293 // genLeaInstruction calls genProduceReg()
2298 // Handling of Vector3 type values loaded through indirection.
2299 if (treeNode->TypeGet() == TYP_SIMD12)
2301 genLoadIndTypeSIMD12(treeNode);
2304 #endif // FEATURE_SIMD
2306 genConsumeAddress(treeNode->AsIndir()->Addr());
2307 emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
2308 genProduceReg(treeNode);
2315 genCodeForMulHi(treeNode->AsOp());
2316 genProduceReg(treeNode);
2322 emitAttr size = emitTypeSize(treeNode);
2323 bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
2324 bool requiresOverflowCheck = treeNode->gtOverflowEx();
2326 GenTree* op1 = treeNode->gtGetOp1();
2327 GenTree* op2 = treeNode->gtGetOp2();
2329 // there are 3 forms of x64 multiply:
2330 // 1-op form with 128 result: RDX:RAX = RAX * rm
2331 // 2-op form: reg *= rm
2332 // 3-op form: reg = rm * imm
2334 genConsumeOperands(treeNode->AsOp());
2336 // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
2338 // immOp :: Only one operand can be an immediate
2339 // rmOp :: Only one operand can be a memory op.
2340 // regOp :: A register op (especially the operand that matches 'targetReg')
2341 // (can be nullptr when we have both a memory op and an immediate op)
2343 GenTree* immOp = nullptr;
2344 GenTree* rmOp = op1;
2347 if (op2->isContainedIntOrIImmed())
2351 else if (op1->isContainedIntOrIImmed())
2357 if (immOp != nullptr)
2359 // This must be a non-floating point operation.
2360 assert(!varTypeIsFloating(treeNode));
2362 // CQ: When possible use LEA for mul by imm 3, 5 or 9
2363 ssize_t imm = immOp->AsIntConCommon()->IconValue();
2365 if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
2367 // We will use the LEA instruction to perform this multiply
2368 // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
2369 unsigned int scale = (unsigned int)(imm - 1);
2370 getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
2374 // use the 3-op form with immediate
2375 ins = getEmitter()->inst3opImulForReg(targetReg);
2376 emit->emitInsBinary(ins, size, rmOp, immOp);
2379 else // we have no contained immediate operand
2384 regNumber mulTargetReg = targetReg;
2385 if (isUnsignedMultiply && requiresOverflowCheck)
2388 mulTargetReg = REG_RAX;
2392 ins = genGetInsForOper(GT_MUL, targetType);
2395 // Set rmOp to the contain memory operand (if any)
2396 // or set regOp to the op2 when it has the matching target register for our multiply op
2398 if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
2403 assert(!regOp->isContained());
2405 // Setup targetReg when neither of the source operands was a matching register
2406 if (regOp->gtRegNum != mulTargetReg)
2408 inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
2411 emit->emitInsBinary(ins, size, treeNode, rmOp);
2413 // Move the result to the desired register, if necessary
2414 if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
2416 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
2420 if (requiresOverflowCheck)
2422 // Overflow checking is only used for non-floating point types
2423 noway_assert(!varTypeIsFloating(treeNode));
2425 genCheckOverflow(treeNode);
2428 genProduceReg(treeNode);
2434 // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
2435 // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
2436 // on float/double args.
2437 noway_assert(!varTypeIsFloating(treeNode));
2441 genCodeForDivMod(treeNode->AsOp());
2445 genIntrinsic(treeNode);
2450 genSIMDIntrinsic(treeNode->AsSIMD());
2452 #endif // FEATURE_SIMD
2455 genCkfinite(treeNode);
2465 // TODO-XArch-CQ: Check if we can use the currently set flags.
2466 // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
2467 // (signed < or >= where targetReg != REG_NA)
2469 GenTreePtr op1 = treeNode->gtGetOp1();
2470 var_types op1Type = op1->TypeGet();
2472 if (varTypeIsFloating(op1Type))
2474 genCompareFloat(treeNode);
2476 #if !defined(_TARGET_64BIT_)
2477 // X86 Long comparison
2478 else if (varTypeIsLong(op1Type))
2481 // The result of an unlowered long compare on a 32-bit target must either be
2482 // a) materialized into a register, or
2485 // A long compare that has a result that is used but not materialized into a register should
2486 // have been handled by Lowering::LowerCompare.
2489 assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
2491 genCompareLong(treeNode);
2493 #endif // !defined(_TARGET_64BIT_)
2496 genCompareInt(treeNode);
2503 GenTree* cmp = treeNode->gtOp.gtOp1;
2505 assert(cmp->OperIsCompare());
2506 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
2508 #if !defined(_TARGET_64BIT_)
2509 // Long-typed compares should have been handled by Lowering::LowerCompare.
2510 assert(!varTypeIsLong(cmp->gtGetOp1()));
2513 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
2514 // is governed by a flag NOT by the inherent type of the node
2515 // TODO-XArch-CQ: Check if we can use the currently set flags.
2516 emitJumpKind jumpKind[2];
2517 bool branchToTrueLabel[2];
2518 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
2520 BasicBlock* skipLabel = nullptr;
2521 if (jumpKind[0] != EJ_NONE)
2523 BasicBlock* jmpTarget;
2524 if (branchToTrueLabel[0])
2526 jmpTarget = compiler->compCurBB->bbJumpDest;
2530 // This case arises only for ordered GT_EQ right now
2531 assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
2532 skipLabel = genCreateTempLabel();
2533 jmpTarget = skipLabel;
2536 inst_JMP(jumpKind[0], jmpTarget);
2539 if (jumpKind[1] != EJ_NONE)
2541 // the second conditional branch always has to be to the true label
2542 assert(branchToTrueLabel[1]);
2543 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
2546 if (skipLabel != nullptr)
2548 genDefineTempLabel(skipLabel);
2555 GenTreeJumpCC* jcc = treeNode->AsJumpCC();
2557 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
2559 CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
2560 emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind);
2562 inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
2568 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
2569 // based on the contents of 'data'
2571 GenTree* data = treeNode->gtOp.gtOp1;
2572 genConsumeRegs(data);
2573 GenTreeIntCon cns = intForm(TYP_INT, 0);
2574 emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
2576 BasicBlock* skipLabel = genCreateTempLabel();
2578 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2579 inst_JMP(jmpEqual, skipLabel);
2581 // emit the call to the EE-helper that stops for GC (or other reasons)
2582 assert(treeNode->gtRsvdRegs != RBM_NONE);
2583 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
2584 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
2585 assert(genIsValidIntReg(tmpReg));
2587 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
2588 genDefineTempLabel(skipLabel);
2593 genStoreInd(treeNode);
2597 // This is handled at the time we call genConsumeReg() on the GT_COPY
2602 // Swap is only supported for lclVar operands that are enregistered
2603 // We do not consume or produce any registers. Both operands remain enregistered.
2604 // However, the gc-ness may change.
2605 assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
2607 GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
2608 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
2609 var_types type1 = varDsc1->TypeGet();
2610 GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
2611 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
2612 var_types type2 = varDsc2->TypeGet();
2614 // We must have both int or both fp regs
2615 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
2617 // FP swap is not yet implemented (and should have NYI'd in LSRA)
2618 assert(!varTypeIsFloating(type1));
2620 regNumber oldOp1Reg = lcl1->gtRegNum;
2621 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
2622 regNumber oldOp2Reg = lcl2->gtRegNum;
2623 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
2625 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
2626 varDsc1->lvRegNum = oldOp2Reg;
2627 varDsc2->lvRegNum = oldOp1Reg;
2630 emitAttr size = EA_PTRSIZE;
2631 if (varTypeGCtype(type1) != varTypeGCtype(type2))
2633 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
2634 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
2637 inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
2639 // Update the gcInfo.
2640 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
2641 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
2642 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
2644 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
2645 // It will also dump the updates.
2646 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
2647 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
2657 genPutArgStk(treeNode);
2662 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
2663 noway_assert(targetType != TYP_STRUCT);
2664 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2665 // commas show up here commonly, as part of a nullchk operation
2666 GenTree* op1 = treeNode->gtOp.gtOp1;
2667 // If child node is not already in the register we need, move it
2669 if (treeNode->gtRegNum != op1->gtRegNum)
2671 inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
2673 genProduceReg(treeNode);
2678 genCallInstruction(treeNode);
2682 genJmpMethod(treeNode);
2688 genLockedInstructions(treeNode);
2691 case GT_MEMORYBARRIER:
2692 instGen_MemoryBarrier();
2697 GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
2698 GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
2699 GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
2701 assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
2702 assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
2704 genConsumeReg(location);
2705 genConsumeReg(value);
2706 genConsumeReg(comparand);
2707 // comparand goes to RAX;
2708 // Note that we must issue this move after the genConsumeRegs(), in case any of the above
2709 // have a GT_COPY from RAX.
2710 if (comparand->gtRegNum != REG_RAX)
2712 inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
2718 emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
2721 if (targetReg != REG_RAX)
2723 inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
2726 genProduceReg(treeNode);
2730 // do nothing - reload is just a marker.
2731 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
2732 // into the register specified in this node.
2739 if (treeNode->gtFlags & GTF_NO_OP_NO)
2741 noway_assert(!"GTF_NO_OP_NO should not be set");
2745 getEmitter()->emitIns_Nop(1);
2749 case GT_ARR_BOUNDS_CHECK:
2752 #endif // FEATURE_SIMD
2753 genRangeCheck(treeNode);
2757 if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
2759 inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
2761 genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
2763 genProduceReg(treeNode);
2771 assert(!treeNode->gtOp.gtOp1->isContained());
2772 regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
2773 emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
2779 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
2781 /* Catch arguments get passed in a register. genCodeForBBlist()
2782 would have marked it as holding a GC object, but not used. */
2784 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
2785 genConsumeReg(treeNode);
2788 #if !FEATURE_EH_FUNCLETS
2791 // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
2792 // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
2794 unsigned finallyNesting;
2795 finallyNesting = treeNode->gtVal.gtVal1;
2796 noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
2797 noway_assert(finallyNesting < compiler->compHndBBtabCount);
2799 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
2800 unsigned filterEndOffsetSlotOffs;
2801 PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
2802 TARGET_POINTER_SIZE); // below doesn't underflow.
2803 filterEndOffsetSlotOffs =
2804 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
2806 unsigned curNestingSlotOffs;
2807 curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
2808 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
2810 #endif // !FEATURE_EH_FUNCLETS
2812 case GT_PINVOKE_PROLOG:
2813 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
2815 // the runtime side requires the codegen here to be consistent
2816 emit->emitDisableRandomNops();
2820 genPendingCallLabel = genCreateTempLabel();
2821 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
2822 emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
2826 if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
2828 assert(treeNode->AsObj()->gtGcPtrCount != 0);
2829 genCodeForCpObj(treeNode->AsObj());
2834 case GT_STORE_DYN_BLK:
2836 genCodeForStoreBlk(treeNode->AsBlk());
2840 genJumpTable(treeNode);
2843 case GT_SWITCH_TABLE:
2844 genTableBasedSwitch(treeNode);
2848 genCodeForArrIndex(treeNode->AsArrIndex());
2852 genCodeForArrOffset(treeNode->AsArrOffs());
2855 case GT_CLS_VAR_ADDR:
2856 getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
2857 genProduceReg(treeNode);
2860 #if !defined(_TARGET_64BIT_)
2862 assert(!treeNode->isContained());
2863 genConsumeRegs(treeNode);
2868 // Do nothing; these nodes are simply markers for debug info.
2875 sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
2877 assert(!"Unknown node in codegen");
2883 //----------------------------------------------------------------------------------
2884 // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
2887 // treeNode - Gentree of GT_STORE_LCL_VAR
2893 // The child of store is a multi-reg call node.
2894 // genProduceReg() on treeNode is made by caller of this routine.
2896 void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
2898 assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
2900 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2901 // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
2902 assert(varTypeIsStruct(treeNode));
2904 // Assumption: current x64 Unix implementation requires that a multi-reg struct
2905 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
2906 // being struct promoted.
2907 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
2908 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
2909 noway_assert(varDsc->lvIsMultiRegRet);
2911 GenTree* op1 = treeNode->gtGetOp1();
2912 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
2913 GenTreeCall* call = actualOp1->AsCall();
2914 assert(call->HasMultiRegRetVal());
2916 genConsumeRegs(op1);
2918 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
2919 assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
2920 unsigned regCount = retTypeDesc->GetReturnRegCount();
2922 if (treeNode->gtRegNum != REG_NA)
2924 // Right now the only enregistrable structs supported are SIMD types.
2925 assert(varTypeIsSIMD(treeNode));
2926 assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
2927 assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
2929 // This is a case of two 8-bytes that comprise the operand is in
2930 // two different xmm registers and needs to assembled into a single
2932 regNumber targetReg = treeNode->gtRegNum;
2933 regNumber reg0 = call->GetRegNumByIdx(0);
2934 regNumber reg1 = call->GetRegNumByIdx(1);
2936 if (op1->IsCopyOrReload())
2938 // GT_COPY/GT_RELOAD will have valid reg for those positions
2939 // that need to be copied or reloaded.
2940 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
2941 if (reloadReg != REG_NA)
2946 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
2947 if (reloadReg != REG_NA)
2953 if (targetReg != reg0 && targetReg != reg1)
2955 // Copy reg0 into targetReg and let it to be handled by one
2956 // of the cases below.
2957 inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
2961 if (targetReg == reg0)
2963 // targeReg[63:0] = targetReg[63:0]
2964 // targetReg[127:64] = reg1[127:64]
2965 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
2969 assert(targetReg == reg1);
2971 // We need two shuffles to achieve this
2973 // targeReg[63:0] = targetReg[63:0]
2974 // targetReg[127:64] = reg0[63:0]
2977 // targeReg[63:0] = targetReg[127:64]
2978 // targetReg[127:64] = targetReg[63:0]
2980 // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
2981 // and next swap low and high 8-bytes of targetReg to have them
2982 // rearranged in the right order.
2983 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
2984 inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
2991 for (unsigned i = 0; i < regCount; ++i)
2993 var_types type = retTypeDesc->GetReturnRegType(i);
2994 regNumber reg = call->GetRegNumByIdx(i);
2995 if (op1->IsCopyOrReload())
2997 // GT_COPY/GT_RELOAD will have valid reg for those positions
2998 // that need to be copied or reloaded.
2999 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
3000 if (reloadReg != REG_NA)
3006 assert(reg != REG_NA);
3007 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
3008 offset += genTypeSize(type);
3011 varDsc->lvRegNum = REG_STK;
3013 #elif defined(_TARGET_X86_)
3014 // Longs are returned in two return registers on x86.
3015 assert(varTypeIsLong(treeNode));
3017 // Assumption: current x86 implementation requires that a multi-reg long
3018 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
3020 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
3021 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
3022 noway_assert(varDsc->lvIsMultiRegRet);
3024 GenTree* op1 = treeNode->gtGetOp1();
3025 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
3026 GenTreeCall* call = actualOp1->AsCall();
3027 assert(call->HasMultiRegRetVal());
3029 genConsumeRegs(op1);
3031 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
3032 unsigned regCount = retTypeDesc->GetReturnRegCount();
3033 assert(regCount == MAX_RET_REG_COUNT);
3037 for (unsigned i = 0; i < regCount; ++i)
3039 var_types type = retTypeDesc->GetReturnRegType(i);
3040 regNumber reg = call->GetRegNumByIdx(i);
3041 if (op1->IsCopyOrReload())
3043 // GT_COPY/GT_RELOAD will have valid reg for those positions
3044 // that need to be copied or reloaded.
3045 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
3046 if (reloadReg != REG_NA)
3052 assert(reg != REG_NA);
3053 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
3054 offset += genTypeSize(type);
3057 varDsc->lvRegNum = REG_STK;
3058 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
3059 assert(!"Unreached");
3060 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
3063 //------------------------------------------------------------------------
3064 // genLclHeap: Generate code for localloc.
3067 // tree - the localloc tree to generate.
3070 // Note that for x86, we don't track ESP movements while generating the localloc code.
3071 // The ESP tracking is used to report stack pointer-relative GC info, which is not
3072 // interesting while doing the localloc construction. Also, for functions with localloc,
3073 // we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
3074 // call arguments. We store the ESP after the localloc is complete in the LocAllocSP
3075 // variable. This variable is implicitly reported to the VM in the GC info (its position
3076 // is defined by convention relative to other items), and is used by the GC to find the
3077 // "base" stack pointer in functions with localloc.
3079 void CodeGen::genLclHeap(GenTreePtr tree)
3081 assert(tree->OperGet() == GT_LCLHEAP);
3082 assert(compiler->compLocallocUsed);
3084 GenTreePtr size = tree->gtOp.gtOp1;
3085 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
3087 regNumber targetReg = tree->gtRegNum;
3088 regMaskTP tmpRegsMask = tree->gtRsvdRegs;
3089 regNumber regCnt = REG_NA;
3090 var_types type = genActualType(size->gtType);
3091 emitAttr easz = emitTypeSize(type);
3092 BasicBlock* endLabel = nullptr;
3096 if (compiler->opts.compStackCheckOnRet)
3098 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
3099 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
3100 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
3101 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
3103 BasicBlock* esp_check = genCreateTempLabel();
3104 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3105 inst_JMP(jmpEqual, esp_check);
3106 getEmitter()->emitIns(INS_BREAKPOINT);
3107 genDefineTempLabel(esp_check);
3111 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
3112 noway_assert(genStackLevel == 0); // Can't have anything on the stack
3114 unsigned stackAdjustment = 0;
3115 BasicBlock* loop = nullptr;
3117 // compute the amount of memory to allocate to properly STACK_ALIGN.
3119 if (size->IsCnsIntOrI())
3121 // If size is a constant, then it must be contained.
3122 assert(size->isContained());
3124 // If amount is zero then return null in targetReg
3125 amount = size->gtIntCon.gtIconVal;
3128 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
3132 // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
3133 amount = AlignUp(amount, STACK_ALIGN);
3137 // The localloc requested memory size is non-constant.
3139 // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
3140 genConsumeRegAndCopy(size, targetReg);
3141 endLabel = genCreateTempLabel();
3142 getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
3143 inst_JMP(EJ_je, endLabel);
3145 // Compute the size of the block to allocate and perform alignment.
3146 // If compInitMem=true, we can reuse targetReg as regcnt,
3147 // since we don't need any internal registers.
3148 if (compiler->info.compInitMem)
3150 assert(genCountBits(tmpRegsMask) == 0);
3155 assert(genCountBits(tmpRegsMask) >= 1);
3156 regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
3157 tmpRegsMask &= ~regCntMask;
3158 regCnt = genRegNumFromMask(regCntMask);
3159 if (regCnt != targetReg)
3161 // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
3162 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
3166 // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
3170 // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
3171 // elements, not a byte count, after the alignment. So instead of the "and", which
3172 // becomes unnecessary, generate a shift, e.g.:
3176 inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
3178 if (compiler->info.compInitMem)
3180 // Convert the count from a count of bytes to a loop count. We will loop once per
3181 // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
3182 // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
3183 // words per iteration on x64. We will shift off all the stack alignment bits
3184 // added above, so there is no need for an 'and' instruction.
3186 // --- shr regCnt, 2 (or 4) ---
3187 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
3191 // Otherwise, mask off the low bits to align the byte count.
3192 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
3196 #if FEATURE_FIXED_OUT_ARGS
3197 // If we have an outgoing arg area then we must adjust the SP by popping off the
3198 // outgoing arg area. We will restore it right before we return from this method.
3200 // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
3201 // are the cases that need to be handled:
3202 // i) Method has out-going arg area.
3203 // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
3204 // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
3205 // ii) Method has no out-going arg area.
3206 // Nothing to pop off from the stack.
3207 if (compiler->lvaOutgoingArgSpaceSize > 0)
3209 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
3211 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
3212 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
3216 if (size->IsCnsIntOrI())
3218 // We should reach here only for non-zero, constant size allocations.
3220 assert((amount % STACK_ALIGN) == 0);
3221 assert((amount % REGSIZE_BYTES) == 0);
3223 // For small allocations we will generate up to six push 0 inline
3224 size_t cntRegSizedWords = amount / REGSIZE_BYTES;
3225 if (cntRegSizedWords <= 6)
3227 for (; cntRegSizedWords != 0; cntRegSizedWords--)
3229 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
3234 bool doNoInitLessThanOnePageAlloc =
3235 !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
3238 bool needRegCntRegister = true;
3239 #else // !_TARGET_X86_
3240 bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
3241 #endif // !_TARGET_X86_
3243 if (needRegCntRegister)
3245 // If compInitMem=true, we can reuse targetReg as regcnt.
3246 // Since size is a constant, regCnt is not yet initialized.
3247 assert(regCnt == REG_NA);
3248 if (compiler->info.compInitMem)
3250 assert(genCountBits(tmpRegsMask) == 0);
3255 assert(genCountBits(tmpRegsMask) >= 1);
3256 regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
3257 tmpRegsMask &= ~regCntMask;
3258 regCnt = genRegNumFromMask(regCntMask);
3262 if (doNoInitLessThanOnePageAlloc)
3264 // Since the size is less than a page, simply adjust ESP.
3265 // ESP might already be in the guard page, so we must touch it BEFORE
3266 // the alloc, not after.
3267 CLANG_FORMAT_COMMENT_ANCHOR;
3270 // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
3271 // to ESP. So do the work in the count register.
3272 // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
3273 // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
3275 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
3276 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
3277 inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
3278 inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
3279 #else // !_TARGET_X86_
3280 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
3281 inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
3282 #endif // !_TARGET_X86_
3287 // else, "mov regCnt, amount"
3289 if (compiler->info.compInitMem)
3291 // When initializing memory, we want 'amount' to be the loop count.
3292 assert((amount % STACK_ALIGN) == 0);
3293 amount /= STACK_ALIGN;
3296 genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
3299 loop = genCreateTempLabel();
3300 if (compiler->info.compInitMem)
3302 // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
3303 // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
3304 // Since we have to zero out the allocated memory AND ensure that RSP is always valid
3305 // by tickling the pages, we will just push 0's on the stack.
3307 assert(genIsValidIntReg(regCnt));
3310 genDefineTempLabel(loop);
3312 #if defined(_TARGET_AMD64_)
3313 // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
3314 static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
3315 inst_IV(INS_push_hide, 0); // --- push 8-byte 0
3316 inst_IV(INS_push_hide, 0); // --- push 8-byte 0
3317 #elif defined(_TARGET_X86_)
3318 // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
3319 static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
3320 inst_IV(INS_push_hide, 0); // --- push 4-byte 0
3321 #endif // _TARGET_X86_
3323 // Decrement the loop counter and loop if not done.
3324 inst_RV(INS_dec, regCnt, TYP_I_IMPL);
3325 inst_JMP(EJ_jne, loop);
3329 // At this point 'regCnt' is set to the total number of bytes to localloc.
3331 // We don't need to zero out the allocated memory. However, we do have
3332 // to tickle the pages to ensure that ESP is always valid and is
3333 // in sync with the "stack guard page". Note that in the worst
3334 // case ESP is on the last byte of the guard page. Thus you must
3335 // touch ESP+0 first not ESP+x01000.
3337 // Another subtlety is that you don't want ESP to be exactly on the
3338 // boundary of the guard page because PUSH is predecrement, thus
3339 // call setup would not touch the guard page but just beyond it
3341 // Note that we go through a few hoops so that ESP never points to
3342 // illegal pages at any time during the tickling process
3345 // add REGCNT, ESP // reg now holds ultimate ESP
3346 // jb loop // result is smaller than orignial ESP (no wrap around)
3347 // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
3349 // test ESP, [ESP+0] // tickle the page
3351 // sub REGTMP, PAGE_SIZE
3358 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
3359 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
3360 inst_JMP(EJ_jb, loop);
3362 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
3364 genDefineTempLabel(loop);
3366 // Tickle the decremented value, and move back to ESP,
3367 // note that it has to be done BEFORE the update of ESP since
3368 // ESP might already be on the guard page. It is OK to leave
3369 // the final value of ESP on the guard page
3370 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
3372 // This is a harmless trick to avoid the emitter trying to track the
3373 // decrement of the ESP - we do the subtraction in another reg instead
3374 // of adjusting ESP directly.
3375 assert(tmpRegsMask != RBM_NONE);
3376 assert(genCountBits(tmpRegsMask) == 1);
3377 regNumber regTmp = genRegNumFromMask(tmpRegsMask);
3379 inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
3380 inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
3381 inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
3383 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
3384 inst_JMP(EJ_jae, loop);
3386 // Move the final value to ESP
3387 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
3391 // Re-adjust SP to allocate out-going arg area
3392 if (stackAdjustment > 0)
3394 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
3395 inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
3398 // Return the stackalloc'ed address in result register.
3399 // TargetReg = RSP + stackAdjustment.
3400 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
3402 if (endLabel != nullptr)
3404 genDefineTempLabel(endLabel);
3409 // Write the lvaLocAllocSPvar stack frame slot
3410 noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
3411 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
3414 if (compiler->opts.compNeedStackProbes)
3416 genGenerateStackProbe();
3422 if (compiler->opts.compStackCheckOnRet)
3424 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
3425 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
3426 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
3427 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
3431 genProduceReg(tree);
3434 void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
3436 if (storeBlkNode->gtBlkOpGcUnsafe)
3438 getEmitter()->emitDisableGC();
3440 bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
3442 switch (storeBlkNode->gtBlkOpKind)
3444 #ifdef _TARGET_AMD64_
3445 case GenTreeBlk::BlkOpKindHelper:
3448 genCodeForCpBlk(storeBlkNode);
3452 genCodeForInitBlk(storeBlkNode);
3455 #endif // _TARGET_AMD64_
3456 case GenTreeBlk::BlkOpKindRepInstr:
3459 genCodeForCpBlkRepMovs(storeBlkNode);
3463 genCodeForInitBlkRepStos(storeBlkNode);
3466 case GenTreeBlk::BlkOpKindUnroll:
3469 genCodeForCpBlkUnroll(storeBlkNode);
3473 genCodeForInitBlkUnroll(storeBlkNode);
3479 if (storeBlkNode->gtBlkOpGcUnsafe)
3481 getEmitter()->emitEnableGC();
3485 // Generate code for InitBlk using rep stos.
3487 // The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
3488 // Any value larger than that, we'll use the helper even if both the
3489 // fill byte and the size are integer constants.
3490 void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
3492 // Make sure we got the arguments of the initblk/initobj operation in the right registers
3493 unsigned size = initBlkNode->Size();
3494 GenTreePtr dstAddr = initBlkNode->Addr();
3495 GenTreePtr initVal = initBlkNode->Data();
3498 assert(!dstAddr->isContained());
3499 assert(!initVal->isContained());
3500 #ifdef _TARGET_AMD64_
3503 if (initVal->IsCnsIntOrI())
3505 #ifdef _TARGET_AMD64_
3506 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
3508 assert(size > CPBLK_UNROLL_LIMIT);
3514 genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
3515 instGen(INS_r_stosb);
3518 // Generate code for InitBlk by performing a loop unroll
3520 // a) Both the size and fill byte value are integer constants.
3521 // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
3523 void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
3525 // Make sure we got the arguments of the initblk/initobj operation in the right registers
3526 unsigned size = initBlkNode->Size();
3527 GenTreePtr dstAddr = initBlkNode->Addr();
3528 GenTreePtr initVal = initBlkNode->Data();
3530 assert(!dstAddr->isContained());
3531 assert(!initVal->isContained());
3533 assert(size <= INITBLK_UNROLL_LIMIT);
3534 assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
3536 emitter* emit = getEmitter();
3538 genConsumeOperands(initBlkNode);
3540 // If the initVal was moved, or spilled and reloaded to a different register,
3541 // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
3542 // which needs to be the new register.
3543 regNumber valReg = initVal->gtRegNum;
3544 initVal = initVal->gtSkipReloadOrCopy();
3546 unsigned offset = 0;
3548 // Perform an unroll using SSE2 loads and stores.
3549 if (size >= XMM_REGSIZE_BYTES)
3551 regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
3554 assert(initBlkNode->gtRsvdRegs != RBM_NONE);
3555 assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
3556 assert(genIsValidFloatReg(tmpReg));
3559 if (initVal->gtIntCon.gtIconVal != 0)
3561 emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
3562 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
3564 // For x86, we need one more to convert it from 8 bytes to 16 bytes.
3565 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
3566 #endif // _TARGET_X86_
3570 emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
3573 // Determine how many 16 byte slots we're going to fill using SSE movs.
3574 size_t slots = size / XMM_REGSIZE_BYTES;
3578 emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
3579 offset += XMM_REGSIZE_BYTES;
3583 // Fill the remainder (or a < 16 byte sized struct)
3584 if ((size & 8) != 0)
3587 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
3588 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
3590 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
3592 #else // !_TARGET_X86_
3593 emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
3595 #endif // !_TARGET_X86_
3597 if ((size & 4) != 0)
3599 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
3602 if ((size & 2) != 0)
3604 emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
3607 if ((size & 1) != 0)
3609 emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
3613 // Generates code for InitBlk by calling the VM memset helper function.
3615 // a) The size argument of the InitBlk is not an integer constant.
3616 // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
3617 void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
3619 #ifdef _TARGET_AMD64_
3620 // Make sure we got the arguments of the initblk operation in the right registers
3621 unsigned blockSize = initBlkNode->Size();
3622 GenTreePtr dstAddr = initBlkNode->Addr();
3623 GenTreePtr initVal = initBlkNode->Data();
3625 assert(!dstAddr->isContained());
3626 assert(!initVal->isContained());
3630 assert(blockSize >= CPBLK_MOVS_LIMIT);
3633 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
3635 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
3636 #else // !_TARGET_AMD64_
3637 NYI_X86("Helper call for InitBlk");
3638 #endif // !_TARGET_AMD64_
3641 // Generate code for a load from some address + offset
3642 // baseNode: tree node which can be either a local address or arbitrary node
3643 // offset: distance from the baseNode from which to load
3644 void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
3646 emitter* emit = getEmitter();
3648 if (baseNode->OperIsLocalAddr())
3650 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
3652 offset += baseNode->gtLclFld.gtLclOffs;
3654 emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
3658 emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
3662 //------------------------------------------------------------------------
3663 // genCodeForStoreOffset: Generate code to store a reg to [base + offset].
3666 // ins - the instruction to generate.
3667 // size - the size that needs to be stored.
3668 // src - the register which needs to be stored.
3669 // baseNode - the base, relative to which to store the src register.
3670 // offset - the offset that is added to the baseNode to calculate the address to store into.
3672 void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
3674 emitter* emit = getEmitter();
3676 if (baseNode->OperIsLocalAddr())
3678 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
3680 offset += baseNode->gtLclFld.gtLclOffs;
3683 emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
3687 emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
3691 // Generates CpBlk code by performing a loop unroll
3693 // The size argument of the CpBlk node is a constant and <= 64 bytes.
3694 // This may seem small but covers >95% of the cases in several framework assemblies.
3696 void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
3698 // Make sure we got the arguments of the cpblk operation in the right registers
3699 unsigned size = cpBlkNode->Size();
3700 GenTreePtr dstAddr = cpBlkNode->Addr();
3701 GenTreePtr source = cpBlkNode->Data();
3702 GenTreePtr srcAddr = nullptr;
3703 assert(size <= CPBLK_UNROLL_LIMIT);
3705 emitter* emit = getEmitter();
3707 if (source->gtOper == GT_IND)
3709 srcAddr = source->gtGetOp1();
3710 if (!srcAddr->isContained())
3712 genConsumeReg(srcAddr);
3717 noway_assert(source->IsLocal());
3718 // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
3719 // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
3720 if (source->OperGet() == GT_LCL_VAR)
3722 source->SetOper(GT_LCL_VAR_ADDR);
3726 assert(source->OperGet() == GT_LCL_FLD);
3727 source->SetOper(GT_LCL_FLD_ADDR);
3732 if (!dstAddr->isContained())
3734 genConsumeReg(dstAddr);
3737 unsigned offset = 0;
3739 // If the size of this struct is larger than 16 bytes
3740 // let's use SSE2 to be able to do 16 byte at a time
3741 // loads and stores.
3743 if (size >= XMM_REGSIZE_BYTES)
3745 assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
3746 regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
3747 assert(genIsValidFloatReg(xmmReg));
3748 size_t slots = size / XMM_REGSIZE_BYTES;
3750 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3751 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3752 // this probably needs to be changed.
3756 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
3758 genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
3759 offset += XMM_REGSIZE_BYTES;
3763 // Fill the remainder (15 bytes or less) if there's one.
3764 if ((size & 0xf) != 0)
3766 // Grab the integer temp register to emit the remaining loads and stores.
3767 regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
3769 if ((size & 8) != 0)
3772 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
3773 for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
3775 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
3776 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
3778 #else // !_TARGET_X86_
3779 genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
3780 genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
3782 #endif // !_TARGET_X86_
3784 if ((size & 4) != 0)
3786 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
3787 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
3790 if ((size & 2) != 0)
3792 genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
3793 genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
3796 if ((size & 1) != 0)
3798 genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
3799 genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
3804 // Generate code for CpBlk by using rep movs
3806 // The size argument of the CpBlk is a constant and is between
3807 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3808 void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
3810 // Make sure we got the arguments of the cpblk operation in the right registers
3811 unsigned size = cpBlkNode->Size();
3812 GenTreePtr dstAddr = cpBlkNode->Addr();
3813 GenTreePtr source = cpBlkNode->Data();
3814 GenTreePtr srcAddr = nullptr;
3817 assert(!dstAddr->isContained());
3818 assert(source->isContained());
3823 noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
3829 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
3831 assert(size > CPBLK_UNROLL_LIMIT);
3836 genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
3837 instGen(INS_r_movsb);
3840 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3842 //---------------------------------------------------------------------------------------------------------------//
3843 // genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
3846 // putArgNode - the PutArgStk tree.
3847 // baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
3849 // TODO-Amd64-Unix: Try to share code with copyblk.
3850 // Need refactoring of copyblk before it could be used for putarg_stk.
3851 // The difference for now is that a putarg_stk contains its children, while cpyblk does not.
3852 // This creates differences in code. After some significant refactoring it could be reused.
3854 void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
3856 // We will never call this method for SIMD types, which are stored directly
3857 // in genPutStructArgStk().
3858 noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
3860 // Make sure we got the arguments of the cpblk operation in the right registers
3861 GenTreePtr dstAddr = putArgNode;
3862 GenTreePtr src = putArgNode->gtOp.gtOp1;
3864 size_t size = putArgNode->getArgSize();
3865 assert(size <= CPBLK_UNROLL_LIMIT);
3867 emitter* emit = getEmitter();
3868 unsigned putArgOffset = putArgNode->getArgOffset();
3870 assert(src->isContained());
3872 assert(src->gtOper == GT_OBJ);
3874 if (!src->gtOp.gtOp1->isContained())
3876 genConsumeReg(src->gtOp.gtOp1);
3879 unsigned offset = 0;
3881 // If the size of this struct is larger than 16 bytes
3882 // let's use SSE2 to be able to do 16 byte at a time
3883 // loads and stores.
3884 if (size >= XMM_REGSIZE_BYTES)
3886 assert(putArgNode->gtRsvdRegs != RBM_NONE);
3887 regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
3888 assert(genIsValidFloatReg(xmmReg));
3889 size_t slots = size / XMM_REGSIZE_BYTES;
3891 assert(putArgNode->gtGetOp1()->isContained());
3892 assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
3894 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3895 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3896 // this probably needs to be changed.
3900 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
3901 offset); // Load the address of the child of the Obj node.
3904 emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
3906 offset += XMM_REGSIZE_BYTES;
3910 // Fill the remainder (15 bytes or less) if there's one.
3911 if ((size & 0xf) != 0)
3913 // Grab the integer temp register to emit the remaining loads and stores.
3914 regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
3915 assert(genIsValidIntReg(tmpReg));
3917 if ((size & 8) != 0)
3919 genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
3921 emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
3926 if ((size & 4) != 0)
3928 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
3930 emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
3935 if ((size & 2) != 0)
3937 genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
3939 emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
3944 if ((size & 1) != 0)
3946 genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
3947 emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
3952 //------------------------------------------------------------------------
3953 // genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
3956 // putArgNode - the PutArgStk tree.
3957 // baseVarNum - the base var number, relative to which the by-val struct bits will go.
3960 // The size argument of the PutArgStk (for structs) is a constant and is between
3961 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3963 void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
3965 assert(putArgNode->TypeGet() == TYP_STRUCT);
3966 assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
3967 assert(baseVarNum != BAD_VAR_NUM);
3969 // Make sure we got the arguments of the cpblk operation in the right registers
3970 GenTreePtr dstAddr = putArgNode;
3971 GenTreePtr srcAddr = putArgNode->gtGetOp1();
3974 assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
3975 assert(srcAddr->isContained());
3977 genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
3978 instGen(INS_r_movsb);
3981 //------------------------------------------------------------------------
3982 // If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
3983 // must be cleared to zeroes. The native compiler doesn't clear the upper bits
3984 // and there is no way to know if the caller is native or not. So, the upper
3985 // 32 bits of Vector argument on stack are always cleared to zero.
3987 void CodeGen::genClearStackVec3ArgUpperBits()
3991 printf("*************** In genClearStackVec3ArgUpperBits()\n");
3994 assert(compiler->compGeneratingProlog);
3996 unsigned varNum = 0;
3998 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
4000 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
4001 assert(varDsc->lvIsParam);
4003 // Does var has simd12 type?
4004 if (varDsc->lvType != TYP_SIMD12)
4009 if (!varDsc->lvIsRegArg)
4011 // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
4012 getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
4016 // Assume that for x64 linux, an argument is fully in registers
4017 // or fully on stack.
4018 regNumber argReg = varDsc->GetOtherArgReg();
4020 // Clear the upper 32 bits by two shift instructions.
4021 // argReg = argReg << 96
4022 getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
4023 // argReg = argReg >> 96
4024 getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
4028 #endif // FEATURE_SIMD
4029 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4031 // Generate code for CpObj nodes wich copy structs that have interleaved
4033 // This will generate a sequence of movs{d,q} instructions for the cases of non-gc members
4034 // and calls to the BY_REF_ASSIGN helper otherwise.
4035 void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
4037 // Make sure we got the arguments of the cpobj operation in the right registers
4038 GenTreePtr dstAddr = cpObjNode->Addr();
4039 GenTreePtr source = cpObjNode->Data();
4040 GenTreePtr srcAddr = nullptr;
4041 bool sourceIsLocal = false;
4043 assert(source->isContained());
4044 if (source->gtOper == GT_IND)
4046 srcAddr = source->gtGetOp1();
4047 assert(!srcAddr->isContained());
4051 noway_assert(source->IsLocal());
4052 sourceIsLocal = true;
4053 // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
4054 // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
4055 if (source->OperGet() == GT_LCL_VAR)
4057 source->SetOper(GT_LCL_VAR_ADDR);
4061 assert(source->OperGet() == GT_LCL_FLD);
4062 source->SetOper(GT_LCL_FLD_ADDR);
4067 bool dstOnStack = dstAddr->OperIsLocalAddr();
4070 bool isRepMovsPtrUsed = false;
4072 assert(!dstAddr->isContained());
4074 // If the GenTree node has data about GC pointers, this means we're dealing
4075 // with CpObj, so this requires special logic.
4076 assert(cpObjNode->gtGcPtrCount > 0);
4078 // MovSq instruction is used for copying non-gcref fields and it needs
4079 // src = RSI and dst = RDI.
4080 // Either these registers must not contain lclVars, or they must be dying or marked for spill.
4081 // This is because these registers are incremented as we go through the struct.
4082 GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
4083 GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
4084 unsigned srcLclVarNum = BAD_VAR_NUM;
4085 unsigned dstLclVarNum = BAD_VAR_NUM;
4086 bool isSrcAddrLiveOut = false;
4087 bool isDstAddrLiveOut = false;
4088 if (genIsRegCandidateLocal(actualSrcAddr))
4090 srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
4091 isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
4093 if (genIsRegCandidateLocal(actualDstAddr))
4095 dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
4096 isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
4098 assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
4099 ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
4100 assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
4101 ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
4104 // Consume these registers.
4105 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
4108 inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
4109 genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
4113 genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
4115 gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
4116 gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
4118 unsigned slots = cpObjNode->gtSlots;
4120 // If we can prove it's on the stack we don't need to use the write barrier.
4123 if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
4126 // If the destination of the CpObj is on the stack
4127 // make sure we allocated RCX to emit rep movs{d,q}.
4128 regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
4129 assert(tmpReg == REG_RCX);
4130 isRepMovsPtrUsed = true;
4133 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
4134 instGen(INS_r_movs_ptr);
4138 // For small structs, it's better to emit a sequence of movsq than to
4139 // emit a rep movsq instruction.
4142 instGen(INS_movs_ptr);
4149 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
4150 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
4158 // Let's see if we can use rep movs{d,q} instead of a sequence of movs{d,q} instructions
4159 // to save cycles and code size.
4161 unsigned nonGcSlotCount = 0;
4167 } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
4169 // If we have a very small contiguous non-gc region, it's better just to
4170 // emit a sequence of movs{d,q} instructions
4171 if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
4173 while (nonGcSlotCount > 0)
4175 instGen(INS_movs_ptr);
4182 // Otherwise, we can save code-size and improve CQ by emitting
4184 regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
4185 assert(tmpReg == REG_RCX);
4186 isRepMovsPtrUsed = true;
4188 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
4189 instGen(INS_r_movs_ptr);
4194 // We have a GC pointer, call the memory barrier.
4195 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
4201 assert(gcPtrCount == 0);
4204 // Clear the gcInfo for RSI and RDI.
4205 // While we normally update GC info prior to the last instruction that uses them,
4206 // these actually live into the helper call.
4207 gcInfo.gcMarkRegSetNpt(RBM_RSI);
4208 gcInfo.gcMarkRegSetNpt(RBM_RDI);
4211 // Generate code for a CpBlk node by the means of the VM memcpy helper call
4213 // a) The size argument of the CpBlk is not an integer constant
4214 // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
4215 void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
4217 #ifdef _TARGET_AMD64_
4218 // Make sure we got the arguments of the cpblk operation in the right registers
4219 unsigned blockSize = cpBlkNode->Size();
4220 GenTreePtr dstAddr = cpBlkNode->Addr();
4221 GenTreePtr source = cpBlkNode->Data();
4222 GenTreePtr srcAddr = nullptr;
4224 // Size goes in arg2
4227 assert(blockSize >= CPBLK_MOVS_LIMIT);
4228 assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
4232 noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
4235 // Source address goes in arg1
4236 if (source->gtOper == GT_IND)
4238 srcAddr = source->gtGetOp1();
4239 assert(!srcAddr->isContained());
4243 noway_assert(source->IsLocal());
4244 assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
4245 inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
4248 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
4250 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
4251 #else // !_TARGET_AMD64_
4252 noway_assert(false && "Helper call for CpBlk is not needed.");
4253 #endif // !_TARGET_AMD64_
4256 // generate code do a switch statement based on a table of ip-relative offsets
4257 void CodeGen::genTableBasedSwitch(GenTree* treeNode)
4259 genConsumeOperands(treeNode->AsOp());
4260 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
4261 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
4263 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
4265 // load the ip-relative offset (which is relative to start of fgFirstBB)
4266 getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
4268 // add it to the absolute address of fgFirstBB
4269 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
4270 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
4271 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
4273 getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
4276 // emits the table and an instruction to get the address of the first element
4277 void CodeGen::genJumpTable(GenTree* treeNode)
4279 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
4280 assert(treeNode->OperGet() == GT_JMPTABLE);
4282 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
4283 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
4284 unsigned jmpTabOffs;
4285 unsigned jmpTabBase;
4287 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
4291 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
4293 for (unsigned i = 0; i < jumpCount; i++)
4295 BasicBlock* target = *jumpTable++;
4296 noway_assert(target->bbFlags & BBF_JMP_TARGET);
4298 JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
4300 getEmitter()->emitDataGenData(i, target);
4303 getEmitter()->emitDataGenEnd();
4305 // Access to inline data is 'abstracted' by a special type of static member
4306 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
4307 // to constant data, not a real static field.
4308 getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
4309 compiler->eeFindJitDataOffs(jmpTabBase), 0);
4310 genProduceReg(treeNode);
4313 // generate code for the locked operations:
4314 // GT_LOCKADD, GT_XCHG, GT_XADD
4315 void CodeGen::genLockedInstructions(GenTree* treeNode)
4317 GenTree* data = treeNode->gtOp.gtOp2;
4318 GenTree* addr = treeNode->gtOp.gtOp1;
4319 regNumber targetReg = treeNode->gtRegNum;
4320 regNumber dataReg = data->gtRegNum;
4321 regNumber addrReg = addr->gtRegNum;
4324 // all of these nodes implicitly do an indirection on op1
4325 // so create a temporary node to feed into the pattern matching
4326 GenTreeIndir i = indirForm(data->TypeGet(), addr);
4327 genConsumeReg(addr);
4329 // The register allocator should have extended the lifetime of the address
4330 // so that it is not used as the target.
4331 noway_assert(addrReg != targetReg);
4333 // If data is a lclVar that's not a last use, we'd better have allocated a register
4334 // for the result (except in the case of GT_LOCKADD which does not produce a register result).
4335 assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
4336 (data->gtFlags & GTF_VAR_DEATH) != 0);
4338 genConsumeIfReg(data);
4339 if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
4341 inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
4342 data->gtRegNum = targetReg;
4344 // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
4345 // original gtRegNum on data, after calling emitInsBinary below.
4347 switch (treeNode->OperGet())
4354 // lock is implied by xchg
4364 getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
4366 if (treeNode->gtRegNum != REG_NA)
4368 genProduceReg(treeNode);
4372 // generate code for BoundsCheck nodes
4373 void CodeGen::genRangeCheck(GenTreePtr oper)
4376 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
4377 #else // !FEATURE_SIMD
4378 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
4379 #endif // !FEATURE_SIMD
4381 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
4383 GenTreePtr arrLen = bndsChk->gtArrLen;
4384 GenTreePtr arrIndex = bndsChk->gtIndex;
4385 GenTreePtr arrRef = nullptr;
4388 GenTree * src1, *src2;
4389 emitJumpKind jmpKind;
4391 genConsumeRegs(arrLen);
4392 genConsumeRegs(arrIndex);
4394 if (arrIndex->isContainedIntOrIImmed())
4396 // arrIndex is a contained constant. In this case
4397 // we will generate one of the following
4398 // cmp [mem], immed (if arrLen is a memory op)
4399 // cmp reg, immed (if arrLen is in a reg)
4401 // That is arrLen cannot be a contained immed.
4402 assert(!arrLen->isContainedIntOrIImmed());
4410 // arrIndex could either be a contained memory op or a reg
4411 // In this case we will generate one of the following
4412 // cmp [mem], immed (if arrLen is a constant)
4413 // cmp [mem], reg (if arrLen is in a reg)
4414 // cmp reg, immed (if arrIndex is in a reg)
4415 // cmp reg1, reg2 (if arraIndex is in reg1)
4416 // cmp reg, [mem] (if arrLen is a memory op)
4418 // That is only one of arrIndex or arrLen can be a memory op.
4419 assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
4426 var_types bndsChkType = src2->TypeGet();
4428 // Bounds checks can only be 32 or 64 bit sized comparisons.
4429 assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
4431 // The type of the bounds check should always wide enough to compare against the index.
4432 assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
4435 getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
4436 genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
4439 //------------------------------------------------------------------------
4440 // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
4441 // lower bound for the given dimension.
4444 // elemType - the element type of the array
4445 // rank - the rank of the array
4446 // dimension - the dimension for which the lower bound offset will be returned.
4451 unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
4453 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
4454 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
4457 //------------------------------------------------------------------------
4458 // genOffsetOfMDArrayLength: Returns the offset from the Array object to the
4459 // size for the given dimension.
4462 // elemType - the element type of the array
4463 // rank - the rank of the array
4464 // dimension - the dimension for which the lower bound offset will be returned.
4469 unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
4471 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
4472 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
4475 //------------------------------------------------------------------------
4476 // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
4477 // producing the effective index by subtracting the lower bound.
4480 // arrIndex - the node for which we're generating code
4486 void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
4488 GenTreePtr arrObj = arrIndex->ArrObj();
4489 GenTreePtr indexNode = arrIndex->IndexExpr();
4491 regNumber arrReg = genConsumeReg(arrObj);
4492 regNumber indexReg = genConsumeReg(indexNode);
4493 regNumber tgtReg = arrIndex->gtRegNum;
4495 unsigned dim = arrIndex->gtCurrDim;
4496 unsigned rank = arrIndex->gtArrRank;
4497 var_types elemType = arrIndex->gtArrElemType;
4499 noway_assert(tgtReg != REG_NA);
4501 // Subtract the lower bound for this dimension.
4502 // TODO-XArch-CQ: make this contained if it's an immediate that fits.
4503 if (tgtReg != indexReg)
4505 inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
4507 getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
4508 genOffsetOfMDArrayLowerBound(elemType, rank, dim));
4509 getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
4510 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
4511 genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
4513 genProduceReg(arrIndex);
4516 //------------------------------------------------------------------------
4517 // genCodeForArrOffset: Generates code to compute the flattened array offset for
4518 // one dimension of an array reference:
4519 // result = (prevDimOffset * dimSize) + effectiveIndex
4520 // where dimSize is obtained from the arrObj operand
4523 // arrOffset - the node for which we're generating code
4529 // dimSize and effectiveIndex are always non-negative, the former by design,
4530 // and the latter because it has been normalized to be zero-based.
4532 void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
4534 GenTreePtr offsetNode = arrOffset->gtOffset;
4535 GenTreePtr indexNode = arrOffset->gtIndex;
4536 GenTreePtr arrObj = arrOffset->gtArrObj;
4538 regNumber tgtReg = arrOffset->gtRegNum;
4540 noway_assert(tgtReg != REG_NA);
4542 unsigned dim = arrOffset->gtCurrDim;
4543 unsigned rank = arrOffset->gtArrRank;
4544 var_types elemType = arrOffset->gtArrElemType;
4546 // We will use a temp register for the offset*scale+effectiveIndex computation.
4547 regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
4548 regNumber tmpReg = genRegNumFromMask(tmpRegMask);
4550 // First, consume the operands in the correct order.
4551 regNumber offsetReg = REG_NA;
4552 if (!offsetNode->IsIntegralConst(0))
4554 offsetReg = genConsumeReg(offsetNode);
4558 assert(offsetNode->isContained());
4560 regNumber indexReg = genConsumeReg(indexNode);
4561 // Although arrReg may not be used in the constant-index case, if we have generated
4562 // the value into a register, we must consume it, otherwise we will fail to end the
4563 // live range of the gc ptr.
4564 // TODO-CQ: Currently arrObj will always have a register allocated to it.
4565 // We could avoid allocating a register for it, which would be of value if the arrObj
4566 // is an on-stack lclVar.
4567 regNumber arrReg = REG_NA;
4568 if (arrObj->gtHasReg())
4570 arrReg = genConsumeReg(arrObj);
4573 if (!offsetNode->IsIntegralConst(0))
4575 // Evaluate tgtReg = offsetReg*dim_size + indexReg.
4576 // tmpReg is used to load dim_size and the result of the multiplication.
4577 // Note that dim_size will never be negative.
4579 getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
4580 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
4581 inst_RV_RV(INS_imul, tmpReg, offsetReg);
4583 if (tmpReg == tgtReg)
4585 inst_RV_RV(INS_add, tmpReg, indexReg);
4589 if (indexReg != tgtReg)
4591 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
4593 inst_RV_RV(INS_add, tgtReg, tmpReg);
4598 if (indexReg != tgtReg)
4600 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
4603 genProduceReg(arrOffset);
4606 // make a temporary indir we can feed to pattern matching routines
4607 // in cases where we don't want to instantiate all the indirs that happen
4609 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
4611 GenTreeIndir i(GT_IND, type, base, nullptr);
4612 i.gtRegNum = REG_NA;
4613 // has to be nonnull (because contained nodes can't be the last in block)
4614 // but don't want it to be a valid pointer
4615 i.gtNext = (GenTree*)(-1);
4619 // make a temporary int we can feed to pattern matching routines
4620 // in cases where we don't want to instantiate
4622 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
4624 GenTreeIntCon i(type, value);
4625 i.gtRegNum = REG_NA;
4626 // has to be nonnull (because contained nodes can't be the last in block)
4627 // but don't want it to be a valid pointer
4628 i.gtNext = (GenTree*)(-1);
4632 instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
4636 // Operations on SIMD vectors shouldn't come this path
4637 assert(!varTypeIsSIMD(type));
4638 if (varTypeIsFloating(type))
4640 return ins_MathOp(oper, type);
4684 #if !defined(_TARGET_64BIT_)
4697 #endif // !defined(_TARGET_64BIT_)
4705 //------------------------------------------------------------------------
4706 // genCodeForShift: Generates the code sequence for a GenTree node that
4707 // represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
4710 // tree - the bit shift node (that specifies the type of bit shift to perform).
4713 // a) All GenTrees are register allocated.
4714 // b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
4715 // it's a register-allocated expression. If it is in a register that is
4716 // not RCX, it will be moved to RCX (so RCX better not be in use!).
4718 void CodeGen::genCodeForShift(GenTreePtr tree)
4720 // Only the non-RMW case here.
4721 assert(tree->OperIsShiftOrRotate());
4722 assert(!tree->gtOp.gtOp1->isContained());
4723 assert(tree->gtRegNum != REG_NA);
4725 genConsumeOperands(tree->AsOp());
4727 var_types targetType = tree->TypeGet();
4728 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
4730 GenTreePtr operand = tree->gtGetOp1();
4731 regNumber operandReg = operand->gtRegNum;
4733 GenTreePtr shiftBy = tree->gtGetOp2();
4734 if (shiftBy->isContainedIntOrIImmed())
4736 // First, move the operand to the destination register and
4737 // later on perform the shift in-place.
4738 // (LSRA will try to avoid this situation through preferencing.)
4739 if (tree->gtRegNum != operandReg)
4741 inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
4744 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4745 inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
4749 // We must have the number of bits to shift stored in ECX, since we constrained this node to
4750 // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
4751 // register destination requirement.
4752 regNumber shiftReg = shiftBy->gtRegNum;
4753 if (shiftReg != REG_RCX)
4755 // Issue the mov to RCX:
4756 inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
4759 // The operand to be shifted must not be in ECX
4760 noway_assert(operandReg != REG_RCX);
4762 if (tree->gtRegNum != operandReg)
4764 inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
4766 inst_RV_CL(ins, tree->gtRegNum, targetType);
4769 genProduceReg(tree);
4772 //------------------------------------------------------------------------
4773 // genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
4774 // represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
4775 // GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
4778 // storeIndNode: the GT_STOREIND node.
4780 void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
4782 GenTree* data = storeInd->Data();
4783 GenTree* addr = storeInd->Addr();
4785 assert(data->OperIsShiftOrRotate());
4787 // This function only handles the RMW case.
4788 assert(data->gtOp.gtOp1->isContained());
4789 assert(data->gtOp.gtOp1->isIndir());
4790 assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
4791 assert(data->gtRegNum == REG_NA);
4793 var_types targetType = data->TypeGet();
4794 genTreeOps oper = data->OperGet();
4795 instruction ins = genGetInsForOper(oper, targetType);
4796 emitAttr attr = EA_ATTR(genTypeSize(targetType));
4798 GenTree* shiftBy = data->gtOp.gtOp2;
4799 if (shiftBy->isContainedIntOrIImmed())
4801 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4802 ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
4803 if (shiftByValue == 1)
4805 // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
4806 getEmitter()->emitInsRMW(ins, attr, storeInd);
4810 getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
4815 // We must have the number of bits to shift stored in ECX, since we constrained this node to
4816 // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
4817 // register destination requirement.
4818 regNumber shiftReg = shiftBy->gtRegNum;
4819 if (shiftReg != REG_RCX)
4821 // Issue the mov to RCX:
4822 inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
4825 // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
4826 getEmitter()->emitInsRMW(ins, attr, storeInd);
4830 void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
4832 regNumber dstReg = tree->gtRegNum;
4833 GenTree* unspillTree = tree;
4835 if (tree->gtOper == GT_RELOAD)
4837 unspillTree = tree->gtOp.gtOp1;
4840 if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
4842 if (genIsRegCandidateLocal(unspillTree))
4844 // Reset spilled flag, since we are going to load a local variable from its home location.
4845 unspillTree->gtFlags &= ~GTF_SPILLED;
4847 GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
4848 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
4850 // Load local variable from its home location.
4851 // In most cases the tree type will indicate the correct type to use for the load.
4852 // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
4853 // widened when loaded into a register), and its size is not the same as genActualType of
4854 // the type of the lclVar, then we need to change the type of the tree node when loading.
4855 // This situation happens due to "optimizations" that avoid a cast and
4856 // simply retype the node when using long type lclVar as an int.
4857 // While loading the int in that case would work for this use of the lclVar, if it is
4858 // later used as a long, we will have incorrectly truncated the long.
4859 // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
4862 var_types treeType = unspillTree->TypeGet();
4863 if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
4865 assert(!varTypeIsGC(varDsc));
4866 var_types spillType = genActualType(varDsc->lvType);
4867 unspillTree->gtType = spillType;
4868 inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
4869 unspillTree->gtType = treeType;
4873 inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
4876 unspillTree->SetInReg();
4878 // TODO-Review: We would like to call:
4879 // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
4880 // instead of the following code, but this ends up hitting this assert:
4881 // assert((regSet.rsMaskVars & regMask) == 0);
4882 // due to issues with LSRA resolution moves.
4883 // So, just force it for now. This probably indicates a condition that creates a GC hole!
4885 // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
4886 // because the variable is not really going live or dead, but that method is somewhat poorly
4887 // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
4888 // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
4890 // Don't update the variable's location if we are just re-spilling it again.
4892 if ((unspillTree->gtFlags & GTF_SPILL) == 0)
4894 genUpdateVarReg(varDsc, tree);
4896 if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
4898 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
4901 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
4904 if (compiler->verbose)
4906 printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
4907 varDsc->PrintVarReg();
4908 printf(" is becoming live ");
4909 compiler->printTreeID(unspillTree);
4914 regSet.AddMaskVars(genGetRegMask(varDsc));
4917 gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
4919 else if (unspillTree->IsMultiRegCall())
4921 GenTreeCall* call = unspillTree->AsCall();
4922 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
4923 unsigned regCount = retTypeDesc->GetReturnRegCount();
4924 GenTreeCopyOrReload* reloadTree = nullptr;
4925 if (tree->OperGet() == GT_RELOAD)
4927 reloadTree = tree->AsCopyOrReload();
4930 // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
4931 // one or more of its result regs are spilled. Call node needs to be
4932 // queried to know which specific result regs to be unspilled.
4933 for (unsigned i = 0; i < regCount; ++i)
4935 unsigned flags = call->GetRegSpillFlagByIdx(i);
4936 if ((flags & GTF_SPILLED) != 0)
4938 var_types dstType = retTypeDesc->GetReturnRegType(i);
4939 regNumber unspillTreeReg = call->GetRegNumByIdx(i);
4941 if (reloadTree != nullptr)
4943 dstReg = reloadTree->GetRegNumByIdx(i);
4944 if (dstReg == REG_NA)
4946 dstReg = unspillTreeReg;
4951 dstReg = unspillTreeReg;
4954 TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
4955 getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
4957 compiler->tmpRlsTemp(t);
4958 gcInfo.gcMarkRegPtrVal(dstReg, dstType);
4962 unspillTree->gtFlags &= ~GTF_SPILLED;
4963 unspillTree->SetInReg();
4967 TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
4968 getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
4970 compiler->tmpRlsTemp(t);
4972 unspillTree->gtFlags &= ~GTF_SPILLED;
4973 unspillTree->SetInReg();
4974 gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
4979 // Do Liveness update for a subnodes that is being consumed by codegen
4980 // including the logic for reload in case is needed and also takes care
4981 // of locating the value on the desired register.
4982 void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
4984 if (needReg == REG_NA)
4988 regNumber treeReg = genConsumeReg(tree);
4989 if (treeReg != needReg)
4991 inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
4995 void CodeGen::genRegCopy(GenTree* treeNode)
4997 assert(treeNode->OperGet() == GT_COPY);
4998 GenTree* op1 = treeNode->gtOp.gtOp1;
5000 if (op1->IsMultiRegCall())
5004 GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
5005 GenTreeCall* call = op1->AsCall();
5006 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
5007 unsigned regCount = retTypeDesc->GetReturnRegCount();
5009 for (unsigned i = 0; i < regCount; ++i)
5011 var_types type = retTypeDesc->GetReturnRegType(i);
5012 regNumber fromReg = call->GetRegNumByIdx(i);
5013 regNumber toReg = copyTree->GetRegNumByIdx(i);
5015 // A Multi-reg GT_COPY node will have valid reg only for those
5016 // positions that corresponding result reg of call node needs
5018 if (toReg != REG_NA)
5020 assert(toReg != fromReg);
5021 inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
5027 var_types targetType = treeNode->TypeGet();
5028 regNumber targetReg = treeNode->gtRegNum;
5029 assert(targetReg != REG_NA);
5031 // Check whether this node and the node from which we're copying the value have
5032 // different register types. This can happen if (currently iff) we have a SIMD
5033 // vector type that fits in an integer register, in which case it is passed as
5034 // an argument, or returned from a call, in an integer register and must be
5035 // copied if it's in an xmm register.
5037 bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
5038 bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
5039 if (srcFltReg != tgtFltReg)
5046 ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
5048 intReg = op1->gtRegNum;
5052 ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
5054 fpReg = op1->gtRegNum;
5056 inst_RV_RV(ins, fpReg, intReg, targetType);
5060 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
5065 // The lclVar will never be a def.
5066 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
5067 // appropriately set the gcInfo for the copied value.
5068 // If not, there are two cases we need to handle:
5069 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
5070 // will remain live in its original register.
5071 // genProduceReg() will appropriately set the gcInfo for the copied value,
5072 // and genConsumeReg will reset it.
5073 // - Otherwise, we need to update register info for the lclVar.
5075 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
5076 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
5078 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
5080 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
5082 // If we didn't just spill it (in genConsumeReg, above), then update the register info
5083 if (varDsc->lvRegNum != REG_STK)
5085 // The old location is dying
5086 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
5088 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
5090 genUpdateVarReg(varDsc, treeNode);
5092 // The new location is going live
5093 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
5099 genProduceReg(treeNode);
5102 // Check that registers are consumed in the right order for the current node being generated.
5104 void CodeGen::genCheckConsumeNode(GenTree* treeNode)
5106 // GT_PUTARG_REG is consumed out of order.
5107 if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
5109 if (lastConsumedNode != nullptr)
5111 if (treeNode == lastConsumedNode)
5115 printf("Node was consumed twice:\n ");
5116 compiler->gtDispTree(treeNode, nullptr, nullptr, true);
5121 if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
5123 printf("Nodes were consumed out-of-order:\n");
5124 compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
5125 compiler->gtDispTree(treeNode, nullptr, nullptr, true);
5127 // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
5130 lastConsumedNode = treeNode;
5135 //--------------------------------------------------------------------
5136 // genConsumeReg: Do liveness update for a subnode that is being
5137 // consumed by codegen.
5140 // tree - GenTree node
5143 // Returns the reg number of tree.
5144 // In case of multi-reg call node returns the first reg number
5145 // of the multi-reg return.
5146 regNumber CodeGen::genConsumeReg(GenTree* tree)
5148 if (tree->OperGet() == GT_COPY)
5153 // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
5154 // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
5155 // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
5156 // always using GT_COPY to make the lclVar location explicit.
5157 // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
5158 // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
5159 // the lclVar (normally when a lclVar is spilled it is then used from its former register
5160 // location, which matches the gtRegNum on the node).
5161 // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
5162 // because if it's on the stack it will always get reloaded into tree->gtRegNum).
5163 if (genIsRegCandidateLocal(tree))
5165 GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
5166 LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
5167 if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
5169 inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
5173 genUnspillRegIfNeeded(tree);
5175 // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
5176 genUpdateLife(tree);
5178 assert(tree->gtHasReg());
5180 // there are three cases where consuming a reg means clearing the bit in the live mask
5181 // 1. it was not produced by a local
5182 // 2. it was produced by a local that is going dead
5183 // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
5185 if (genIsRegCandidateLocal(tree))
5187 GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
5188 LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
5189 assert(varDsc->lvLRACandidate);
5191 if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
5193 gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
5195 else if (varDsc->lvRegNum == REG_STK)
5197 // We have loaded this into a register only temporarily
5198 gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
5203 gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
5206 genCheckConsumeNode(tree);
5207 return tree->gtRegNum;
5210 // Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
5211 void CodeGen::genConsumeAddress(GenTree* addr)
5213 if (addr->OperGet() == GT_LEA)
5215 genConsumeAddrMode(addr->AsAddrMode());
5217 else if (!addr->isContained())
5219 genConsumeReg(addr);
5223 // do liveness update for a subnode that is being consumed by codegen
5224 void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
5226 genConsumeOperands(addr);
5229 void CodeGen::genConsumeRegs(GenTree* tree)
5231 #if !defined(_TARGET_64BIT_)
5232 if (tree->OperGet() == GT_LONG)
5234 genConsumeRegs(tree->gtGetOp1());
5235 genConsumeRegs(tree->gtGetOp2());
5238 #endif // !defined(_TARGET_64BIT_)
5240 if (tree->isContained())
5242 if (tree->isContainedSpillTemp())
5244 // spill temps are un-tracked and hence no need to update life
5246 else if (tree->isIndir())
5248 genConsumeAddress(tree->AsIndir()->Addr());
5250 else if (tree->OperGet() == GT_AND)
5252 // This is the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
5253 // Now we need to consume the operands of the GT_AND node.
5254 genConsumeOperands(tree->AsOp());
5256 else if (tree->OperGet() == GT_LCL_VAR)
5258 // A contained lcl var must be living on stack and marked as reg optional.
5259 unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
5260 LclVarDsc* varDsc = compiler->lvaTable + varNum;
5262 noway_assert(varDsc->lvRegNum == REG_STK);
5263 noway_assert(tree->IsRegOptional());
5265 // Update the life of reg optional lcl var.
5266 genUpdateLife(tree);
5270 assert(tree->OperIsLeaf());
5275 genConsumeReg(tree);
5279 //------------------------------------------------------------------------
5280 // genConsumeOperands: Do liveness update for the operands of a unary or binary tree
5283 // tree - the GenTreeOp whose operands will have their liveness updated.
5289 // Note that this logic is localized here because we must do the liveness update in
5290 // the correct execution order. This is important because we may have two operands
5291 // that involve the same lclVar, and if one is marked "lastUse" we must handle it
5294 void CodeGen::genConsumeOperands(GenTreeOp* tree)
5296 GenTree* firstOp = tree->gtOp1;
5297 GenTree* secondOp = tree->gtOp2;
5298 if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
5300 assert(secondOp != nullptr);
5302 secondOp = tree->gtOp1;
5304 if (firstOp != nullptr)
5306 genConsumeRegs(firstOp);
5308 if (secondOp != nullptr)
5310 genConsumeRegs(secondOp);
5314 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
5315 //------------------------------------------------------------------------
5316 // genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
5317 // Also loads in the right register the addresses of the
5318 // src/dst for rep mov operation.
5321 // putArgNode - the PUTARG_STK tree.
5322 // dstReg - the dstReg for the rep move operation.
5323 // srcReg - the srcReg for the rep move operation.
5324 // sizeReg - the sizeReg for the rep move operation.
5325 // baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
5330 // Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
5331 // for copying on the stack a struct with references.
5332 // The source address/offset is determined from the address on the GT_OBJ node, while
5333 // the destination address is the address contained in 'baseVarNum' plus the offset
5334 // provided in the 'putArgNode'.
5336 void CodeGen::genConsumePutStructArgStk(
5337 GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
5339 assert(varTypeIsStruct(putArgNode));
5340 assert(baseVarNum != BAD_VAR_NUM);
5342 // The putArgNode children are always contained. We should not consume any registers.
5343 assert(putArgNode->gtGetOp1()->isContained());
5345 GenTree* dstAddr = putArgNode;
5347 // Get the source address.
5348 GenTree* src = putArgNode->gtGetOp1();
5349 assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
5350 GenTree* srcAddr = src->gtGetOp1();
5352 size_t size = putArgNode->getArgSize();
5354 assert(dstReg != REG_NA);
5355 assert(srcReg != REG_NA);
5357 // Consume the registers only if they are not contained or set to REG_NA.
5358 if (srcAddr->gtRegNum != REG_NA)
5360 genConsumeReg(srcAddr);
5363 // If the op1 is already in the dstReg - nothing to do.
5364 // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
5365 if (dstAddr->gtRegNum != dstReg)
5367 // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
5368 // for tail calls) in RDI.
5369 // Destination is always local (on the stack) - use EA_PTRSIZE.
5370 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
5373 if (srcAddr->gtRegNum != srcReg)
5375 if (srcAddr->OperIsLocalAddr())
5377 // The OperLocalAddr is always contained.
5378 assert(srcAddr->isContained());
5379 GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
5381 // Generate LEA instruction to load the LclVar address in RSI.
5382 // Source is known to be on the stack. Use EA_PTRSIZE.
5383 unsigned int offset = 0;
5384 if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
5386 offset = srcAddr->AsLclFld()->gtLclOffs;
5388 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
5392 assert(srcAddr->gtRegNum != REG_NA);
5393 // Source is not known to be on the stack. Use EA_BYREF.
5394 getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
5398 if (sizeReg != REG_NA)
5400 inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
5403 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
5405 //------------------------------------------------------------------------
5406 // genConsumeBlockSize: Ensure that the block size is in the given register
5409 // blkNode - The block node
5410 // sizeReg - The register into which the block's size should go
5413 void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
5415 if (sizeReg != REG_NA)
5417 unsigned blockSize = blkNode->Size();
5420 assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
5421 genSetRegToIcon(sizeReg, blockSize);
5425 noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
5426 genConsumeReg(blkNode->AsDynBlk()->gtDynamicSize);
5431 //------------------------------------------------------------------------
5432 // genConsumeBlockDst: Ensure that the block destination address is in its
5433 // allocated register.
5435 // blkNode - The block node
5438 void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
5440 GenTree* dstAddr = blkNode->Addr();
5441 genConsumeReg(dstAddr);
5444 //------------------------------------------------------------------------
5445 // genConsumeBlockSrc: Ensure that the block source address is in its
5446 // allocated register if it is non-local.
5448 // blkNode - The block node
5451 // Returns the source address node, if it is non-local,
5452 // and nullptr otherwise.
5454 GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
5456 GenTree* src = blkNode->Data();
5457 if (blkNode->OperIsCopyBlkOp())
5459 // For a CopyBlk we need the address of the source.
5460 if (src->OperGet() == GT_IND)
5462 src = src->gtOp.gtOp1;
5466 // This must be a local.
5467 // For this case, there is no source address register, as it is a
5468 // stack-based address.
5469 assert(src->OperIsLocal());
5477 //------------------------------------------------------------------------
5478 // genConsumeBlockOp: Ensure that the block's operands are enregistered
5481 // blkNode - The block node
5484 // This ensures that the operands are consumed in the proper order to
5485 // obey liveness modeling.
5487 void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
5489 // We have to consume the registers, and perform any copies, in the actual execution order.
5490 // The nominal order is: dst, src, size. However this may have been changed
5491 // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
5493 // Note that the register allocator ensures that the registers ON THE NODES will not interfere
5494 // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
5495 // Further, it ensures that they will not interfere with one another if they are then copied
5496 // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
5497 // then, that we first consume all the operands, then do any necessary moves.
5499 GenTree* dstAddr = blkNode->Addr();
5500 GenTree* src = nullptr;
5501 unsigned blockSize = blkNode->Size();
5502 GenTree* size = nullptr;
5503 bool evalSizeFirst = true;
5505 if (blkNode->OperGet() == GT_STORE_DYN_BLK)
5507 evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
5508 size = blkNode->AsDynBlk()->gtDynamicSize;
5511 // First, consusme all the sources in order
5514 genConsumeBlockSize(blkNode, sizeReg);
5516 if (blkNode->IsReverseOp())
5518 src = genConsumeBlockSrc(blkNode);
5519 genConsumeBlockDst(blkNode);
5523 genConsumeBlockDst(blkNode);
5524 src = genConsumeBlockSrc(blkNode);
5528 genConsumeBlockSize(blkNode, sizeReg);
5530 // Next, perform any necessary moves.
5531 if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
5533 inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
5535 if (blkNode->IsReverseOp())
5537 if ((src != nullptr) && (src->gtRegNum != srcReg))
5539 inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
5541 if (dstAddr->gtRegNum != dstReg)
5543 inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
5548 if (dstAddr->gtRegNum != dstReg)
5550 inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
5552 if ((src != nullptr) && (src->gtRegNum != srcReg))
5554 inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
5557 if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
5559 inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
5563 //-------------------------------------------------------------------------
5564 // genProduceReg: do liveness update for register produced by the current
5568 // tree - Gentree node
5572 void CodeGen::genProduceReg(GenTree* tree)
5574 if (tree->gtFlags & GTF_SPILL)
5576 // Code for GT_COPY node gets generated as part of consuming regs by its parent.
5577 // A GT_COPY node in turn produces reg result and it should never be marked to
5580 // Similarly GT_RELOAD node gets generated as part of consuming regs by its
5581 // parent and should never be marked for spilling.
5582 noway_assert(!tree->IsCopyOrReload());
5584 if (genIsRegCandidateLocal(tree))
5586 // Store local variable to its home location.
5587 tree->gtFlags &= ~GTF_REG_VAL;
5588 // Ensure that lclVar stores are typed correctly.
5589 unsigned varNum = tree->gtLclVarCommon.gtLclNum;
5590 assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
5591 (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
5592 inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
5596 // In case of multi-reg call node, spill flag on call node
5597 // indicates that one or more of its allocated regs need to
5598 // be spilled. Call node needs to be further queried to
5599 // know which of its result regs needs to be spilled.
5600 if (tree->IsMultiRegCall())
5602 GenTreeCall* call = tree->AsCall();
5603 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
5604 unsigned regCount = retTypeDesc->GetReturnRegCount();
5606 for (unsigned i = 0; i < regCount; ++i)
5608 unsigned flags = call->GetRegSpillFlagByIdx(i);
5609 if ((flags & GTF_SPILL) != 0)
5611 regNumber reg = call->GetRegNumByIdx(i);
5613 regSet.rsSpillTree(reg, call, i);
5614 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
5621 regSet.rsSpillTree(tree->gtRegNum, tree);
5622 gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
5625 tree->gtFlags |= GTF_SPILLED;
5626 tree->gtFlags &= ~GTF_SPILL;
5632 genUpdateLife(tree);
5634 // If we've produced a register, mark it as a pointer, as needed.
5635 if (tree->gtHasReg())
5637 // We only mark the register in the following cases:
5638 // 1. It is not a register candidate local. In this case, we're producing a
5639 // register from a local, but the local is not a register candidate. Thus,
5640 // we must be loading it as a temp register, and any "last use" flag on
5641 // the register wouldn't be relevant.
5642 // 2. The register candidate local is going dead. There's no point to mark
5643 // the register as live, with a GC pointer, if the variable is dead.
5644 if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
5646 // Multi-reg call node will produce more than one register result.
5647 // Mark all the regs produced by call node.
5648 if (tree->IsMultiRegCall())
5650 GenTreeCall* call = tree->AsCall();
5651 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
5652 unsigned regCount = retTypeDesc->GetReturnRegCount();
5654 for (unsigned i = 0; i < regCount; ++i)
5656 regNumber reg = call->GetRegNumByIdx(i);
5657 var_types type = retTypeDesc->GetReturnRegType(i);
5658 gcInfo.gcMarkRegPtrVal(reg, type);
5661 else if (tree->IsCopyOrReloadOfMultiRegCall())
5663 // we should never see reload of multi-reg call here
5664 // because GT_RELOAD gets generated in reg consuming path.
5665 noway_assert(tree->OperGet() == GT_COPY);
5667 // A multi-reg GT_COPY node produces those regs to which
5668 // copy has taken place.
5669 GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
5670 GenTreeCall* call = copy->gtGetOp1()->AsCall();
5671 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
5672 unsigned regCount = retTypeDesc->GetReturnRegCount();
5674 for (unsigned i = 0; i < regCount; ++i)
5676 var_types type = retTypeDesc->GetReturnRegType(i);
5677 regNumber fromReg = call->GetRegNumByIdx(i);
5678 regNumber toReg = copy->GetRegNumByIdx(i);
5680 if (toReg != REG_NA)
5682 gcInfo.gcMarkRegPtrVal(toReg, type);
5688 gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
5695 // transfer gc/byref status of src reg to dst reg
5696 void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
5698 regMaskTP srcMask = genRegMask(src);
5699 regMaskTP dstMask = genRegMask(dst);
5701 if (gcInfo.gcRegGCrefSetCur & srcMask)
5703 gcInfo.gcMarkRegSetGCref(dstMask);
5705 else if (gcInfo.gcRegByrefSetCur & srcMask)
5707 gcInfo.gcMarkRegSetByref(dstMask);
5711 gcInfo.gcMarkRegSetNpt(dstMask);
5715 // generates an ip-relative call or indirect call via reg ('call reg')
5716 // pass in 'addr' for a relative call or 'base' for a indirect register call
5717 // methHnd - optional, only used for pretty printing
5718 // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
5719 void CodeGen::genEmitCall(int callType,
5720 CORINFO_METHOD_HANDLE methHnd,
5721 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
5722 emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
5723 IL_OFFSETX ilOffset,
5728 #if !defined(_TARGET_X86_)
5729 ssize_t argSize = 0;
5730 #endif // !defined(_TARGET_X86_)
5731 getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
5732 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
5733 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
5734 emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
5737 // generates an indirect call via addressing mode (call []) given an indir node
5738 // methHnd - optional, only used for pretty printing
5739 // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
5740 void CodeGen::genEmitCall(int callType,
5741 CORINFO_METHOD_HANDLE methHnd,
5742 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
5743 emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
5744 IL_OFFSETX ilOffset)
5746 #if !defined(_TARGET_X86_)
5747 ssize_t argSize = 0;
5748 #endif // !defined(_TARGET_X86_)
5749 genConsumeAddress(indir->Addr());
5751 getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
5752 argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
5753 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
5754 indir->Base() ? indir->Base()->gtRegNum : REG_NA,
5755 indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
5758 //------------------------------------------------------------------------
5759 // genStoreInd: Generate code for a GT_STOREIND node.
5762 // treeNode - The GT_STOREIND node for which to generate code.
5767 void CodeGen::genStoreInd(GenTreePtr node)
5769 assert(node->OperGet() == GT_STOREIND);
5772 // Storing Vector3 of size 12 bytes through indirection
5773 if (node->TypeGet() == TYP_SIMD12)
5775 genStoreIndTypeSIMD12(node);
5778 #endif // FEATURE_SIMD
5780 GenTreeStoreInd* storeInd = node->AsStoreInd();
5781 GenTree* data = storeInd->Data();
5782 GenTree* addr = storeInd->Addr();
5783 var_types targetType = storeInd->TypeGet();
5785 assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
5787 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
5788 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
5790 // data and addr must be in registers.
5791 // Consume both registers so that any copies of interfering registers are taken care of.
5792 genConsumeOperands(storeInd->AsOp());
5794 if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
5799 // At this point, we should not have any interference.
5800 // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
5801 noway_assert(data->gtRegNum != REG_ARG_0);
5803 // addr goes in REG_ARG_0
5804 if (addr->gtRegNum != REG_ARG_0)
5806 inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
5809 // data goes in REG_ARG_1
5810 if (data->gtRegNum != REG_ARG_1)
5812 inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
5815 genGCWriteBarrier(storeInd, writeBarrierForm);
5819 bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
5820 bool dataIsUnary = false;
5821 bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
5822 GenTree* rmwSrc = nullptr;
5824 // We must consume the operands in the proper execution order, so that liveness is
5825 // updated appropriately.
5828 genConsumeAddress(addr);
5831 // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
5832 // and non-indir operand of data is the source of RMW memory op.
5835 assert(data->isContained() && !data->OperIsLeaf());
5837 GenTreePtr rmwDst = nullptr;
5839 dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
5842 if (storeInd->IsRMWDstOp1())
5844 rmwDst = data->gtGetOp1();
5845 rmwSrc = data->gtGetOp2();
5849 assert(storeInd->IsRMWDstOp2());
5850 rmwDst = data->gtGetOp2();
5851 rmwSrc = data->gtGetOp1();
5854 genConsumeRegs(rmwSrc);
5858 // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
5859 // For unary RMW ops, src and dst of RMW memory op is the same. Lower
5860 // clears operand counts on rmwSrc and we don't need to perform a
5861 // genConsumeReg() on it.
5862 assert(storeInd->IsRMWDstOp1());
5863 rmwSrc = data->gtGetOp1();
5864 rmwDst = data->gtGetOp1();
5865 assert(rmwSrc->isContained());
5868 assert(rmwSrc != nullptr);
5869 assert(rmwDst != nullptr);
5870 assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
5874 genConsumeRegs(data);
5879 genConsumeAddress(addr);
5886 // generate code for unary RMW memory ops like neg/not
5887 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
5892 if (data->OperIsShiftOrRotate())
5894 // Generate code for shift RMW memory ops.
5895 // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
5896 // <amount> <shift> [addr]).
5897 assert(storeInd->IsRMWDstOp1());
5898 assert(rmwSrc == data->gtGetOp2());
5899 genCodeForShiftRMW(storeInd);
5903 // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
5904 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
5911 getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
5916 //------------------------------------------------------------------------
5917 // genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
5918 // helper functions.
5921 // writeBarrierForm - the write barrier form to use
5922 // addr - the address at which to do the store
5923 // data - the data to store
5926 // true if an optimized write barrier form was used, false if not. If this
5927 // function returns false, the caller must emit a "standard" write barrier.
5929 bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
5931 assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
5933 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
5934 bool useOptimizedWriteBarriers = true;
5937 useOptimizedWriteBarriers =
5938 (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
5941 if (!useOptimizedWriteBarriers)
5946 const static int regToHelper[2][8] = {
5947 // If the target is known to be in managed memory
5949 CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
5950 CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
5953 // Don't know if the target is in managed memory
5955 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
5956 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
5957 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
5961 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
5962 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
5963 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
5964 noway_assert(regToHelper[0][REG_ESP] == -1);
5965 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
5966 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
5967 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
5969 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
5970 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
5971 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
5972 noway_assert(regToHelper[1][REG_ESP] == -1);
5973 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
5974 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
5975 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
5977 regNumber reg = data->gtRegNum;
5978 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
5980 // Generate the following code:
5982 // call write_barrier_helper_reg
5984 // addr goes in REG_ARG_0
5985 if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
5987 inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
5990 unsigned tgtAnywhere = 0;
5991 if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
5996 // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
5997 // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
5998 // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
6000 genEmitHelperCall(regToHelper[tgtAnywhere][reg],
6002 EA_PTRSIZE); // retSize
6005 #else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
6007 #endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
6010 // Produce code for a GT_CALL node
6011 void CodeGen::genCallInstruction(GenTreePtr node)
6013 GenTreeCall* call = node->AsCall();
6014 assert(call->gtOper == GT_CALL);
6016 gtCallTypes callType = (gtCallTypes)call->gtCallType;
6018 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
6020 // all virtuals should have been expanded into a control expression
6021 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
6023 // Consume all the arg regs
6024 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
6026 assert(list->IsList());
6028 GenTreePtr argNode = list->Current();
6030 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
6031 assert(curArgTabEntry);
6033 if (curArgTabEntry->regNum == REG_STK)
6038 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
6039 // Deal with multi register passed struct args.
6040 if (argNode->OperGet() == GT_LIST)
6042 GenTreeArgList* argListPtr = argNode->AsArgList();
6043 unsigned iterationNum = 0;
6044 for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
6046 GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
6047 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
6048 regNumber argReg = REG_NA;
6050 if (iterationNum == 0)
6052 argReg = curArgTabEntry->regNum;
6056 assert(iterationNum == 1);
6057 argReg = curArgTabEntry->otherRegNum;
6060 genConsumeReg(putArgRegNode);
6062 // Validate the putArgRegNode has the right type.
6063 assert(putArgRegNode->TypeGet() ==
6064 compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
6065 .eightByteClassifications[iterationNum],
6066 curArgTabEntry->structDesc
6067 .eightByteSizes[iterationNum]));
6068 if (putArgRegNode->gtRegNum != argReg)
6070 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
6071 putArgRegNode->gtRegNum);
6076 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
6078 regNumber argReg = curArgTabEntry->regNum;
6079 genConsumeReg(argNode);
6080 if (argNode->gtRegNum != argReg)
6082 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
6087 // In the case of a varargs call,
6088 // the ABI dictates that if we have floating point args,
6089 // we must pass the enregistered arguments in both the
6090 // integer and floating point registers so, let's do that.
6091 if (call->IsVarargs() && varTypeIsFloating(argNode))
6093 regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
6094 instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
6095 inst_RV_RV(ins, argNode->gtRegNum, targetReg);
6097 #endif // FEATURE_VARARG
6100 #if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
6101 // The call will pop its arguments.
6102 // for each putarg_stk:
6103 ssize_t stackArgBytes = 0;
6104 GenTreePtr args = call->gtCallArgs;
6107 GenTreePtr arg = args->gtOp.gtOp1;
6108 if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
6110 #if defined(_TARGET_X86_)
6111 assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
6112 if (arg->OperGet() == GT_LONG)
6114 assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
6116 #endif // defined(_TARGET_X86_)
6118 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
6119 if (genActualType(arg->TypeGet()) == TYP_STRUCT)
6121 assert(arg->OperGet() == GT_PUTARG_STK);
6123 GenTreeObj* obj = arg->gtGetOp1()->AsObj();
6124 stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
6127 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
6129 stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
6131 args = args->gtOp.gtOp2;
6133 #endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
6135 // Insert a null check on "this" pointer if asked.
6136 if (call->NeedsNullCheck())
6138 const regNumber regThis = genGetThisArgReg(call);
6139 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
6142 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
6143 CORINFO_METHOD_HANDLE methHnd;
6144 GenTree* target = call->gtControlExpr;
6145 if (callType == CT_INDIRECT)
6147 assert(target == nullptr);
6148 target = call->gtCall.gtCallAddr;
6153 methHnd = call->gtCallMethHnd;
6156 CORINFO_SIG_INFO* sigInfo = nullptr;
6158 // Pass the call signature information down into the emitter so the emitter can associate
6159 // native call sites with the signatures they were generated from.
6160 if (callType != CT_HELPER)
6162 sigInfo = call->callSig;
6166 // If fast tail call, then we are done. In this case we setup the args (both reg args
6167 // and stack args in incoming arg area) and call target in rax. Epilog sequence would
6168 // generate "jmp rax".
6169 if (call->IsFastTailCall())
6171 // Don't support fast tail calling JIT helpers
6172 assert(callType != CT_HELPER);
6174 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
6175 assert(target != nullptr);
6177 genConsumeReg(target);
6178 if (target->gtRegNum != REG_RAX)
6180 inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
6185 // For a pinvoke to unmanged code we emit a label to clear
6186 // the GC pointer state before the callsite.
6187 // We can't utilize the typical lazy killing of GC pointers
6188 // at (or inside) the callsite.
6189 if (call->IsUnmanaged())
6191 genDefineTempLabel(genCreateTempLabel());
6194 // Determine return value size(s).
6195 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
6196 emitAttr retSize = EA_PTRSIZE;
6197 emitAttr secondRetSize = EA_UNKNOWN;
6199 if (call->HasMultiRegRetVal())
6201 retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
6202 secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
6206 assert(!varTypeIsStruct(call));
6208 if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
6212 else if (call->gtType == TYP_BYREF)
6218 bool fPossibleSyncHelperCall = false;
6219 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
6221 #ifdef DEBUGGING_SUPPORT
6222 // We need to propagate the IL offset information to the call instruction, so we can emit
6223 // an IL to native mapping record for the call, to support managed return value debugging.
6224 // We don't want tail call helper calls that were converted from normal calls to get a record,
6225 // so we skip this hash table lookup logic in that case.
6226 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
6228 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
6230 #endif // DEBUGGING_SUPPORT
6232 #if defined(_TARGET_X86_)
6233 // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
6234 // adjust its stack level accordingly.
6235 // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
6236 // pop when we're done.
6237 ssize_t argSizeForEmitter = stackArgBytes;
6238 if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
6240 argSizeForEmitter = -stackArgBytes;
6243 #endif // defined(_TARGET_X86_)
6245 if (call->IsTailCallViaHelper())
6247 if (compiler->getNeedsGSSecurityCookie())
6249 genEmitGSCookieCheck(true);
6253 if (target != nullptr)
6255 if (target->isContainedIndir())
6257 if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
6259 // Note that if gtControlExpr is an indir of an absolute address, we mark it as
6260 // contained only if it can be encoded as PC-relative offset.
6261 assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
6263 genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
6264 INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
6267 ->IconValue() X86_ARG(argSizeForEmitter),
6268 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
6272 genEmitCall(emitter::EC_INDIR_ARD, methHnd,
6273 INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
6274 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
6279 // We have already generated code for gtControlExpr evaluating it into a register.
6280 // We just need to emit "call reg" in this case.
6281 assert(genIsValidIntReg(target->gtRegNum));
6282 genEmitCall(emitter::EC_INDIR_R, methHnd,
6283 INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
6284 X86_ARG(argSizeForEmitter),
6285 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
6288 #ifdef FEATURE_READYTORUN_COMPILER
6289 else if (call->gtEntryPoint.addr != nullptr)
6291 genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
6292 : emitter::EC_FUNC_TOKEN_INDIR,
6293 methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
6294 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
6299 // Generate a direct call to a non-virtual user defined or helper method
6300 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
6302 void* addr = nullptr;
6303 if (callType == CT_HELPER)
6305 // Direct call to a helper method.
6306 helperNum = compiler->eeGetHelperNum(methHnd);
6307 noway_assert(helperNum != CORINFO_HELP_UNDEF);
6309 void* pAddr = nullptr;
6310 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
6312 if (addr == nullptr)
6317 // tracking of region protected by the monitor in synchronized methods
6318 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
6320 fPossibleSyncHelperCall = true;
6325 // Direct call to a non-virtual user function.
6326 addr = call->gtDirectCallAddress;
6329 // Non-virtual direct calls to known addresses
6330 genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
6331 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
6334 // if it was a pinvoke we may have needed to get the address of a label
6335 if (genPendingCallLabel)
6337 assert(call->IsUnmanaged());
6338 genDefineTempLabel(genPendingCallLabel);
6339 genPendingCallLabel = nullptr;
6342 #if defined(_TARGET_X86_)
6343 // The call will pop its arguments.
6344 genStackLevel -= stackArgBytes;
6345 #endif // defined(_TARGET_X86_)
6348 // All Callee arg registers are trashed and no longer contain any GC pointers.
6349 // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
6350 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
6351 // registers from RBM_CALLEE_TRASH.
6352 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
6353 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
6354 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
6355 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
6357 var_types returnType = call->TypeGet();
6358 if (returnType != TYP_VOID)
6361 if (varTypeIsFloating(returnType))
6363 // Spill the value from the fp stack.
6364 // Then, load it into the target register.
6365 call->gtFlags |= GTF_SPILL;
6366 regSet.rsSpillFPStack(call);
6367 call->gtFlags |= GTF_SPILLED;
6368 call->gtFlags &= ~GTF_SPILL;
6371 #endif // _TARGET_X86_
6373 regNumber returnReg;
6375 if (call->HasMultiRegRetVal())
6377 assert(retTypeDesc != nullptr);
6378 unsigned regCount = retTypeDesc->GetReturnRegCount();
6380 // If regs allocated to call node are different from ABI return
6381 // regs in which the call has returned its result, move the result
6382 // to regs allocated to call node.
6383 for (unsigned i = 0; i < regCount; ++i)
6385 var_types regType = retTypeDesc->GetReturnRegType(i);
6386 returnReg = retTypeDesc->GetABIReturnReg(i);
6387 regNumber allocatedReg = call->GetRegNumByIdx(i);
6388 if (returnReg != allocatedReg)
6390 inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
6395 // A Vector3 return value is stored in xmm0 and xmm1.
6396 // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
6397 // the native compiler doesn't guarantee it.
6398 if (returnType == TYP_SIMD12)
6400 returnReg = retTypeDesc->GetABIReturnReg(1);
6401 // Clear the upper 32 bits by two shift instructions.
6402 // retReg = retReg << 96
6403 // retReg = retReg >> 96
6404 getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
6405 getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
6407 #endif // FEATURE_SIMD
6412 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
6414 // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
6415 // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
6416 // correct argument registers.
6417 returnReg = REG_PINVOKE_TCB;
6420 #endif // _TARGET_X86_
6421 if (varTypeIsFloating(returnType))
6423 returnReg = REG_FLOATRET;
6427 returnReg = REG_INTRET;
6430 if (call->gtRegNum != returnReg)
6432 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
6436 genProduceReg(call);
6440 // If there is nothing next, that means the result is thrown away, so this value is not live.
6441 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
6442 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
6444 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
6447 #if defined(_TARGET_X86_)
6448 //-------------------------------------------------------------------------
6449 // Create a label for tracking of region protected by the monitor in synchronized methods.
6450 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
6451 // so the GC state vars have been updated before creating the label.
6453 if (fPossibleSyncHelperCall)
6457 case CORINFO_HELP_MON_ENTER:
6458 case CORINFO_HELP_MON_ENTER_STATIC:
6459 noway_assert(compiler->syncStartEmitCookie == NULL);
6460 compiler->syncStartEmitCookie =
6461 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
6462 noway_assert(compiler->syncStartEmitCookie != NULL);
6464 case CORINFO_HELP_MON_EXIT:
6465 case CORINFO_HELP_MON_EXIT_STATIC:
6466 noway_assert(compiler->syncEndEmitCookie == NULL);
6467 compiler->syncEndEmitCookie =
6468 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
6469 noway_assert(compiler->syncEndEmitCookie != NULL);
6476 // Is the caller supposed to pop the arguments?
6477 if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
6479 genAdjustSP(stackArgBytes);
6481 #endif // _TARGET_X86_
6484 // Produce code for a GT_JMP node.
6485 // The arguments of the caller needs to be transferred to the callee before exiting caller.
6486 // The actual jump to callee is generated as part of caller epilog sequence.
6487 // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
6488 void CodeGen::genJmpMethod(GenTreePtr jmp)
6490 assert(jmp->OperGet() == GT_JMP);
6491 assert(compiler->compJmpOpUsed);
6493 // If no arguments, nothing to do
6494 if (compiler->info.compArgsCount == 0)
6499 // Make sure register arguments are in their initial registers
6500 // and stack arguments are put back as well.
6504 // First move any en-registered stack arguments back to the stack.
6505 // At the same time any reg arg not in correct reg is moved back to its stack location.
6507 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
6508 // But that would require us to deal with circularity while moving values around. Spilling
6509 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
6510 // are not frequent.
6511 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
6513 varDsc = compiler->lvaTable + varNum;
6515 if (varDsc->lvPromoted)
6517 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
6519 unsigned fieldVarNum = varDsc->lvFieldLclStart;
6520 varDsc = compiler->lvaTable + fieldVarNum;
6522 noway_assert(varDsc->lvIsParam);
6524 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
6526 // Skip reg args which are already in its right register for jmp call.
6527 // If not, we will spill such args to their stack locations.
6529 // If we need to generate a tail call profiler hook, then spill all
6530 // arg regs to free them up for the callback.
6531 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
6536 else if (varDsc->lvRegNum == REG_STK)
6538 // Skip args which are currently living in stack.
6542 // If we came here it means either a reg argument not in the right register or
6543 // a stack argument currently living in a register. In either case the following
6544 // assert should hold.
6545 assert(varDsc->lvRegNum != REG_STK);
6547 var_types loadType = varDsc->lvaArgType();
6548 getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
6550 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
6551 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
6552 // Therefore manually update life of varDsc->lvRegNum.
6553 regMaskTP tempMask = varDsc->lvRegMask();
6554 regSet.RemoveMaskVars(tempMask);
6555 gcInfo.gcMarkRegSetNpt(tempMask);
6556 if (compiler->lvaIsGCTracked(varDsc))
6559 if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
6561 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
6565 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
6569 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
6573 #ifdef PROFILING_SUPPORTED
6574 // At this point all arg regs are free.
6575 // Emit tail call profiler callback.
6576 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
6579 // Next move any un-enregistered register arguments back to their register.
6580 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
6581 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
6582 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
6584 varDsc = compiler->lvaTable + varNum;
6585 if (varDsc->lvPromoted)
6587 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
6589 unsigned fieldVarNum = varDsc->lvFieldLclStart;
6590 varDsc = compiler->lvaTable + fieldVarNum;
6592 noway_assert(varDsc->lvIsParam);
6594 // Skip if arg not passed in a register.
6595 if (!varDsc->lvIsRegArg)
6600 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
6601 if (varTypeIsStruct(varDsc))
6603 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
6604 assert(typeHnd != nullptr);
6606 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
6607 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
6608 assert(structDesc.passedInRegisters);
6610 unsigned __int8 offset0 = 0;
6611 unsigned __int8 offset1 = 0;
6612 var_types type0 = TYP_UNKNOWN;
6613 var_types type1 = TYP_UNKNOWN;
6615 // Get the eightbyte data
6616 compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
6618 // Move the values into the right registers.
6621 // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
6622 // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
6623 // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
6624 // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
6625 // genCodeForBBList().
6626 if (type0 != TYP_UNKNOWN)
6628 getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
6629 regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
6630 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
6633 if (type1 != TYP_UNKNOWN)
6635 getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
6636 regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
6637 gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
6640 if (varDsc->lvTracked)
6642 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
6646 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
6648 // Register argument
6649 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
6651 // Is register argument already in the right register?
6652 // If not load it from its stack location.
6653 var_types loadType = varDsc->lvaArgType();
6654 regNumber argReg = varDsc->lvArgReg; // incoming arg register
6656 if (varDsc->lvRegNum != argReg)
6658 assert(genIsValidReg(argReg));
6659 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
6661 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
6662 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
6663 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
6664 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
6665 regSet.AddMaskVars(genRegMask(argReg));
6666 gcInfo.gcMarkRegPtrVal(argReg, loadType);
6667 if (compiler->lvaIsGCTracked(varDsc))
6670 if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
6672 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
6676 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
6680 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
6685 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
6686 // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
6687 // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
6688 // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
6689 // values on the stack.
6690 if (compiler->info.compIsVarArgs)
6692 regNumber intArgReg;
6693 var_types loadType = varDsc->lvaArgType();
6694 regNumber argReg = varDsc->lvArgReg; // incoming arg register
6696 if (varTypeIsFloating(loadType))
6698 intArgReg = compiler->getCallArgIntRegister(argReg);
6699 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6700 inst_RV_RV(ins, argReg, intArgReg, loadType);
6707 fixedIntArgMask |= genRegMask(intArgReg);
6709 if (intArgReg == REG_ARG_0)
6711 assert(firstArgVarNum == BAD_VAR_NUM);
6712 firstArgVarNum = varNum;
6715 #endif // FEATURE_VARARG
6718 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
6719 // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
6720 // load the remaining arg registers (both int and float) from the corresponding
6721 // shadow stack slots. This is for the reason that we don't know the number and type
6722 // of non-fixed params passed by the caller, therefore we have to assume the worst case
6723 // of caller passing float/double args both in int and float arg regs.
6725 // This doesn't apply to x86, which doesn't pass floating point values in floating
6728 // The caller could have passed gc-ref/byref type var args. Since these are var args
6729 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
6730 // remaining arg registers from shadow stack slots as non-gc interruptible.
6731 if (fixedIntArgMask != RBM_NONE)
6733 assert(compiler->info.compIsVarArgs);
6734 assert(firstArgVarNum != BAD_VAR_NUM);
6736 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
6737 if (remainingIntArgMask != RBM_NONE)
6739 instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
6740 getEmitter()->emitDisableGC();
6741 for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
6743 regNumber argReg = intArgRegs[argNum];
6744 regMaskTP argRegMask = genRegMask(argReg);
6746 if ((remainingIntArgMask & argRegMask) != 0)
6748 remainingIntArgMask &= ~argRegMask;
6749 getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
6751 // also load it in corresponding float arg reg
6752 regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
6753 inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
6756 argOffset += REGSIZE_BYTES;
6758 getEmitter()->emitEnableGC();
6761 #endif // FEATURE_VARARG
6764 // produce code for a GT_LEA subnode
6765 void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
6767 emitAttr size = emitTypeSize(lea);
6768 genConsumeOperands(lea);
6770 if (lea->Base() && lea->Index())
6772 regNumber baseReg = lea->Base()->gtRegNum;
6773 regNumber indexReg = lea->Index()->gtRegNum;
6774 getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
6776 else if (lea->Base())
6778 getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
6780 else if (lea->Index())
6782 getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
6789 //-------------------------------------------------------------------------------------------
6790 // genJumpKindsForTree: Determine the number and kinds of conditional branches
6791 // necessary to implement the given GT_CMP node
6794 // cmpTree - (input) The GenTree node that is used to set the Condition codes
6795 // - The GenTree Relop node that was used to set the Condition codes
6796 // jmpKind[2] - (output) One or two conditional branch instructions
6797 // jmpToTrueLabel[2] - (output) When true we branch to the true case
6798 // When false we create a second label and branch to the false case
6799 // Only GT_EQ for a floating point compares can have a false value.
6802 // Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
6805 // At least one conditional branch instruction will be returned.
6806 // Typically only one conditional branch is needed
6807 // and the second jmpKind[] value is set to EJ_NONE
6810 // jmpToTrueLabel[i]= true implies branch when the compare operation is true.
6811 // jmpToTrueLabel[i]= false implies branch when the compare operation is false.
6812 //-------------------------------------------------------------------------------------------
6815 void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
6817 // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
6818 jmpToTrueLabel[0] = true;
6819 jmpToTrueLabel[1] = true;
6821 // For integer comparisons just use genJumpKindForOper
6822 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
6824 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
6825 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
6826 jmpKind[1] = EJ_NONE;
6830 assert(cmpTree->OperIsCompare());
6832 // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
6833 // while generating code for compare opererators (e.g. GT_EQ etc).
6834 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
6836 // Must branch if we have an NaN, unordered
6837 switch (cmpTree->gtOper)
6842 jmpKind[1] = EJ_NONE;
6847 jmpKind[0] = EJ_jbe;
6848 jmpKind[1] = EJ_NONE;
6852 jmpKind[0] = EJ_jpe;
6853 jmpKind[1] = EJ_jne;
6858 jmpKind[1] = EJ_NONE;
6865 else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
6867 // Do not branch if we have an NaN, unordered
6868 switch (cmpTree->gtOper)
6873 jmpKind[1] = EJ_NONE;
6878 jmpKind[0] = EJ_jae;
6879 jmpKind[1] = EJ_NONE;
6883 jmpKind[0] = EJ_jne;
6884 jmpKind[1] = EJ_NONE;
6888 jmpKind[0] = EJ_jpe;
6890 jmpToTrueLabel[0] = false;
6900 #if !defined(_TARGET_64BIT_)
6901 //------------------------------------------------------------------------
6902 // genJumpKindsForTreeLongHi: Generate the jump types for compare
6903 // operators of the high parts of a compare with long type operands
6904 // on x86 for the case where rel-op result needs to be materialized into a
6908 // cmpTree - The GT_CMP node
6909 // jmpKind - Return array of jump kinds
6910 // jmpToTrueLabel - Return array of if the jump is going to true label
6915 void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
6917 assert(cmpTree->OperIsCompare());
6918 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
6920 switch (cmpTree->gtOper)
6924 if (compareKind == CK_SIGNED)
6938 if (compareKind == CK_SIGNED)
6951 // GT_EQ will not jump to the true label if the hi parts are equal
6952 jmpKind[0] = EJ_NONE;
6953 jmpKind[1] = EJ_jne;
6957 // GT_NE will always jump to the true label if the high parts are not equal
6958 jmpKind[0] = EJ_jne;
6959 jmpKind[1] = EJ_NONE;
6967 //------------------------------------------------------------------------
6968 // genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
6969 // is manifested in a register.
6972 // treeNode - the compare tree
6977 // For long compares, we need to compare the high parts of operands first, then the low parts.
6978 // If the high compare is false, we do not need to compare the low parts. For less than and
6979 // greater than, if the high compare is true, we can assume the entire compare is true. For
6980 // compares that are realized in a register, we will generate:
6982 // Opcode x86 equivalent Comment
6983 // ------ -------------- -------
6984 // GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
6985 // jne label is false.
6989 // GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
6990 // jne label is true.
6994 // GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
6995 // jne label correctly and we do not need to check lo. Otherwise,
6996 // cmp loOp1,loOp2 we need to compare the lo halves
6999 // GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7000 // jne label correctly and we do not need to check lo. Otherwise,
7001 // cmp loOp1,loOp2 we need to compare the lo halves
7004 // GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7005 // jne label correctly and we do not need to check lo. Otherwise,
7006 // cmp loOp1,loOp2 we need to compare the lo halves
7009 // GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7010 // jne label correctly and we do not need to check lo. Otherwise,
7011 // cmp loOp1,loOp2 we need to compare the lo halves
7014 // For signed long comparisons, we need additional labels, as we need to use signed conditions on the
7015 // "set" instruction:
7017 // GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7018 // jne labelHi correctly and we do not need to check lo. Otherwise,
7019 // cmp loOp1,loOp2 we need to compare the lo halves
7020 // setb Unsigned set for lo compare
7022 // labelHi: setl Signed set for high compare
7025 // GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7026 // jne labelHi correctly and we do not need to check lo. Otherwise,
7027 // cmp loOp1,loOp2 we need to compare the lo halves
7028 // setbe Unsigend set for lo compare
7030 // labelHi: setle Signed set for hi compare
7033 // GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7034 // jne labelHi correctly and we do not need to check lo. Otherwise,
7035 // cmp loOp1,loOp2 we need to compare the lo halves
7036 // seta Unsigned set for lo compare
7038 // labelHi: setg Signed set for high compare
7041 // GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
7042 // jne labelHi correctly and we do not need to check lo. Otherwise,
7043 // cmp loOp1,loOp2 we need to compare the lo halves
7044 // setae Unsigned set for lo compare
7046 // labelHi: setge Signed set for hi compare
7049 // TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
7050 void CodeGen::genCompareLong(GenTreePtr treeNode)
7052 assert(treeNode->OperIsCompare());
7054 GenTreeOp* tree = treeNode->AsOp();
7055 GenTreePtr op1 = tree->gtOp1;
7056 GenTreePtr op2 = tree->gtOp2;
7058 assert(varTypeIsLong(op1->TypeGet()));
7059 assert(varTypeIsLong(op2->TypeGet()));
7061 regNumber targetReg = treeNode->gtRegNum;
7063 genConsumeOperands(tree);
7065 GenTreePtr loOp1 = op1->gtGetOp1();
7066 GenTreePtr hiOp1 = op1->gtGetOp2();
7067 GenTreePtr loOp2 = op2->gtGetOp1();
7068 GenTreePtr hiOp2 = op2->gtGetOp2();
7070 // Create compare for the high parts
7071 instruction ins = INS_cmp;
7072 var_types cmpType = TYP_INT;
7073 emitAttr cmpAttr = emitTypeSize(cmpType);
7075 // Emit the compare instruction
7076 getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
7078 // If the result is not being materialized in a register, we're done.
7079 if (targetReg == REG_NA)
7084 // Generate the first jump for the high compare
7085 CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
7087 BasicBlock* labelHi = genCreateTempLabel();
7088 BasicBlock* labelFinal = genCreateTempLabel();
7090 if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
7092 // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
7093 // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
7094 // need to worry about the sign of the comparison, so we can use the simplified case.
7096 // We only have to check for equality for the hi comparison. If they are not equal, then the set will
7097 // do the right thing. If they are equal, we have to check the lo halves.
7098 inst_JMP(EJ_jne, labelHi);
7100 // Emit the comparison. Perform the set for the lo. Jump to labelFinal
7101 getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
7103 // The low set must be unsigned
7104 emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
7106 inst_SET(jumpKindLo, targetReg);
7107 // Set the higher bytes to 0
7108 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
7109 genProduceReg(tree);
7111 inst_JMP(EJ_jmp, labelFinal);
7113 // Define the label for hi jump target here. If we have jumped here, we want to set
7114 // the target register based on the jump kind of the actual compare type.
7116 genDefineTempLabel(labelHi);
7117 inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
7119 // Set the higher bytes to 0
7120 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
7121 genProduceReg(tree);
7123 genDefineTempLabel(labelFinal);
7127 // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
7128 // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
7129 // we can just jump to the set that the lo compare will use.
7131 // We only have to check for equality for the hi comparison. If they are not equal, then the set will
7132 // do the right thing. If they are equal, we have to check the lo halves.
7133 inst_JMP(EJ_jne, labelFinal);
7135 // Emit the comparison
7136 getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
7138 // Define the label for hi jump target here. If we have jumped here, we want to set
7139 // the target register based on the jump kind of the lower half (the actual compare
7140 // type). If we have fallen through, then we are doing a normal int compare for the
7143 genDefineTempLabel(labelFinal);
7145 // The low set must be unsigned
7146 emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
7148 inst_SET(jumpKindLo, targetReg);
7149 // Set the higher bytes to 0
7150 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
7151 genProduceReg(tree);
7154 #endif //! defined(_TARGET_64BIT_)
7156 //------------------------------------------------------------------------
7157 // genCompareFloat: Generate code for comparing two floating point values
7160 // treeNode - the compare tree
7165 // SSE2 instruction ucomis[s|d] is performs unordered comparison and
7166 // updates rFLAGS register as follows.
7167 // Result of compare ZF PF CF
7168 // ----------------- ------------
7169 // Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
7174 // From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
7175 // unordered comparison of floating point values. That is *.UN comparisons result in true when
7176 // one of the operands is a NaN whereas ordered comparisons results in false.
7178 // Opcode Amd64 equivalent Comment
7179 // ------ ----------------- --------
7180 // BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
7183 // BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
7186 // BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
7189 // BGT(a, b) ucomis[s|d] a, b branch if a>b
7192 // BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
7195 // BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
7198 // BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
7201 // BGE(a,b) ucomis[s|d] a, b branch if a>=b
7204 // BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
7205 // je This case is given for completeness, in case if JIT generates such
7206 // a gentree internally.
7208 // BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
7213 // BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
7214 // jne given for completeness, in case if JIT generates such a gentree
7217 // BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
7221 // As we can see from the above equalities that the operands of a compare operator need to be
7222 // reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
7223 void CodeGen::genCompareFloat(GenTreePtr treeNode)
7225 assert(treeNode->OperIsCompare());
7227 GenTreeOp* tree = treeNode->AsOp();
7228 GenTreePtr op1 = tree->gtOp1;
7229 GenTreePtr op2 = tree->gtOp2;
7230 var_types op1Type = op1->TypeGet();
7231 var_types op2Type = op2->TypeGet();
7233 genConsumeOperands(tree);
7235 assert(varTypeIsFloating(op1Type));
7236 assert(op1Type == op2Type);
7238 regNumber targetReg = treeNode->gtRegNum;
7243 if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
7245 // Unordered comparison case
7246 reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
7250 reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
7255 GenTreePtr tmp = op1;
7260 ins = ins_FloatCompare(op1Type);
7261 cmpAttr = emitTypeSize(op1Type);
7263 getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
7265 // Are we evaluating this into a register?
7266 if (targetReg != REG_NA)
7268 genSetRegToCond(targetReg, tree);
7269 genProduceReg(tree);
7273 //------------------------------------------------------------------------
7274 // genCompareInt: Generate code for comparing ints or, on amd64, longs.
7277 // treeNode - the compare tree
7281 void CodeGen::genCompareInt(GenTreePtr treeNode)
7283 assert(treeNode->OperIsCompare());
7285 GenTreeOp* tree = treeNode->AsOp();
7286 GenTreePtr op1 = tree->gtOp1;
7287 GenTreePtr op2 = tree->gtOp2;
7288 var_types op1Type = op1->TypeGet();
7289 var_types op2Type = op2->TypeGet();
7291 genConsumeOperands(tree);
7296 regNumber targetReg = treeNode->gtRegNum;
7297 assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
7298 assert(!varTypeIsFloating(op2Type));
7301 assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
7302 #endif // _TARGET_X86_
7304 // By default we use an int32 sized cmp instruction
7307 var_types cmpType = TYP_INT;
7309 // In the if/then/else statement below we may change the
7310 // 'cmpType' and/or 'ins' to generate a smaller instruction
7312 // Are we comparing two values that are the same size?
7314 if (genTypeSize(op1Type) == genTypeSize(op2Type))
7316 if (op1Type == op2Type)
7318 // If both types are exactly the same we can use that type
7321 else if (genTypeSize(op1Type) == 8)
7323 // If we have two different int64 types we need to use a long compare
7327 cmpAttr = emitTypeSize(cmpType);
7329 else // Here we know that (op1Type != op2Type)
7331 // Do we have a short compare against a constant in op2?
7333 // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small
7334 // compare immediate we labeled this compare with a GTF_RELOP_SMALL
7335 // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
7337 if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
7339 assert(varTypeIsSmall(op1Type));
7342 #ifdef _TARGET_AMD64_
7343 else // compare two different sized operands
7345 // For this case we don't want any memory operands, only registers or immediates
7347 assert(!op1->isContainedMemoryOp());
7348 assert(!op2->isContainedMemoryOp());
7350 // Check for the case where one operand is an int64 type
7351 // Lower should have placed 32-bit operand in a register
7352 // for signed comparisons we will sign extend the 32-bit value in place.
7354 bool op1Is64Bit = (genTypeSize(op1Type) == 8);
7355 bool op2Is64Bit = (genTypeSize(op2Type) == 8);
7359 if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
7361 assert(op2->gtRegNum != REG_NA);
7362 inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
7365 else if (op2Is64Bit)
7368 if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
7370 assert(op1->gtRegNum != REG_NA);
7374 #endif // _TARGET_AMD64_
7376 cmpAttr = emitTypeSize(cmpType);
7379 // See if we can generate a "test" instruction instead of a "cmp".
7380 // For this to generate the correct conditional branch we must have
7381 // a compare against zero.
7383 if (op2->IsIntegralConst(0))
7385 if (op1->isContained())
7387 // op1 can be a contained memory op
7388 // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
7390 if ((op1->OperGet() == GT_AND))
7392 noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
7394 ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
7395 op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
7396 op1 = op1->gtOp.gtOp1; // overwrite op1
7398 if (op1->isContainedMemoryOp())
7400 // use the size andOp1 if it is a contained memoryop.
7401 cmpAttr = emitTypeSize(op1->TypeGet());
7403 // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
7406 else // op1 is not contained thus it must be in a register
7409 op2 = op1; // we will generate "test reg1,reg1"
7410 // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
7414 getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
7416 // Are we evaluating this into a register?
7417 if (targetReg != REG_NA)
7419 genSetRegToCond(targetReg, tree);
7420 genProduceReg(tree);
7424 //-------------------------------------------------------------------------------------------
7425 // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
7426 // corresponding to a binary Relational operator result.
7429 // dstReg - The target register to set to 1 or 0
7430 // tree - The GenTree Relop node that was used to set the Condition codes
7432 // Return Value: none
7435 // A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
7436 //-------------------------------------------------------------------------------------------
7438 void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
7440 noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
7442 emitJumpKind jumpKind[2];
7443 bool branchToTrueLabel[2];
7444 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
7446 if (jumpKind[1] == EJ_NONE)
7448 // Set (lower byte of) reg according to the flags
7449 inst_SET(jumpKind[0], dstReg);
7454 // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
7455 // These are represented by two conditions.
7456 if (tree->gtOper == GT_EQ)
7458 // This must be an ordered comparison.
7459 assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
7463 // This must be BNE.UN
7464 assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
7468 // Here is the sample code generated in each case:
7469 // BEQ == cmp, jpe <false label>, je <true label>
7470 // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
7471 // setnp reg // if (PF==0) reg = 1 else reg = 0
7472 // jpe L1 // Jmp if PF==1
7476 // BNE.UN == cmp, jpe <true label>, jne <true label>
7477 // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
7483 // reverse the jmpkind condition before setting dstReg if it is to false label.
7484 inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
7486 BasicBlock* label = genCreateTempLabel();
7487 inst_JMP(jumpKind[0], label);
7489 // second branch is always to true label
7490 assert(branchToTrueLabel[1]);
7491 inst_SET(jumpKind[1], dstReg);
7492 genDefineTempLabel(label);
7495 var_types treeType = tree->TypeGet();
7496 if (treeType == TYP_INT || treeType == TYP_LONG)
7498 // Set the higher bytes to 0
7499 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
7503 noway_assert(treeType == TYP_BYTE);
7507 #if !defined(_TARGET_64BIT_)
7508 //------------------------------------------------------------------------
7509 // genIntToIntCast: Generate code for long to int casts on x86.
7512 // cast - The GT_CAST node
7518 // The cast node and its sources (via GT_LONG) must have been assigned registers.
7519 // The destination cannot be a floating point type or a small integer type.
7521 void CodeGen::genLongToIntCast(GenTree* cast)
7523 assert(cast->OperGet() == GT_CAST);
7525 GenTree* src = cast->gtGetOp1();
7526 noway_assert(src->OperGet() == GT_LONG);
7528 genConsumeRegs(src);
7530 var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG;
7531 var_types dstType = cast->CastToType();
7532 regNumber loSrcReg = src->gtGetOp1()->gtRegNum;
7533 regNumber hiSrcReg = src->gtGetOp2()->gtRegNum;
7534 regNumber dstReg = cast->gtRegNum;
7536 assert((dstType == TYP_INT) || (dstType == TYP_UINT));
7537 assert(genIsValidIntReg(loSrcReg));
7538 assert(genIsValidIntReg(hiSrcReg));
7539 assert(genIsValidIntReg(dstReg));
7541 if (cast->gtOverflow())
7544 // Generate an overflow check for [u]long to [u]int casts:
7546 // long -> int - check if the upper 33 bits are all 0 or all 1
7548 // ulong -> int - check if the upper 33 bits are all 0
7550 // long -> uint - check if the upper 32 bits are all 0
7551 // ulong -> uint - check if the upper 32 bits are all 0
7554 if ((srcType == TYP_LONG) && (dstType == TYP_INT))
7556 BasicBlock* allOne = genCreateTempLabel();
7557 BasicBlock* success = genCreateTempLabel();
7559 inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
7560 inst_JMP(EJ_js, allOne);
7562 inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
7563 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
7564 inst_JMP(EJ_jmp, success);
7566 genDefineTempLabel(allOne);
7567 inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE);
7568 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
7570 genDefineTempLabel(success);
7574 if ((srcType == TYP_ULONG) && (dstType == TYP_INT))
7576 inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
7577 genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW);
7580 inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
7581 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
7585 if (dstReg != loSrcReg)
7587 inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE);
7590 genProduceReg(cast);
7594 //------------------------------------------------------------------------
7595 // genIntToIntCast: Generate code for an integer cast
7596 // This method handles integer overflow checking casts
7597 // as well as ordinary integer casts.
7600 // treeNode - The GT_CAST node
7606 // The treeNode is not a contained node and must have an assigned register.
7607 // For a signed convert from byte, the source must be in a byte-addressable register.
7608 // Neither the source nor target type can be a floating point type.
7610 // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
7611 // TODO: refactor to use getCastDescription
7613 void CodeGen::genIntToIntCast(GenTreePtr treeNode)
7615 assert(treeNode->OperGet() == GT_CAST);
7617 GenTreePtr castOp = treeNode->gtCast.CastOp();
7618 var_types srcType = genActualType(castOp->TypeGet());
7620 #if !defined(_TARGET_64BIT_)
7621 if (varTypeIsLong(srcType))
7623 genLongToIntCast(treeNode);
7626 #endif // !defined(_TARGET_64BIT_)
7628 regNumber targetReg = treeNode->gtRegNum;
7629 regNumber sourceReg = castOp->gtRegNum;
7630 var_types dstType = treeNode->CastToType();
7631 bool isUnsignedDst = varTypeIsUnsigned(dstType);
7632 bool isUnsignedSrc = varTypeIsUnsigned(srcType);
7634 // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
7635 if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
7637 srcType = genUnsignedType(srcType);
7638 isUnsignedSrc = true;
7641 bool requiresOverflowCheck = false;
7642 bool needAndAfter = false;
7644 assert(genIsValidIntReg(targetReg));
7645 assert(genIsValidIntReg(sourceReg));
7647 instruction ins = INS_invalid;
7648 emitAttr size = EA_UNKNOWN;
7650 if (genTypeSize(srcType) < genTypeSize(dstType))
7654 // Is this an Overflow checking cast?
7655 // We only need to handle one case, as the other casts can never overflow.
7656 // cast from TYP_INT to TYP_ULONG
7658 if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
7660 requiresOverflowCheck = true;
7661 size = EA_ATTR(genTypeSize(srcType));
7666 // we need the source size
7667 size = EA_ATTR(genTypeSize(srcType));
7668 noway_assert(size < EA_PTRSIZE);
7670 ins = ins_Move_Extend(srcType, castOp->InReg());
7673 Special case: ins_Move_Extend assumes the destination type is no bigger
7674 than TYP_INT. movsx and movzx can already extend all the way to
7675 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
7676 but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
7678 if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
7681 NYI_X86("Cast to 64 bit for x86/RyuJIT");
7682 #else // !_TARGET_X86_
7684 #endif // !_TARGET_X86_
7688 Special case: for a cast of byte to char we first
7689 have to expand the byte (w/ sign extension), then
7690 mask off the high bits.
7691 Use 'movsx' followed by 'and'
7693 if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
7695 noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
7696 needAndAfter = true;
7702 // Narrowing cast, or sign-changing cast
7703 noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
7705 // Is this an Overflow checking cast?
7706 if (treeNode->gtOverflow())
7708 requiresOverflowCheck = true;
7709 size = EA_ATTR(genTypeSize(srcType));
7714 size = EA_ATTR(genTypeSize(dstType));
7715 ins = ins_Move_Extend(dstType, castOp->InReg());
7719 noway_assert(ins != INS_invalid);
7721 genConsumeReg(castOp);
7723 if (requiresOverflowCheck)
7725 ssize_t typeMin = 0;
7726 ssize_t typeMax = 0;
7727 ssize_t typeMask = 0;
7728 bool needScratchReg = false;
7729 bool signCheckOnly = false;
7731 /* Do we need to compare the value, or just check masks */
7736 typeMask = ssize_t((int)0xFFFFFF80);
7737 typeMin = SCHAR_MIN;
7738 typeMax = SCHAR_MAX;
7742 typeMask = ssize_t((int)0xFFFFFF00L);
7746 typeMask = ssize_t((int)0xFFFF8000);
7752 typeMask = ssize_t((int)0xFFFF0000L);
7756 if (srcType == TYP_UINT)
7758 signCheckOnly = true;
7762 typeMask = 0xFFFFFFFF80000000LL;
7769 if (srcType == TYP_INT)
7771 signCheckOnly = true;
7775 needScratchReg = true;
7780 noway_assert(srcType == TYP_ULONG);
7781 signCheckOnly = true;
7785 noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
7786 signCheckOnly = true;
7790 NO_WAY("Unknown type");
7796 // We only need to check for a negative value in sourceReg
7797 inst_RV_IV(INS_cmp, sourceReg, 0, size);
7798 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
7802 regNumber tmpReg = REG_NA;
7806 // We need an additional temp register
7807 // Make sure we have exactly one allocated.
7808 assert(treeNode->gtRsvdRegs != RBM_NONE);
7809 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
7810 tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
7813 // When we are converting from unsigned or to unsigned, we
7814 // will only have to check for any bits set using 'typeMask'
7815 if (isUnsignedSrc || isUnsignedDst)
7819 inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
7820 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
7821 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
7825 noway_assert(typeMask != 0);
7826 inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
7827 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
7832 // For a narrowing signed cast
7834 // We must check the value is in a signed range.
7836 // Compare with the MAX
7838 noway_assert((typeMin != 0) && (typeMax != 0));
7840 inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
7841 genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
7843 // Compare with the MIN
7845 inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
7846 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
7850 if (targetReg != sourceReg
7851 #ifdef _TARGET_AMD64_
7852 // On amd64, we can hit this path for a same-register
7853 // 4-byte to 8-byte widening conversion, and need to
7854 // emit the instruction to set the high bits correctly.
7855 || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
7856 #endif // _TARGET_AMD64_
7858 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7860 else // non-overflow checking cast
7862 noway_assert(size < EA_PTRSIZE || srcType == dstType);
7864 // We may have code transformations that result in casts where srcType is the same as dstType.
7865 // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
7866 // long constant to a long lclVar.
7867 if (srcType == dstType)
7871 /* Is the value sitting in a non-byte-addressable register? */
7872 else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
7876 // for unsigned values we can AND, so it need not be a byte register
7881 // Move the value into a byte register
7882 noway_assert(!"Signed byte convert from non-byte-addressable register");
7885 /* Generate "mov targetReg, castOp->gtReg */
7886 if (targetReg != sourceReg)
7888 inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
7894 noway_assert((needAndAfter == false) && isUnsignedDst);
7896 /* Generate "and reg, MASK */
7897 unsigned fillPattern;
7898 if (size == EA_1BYTE)
7902 else if (size == EA_2BYTE)
7904 fillPattern = 0xffff;
7908 fillPattern = 0xffffffff;
7911 inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
7913 #ifdef _TARGET_AMD64_
7914 else if (ins == INS_movsxd)
7916 noway_assert(!needAndAfter);
7917 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7919 #endif // _TARGET_AMD64_
7920 else if (ins == INS_mov)
7922 noway_assert(!needAndAfter);
7923 if (targetReg != sourceReg
7924 #ifdef _TARGET_AMD64_
7925 // On amd64, 'mov' is the opcode used to zero-extend from
7926 // 4 bytes to 8 bytes.
7927 || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
7928 #endif // _TARGET_AMD64_
7931 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7936 noway_assert(ins == INS_movsx || ins == INS_movzx);
7938 /* Generate "mov targetReg, castOp->gtReg */
7939 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7941 /* Mask off high bits for cast from byte to char */
7944 noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
7945 inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
7950 genProduceReg(treeNode);
7953 //------------------------------------------------------------------------
7954 // genFloatToFloatCast: Generate code for a cast between float and double
7957 // treeNode - The GT_CAST node
7963 // Cast is a non-overflow conversion.
7964 // The treeNode must have an assigned register.
7965 // The cast is between float and double or vice versa.
7967 void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
7969 // float <--> double conversions are always non-overflow ones
7970 assert(treeNode->OperGet() == GT_CAST);
7971 assert(!treeNode->gtOverflow());
7973 regNumber targetReg = treeNode->gtRegNum;
7974 assert(genIsValidFloatReg(targetReg));
7976 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7978 // If not contained, must be a valid float reg.
7979 if (!op1->isContained())
7981 assert(genIsValidFloatReg(op1->gtRegNum));
7985 var_types dstType = treeNode->CastToType();
7986 var_types srcType = op1->TypeGet();
7987 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
7989 genConsumeOperands(treeNode->AsOp());
7990 if (srcType == dstType && targetReg == op1->gtRegNum)
7992 // source and destinations types are the same and also reside in the same register.
7993 // we just need to consume and produce the reg in this case.
7998 instruction ins = ins_FloatConv(dstType, srcType);
7999 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
8002 genProduceReg(treeNode);
8005 //------------------------------------------------------------------------
8006 // genIntToFloatCast: Generate code to cast an int/long to float/double
8009 // treeNode - The GT_CAST node
8015 // Cast is a non-overflow conversion.
8016 // The treeNode must have an assigned register.
8017 // SrcType= int32/uint32/int64/uint64 and DstType=float/double.
8019 void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
8021 // int type --> float/double conversions are always non-overflow ones
8022 assert(treeNode->OperGet() == GT_CAST);
8023 assert(!treeNode->gtOverflow());
8025 regNumber targetReg = treeNode->gtRegNum;
8026 assert(genIsValidFloatReg(targetReg));
8028 GenTreePtr op1 = treeNode->gtOp.gtOp1;
8030 if (!op1->isContained())
8032 assert(genIsValidIntReg(op1->gtRegNum));
8036 var_types dstType = treeNode->CastToType();
8037 var_types srcType = op1->TypeGet();
8038 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
8040 #if !defined(_TARGET_64BIT_)
8041 // We expect morph to replace long to float/double casts with helper calls
8042 noway_assert(!varTypeIsLong(srcType));
8043 #endif // !defined(_TARGET_64BIT_)
8045 // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
8046 // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
8047 // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
8048 // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
8049 // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
8050 // temp and using temp as operand of cast operation.
8051 if (srcType == TYP_BYREF)
8053 noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
8054 srcType = TYP_I_IMPL;
8057 // force the srcType to unsigned if GT_UNSIGNED flag is set
8058 if (treeNode->gtFlags & GTF_UNSIGNED)
8060 srcType = genUnsignedType(srcType);
8063 noway_assert(!varTypeIsGC(srcType));
8065 // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
8066 // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
8067 // either the front-end or lowering phase to have generated two levels of cast.
8068 // The first one is for widening smaller int type to int32 and the second one is
8069 // to the float/double.
8070 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
8071 noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
8073 // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
8074 // here since they should have been lowered apropriately.
8075 noway_assert(srcType != TYP_UINT);
8076 noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
8078 // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
8079 // which does a partial write to lower 4/8 bytes of xmm register keeping the other
8080 // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
8081 // the partial write could introduce a false dependency and could cause a stall
8082 // if there are further uses of xmmReg. We have such a case occuring with a
8083 // customer reported version of SpectralNorm benchmark, resulting in 2x perf
8084 // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
8085 // cvtsi2ss/sd instruction.
8087 genConsumeOperands(treeNode->AsOp());
8088 getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
8090 // Note that here we need to specify srcType that will determine
8091 // the size of source reg/mem operand and rex.w prefix.
8092 instruction ins = ins_FloatConv(dstType, TYP_INT);
8093 getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
8095 // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
8096 // will interpret ULONG value as LONG. Hence we need to adjust the
8097 // result if sign-bit of srcType is set.
8098 if (srcType == TYP_ULONG)
8100 // The instruction sequence below is less accurate than what clang
8101 // and gcc generate. However, we keep the current sequence for backward compatiblity.
8102 // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
8103 // should be also updated for consistent conversion result.
8104 assert(dstType == TYP_DOUBLE);
8105 assert(!op1->isContained());
8107 // Set the flags without modifying op1.
8108 // test op1Reg, op1Reg
8109 inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
8111 // No need to adjust result if op1 >= 0 i.e. positive
8113 BasicBlock* label = genCreateTempLabel();
8114 inst_JMP(EJ_jge, label);
8116 // Adjust the result
8117 // result = result + 0x43f00000 00000000
8118 // addsd resultReg, 0x43f00000 00000000
8119 GenTreePtr* cns = &u8ToDblBitmask;
8120 if (*cns == nullptr)
8123 static_assert_no_msg(sizeof(double) == sizeof(__int64));
8124 *((__int64*)&d) = 0x43f0000000000000LL;
8126 *cns = genMakeConst(&d, dstType, treeNode, true);
8128 inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
8130 genDefineTempLabel(label);
8133 genProduceReg(treeNode);
8136 //------------------------------------------------------------------------
8137 // genFloatToIntCast: Generate code to cast float/double to int/long
8140 // treeNode - The GT_CAST node
8146 // Cast is a non-overflow conversion.
8147 // The treeNode must have an assigned register.
8148 // SrcType=float/double and DstType= int32/uint32/int64/uint64
8150 // TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
8152 void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
8154 // we don't expect to see overflow detecting float/double --> int type conversions here
8155 // as they should have been converted into helper calls by front-end.
8156 assert(treeNode->OperGet() == GT_CAST);
8157 assert(!treeNode->gtOverflow());
8159 regNumber targetReg = treeNode->gtRegNum;
8160 assert(genIsValidIntReg(targetReg));
8162 GenTreePtr op1 = treeNode->gtOp.gtOp1;
8164 if (!op1->isContained())
8166 assert(genIsValidFloatReg(op1->gtRegNum));
8170 var_types dstType = treeNode->CastToType();
8171 var_types srcType = op1->TypeGet();
8172 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
8174 // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
8175 // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
8176 // front-end or lowering phase to have generated two levels of cast. The first one is
8177 // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
8178 // the required smaller int type.
8179 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
8180 noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
8182 // We shouldn't be seeing uint64 here as it should have been converted
8183 // into a helper call by either front-end or lowering phase.
8184 noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
8186 // If the dstType is TYP_UINT, we have 32-bits to encode the
8187 // float number. Any of 33rd or above bits can be the sign bit.
8188 // To acheive it we pretend as if we are converting it to a long.
8189 if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
8194 // Note that we need to specify dstType here so that it will determine
8195 // the size of destination integer register and also the rex.w prefix.
8196 genConsumeOperands(treeNode->AsOp());
8197 instruction ins = ins_FloatConv(TYP_INT, srcType);
8198 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
8199 genProduceReg(treeNode);
8202 //------------------------------------------------------------------------
8203 // genCkfinite: Generate code for ckfinite opcode.
8206 // treeNode - The GT_CKFINITE node
8212 // GT_CKFINITE node has reserved an internal register.
8214 // TODO-XArch-CQ - mark the operand as contained if known to be in
8215 // memory (e.g. field or an array element).
8217 void CodeGen::genCkfinite(GenTreePtr treeNode)
8219 assert(treeNode->OperGet() == GT_CKFINITE);
8221 GenTreePtr op1 = treeNode->gtOp.gtOp1;
8222 var_types targetType = treeNode->TypeGet();
8223 int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
8224 regNumber targetReg = treeNode->gtRegNum;
8226 // Extract exponent into a register.
8227 assert(treeNode->gtRsvdRegs != RBM_NONE);
8228 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
8229 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
8233 #ifdef _TARGET_64BIT_
8235 // Copy the floating-point value to an integer register. If we copied a float to a long, then
8236 // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
8237 // bits of the integer register.
8238 instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
8239 inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
8240 if (targetType == TYP_DOUBLE)
8242 // right shift by 32 bits to get to exponent.
8243 inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
8246 // Mask exponent with all 1's and check if the exponent is all 1's
8247 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
8248 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
8250 // If exponent is all 1's, throw ArithmeticException
8251 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
8253 // if it is a finite value copy it to targetReg
8254 if (targetReg != op1->gtRegNum)
8256 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
8259 #else // !_TARGET_64BIT_
8261 // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
8262 // There is no easy way to do this. To not require an extra register, we'll use shuffles
8263 // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
8264 // need to produce the value into the target register.
8266 // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
8267 // movaps targetReg, op1->gtRegNum
8268 // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
8269 // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
8270 // and tmpReg, <mask>
8271 // cmp tmpReg, <mask>
8273 // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
8275 // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
8276 // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
8277 // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
8278 // and tmpReg, <mask>
8279 // cmp tmpReg, <mask>
8281 // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
8283 // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
8284 // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
8285 // and tmpReg, <mask>
8286 // cmp tmpReg, <mask>
8288 // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
8290 regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
8292 if (targetType == TYP_DOUBLE)
8294 if (targetReg != op1->gtRegNum)
8296 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
8298 inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
8299 copyToTmpSrcReg = targetReg;
8303 copyToTmpSrcReg = op1->gtRegNum;
8306 // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
8307 // value, no matter the floating-point type.
8308 inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
8310 // Mask exponent with all 1's and check if the exponent is all 1's
8311 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
8312 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
8314 // If exponent is all 1's, throw ArithmeticException
8315 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
8317 if (targetReg != op1->gtRegNum)
8319 // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
8320 // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
8321 // than re-shuffling the targetReg.
8322 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
8324 else if (targetType == TYP_DOUBLE)
8326 // We need to re-shuffle the targetReg to get the correct result.
8327 inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
8330 #endif // !_TARGET_64BIT_
8332 genProduceReg(treeNode);
8335 #ifdef _TARGET_AMD64_
8336 int CodeGenInterface::genSPtoFPdelta()
8340 #ifdef PLATFORM_UNIX
8342 // We require frame chaining on Unix to support native tool unwinding (such as
8343 // unwinding by the native debugger). We have a CLR-only extension to the
8344 // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
8345 // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
8346 delta = genTotalFrameSize();
8348 #else // !PLATFORM_UNIX
8350 // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
8351 // RBP needs to be reported in unwind codes. This case would arise for methods
8353 if (compiler->compLocallocUsed)
8355 // We cannot base delta computation on compLclFrameSize since it changes from
8356 // tentative to final frame layout and hence there is a possibility of
8357 // under-estimating offset of vars from FP, which in turn results in under-
8358 // estimating instruction size.
8360 // To be predictive and so as never to under-estimate offset of vars from FP
8361 // we will always position FP at min(240, outgoing arg area size).
8362 delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
8364 else if (compiler->opts.compDbgEnC)
8366 // vm assumption on EnC methods is that rsp and rbp are equal
8371 delta = genTotalFrameSize();
8374 #endif // !PLATFORM_UNIX
8379 //---------------------------------------------------------------------
8380 // genTotalFrameSize - return the total size of the stack frame, including local size,
8381 // callee-saved register size, etc. For AMD64, this does not include the caller-pushed
8388 int CodeGenInterface::genTotalFrameSize()
8390 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
8392 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
8394 assert(totalFrameSize >= 0);
8395 return totalFrameSize;
8398 //---------------------------------------------------------------------
8399 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
8400 // This number is going to be negative, since the Caller-SP is at a higher
8401 // address than the frame pointer.
8403 // There must be a frame pointer to call this function!
8405 // We can't compute this directly from the Caller-SP, since the frame pointer
8406 // is based on a maximum delta from Initial-SP, so first we find SP, then
8407 // compute the FP offset.
8409 int CodeGenInterface::genCallerSPtoFPdelta()
8411 assert(isFramePointerUsed());
8412 int callerSPtoFPdelta;
8414 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
8416 assert(callerSPtoFPdelta <= 0);
8417 return callerSPtoFPdelta;
8420 //---------------------------------------------------------------------
8421 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
8423 // This number will be negative.
8425 int CodeGenInterface::genCallerSPtoInitialSPdelta()
8427 int callerSPtoSPdelta = 0;
8429 callerSPtoSPdelta -= genTotalFrameSize();
8430 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
8432 // compCalleeRegsPushed does not account for the frame pointer
8433 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
8434 if (isFramePointerUsed())
8436 callerSPtoSPdelta -= REGSIZE_BYTES;
8439 assert(callerSPtoSPdelta <= 0);
8440 return callerSPtoSPdelta;
8442 #endif // _TARGET_AMD64_
8444 //-----------------------------------------------------------------------------------------
8445 // genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
8448 // treeNode - tree node
8454 // i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
8455 // ii) tree type is floating point type.
8456 // iii) caller of this routine needs to call genProduceReg()
8457 void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
8459 regNumber targetReg = treeNode->gtRegNum;
8460 var_types targetType = treeNode->TypeGet();
8461 assert(varTypeIsFloating(targetType));
8465 GenTreePtr* bitMask = nullptr;
8466 instruction ins = INS_invalid;
8467 void* cnsAddr = nullptr;
8468 bool dblAlign = false;
8470 switch (treeNode->OperGet())
8473 // Neg(x) = flip the sign bit.
8474 // Neg(f) = f ^ 0x80000000
8475 // Neg(d) = d ^ 0x8000000000000000
8476 ins = genGetInsForOper(GT_XOR, targetType);
8477 if (targetType == TYP_FLOAT)
8479 bitMask = &negBitmaskFlt;
8481 static_assert_no_msg(sizeof(float) == sizeof(int));
8482 *((int*)&f) = 0x80000000;
8487 bitMask = &negBitmaskDbl;
8489 static_assert_no_msg(sizeof(double) == sizeof(__int64));
8490 *((__int64*)&d) = 0x8000000000000000LL;
8497 assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
8499 // Abs(x) = set sign-bit to zero
8500 // Abs(f) = f & 0x7fffffff
8501 // Abs(d) = d & 0x7fffffffffffffff
8502 ins = genGetInsForOper(GT_AND, targetType);
8503 if (targetType == TYP_FLOAT)
8505 bitMask = &absBitmaskFlt;
8507 static_assert_no_msg(sizeof(float) == sizeof(int));
8508 *((int*)&f) = 0x7fffffff;
8513 bitMask = &absBitmaskDbl;
8515 static_assert_no_msg(sizeof(double) == sizeof(__int64));
8516 *((__int64*)&d) = 0x7fffffffffffffffLL;
8523 assert(!"genSSE2: unsupported oper");
8528 if (*bitMask == nullptr)
8530 assert(cnsAddr != nullptr);
8531 *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
8534 // We need an additional register for bitmask.
8535 // Make sure we have one allocated.
8536 assert(treeNode->gtRsvdRegs != RBM_NONE);
8537 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
8538 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
8540 // Move operand into targetReg only if the reg reserved for
8541 // internal purpose is not the same as targetReg.
8542 GenTreePtr op1 = treeNode->gtOp.gtOp1;
8543 assert(!op1->isContained());
8544 regNumber operandReg = genConsumeReg(op1);
8545 if (tmpReg != targetReg)
8547 if (operandReg != targetReg)
8549 inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
8552 operandReg = tmpReg;
8555 inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
8556 assert(ins != INS_invalid);
8557 inst_RV_RV(ins, targetReg, operandReg, targetType);
8560 //---------------------------------------------------------------------
8561 // genIntrinsic - generate code for a given intrinsic
8564 // treeNode - the GT_INTRINSIC node
8569 void CodeGen::genIntrinsic(GenTreePtr treeNode)
8571 // Right now only Sqrt/Abs are treated as math intrinsics.
8572 switch (treeNode->gtIntrinsic.gtIntrinsicId)
8574 case CORINFO_INTRINSIC_Sqrt:
8575 noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
8576 genConsumeOperands(treeNode->AsOp());
8577 getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
8578 treeNode->gtOp.gtOp1);
8581 case CORINFO_INTRINSIC_Abs:
8582 genSSE2BitwiseOp(treeNode);
8586 assert(!"genIntrinsic: Unsupported intrinsic");
8590 genProduceReg(treeNode);
8593 //-------------------------------------------------------------------------- //
8594 // getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
8597 // treeNode - the GT_PUTARG_STK node
8600 // The number of the base variable.
8603 // If tail call the outgoing args are placed in the caller's incoming arg stack space.
8604 // Otherwise, they go in the outgoing arg area on the current frame.
8606 // On Windows the caller always creates slots (homing space) in its frame for the
8607 // first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
8608 // For System V systems there is no such calling convention requirement, and the code needs to find
8609 // the first stack passed argument from the caller. This is done by iterating over
8610 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
8612 unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
8614 assert(treeNode->OperGet() == GT_PUTARG_STK);
8616 unsigned baseVarNum;
8618 #if FEATURE_FASTTAILCALL
8619 bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
8621 const bool putInIncomingArgArea = false;
8624 // Whether to setup stk arg in incoming or out-going arg area?
8625 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
8626 // All other calls - stk arg is setup in out-going arg area.
8627 if (putInIncomingArgArea)
8629 // See the note in the function header re: finding the first stack passed argument.
8630 baseVarNum = getFirstArgWithStackSlot();
8631 assert(baseVarNum != BAD_VAR_NUM);
8634 // This must be a fast tail call.
8635 assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
8637 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
8638 // because fast tail call requires that in-coming arg area of caller is >= out-going
8639 // arg area required for tail call.
8640 LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
8641 assert(varDsc != nullptr);
8643 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
8644 assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
8645 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
8646 // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
8647 assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
8648 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
8653 #if FEATURE_FIXED_OUT_ARGS
8654 baseVarNum = compiler->lvaOutgoingArgSpaceVar;
8655 #else // !FEATURE_FIXED_OUT_ARGS
8656 NYI_X86("Stack args for x86/RyuJIT");
8657 baseVarNum = BAD_VAR_NUM;
8658 #endif // !FEATURE_FIXED_OUT_ARGS
8664 //--------------------------------------------------------------------- //
8665 // genPutStructArgStk - generate code for passing an arg on the stack.
8668 // treeNode - the GT_PUTARG_STK node
8669 // targetType - the type of the treeNode
8674 void CodeGen::genPutArgStk(GenTreePtr treeNode)
8676 var_types targetType = treeNode->TypeGet();
8678 noway_assert(targetType != TYP_STRUCT);
8680 // The following logic is applicable for x86 arch.
8681 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
8683 GenTreePtr data = treeNode->gtOp.gtOp1;
8685 // On a 32-bit target, all of the long arguments have been decomposed into
8686 // a separate putarg_stk for each of the upper and lower halves.
8687 noway_assert(targetType != TYP_LONG);
8689 int argSize = genTypeSize(genActualType(targetType));
8690 genStackLevel += argSize;
8692 // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
8693 if (data->isContainedIntOrIImmed())
8695 if (data->IsIconHandle())
8697 inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
8701 inst_IV(INS_push, data->gtIntCon.gtIconVal);
8704 else if (data->isContained())
8706 NYI_X86("Contained putarg_stk of non-constant");
8710 genConsumeReg(data);
8711 if (varTypeIsIntegralOrI(targetType))
8713 inst_RV(INS_push, data->gtRegNum, targetType);
8718 inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
8719 getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
8722 #else // !_TARGET_X86_
8724 unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
8726 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
8728 if (varTypeIsStruct(targetType))
8730 genPutStructArgStk(treeNode, baseVarNum);
8733 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
8735 noway_assert(targetType != TYP_STRUCT);
8736 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
8738 // Get argument offset on stack.
8739 // Here we cross check that argument offset hasn't changed from lowering to codegen since
8740 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
8741 int argOffset = treeNode->AsPutArgStk()->getArgOffset();
8744 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
8745 assert(curArgTabEntry);
8746 assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
8749 GenTreePtr data = treeNode->gtGetOp1();
8751 if (data->isContained())
8753 getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
8754 (int)data->AsIntConCommon()->IconValue());
8758 genConsumeReg(data);
8759 getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
8763 #endif // !_TARGET_X86_
8766 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8768 //---------------------------------------------------------------------
8769 // genPutStructArgStk - generate code for copying a struct arg on the stack by value.
8770 // In case there are references to heap object in the struct,
8771 // it generates the gcinfo as well.
8774 // treeNode - the GT_PUTARG_STK node
8775 // baseVarNum - the variable number relative to which to put the argument on the stack.
8776 // For tail calls this is the baseVarNum = 0.
8777 // For non tail calls this is the outgoingArgSpace.
8782 void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
8784 assert(treeNode->OperGet() == GT_PUTARG_STK);
8785 assert(baseVarNum != BAD_VAR_NUM);
8787 var_types targetType = treeNode->TypeGet();
8789 if (varTypeIsSIMD(targetType))
8791 regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
8792 assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
8793 getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
8794 treeNode->AsPutArgStk()->getArgOffset());
8798 assert(targetType == TYP_STRUCT);
8800 GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
8801 if (putArgStk->gtNumberReferenceSlots == 0)
8803 switch (putArgStk->gtPutArgStkKind)
8805 case GenTreePutArgStk::PutArgStkKindRepInstr:
8806 genStructPutArgRepMovs(putArgStk, baseVarNum);
8808 case GenTreePutArgStk::PutArgStkKindUnroll:
8809 genStructPutArgUnroll(putArgStk, baseVarNum);
8817 // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
8819 // Consume these registers.
8820 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
8821 genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
8822 GenTreePtr dstAddr = putArgStk;
8823 GenTreePtr src = putArgStk->gtOp.gtOp1;
8824 assert(src->OperGet() == GT_OBJ);
8825 GenTreePtr srcAddr = src->gtGetOp1();
8827 unsigned slots = putArgStk->gtNumSlots;
8829 // We are always on the stack we don't need to use the write barrier.
8830 BYTE* gcPtrs = putArgStk->gtGcPtrs;
8831 unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
8834 unsigned copiedSlots = 0;
8840 // Let's see if we can use rep movs{d,q} instead of a sequence of movs{d,q} instructions
8841 // to save cycles and code size.
8843 unsigned nonGcSlotCount = 0;
8849 } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
8851 // If we have a very small contiguous non-gc region, it's better just to
8852 // emit a sequence of movs{d,q} instructions
8853 if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
8855 copiedSlots += nonGcSlotCount;
8856 while (nonGcSlotCount > 0)
8858 instGen(INS_movs_ptr);
8864 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
8865 copiedSlots += nonGcSlotCount;
8866 instGen(INS_r_movs_ptr);
8871 case TYPE_GC_REF: // Is an object ref
8872 case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
8874 // We have a GC (byref or ref) pointer
8875 // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movs{d,q} instruction,
8876 // but the logic for emitting a GC info record is not available (it is internal for the emitter
8877 // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
8878 // instGen(INS_movs{d,q}); and emission of gc info.
8881 if (gcPtrs[i] == TYPE_GC_REF)
8887 assert(gcPtrs[i] == TYPE_GC_BYREF);
8888 memType = TYP_BYREF;
8891 getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
8892 getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
8893 ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
8895 // Source for the copy operation.
8896 // If a LocalAddr, use EA_PTRSIZE - copy from stack.
8897 // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
8898 getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
8899 TARGET_POINTER_SIZE);
8901 // Always copying to the stack - outgoing arg area
8902 // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
8903 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
8916 assert(gcPtrCount == 0);
8919 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8921 /*****************************************************************************
8923 * Create and record GC Info for the function.
8925 #ifdef _TARGET_AMD64_
8927 #else // !_TARGET_AMD64_
8929 #endif // !_TARGET_AMD64_
8930 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
8932 #ifdef JIT32_GCENCODER
8933 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
8934 #else // !JIT32_GCENCODER
8935 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
8936 #endif // !JIT32_GCENCODER
8939 #ifdef JIT32_GCENCODER
8940 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
8941 unsigned prologSize,
8942 unsigned epilogSize DEBUGARG(void* codePtr))
8951 compiler->compInfoBlkSize =
8952 gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
8954 size_t argTabOffset = 0;
8955 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
8959 if (genInterruptible)
8961 gcHeaderISize += compiler->compInfoBlkSize;
8962 gcPtrMapISize += ptrMapSize;
8966 gcHeaderNSize += compiler->compInfoBlkSize;
8967 gcPtrMapNSize += ptrMapSize;
8970 #endif // DISPLAY_SIZES
8972 compiler->compInfoBlkSize += ptrMapSize;
8974 /* Allocate the info block for the method */
8976 compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
8978 #if 0 // VERBOSE_SIZES
8979 // TODO-X86-Cleanup: 'dataSize', below, is not defined
8981 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
8983 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
8984 compiler->info.compILCodeSize,
8985 compiler->compInfoBlkSize,
8986 codeSize + dataSize,
8987 codeSize + dataSize - prologSize - epilogSize,
8988 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
8989 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
8990 compiler->info.compClassName,
8991 compiler->info.compMethodName);
8996 /* Fill in the info block and return it to the caller */
8998 void* infoPtr = compiler->compInfoBlkAddr;
9000 /* Create the method info block: header followed by GC tracking tables */
9002 compiler->compInfoBlkAddr +=
9003 gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
9005 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
9006 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
9007 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
9013 BYTE* temp = (BYTE*)infoPtr;
9014 unsigned size = compiler->compInfoBlkAddr - temp;
9015 BYTE* ptab = temp + headerSize;
9017 noway_assert(size == headerSize + ptrMapSize);
9019 printf("Method info block - header [%u bytes]:", headerSize);
9021 for (unsigned i = 0; i < size; i++)
9025 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
9026 printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
9031 printf("\n %04X: ", i);
9034 printf("%02X ", *temp++);
9044 if (compiler->opts.dspGCtbls)
9046 const BYTE* base = (BYTE*)infoPtr;
9048 unsigned methodSize;
9051 printf("GC Info for method %s\n", compiler->info.compFullName);
9052 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
9054 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
9055 // printf("size of header encoding is %3u\n", size);
9058 if (compiler->opts.dspGCtbls)
9061 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
9062 // printf("size of pointer table is %3u\n", size);
9064 noway_assert(compiler->compInfoBlkAddr == (base + size));
9069 if (jitOpts.testMask & 128)
9071 for (unsigned offs = 0; offs < codeSize; offs++)
9073 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
9077 #endif // DUMP_GC_TABLES
9079 /* Make sure we ended up generating the expected number of bytes */
9081 noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
9086 #else // !JIT32_GCENCODER
9087 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
9089 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
9090 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
9091 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
9092 assert(gcInfoEncoder);
9094 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
9095 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
9097 // First we figure out the encoder ID's for the stack slots and registers.
9098 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
9099 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
9100 gcInfoEncoder->FinalizeSlotIds();
9101 // Now we can actually use those slot ID's to declare live ranges.
9102 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
9104 #if defined(DEBUGGING_SUPPORT)
9105 if (compiler->opts.compDbgEnC)
9107 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
9111 // -saved 'this' pointer and bool for synchronized methods
9113 // 4 slots for RBP + return address + RSI + RDI
9114 int preservedAreaSize = 4 * REGSIZE_BYTES;
9116 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
9118 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
9120 preservedAreaSize += REGSIZE_BYTES;
9123 // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
9124 preservedAreaSize += 4;
9127 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
9129 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
9133 gcInfoEncoder->Build();
9135 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
9136 // let's save the values anyway for debugging purposes
9137 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
9138 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
9140 #endif // !JIT32_GCENCODER
9142 /*****************************************************************************
9143 * Emit a call to a helper function.
9147 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
9149 void* addr = nullptr;
9150 void* pAddr = nullptr;
9152 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
9153 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
9154 regNumber callTarget = REG_NA;
9155 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
9159 assert(pAddr != nullptr);
9161 // Absolute indirect call addr
9162 // Note: Order of checks is important. First always check for pc-relative and next
9163 // zero-relative. Because the former encoding is 1-byte smaller than the latter.
9164 if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
9165 genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
9167 // generate call whose target is specified by 32-bit offset relative to PC or zero.
9168 callType = emitter::EC_FUNC_TOKEN_INDIR;
9173 #ifdef _TARGET_AMD64_
9174 // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
9175 // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
9180 if (callTargetReg == REG_NA)
9182 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
9183 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
9184 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
9185 regMaskTP callTargetMask = genRegMask(callTargetReg);
9186 noway_assert((callTargetMask & killMask) == callTargetMask);
9190 // The call target must not overwrite any live variable, though it may not be in the
9191 // kill set for the call.
9192 regMaskTP callTargetMask = genRegMask(callTargetReg);
9193 noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
9197 callTarget = callTargetReg;
9198 CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
9199 callType = emitter::EC_INDIR_ARD;
9203 getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
9204 retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
9205 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
9206 BAD_IL_OFFSET, // IL offset
9208 REG_NA, 0, 0, // xreg, xmul, disp
9210 emitter::emitNoGChelper(helper));
9212 regTracker.rsTrashRegSet(killMask);
9213 regTracker.rsTrashRegsForGCInterruptability();
9216 #if !defined(_TARGET_64BIT_)
9217 //-----------------------------------------------------------------------------
9219 // Code Generation for Long integers
9221 //-----------------------------------------------------------------------------
9223 //------------------------------------------------------------------------
9224 // genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
9227 // treeNode - A TYP_LONG lclVar node.
9233 // 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
9234 // Its operand must be a GT_LONG node.
9236 void CodeGen::genStoreLongLclVar(GenTree* treeNode)
9238 emitter* emit = getEmitter();
9240 GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
9241 unsigned lclNum = lclNode->gtLclNum;
9242 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
9243 assert(varDsc->TypeGet() == TYP_LONG);
9244 assert(!varDsc->lvPromoted);
9245 GenTreePtr op1 = treeNode->gtOp.gtOp1;
9246 noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
9247 genConsumeRegs(op1);
9249 if (op1->OperGet() == GT_LONG)
9251 // Definitions of register candidates will have been lowered to 2 int lclVars.
9252 assert(!treeNode->InReg());
9254 GenTreePtr loVal = op1->gtGetOp1();
9255 GenTreePtr hiVal = op1->gtGetOp2();
9256 // NYI: Contained immediates.
9257 NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
9258 emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
9259 emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
9261 else if (op1->OperGet() == GT_MUL_LONG)
9263 assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
9266 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
9267 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum, genTypeSize(TYP_INT));
9270 #endif // !defined(_TARGET_64BIT_)
9272 /*****************************************************************************
9273 * Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
9274 * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
9275 * disassembler thinks the instructions as the same as we do.
9278 // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
9279 // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
9280 //#define ALL_XARCH_EMITTER_UNIT_TESTS
9282 #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
9283 void CodeGen::genAmd64EmitterUnitTests()
9290 if (!compiler->opts.altJit)
9292 // No point doing this in a "real" JIT.
9296 // Mark the "fake" instructions in the output.
9297 printf("*************** In genAmd64EmitterUnitTests()\n");
9300 // genDefineTempLabel(genCreateTempLabel());
9301 // to create artificial labels to help separate groups of tests.
9306 CLANG_FORMAT_COMMENT_ANCHOR;
9308 #ifdef ALL_XARCH_EMITTER_UNIT_TESTS
9309 #ifdef FEATURE_AVX_SUPPORT
9310 genDefineTempLabel(genCreateTempLabel());
9312 // vhaddpd ymm0,ymm1,ymm2
9313 getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9314 // vaddss xmm0,xmm1,xmm2
9315 getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9316 // vaddsd xmm0,xmm1,xmm2
9317 getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9318 // vaddps xmm0,xmm1,xmm2
9319 getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9320 // vaddps ymm0,ymm1,ymm2
9321 getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9322 // vaddpd xmm0,xmm1,xmm2
9323 getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9324 // vaddpd ymm0,ymm1,ymm2
9325 getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9326 // vsubss xmm0,xmm1,xmm2
9327 getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9328 // vsubsd xmm0,xmm1,xmm2
9329 getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9330 // vsubps ymm0,ymm1,ymm2
9331 getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9332 // vsubps ymm0,ymm1,ymm2
9333 getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9334 // vsubpd xmm0,xmm1,xmm2
9335 getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9336 // vsubpd ymm0,ymm1,ymm2
9337 getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9338 // vmulss xmm0,xmm1,xmm2
9339 getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9340 // vmulsd xmm0,xmm1,xmm2
9341 getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9342 // vmulps xmm0,xmm1,xmm2
9343 getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9344 // vmulpd xmm0,xmm1,xmm2
9345 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9346 // vmulps ymm0,ymm1,ymm2
9347 getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9348 // vmulpd ymm0,ymm1,ymm2
9349 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9350 // vandps xmm0,xmm1,xmm2
9351 getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9352 // vandpd xmm0,xmm1,xmm2
9353 getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9354 // vandps ymm0,ymm1,ymm2
9355 getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9356 // vandpd ymm0,ymm1,ymm2
9357 getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9358 // vorps xmm0,xmm1,xmm2
9359 getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9360 // vorpd xmm0,xmm1,xmm2
9361 getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9362 // vorps ymm0,ymm1,ymm2
9363 getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9364 // vorpd ymm0,ymm1,ymm2
9365 getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9366 // vdivss xmm0,xmm1,xmm2
9367 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9368 // vdivsd xmm0,xmm1,xmm2
9369 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9370 // vdivss xmm0,xmm1,xmm2
9371 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9372 // vdivsd xmm0,xmm1,xmm2
9373 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9375 // vdivss xmm0,xmm1,xmm2
9376 getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9377 // vdivsd xmm0,xmm1,xmm2
9378 getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
9379 #endif // FEATURE_AVX_SUPPORT
9380 #endif // ALL_XARCH_EMITTER_UNIT_TESTS
9381 printf("*************** End of genAmd64EmitterUnitTests()\n");
9384 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
9386 /*****************************************************************************/
9387 #ifdef DEBUGGING_SUPPORT
9388 /*****************************************************************************
9391 * Called for every scope info piece to record by the main genSetScopeInfo()
9394 void CodeGen::genSetScopeInfo(unsigned which,
9395 UNATIVE_OFFSET startOffs,
9396 UNATIVE_OFFSET length,
9400 Compiler::siVarLoc& varLoc)
9402 /* We need to do some mapping while reporting back these variables */
9404 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
9405 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
9407 VarName name = nullptr;
9411 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
9413 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
9415 name = compiler->info.compVarScopes[scopeNum].vsdName;
9419 // Hang on to this compiler->info.
9421 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
9423 tlvi.tlviVarNum = ilVarNum;
9424 tlvi.tlviLVnum = LVnum;
9425 tlvi.tlviName = name;
9426 tlvi.tlviStartPC = startOffs;
9427 tlvi.tlviLength = length;
9428 tlvi.tlviAvailable = avail;
9429 tlvi.tlviVarLoc = varLoc;
9433 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
9435 #endif // DEBUGGING_SUPPORT
9437 #endif // _TARGET_AMD64_
9439 #endif // !LEGACY_BACKEND